Patchwork [v4,2/3] mm: Add support for kmem caches in DMA32 zone

login
register
mail settings
Submitter Nicolas Boichat
Date Dec. 5, 2018, 5:48 a.m.
Message ID <20181205054828.183476-3-drinkcat@chromium.org>
Download mbox | patch
Permalink /patch/672549/
State New
Headers show

Comments

Nicolas Boichat - Dec. 5, 2018, 5:48 a.m.
In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
data structures smaller than a page with GFP_DMA32 flag.

This change makes it possible to create a custom cache in DMA32 zone
using kmem_cache_create, then allocate memory using kmem_cache_alloc.

We do not create a DMA32 kmalloc cache array, as there are currently
no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
ensures that such calls still fail (as they do before this change).

Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")
Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
---

Changes since v2:
 - Clarified commit message
 - Add entry in sysfs-kernel-slab to document the new sysfs file

(v3 used the page_frag approach)

Documentation/ABI/testing/sysfs-kernel-slab |  9 +++++++++
 include/linux/slab.h                        |  2 ++
 mm/internal.h                               |  8 ++++++--
 mm/slab.c                                   |  4 +++-
 mm/slab.h                                   |  3 ++-
 mm/slab_common.c                            |  2 +-
 mm/slub.c                                   | 18 +++++++++++++++++-
 7 files changed, 40 insertions(+), 6 deletions(-)
Wei Yang - Dec. 5, 2018, 7:25 a.m.
On Wed, Dec 05, 2018 at 01:48:27PM +0800, Nicolas Boichat wrote:
>In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
>data structures smaller than a page with GFP_DMA32 flag.
>
>This change makes it possible to create a custom cache in DMA32 zone
>using kmem_cache_create, then allocate memory using kmem_cache_alloc.
>
>We do not create a DMA32 kmalloc cache array, as there are currently
>no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
>ensures that such calls still fail (as they do before this change).
>
>Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")
>Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
>---
>
>Changes since v2:
> - Clarified commit message
> - Add entry in sysfs-kernel-slab to document the new sysfs file
>
>(v3 used the page_frag approach)
>
>Documentation/ABI/testing/sysfs-kernel-slab |  9 +++++++++
> include/linux/slab.h                        |  2 ++
> mm/internal.h                               |  8 ++++++--
> mm/slab.c                                   |  4 +++-
> mm/slab.h                                   |  3 ++-
> mm/slab_common.c                            |  2 +-
> mm/slub.c                                   | 18 +++++++++++++++++-
> 7 files changed, 40 insertions(+), 6 deletions(-)
>
>diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
>index 29601d93a1c2ea..d742c6cfdffbe9 100644
>--- a/Documentation/ABI/testing/sysfs-kernel-slab
>+++ b/Documentation/ABI/testing/sysfs-kernel-slab
>@@ -106,6 +106,15 @@ Description:
> 		are from ZONE_DMA.
> 		Available when CONFIG_ZONE_DMA is enabled.
> 
>+What:		/sys/kernel/slab/cache/cache_dma32
>+Date:		December 2018
>+KernelVersion:	4.21
>+Contact:	Nicolas Boichat <drinkcat@chromium.org>
>+Description:
>+		The cache_dma32 file is read-only and specifies whether objects
>+		are from ZONE_DMA32.
>+		Available when CONFIG_ZONE_DMA32 is enabled.
>+
> What:		/sys/kernel/slab/cache/cpu_slabs
> Date:		May 2007
> KernelVersion:	2.6.22
>diff --git a/include/linux/slab.h b/include/linux/slab.h
>index 11b45f7ae4057c..9449b19c5f107a 100644
>--- a/include/linux/slab.h
>+++ b/include/linux/slab.h
>@@ -32,6 +32,8 @@
> #define SLAB_HWCACHE_ALIGN	((slab_flags_t __force)0x00002000U)
> /* Use GFP_DMA memory */
> #define SLAB_CACHE_DMA		((slab_flags_t __force)0x00004000U)
>+/* Use GFP_DMA32 memory */
>+#define SLAB_CACHE_DMA32	((slab_flags_t __force)0x00008000U)
> /* DEBUG: Store the last owner for bug hunting */
> #define SLAB_STORE_USER		((slab_flags_t __force)0x00010000U)
> /* Panic if kmem_cache_create() fails */
>diff --git a/mm/internal.h b/mm/internal.h
>index a2ee82a0cd44ae..fd244ad716eaf8 100644
>--- a/mm/internal.h
>+++ b/mm/internal.h
>@@ -14,6 +14,7 @@
> #include <linux/fs.h>
> #include <linux/mm.h>
> #include <linux/pagemap.h>
>+#include <linux/slab.h>
> #include <linux/tracepoint-defs.h>
> 
> /*
>@@ -34,9 +35,12 @@
> #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
> 
> /* Check for flags that must not be used with a slab allocator */
>-static inline gfp_t check_slab_flags(gfp_t flags)
>+static inline gfp_t check_slab_flags(gfp_t flags, slab_flags_t slab_flags)
> {
>-	gfp_t bug_mask = __GFP_DMA32 | __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>+	gfp_t bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>+
>+	if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
>+		bug_mask |= __GFP_DMA32;

The original version doesn't check CONFIG_ZONE_DMA32.

Do we need to add this condition here?
Could we just decide the bug_mask based on slab_flags?

> 
> 	if (unlikely(flags & bug_mask)) {
> 		gfp_t invalid_mask = flags & bug_mask;
>diff --git a/mm/slab.c b/mm/slab.c
>index 65a774f05e7836..2fd3b9a996cbe6 100644
>--- a/mm/slab.c
>+++ b/mm/slab.c
>@@ -2109,6 +2109,8 @@ int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
> 	cachep->allocflags = __GFP_COMP;
> 	if (flags & SLAB_CACHE_DMA)
> 		cachep->allocflags |= GFP_DMA;
>+	if (flags & SLAB_CACHE_DMA32)
>+		cachep->allocflags |= GFP_DMA32;
> 	if (flags & SLAB_RECLAIM_ACCOUNT)
> 		cachep->allocflags |= __GFP_RECLAIMABLE;
> 	cachep->size = size;
>@@ -2643,7 +2645,7 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
> 	 * Be lazy and only check for valid flags here,  keeping it out of the
> 	 * critical path in kmem_cache_alloc().
> 	 */
>-	flags = check_slab_flags(flags);
>+	flags = check_slab_flags(flags, cachep->flags);
> 	WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
> 	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
> 
>diff --git a/mm/slab.h b/mm/slab.h
>index 4190c24ef0e9df..fcf717e12f0a86 100644
>--- a/mm/slab.h
>+++ b/mm/slab.h
>@@ -127,7 +127,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
> 
> 
> /* Legal flag mask for kmem_cache_create(), for various configurations */
>-#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
>+#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
>+			 SLAB_CACHE_DMA32 | SLAB_PANIC | \
> 			 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
> 
> #if defined(CONFIG_DEBUG_SLAB)
>diff --git a/mm/slab_common.c b/mm/slab_common.c
>index 70b0cc85db67f8..18b7b809c8d064 100644
>--- a/mm/slab_common.c
>+++ b/mm/slab_common.c
>@@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
> 		SLAB_FAILSLAB | SLAB_KASAN)
> 
> #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
>-			 SLAB_ACCOUNT)
>+			 SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
> 
> /*
>  * Merge control. If this is set then no merging of slab caches will occur.
>diff --git a/mm/slub.c b/mm/slub.c
>index 21a3f6866da472..6d47765a82d150 100644
>--- a/mm/slub.c
>+++ b/mm/slub.c
>@@ -1685,7 +1685,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
> 
> static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
> {
>-	flags = check_slab_flags(flags);
>+	flags = check_slab_flags(flags, s->flags);
> 
> 	return allocate_slab(s,
> 		flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
>@@ -3577,6 +3577,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
> 	if (s->flags & SLAB_CACHE_DMA)
> 		s->allocflags |= GFP_DMA;
> 
>+	if (s->flags & SLAB_CACHE_DMA32)
>+		s->allocflags |= GFP_DMA32;
>+
> 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
> 		s->allocflags |= __GFP_RECLAIMABLE;
> 
>@@ -5095,6 +5098,14 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
> SLAB_ATTR_RO(cache_dma);
> #endif
> 
>+#ifdef CONFIG_ZONE_DMA32
>+static ssize_t cache_dma32_show(struct kmem_cache *s, char *buf)
>+{
>+	return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA32));
>+}
>+SLAB_ATTR_RO(cache_dma32);
>+#endif
>+
> static ssize_t usersize_show(struct kmem_cache *s, char *buf)
> {
> 	return sprintf(buf, "%u\n", s->usersize);
>@@ -5435,6 +5446,9 @@ static struct attribute *slab_attrs[] = {
> #ifdef CONFIG_ZONE_DMA
> 	&cache_dma_attr.attr,
> #endif
>+#ifdef CONFIG_ZONE_DMA32
>+	&cache_dma32_attr.attr,
>+#endif
> #ifdef CONFIG_NUMA
> 	&remote_node_defrag_ratio_attr.attr,
> #endif
>@@ -5665,6 +5679,8 @@ static char *create_unique_id(struct kmem_cache *s)
> 	 */
> 	if (s->flags & SLAB_CACHE_DMA)
> 		*p++ = 'd';
>+	if (s->flags & SLAB_CACHE_DMA32)
>+		*p++ = 'D';
> 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
> 		*p++ = 'a';
> 	if (s->flags & SLAB_CONSISTENCY_CHECKS)
>-- 
>2.20.0.rc1.387.gf8505762e3-goog
>
>_______________________________________________
>iommu mailing list
>iommu@lists.linux-foundation.org
>https://lists.linuxfoundation.org/mailman/listinfo/iommu
Nicolas Boichat - Dec. 5, 2018, 7:39 a.m.
On Wed, Dec 5, 2018 at 3:25 PM Wei Yang <richard.weiyang@gmail.com> wrote:
>
> On Wed, Dec 05, 2018 at 01:48:27PM +0800, Nicolas Boichat wrote:
> >In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
> >data structures smaller than a page with GFP_DMA32 flag.
> >
> >This change makes it possible to create a custom cache in DMA32 zone
> >using kmem_cache_create, then allocate memory using kmem_cache_alloc.
> >
> >We do not create a DMA32 kmalloc cache array, as there are currently
> >no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
> >ensures that such calls still fail (as they do before this change).
> >
> >Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")
> >Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
> >---
> >
> >Changes since v2:
> > - Clarified commit message
> > - Add entry in sysfs-kernel-slab to document the new sysfs file
> >
> >(v3 used the page_frag approach)
> >
> >Documentation/ABI/testing/sysfs-kernel-slab |  9 +++++++++
> > include/linux/slab.h                        |  2 ++
> > mm/internal.h                               |  8 ++++++--
> > mm/slab.c                                   |  4 +++-
> > mm/slab.h                                   |  3 ++-
> > mm/slab_common.c                            |  2 +-
> > mm/slub.c                                   | 18 +++++++++++++++++-
> > 7 files changed, 40 insertions(+), 6 deletions(-)
> >
> >diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
> >index 29601d93a1c2ea..d742c6cfdffbe9 100644
> >--- a/Documentation/ABI/testing/sysfs-kernel-slab
> >+++ b/Documentation/ABI/testing/sysfs-kernel-slab
> >@@ -106,6 +106,15 @@ Description:
> >               are from ZONE_DMA.
> >               Available when CONFIG_ZONE_DMA is enabled.
> >
> >+What:         /sys/kernel/slab/cache/cache_dma32
> >+Date:         December 2018
> >+KernelVersion:        4.21
> >+Contact:      Nicolas Boichat <drinkcat@chromium.org>
> >+Description:
> >+              The cache_dma32 file is read-only and specifies whether objects
> >+              are from ZONE_DMA32.
> >+              Available when CONFIG_ZONE_DMA32 is enabled.
> >+
> > What:         /sys/kernel/slab/cache/cpu_slabs
> > Date:         May 2007
> > KernelVersion:        2.6.22
> >diff --git a/include/linux/slab.h b/include/linux/slab.h
> >index 11b45f7ae4057c..9449b19c5f107a 100644
> >--- a/include/linux/slab.h
> >+++ b/include/linux/slab.h
> >@@ -32,6 +32,8 @@
> > #define SLAB_HWCACHE_ALIGN    ((slab_flags_t __force)0x00002000U)
> > /* Use GFP_DMA memory */
> > #define SLAB_CACHE_DMA                ((slab_flags_t __force)0x00004000U)
> >+/* Use GFP_DMA32 memory */
> >+#define SLAB_CACHE_DMA32      ((slab_flags_t __force)0x00008000U)
> > /* DEBUG: Store the last owner for bug hunting */
> > #define SLAB_STORE_USER               ((slab_flags_t __force)0x00010000U)
> > /* Panic if kmem_cache_create() fails */
> >diff --git a/mm/internal.h b/mm/internal.h
> >index a2ee82a0cd44ae..fd244ad716eaf8 100644
> >--- a/mm/internal.h
> >+++ b/mm/internal.h
> >@@ -14,6 +14,7 @@
> > #include <linux/fs.h>
> > #include <linux/mm.h>
> > #include <linux/pagemap.h>
> >+#include <linux/slab.h>
> > #include <linux/tracepoint-defs.h>
> >
> > /*
> >@@ -34,9 +35,12 @@
> > #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
> >
> > /* Check for flags that must not be used with a slab allocator */
> >-static inline gfp_t check_slab_flags(gfp_t flags)
> >+static inline gfp_t check_slab_flags(gfp_t flags, slab_flags_t slab_flags)
> > {
> >-      gfp_t bug_mask = __GFP_DMA32 | __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> >+      gfp_t bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> >+
> >+      if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
> >+              bug_mask |= __GFP_DMA32;
>
> The original version doesn't check CONFIG_ZONE_DMA32.
>
> Do we need to add this condition here?
> Could we just decide the bug_mask based on slab_flags?

We can. The reason I did it this way is that when we don't have
CONFIG_ZONE_DMA32, the compiler should be able to simplify to:

bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
if (true || ..) => if (true)
   bug_mask |= __GFP_DMA32;

Then just
bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK | __GFP_DMA32;

And since the function is inline, slab_flags would not even need to be
accessed at all.

> >
> >       if (unlikely(flags & bug_mask)) {
> >               gfp_t invalid_mask = flags & bug_mask;
> >diff --git a/mm/slab.c b/mm/slab.c
> >index 65a774f05e7836..2fd3b9a996cbe6 100644
> >--- a/mm/slab.c
> >+++ b/mm/slab.c
> >@@ -2109,6 +2109,8 @@ int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
> >       cachep->allocflags = __GFP_COMP;
> >       if (flags & SLAB_CACHE_DMA)
> >               cachep->allocflags |= GFP_DMA;
> >+      if (flags & SLAB_CACHE_DMA32)
> >+              cachep->allocflags |= GFP_DMA32;
> >       if (flags & SLAB_RECLAIM_ACCOUNT)
> >               cachep->allocflags |= __GFP_RECLAIMABLE;
> >       cachep->size = size;
> >@@ -2643,7 +2645,7 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
> >        * Be lazy and only check for valid flags here,  keeping it out of the
> >        * critical path in kmem_cache_alloc().
> >        */
> >-      flags = check_slab_flags(flags);
> >+      flags = check_slab_flags(flags, cachep->flags);
> >       WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
> >       local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
> >
> >diff --git a/mm/slab.h b/mm/slab.h
> >index 4190c24ef0e9df..fcf717e12f0a86 100644
> >--- a/mm/slab.h
> >+++ b/mm/slab.h
> >@@ -127,7 +127,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
> >
> >
> > /* Legal flag mask for kmem_cache_create(), for various configurations */
> >-#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
> >+#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
> >+                       SLAB_CACHE_DMA32 | SLAB_PANIC | \
> >                        SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
> >
> > #if defined(CONFIG_DEBUG_SLAB)
> >diff --git a/mm/slab_common.c b/mm/slab_common.c
> >index 70b0cc85db67f8..18b7b809c8d064 100644
> >--- a/mm/slab_common.c
> >+++ b/mm/slab_common.c
> >@@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
> >               SLAB_FAILSLAB | SLAB_KASAN)
> >
> > #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
> >-                       SLAB_ACCOUNT)
> >+                       SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
> >
> > /*
> >  * Merge control. If this is set then no merging of slab caches will occur.
> >diff --git a/mm/slub.c b/mm/slub.c
> >index 21a3f6866da472..6d47765a82d150 100644
> >--- a/mm/slub.c
> >+++ b/mm/slub.c
> >@@ -1685,7 +1685,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
> >
> > static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
> > {
> >-      flags = check_slab_flags(flags);
> >+      flags = check_slab_flags(flags, s->flags);
> >
> >       return allocate_slab(s,
> >               flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
> >@@ -3577,6 +3577,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
> >       if (s->flags & SLAB_CACHE_DMA)
> >               s->allocflags |= GFP_DMA;
> >
> >+      if (s->flags & SLAB_CACHE_DMA32)
> >+              s->allocflags |= GFP_DMA32;
> >+
> >       if (s->flags & SLAB_RECLAIM_ACCOUNT)
> >               s->allocflags |= __GFP_RECLAIMABLE;
> >
> >@@ -5095,6 +5098,14 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
> > SLAB_ATTR_RO(cache_dma);
> > #endif
> >
> >+#ifdef CONFIG_ZONE_DMA32
> >+static ssize_t cache_dma32_show(struct kmem_cache *s, char *buf)
> >+{
> >+      return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA32));
> >+}
> >+SLAB_ATTR_RO(cache_dma32);
> >+#endif
> >+
> > static ssize_t usersize_show(struct kmem_cache *s, char *buf)
> > {
> >       return sprintf(buf, "%u\n", s->usersize);
> >@@ -5435,6 +5446,9 @@ static struct attribute *slab_attrs[] = {
> > #ifdef CONFIG_ZONE_DMA
> >       &cache_dma_attr.attr,
> > #endif
> >+#ifdef CONFIG_ZONE_DMA32
> >+      &cache_dma32_attr.attr,
> >+#endif
> > #ifdef CONFIG_NUMA
> >       &remote_node_defrag_ratio_attr.attr,
> > #endif
> >@@ -5665,6 +5679,8 @@ static char *create_unique_id(struct kmem_cache *s)
> >        */
> >       if (s->flags & SLAB_CACHE_DMA)
> >               *p++ = 'd';
> >+      if (s->flags & SLAB_CACHE_DMA32)
> >+              *p++ = 'D';
> >       if (s->flags & SLAB_RECLAIM_ACCOUNT)
> >               *p++ = 'a';
> >       if (s->flags & SLAB_CONSISTENCY_CHECKS)
> >--
> >2.20.0.rc1.387.gf8505762e3-goog
> >
> >_______________________________________________
> >iommu mailing list
> >iommu@lists.linux-foundation.org
> >https://lists.linuxfoundation.org/mailman/listinfo/iommu
>
> --
> Wei Yang
> Help you, Help me
Wei Yang - Dec. 5, 2018, 9:18 a.m.
On Wed, Dec 05, 2018 at 03:39:51PM +0800, Nicolas Boichat wrote:
>On Wed, Dec 5, 2018 at 3:25 PM Wei Yang <richard.weiyang@gmail.com> wrote:
>>
>> On Wed, Dec 05, 2018 at 01:48:27PM +0800, Nicolas Boichat wrote:
>> >In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
>> >data structures smaller than a page with GFP_DMA32 flag.
>> >
>> >This change makes it possible to create a custom cache in DMA32 zone
>> >using kmem_cache_create, then allocate memory using kmem_cache_alloc.
>> >
>> >We do not create a DMA32 kmalloc cache array, as there are currently
>> >no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
>> >ensures that such calls still fail (as they do before this change).
>> >
>> >Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")
>> >Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
>> >---
>> >
>> >Changes since v2:
>> > - Clarified commit message
>> > - Add entry in sysfs-kernel-slab to document the new sysfs file
>> >
>> >(v3 used the page_frag approach)
>> >
>> >Documentation/ABI/testing/sysfs-kernel-slab |  9 +++++++++
>> > include/linux/slab.h                        |  2 ++
>> > mm/internal.h                               |  8 ++++++--
>> > mm/slab.c                                   |  4 +++-
>> > mm/slab.h                                   |  3 ++-
>> > mm/slab_common.c                            |  2 +-
>> > mm/slub.c                                   | 18 +++++++++++++++++-
>> > 7 files changed, 40 insertions(+), 6 deletions(-)
>> >
>> >diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
>> >index 29601d93a1c2ea..d742c6cfdffbe9 100644
>> >--- a/Documentation/ABI/testing/sysfs-kernel-slab
>> >+++ b/Documentation/ABI/testing/sysfs-kernel-slab
>> >@@ -106,6 +106,15 @@ Description:
>> >               are from ZONE_DMA.
>> >               Available when CONFIG_ZONE_DMA is enabled.
>> >
>> >+What:         /sys/kernel/slab/cache/cache_dma32
>> >+Date:         December 2018
>> >+KernelVersion:        4.21
>> >+Contact:      Nicolas Boichat <drinkcat@chromium.org>
>> >+Description:
>> >+              The cache_dma32 file is read-only and specifies whether objects
>> >+              are from ZONE_DMA32.
>> >+              Available when CONFIG_ZONE_DMA32 is enabled.
>> >+
>> > What:         /sys/kernel/slab/cache/cpu_slabs
>> > Date:         May 2007
>> > KernelVersion:        2.6.22
>> >diff --git a/include/linux/slab.h b/include/linux/slab.h
>> >index 11b45f7ae4057c..9449b19c5f107a 100644
>> >--- a/include/linux/slab.h
>> >+++ b/include/linux/slab.h
>> >@@ -32,6 +32,8 @@
>> > #define SLAB_HWCACHE_ALIGN    ((slab_flags_t __force)0x00002000U)
>> > /* Use GFP_DMA memory */
>> > #define SLAB_CACHE_DMA                ((slab_flags_t __force)0x00004000U)
>> >+/* Use GFP_DMA32 memory */
>> >+#define SLAB_CACHE_DMA32      ((slab_flags_t __force)0x00008000U)
>> > /* DEBUG: Store the last owner for bug hunting */
>> > #define SLAB_STORE_USER               ((slab_flags_t __force)0x00010000U)
>> > /* Panic if kmem_cache_create() fails */
>> >diff --git a/mm/internal.h b/mm/internal.h
>> >index a2ee82a0cd44ae..fd244ad716eaf8 100644
>> >--- a/mm/internal.h
>> >+++ b/mm/internal.h
>> >@@ -14,6 +14,7 @@
>> > #include <linux/fs.h>
>> > #include <linux/mm.h>
>> > #include <linux/pagemap.h>
>> >+#include <linux/slab.h>
>> > #include <linux/tracepoint-defs.h>
>> >
>> > /*
>> >@@ -34,9 +35,12 @@
>> > #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
>> >
>> > /* Check for flags that must not be used with a slab allocator */
>> >-static inline gfp_t check_slab_flags(gfp_t flags)
>> >+static inline gfp_t check_slab_flags(gfp_t flags, slab_flags_t slab_flags)
>> > {
>> >-      gfp_t bug_mask = __GFP_DMA32 | __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>> >+      gfp_t bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>> >+
>> >+      if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
>> >+              bug_mask |= __GFP_DMA32;
>>
>> The original version doesn't check CONFIG_ZONE_DMA32.
>>
>> Do we need to add this condition here?
>> Could we just decide the bug_mask based on slab_flags?
>
>We can. The reason I did it this way is that when we don't have
>CONFIG_ZONE_DMA32, the compiler should be able to simplify to:
>
>bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>if (true || ..) => if (true)
>   bug_mask |= __GFP_DMA32;
>
>Then just
>bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK | __GFP_DMA32;
>
>And since the function is inline, slab_flags would not even need to be
>accessed at all.
>

Thanks for explanation. This make sense to me.
Michal Hocko - Dec. 5, 2018, 9:55 a.m.
On Wed 05-12-18 13:48:27, Nicolas Boichat wrote:
> In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
> data structures smaller than a page with GFP_DMA32 flag.
> 
> This change makes it possible to create a custom cache in DMA32 zone
> using kmem_cache_create, then allocate memory using kmem_cache_alloc.
> 
> We do not create a DMA32 kmalloc cache array, as there are currently
> no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
> ensures that such calls still fail (as they do before this change).

The changelog should be much more specific about decisions made here.
First of all it would be nice to mention the usecase.

Secondly, why do we need a new sysfs file? Who is going to consume it?

Then why do we need SLAB_MERGE_SAME to cover GFP_DMA32 as well? I
thought the whole point is to use dedicated slab cache. Who is this
going to merge with?
Nicolas Boichat - Dec. 5, 2018, 11:01 a.m.
On Wed, Dec 5, 2018 at 5:56 PM Michal Hocko <mhocko@kernel.org> wrote:
>
> On Wed 05-12-18 13:48:27, Nicolas Boichat wrote:
> > In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
> > data structures smaller than a page with GFP_DMA32 flag.
> >
> > This change makes it possible to create a custom cache in DMA32 zone
> > using kmem_cache_create, then allocate memory using kmem_cache_alloc.
> >
> > We do not create a DMA32 kmalloc cache array, as there are currently
> > no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
> > ensures that such calls still fail (as they do before this change).
>
> The changelog should be much more specific about decisions made here.
> First of all it would be nice to mention the usecase.

Ok, I'll copy most of the cover letter text here (i.e. the fact that
IOMMU wants physical memory <4GB for L2 page tables, why it's better
than genalloc/page_frag).

> Secondly, why do we need a new sysfs file? Who is going to consume it?

We have cache_dma, so it seems consistent to add cache_dma32.

I wasn't aware of tools/vm/slabinfo.c, so I can add support for
cache_dma32 in a follow-up patch. Any other user I should take care
of?

> Then why do we need SLAB_MERGE_SAME to cover GFP_DMA32 as well?

SLAB_MERGE_SAME tells us which flags _need_ to be the same for the
slabs to be merged. We don't want slab caches with GFP_DMA32 and
~GFP_DMA32 to be merged, so it should be in there.
(https://elixir.bootlin.com/linux/v4.19.6/source/mm/slab_common.c#L342).

> I
> thought the whole point is to use dedicated slab cache. Who is this
> going to merge with?

Well, if there was another SLAB cache requiring 1KB GFP_DMA32
elements, then I don't see why we would not merge the caches. This is
what happens with this IOMMU L2 tables cache pre-CONFIG_ZONE_DMA32 on
arm64 (output on some 3.18 kernel below), and what would happen on
arm32 since we still use GFP_DMA.

/sys/kernel/slab # ls -l | grep dt-0001024
drwxr-xr-x. 2 root root 0 Dec  5 02:25 :dt-0001024
lrwxrwxrwx. 1 root root 0 Dec  5 02:25 dma-kmalloc-1024 -> :dt-0001024
lrwxrwxrwx. 1 root root 0 Dec  5 02:25 io-pgtable_armv7s_l2 -> :dt-0001024

Thanks!

> --
> Michal Hocko
> SUSE Labs
Michal Hocko - Dec. 5, 2018, 11:37 a.m.
On Wed 05-12-18 19:01:03, Nicolas Boichat wrote:
[...]
> > Secondly, why do we need a new sysfs file? Who is going to consume it?
> 
> We have cache_dma, so it seems consistent to add cache_dma32.

I wouldn't copy a pattern unless there is an explicit usecase for it.
We do expose way too much to userspace and that keeps kicking us later.
Not that I am aware of any specific example for cache_dma but seeing
other examples I would rather be more careful.

> I wasn't aware of tools/vm/slabinfo.c, so I can add support for
> cache_dma32 in a follow-up patch. Any other user I should take care
> of?

In general zones are inernal MM implementation details and the less we
export to userspace the better.

> > Then why do we need SLAB_MERGE_SAME to cover GFP_DMA32 as well?
> 
> SLAB_MERGE_SAME tells us which flags _need_ to be the same for the
> slabs to be merged. We don't want slab caches with GFP_DMA32 and
> ~GFP_DMA32 to be merged, so it should be in there.
> (https://elixir.bootlin.com/linux/v4.19.6/source/mm/slab_common.c#L342).

Ohh, my bad, I have misread the change. Sure we definitely not want to
allow merging here. My bad.
Wei Yang - Dec. 5, 2018, 12:18 p.m.
On Wed, Dec 05, 2018 at 03:39:51PM +0800, Nicolas Boichat wrote:
>On Wed, Dec 5, 2018 at 3:25 PM Wei Yang <richard.weiyang@gmail.com> wrote:
>>
>> On Wed, Dec 05, 2018 at 01:48:27PM +0800, Nicolas Boichat wrote:
>> >In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
>> >data structures smaller than a page with GFP_DMA32 flag.
>> >
>> >This change makes it possible to create a custom cache in DMA32 zone
>> >using kmem_cache_create, then allocate memory using kmem_cache_alloc.
>> >
>> >We do not create a DMA32 kmalloc cache array, as there are currently
>> >no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
>> >ensures that such calls still fail (as they do before this change).
>> >
>> >Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")
>> >Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
>> >---
>> >
>> >Changes since v2:
>> > - Clarified commit message
>> > - Add entry in sysfs-kernel-slab to document the new sysfs file
>> >
>> >(v3 used the page_frag approach)
>> >
>> >Documentation/ABI/testing/sysfs-kernel-slab |  9 +++++++++
>> > include/linux/slab.h                        |  2 ++
>> > mm/internal.h                               |  8 ++++++--
>> > mm/slab.c                                   |  4 +++-
>> > mm/slab.h                                   |  3 ++-
>> > mm/slab_common.c                            |  2 +-
>> > mm/slub.c                                   | 18 +++++++++++++++++-
>> > 7 files changed, 40 insertions(+), 6 deletions(-)
>> >
>> >diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
>> >index 29601d93a1c2ea..d742c6cfdffbe9 100644
>> >--- a/Documentation/ABI/testing/sysfs-kernel-slab
>> >+++ b/Documentation/ABI/testing/sysfs-kernel-slab
>> >@@ -106,6 +106,15 @@ Description:
>> >               are from ZONE_DMA.
>> >               Available when CONFIG_ZONE_DMA is enabled.
>> >
>> >+What:         /sys/kernel/slab/cache/cache_dma32
>> >+Date:         December 2018
>> >+KernelVersion:        4.21
>> >+Contact:      Nicolas Boichat <drinkcat@chromium.org>
>> >+Description:
>> >+              The cache_dma32 file is read-only and specifies whether objects
>> >+              are from ZONE_DMA32.
>> >+              Available when CONFIG_ZONE_DMA32 is enabled.
>> >+
>> > What:         /sys/kernel/slab/cache/cpu_slabs
>> > Date:         May 2007
>> > KernelVersion:        2.6.22
>> >diff --git a/include/linux/slab.h b/include/linux/slab.h
>> >index 11b45f7ae4057c..9449b19c5f107a 100644
>> >--- a/include/linux/slab.h
>> >+++ b/include/linux/slab.h
>> >@@ -32,6 +32,8 @@
>> > #define SLAB_HWCACHE_ALIGN    ((slab_flags_t __force)0x00002000U)
>> > /* Use GFP_DMA memory */
>> > #define SLAB_CACHE_DMA                ((slab_flags_t __force)0x00004000U)
>> >+/* Use GFP_DMA32 memory */
>> >+#define SLAB_CACHE_DMA32      ((slab_flags_t __force)0x00008000U)
>> > /* DEBUG: Store the last owner for bug hunting */
>> > #define SLAB_STORE_USER               ((slab_flags_t __force)0x00010000U)
>> > /* Panic if kmem_cache_create() fails */
>> >diff --git a/mm/internal.h b/mm/internal.h
>> >index a2ee82a0cd44ae..fd244ad716eaf8 100644
>> >--- a/mm/internal.h
>> >+++ b/mm/internal.h
>> >@@ -14,6 +14,7 @@
>> > #include <linux/fs.h>
>> > #include <linux/mm.h>
>> > #include <linux/pagemap.h>
>> >+#include <linux/slab.h>
>> > #include <linux/tracepoint-defs.h>
>> >
>> > /*
>> >@@ -34,9 +35,12 @@
>> > #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
>> >
>> > /* Check for flags that must not be used with a slab allocator */
>> >-static inline gfp_t check_slab_flags(gfp_t flags)
>> >+static inline gfp_t check_slab_flags(gfp_t flags, slab_flags_t slab_flags)
>> > {
>> >-      gfp_t bug_mask = __GFP_DMA32 | __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>> >+      gfp_t bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>> >+
>> >+      if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
>> >+              bug_mask |= __GFP_DMA32;
>>
>> The original version doesn't check CONFIG_ZONE_DMA32.
>>
>> Do we need to add this condition here?
>> Could we just decide the bug_mask based on slab_flags?
>
>We can. The reason I did it this way is that when we don't have
>CONFIG_ZONE_DMA32, the compiler should be able to simplify to:
>
>bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>if (true || ..) => if (true)
>   bug_mask |= __GFP_DMA32;
>
>Then just
>bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK | __GFP_DMA32;
>
>And since the function is inline, slab_flags would not even need to be
>accessed at all.
>

Hmm, I get one confusion.

This means if CONFIG_ZONE_DMA32 is not enabled, bug_mask will always
contains __GFP_DMA32. This will check with cachep->flags.

If cachep->flags has GFP_DMA32, this always fail?

Is this possible?
Vlastimil Babka - Dec. 5, 2018, 1:59 p.m.
On 12/5/18 6:48 AM, Nicolas Boichat wrote:
> In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
> data structures smaller than a page with GFP_DMA32 flag.
> 
> This change makes it possible to create a custom cache in DMA32 zone
> using kmem_cache_create, then allocate memory using kmem_cache_alloc.
> 
> We do not create a DMA32 kmalloc cache array, as there are currently
> no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
> ensures that such calls still fail (as they do before this change).
> 
> Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")

Same as my comment for 1/3.

> Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>

In general,
Acked-by: Vlastimil Babka <vbabka@suse.cz>

Some comments below:

> ---
> 
> Changes since v2:
>  - Clarified commit message
>  - Add entry in sysfs-kernel-slab to document the new sysfs file
> 
> (v3 used the page_frag approach)
> 
> Documentation/ABI/testing/sysfs-kernel-slab |  9 +++++++++
>  include/linux/slab.h                        |  2 ++
>  mm/internal.h                               |  8 ++++++--
>  mm/slab.c                                   |  4 +++-
>  mm/slab.h                                   |  3 ++-
>  mm/slab_common.c                            |  2 +-
>  mm/slub.c                                   | 18 +++++++++++++++++-
>  7 files changed, 40 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
> index 29601d93a1c2ea..d742c6cfdffbe9 100644
> --- a/Documentation/ABI/testing/sysfs-kernel-slab
> +++ b/Documentation/ABI/testing/sysfs-kernel-slab
> @@ -106,6 +106,15 @@ Description:
>  		are from ZONE_DMA.
>  		Available when CONFIG_ZONE_DMA is enabled.
>  
> +What:		/sys/kernel/slab/cache/cache_dma32
> +Date:		December 2018
> +KernelVersion:	4.21
> +Contact:	Nicolas Boichat <drinkcat@chromium.org>
> +Description:
> +		The cache_dma32 file is read-only and specifies whether objects
> +		are from ZONE_DMA32.
> +		Available when CONFIG_ZONE_DMA32 is enabled.

I don't have a strong opinion. It's a new file, yeah, but consistent
with already existing ones. I'd leave the decision with SL*B maintainers.

>  What:		/sys/kernel/slab/cache/cpu_slabs
>  Date:		May 2007
>  KernelVersion:	2.6.22
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 11b45f7ae4057c..9449b19c5f107a 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -32,6 +32,8 @@
>  #define SLAB_HWCACHE_ALIGN	((slab_flags_t __force)0x00002000U)
>  /* Use GFP_DMA memory */
>  #define SLAB_CACHE_DMA		((slab_flags_t __force)0x00004000U)
> +/* Use GFP_DMA32 memory */
> +#define SLAB_CACHE_DMA32	((slab_flags_t __force)0x00008000U)
>  /* DEBUG: Store the last owner for bug hunting */
>  #define SLAB_STORE_USER		((slab_flags_t __force)0x00010000U)
>  /* Panic if kmem_cache_create() fails */
> diff --git a/mm/internal.h b/mm/internal.h
> index a2ee82a0cd44ae..fd244ad716eaf8 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -14,6 +14,7 @@
>  #include <linux/fs.h>
>  #include <linux/mm.h>
>  #include <linux/pagemap.h>
> +#include <linux/slab.h>
>  #include <linux/tracepoint-defs.h>
>  
>  /*
> @@ -34,9 +35,12 @@
>  #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
>  
>  /* Check for flags that must not be used with a slab allocator */
> -static inline gfp_t check_slab_flags(gfp_t flags)
> +static inline gfp_t check_slab_flags(gfp_t flags, slab_flags_t slab_flags)
>  {
> -	gfp_t bug_mask = __GFP_DMA32 | __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> +	gfp_t bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> +
> +	if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
> +		bug_mask |= __GFP_DMA32;

I'll point out that this is not even strictly needed AFAICS, as only
flags passed to kmem_cache_alloc() are checked - the cache->allocflags
derived from SLAB_CACHE_DMA32 are appended only after check_slab_flags()
(in both SLAB and SLUB AFAICS). And for a cache created with
SLAB_CACHE_DMA32, the caller of kmem_cache_alloc() doesn't need to also
include __GFP_DMA32, the allocation will be from ZONE_DMA32 regardless.
So it would be fine even unchanged. The check would anyway need some
more love to catch the same with __GFP_DMA to be consistent and cover
all corner cases.

>  
>  	if (unlikely(flags & bug_mask)) {
>  		gfp_t invalid_mask = flags & bug_mask;
Nicolas Boichat - Dec. 6, 2018, 12:41 a.m.
On Wed, Dec 5, 2018 at 8:18 PM Wei Yang <richard.weiyang@gmail.com> wrote:
>
> On Wed, Dec 05, 2018 at 03:39:51PM +0800, Nicolas Boichat wrote:
> >On Wed, Dec 5, 2018 at 3:25 PM Wei Yang <richard.weiyang@gmail.com> wrote:
> >>
> >> On Wed, Dec 05, 2018 at 01:48:27PM +0800, Nicolas Boichat wrote:
> >> >In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
> >> >data structures smaller than a page with GFP_DMA32 flag.
> >> >
> >> >This change makes it possible to create a custom cache in DMA32 zone
> >> >using kmem_cache_create, then allocate memory using kmem_cache_alloc.
> >> >
> >> >We do not create a DMA32 kmalloc cache array, as there are currently
> >> >no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
> >> >ensures that such calls still fail (as they do before this change).
> >> >
> >> >Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")
> >> >Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
> >> >---
> >> >
> >> >Changes since v2:
> >> > - Clarified commit message
> >> > - Add entry in sysfs-kernel-slab to document the new sysfs file
> >> >
> >> >(v3 used the page_frag approach)
> >> >
> >> >Documentation/ABI/testing/sysfs-kernel-slab |  9 +++++++++
> >> > include/linux/slab.h                        |  2 ++
> >> > mm/internal.h                               |  8 ++++++--
> >> > mm/slab.c                                   |  4 +++-
> >> > mm/slab.h                                   |  3 ++-
> >> > mm/slab_common.c                            |  2 +-
> >> > mm/slub.c                                   | 18 +++++++++++++++++-
> >> > 7 files changed, 40 insertions(+), 6 deletions(-)
> >> >
> >> >diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
> >> >index 29601d93a1c2ea..d742c6cfdffbe9 100644
> >> >--- a/Documentation/ABI/testing/sysfs-kernel-slab
> >> >+++ b/Documentation/ABI/testing/sysfs-kernel-slab
> >> >@@ -106,6 +106,15 @@ Description:
> >> >               are from ZONE_DMA.
> >> >               Available when CONFIG_ZONE_DMA is enabled.
> >> >
> >> >+What:         /sys/kernel/slab/cache/cache_dma32
> >> >+Date:         December 2018
> >> >+KernelVersion:        4.21
> >> >+Contact:      Nicolas Boichat <drinkcat@chromium.org>
> >> >+Description:
> >> >+              The cache_dma32 file is read-only and specifies whether objects
> >> >+              are from ZONE_DMA32.
> >> >+              Available when CONFIG_ZONE_DMA32 is enabled.
> >> >+
> >> > What:         /sys/kernel/slab/cache/cpu_slabs
> >> > Date:         May 2007
> >> > KernelVersion:        2.6.22
> >> >diff --git a/include/linux/slab.h b/include/linux/slab.h
> >> >index 11b45f7ae4057c..9449b19c5f107a 100644
> >> >--- a/include/linux/slab.h
> >> >+++ b/include/linux/slab.h
> >> >@@ -32,6 +32,8 @@
> >> > #define SLAB_HWCACHE_ALIGN    ((slab_flags_t __force)0x00002000U)
> >> > /* Use GFP_DMA memory */
> >> > #define SLAB_CACHE_DMA                ((slab_flags_t __force)0x00004000U)
> >> >+/* Use GFP_DMA32 memory */
> >> >+#define SLAB_CACHE_DMA32      ((slab_flags_t __force)0x00008000U)
> >> > /* DEBUG: Store the last owner for bug hunting */
> >> > #define SLAB_STORE_USER               ((slab_flags_t __force)0x00010000U)
> >> > /* Panic if kmem_cache_create() fails */
> >> >diff --git a/mm/internal.h b/mm/internal.h
> >> >index a2ee82a0cd44ae..fd244ad716eaf8 100644
> >> >--- a/mm/internal.h
> >> >+++ b/mm/internal.h
> >> >@@ -14,6 +14,7 @@
> >> > #include <linux/fs.h>
> >> > #include <linux/mm.h>
> >> > #include <linux/pagemap.h>
> >> >+#include <linux/slab.h>
> >> > #include <linux/tracepoint-defs.h>
> >> >
> >> > /*
> >> >@@ -34,9 +35,12 @@
> >> > #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
> >> >
> >> > /* Check for flags that must not be used with a slab allocator */
> >> >-static inline gfp_t check_slab_flags(gfp_t flags)
> >> >+static inline gfp_t check_slab_flags(gfp_t flags, slab_flags_t slab_flags)
> >> > {
> >> >-      gfp_t bug_mask = __GFP_DMA32 | __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> >> >+      gfp_t bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> >> >+
> >> >+      if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
> >> >+              bug_mask |= __GFP_DMA32;
> >>
> >> The original version doesn't check CONFIG_ZONE_DMA32.
> >>
> >> Do we need to add this condition here?
> >> Could we just decide the bug_mask based on slab_flags?
> >
> >We can. The reason I did it this way is that when we don't have
> >CONFIG_ZONE_DMA32, the compiler should be able to simplify to:
> >
> >bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> >if (true || ..) => if (true)
> >   bug_mask |= __GFP_DMA32;
> >
> >Then just
> >bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK | __GFP_DMA32;
> >
> >And since the function is inline, slab_flags would not even need to be
> >accessed at all.
> >
>
> Hmm, I get one confusion.
>
> This means if CONFIG_ZONE_DMA32 is not enabled, bug_mask will always
> contains __GFP_DMA32. This will check with cachep->flags.
>
> If cachep->flags has GFP_DMA32, this always fail?
>
> Is this possible?

Not fully sure to understand the question, but the code is:
if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
       bug_mask |= __GFP_DMA32;

IS_ENABLED(CONFIG_ZONE_DMA32) == true:
 - (slab_flags & SLAB_CACHE_DMA32) => bug_mask untouched, __GFP_DMA32
is allowed.
 - !(slab_flags & SLAB_CACHE_DMA32) => bug_mask |= __GFP_DMA32;,
__GFP_DMA32 triggers warning
IS_ENABLED(CONFIG_ZONE_DMA32) == false:
  => bug_mask |= __GFP_DMA32;, __GFP_DMA32 triggers warning (as
expected, GFP_DMA32 does not make sense if there is no DMA32 zone).

Does that clarify?

>
> --
> Wei Yang
> Help you, Help me
Wei Yang - Dec. 6, 2018, 3:32 a.m.
On Thu, Dec 06, 2018 at 08:41:36AM +0800, Nicolas Boichat wrote:
>On Wed, Dec 5, 2018 at 8:18 PM Wei Yang <richard.weiyang@gmail.com> wrote:
>>
>> On Wed, Dec 05, 2018 at 03:39:51PM +0800, Nicolas Boichat wrote:
>> >On Wed, Dec 5, 2018 at 3:25 PM Wei Yang <richard.weiyang@gmail.com> wrote:
>> >>
>> >> On Wed, Dec 05, 2018 at 01:48:27PM +0800, Nicolas Boichat wrote:
>> >> >In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
>> >> >data structures smaller than a page with GFP_DMA32 flag.
>> >> >
>> >> >This change makes it possible to create a custom cache in DMA32 zone
>> >> >using kmem_cache_create, then allocate memory using kmem_cache_alloc.
>> >> >
>> >> >We do not create a DMA32 kmalloc cache array, as there are currently
>> >> >no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
>> >> >ensures that such calls still fail (as they do before this change).
>> >> >
>> >> >Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")
>> >> >Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
>> >> >---
>> >> >
>> >> >Changes since v2:
>> >> > - Clarified commit message
>> >> > - Add entry in sysfs-kernel-slab to document the new sysfs file
>> >> >
>> >> >(v3 used the page_frag approach)
>> >> >
>> >> >Documentation/ABI/testing/sysfs-kernel-slab |  9 +++++++++
>> >> > include/linux/slab.h                        |  2 ++
>> >> > mm/internal.h                               |  8 ++++++--
>> >> > mm/slab.c                                   |  4 +++-
>> >> > mm/slab.h                                   |  3 ++-
>> >> > mm/slab_common.c                            |  2 +-
>> >> > mm/slub.c                                   | 18 +++++++++++++++++-
>> >> > 7 files changed, 40 insertions(+), 6 deletions(-)
>> >> >
>> >> >diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
>> >> >index 29601d93a1c2ea..d742c6cfdffbe9 100644
>> >> >--- a/Documentation/ABI/testing/sysfs-kernel-slab
>> >> >+++ b/Documentation/ABI/testing/sysfs-kernel-slab
>> >> >@@ -106,6 +106,15 @@ Description:
>> >> >               are from ZONE_DMA.
>> >> >               Available when CONFIG_ZONE_DMA is enabled.
>> >> >
>> >> >+What:         /sys/kernel/slab/cache/cache_dma32
>> >> >+Date:         December 2018
>> >> >+KernelVersion:        4.21
>> >> >+Contact:      Nicolas Boichat <drinkcat@chromium.org>
>> >> >+Description:
>> >> >+              The cache_dma32 file is read-only and specifies whether objects
>> >> >+              are from ZONE_DMA32.
>> >> >+              Available when CONFIG_ZONE_DMA32 is enabled.
>> >> >+
>> >> > What:         /sys/kernel/slab/cache/cpu_slabs
>> >> > Date:         May 2007
>> >> > KernelVersion:        2.6.22
>> >> >diff --git a/include/linux/slab.h b/include/linux/slab.h
>> >> >index 11b45f7ae4057c..9449b19c5f107a 100644
>> >> >--- a/include/linux/slab.h
>> >> >+++ b/include/linux/slab.h
>> >> >@@ -32,6 +32,8 @@
>> >> > #define SLAB_HWCACHE_ALIGN    ((slab_flags_t __force)0x00002000U)
>> >> > /* Use GFP_DMA memory */
>> >> > #define SLAB_CACHE_DMA                ((slab_flags_t __force)0x00004000U)
>> >> >+/* Use GFP_DMA32 memory */
>> >> >+#define SLAB_CACHE_DMA32      ((slab_flags_t __force)0x00008000U)
>> >> > /* DEBUG: Store the last owner for bug hunting */
>> >> > #define SLAB_STORE_USER               ((slab_flags_t __force)0x00010000U)
>> >> > /* Panic if kmem_cache_create() fails */
>> >> >diff --git a/mm/internal.h b/mm/internal.h
>> >> >index a2ee82a0cd44ae..fd244ad716eaf8 100644
>> >> >--- a/mm/internal.h
>> >> >+++ b/mm/internal.h
>> >> >@@ -14,6 +14,7 @@
>> >> > #include <linux/fs.h>
>> >> > #include <linux/mm.h>
>> >> > #include <linux/pagemap.h>
>> >> >+#include <linux/slab.h>
>> >> > #include <linux/tracepoint-defs.h>
>> >> >
>> >> > /*
>> >> >@@ -34,9 +35,12 @@
>> >> > #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
>> >> >
>> >> > /* Check for flags that must not be used with a slab allocator */
>> >> >-static inline gfp_t check_slab_flags(gfp_t flags)
>> >> >+static inline gfp_t check_slab_flags(gfp_t flags, slab_flags_t slab_flags)
>> >> > {
>> >> >-      gfp_t bug_mask = __GFP_DMA32 | __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>> >> >+      gfp_t bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>> >> >+
>> >> >+      if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
>> >> >+              bug_mask |= __GFP_DMA32;
>> >>
>> >> The original version doesn't check CONFIG_ZONE_DMA32.
>> >>
>> >> Do we need to add this condition here?
>> >> Could we just decide the bug_mask based on slab_flags?
>> >
>> >We can. The reason I did it this way is that when we don't have
>> >CONFIG_ZONE_DMA32, the compiler should be able to simplify to:
>> >
>> >bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>> >if (true || ..) => if (true)
>> >   bug_mask |= __GFP_DMA32;
>> >
>> >Then just
>> >bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK | __GFP_DMA32;
>> >
>> >And since the function is inline, slab_flags would not even need to be
>> >accessed at all.
>> >
>>
>> Hmm, I get one confusion.
>>
>> This means if CONFIG_ZONE_DMA32 is not enabled, bug_mask will always
>> contains __GFP_DMA32. This will check with cachep->flags.
>>
>> If cachep->flags has GFP_DMA32, this always fail?
>>
>> Is this possible?
>
>Not fully sure to understand the question, but the code is:
>if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
>       bug_mask |= __GFP_DMA32;
>
>IS_ENABLED(CONFIG_ZONE_DMA32) == true:
> - (slab_flags & SLAB_CACHE_DMA32) => bug_mask untouched, __GFP_DMA32
>is allowed.
> - !(slab_flags & SLAB_CACHE_DMA32) => bug_mask |= __GFP_DMA32;,
>__GFP_DMA32 triggers warning
>IS_ENABLED(CONFIG_ZONE_DMA32) == false:
>  => bug_mask |= __GFP_DMA32;, __GFP_DMA32 triggers warning (as
>expected, GFP_DMA32 does not make sense if there is no DMA32 zone).

This is the case I am thinking.

The warning is reasonable since there is no DMA32. While the
kmem_cache_create() user is not easy to change their code.

For example, one writes code and wants to have a kmem_cache with DMA32
capability, so he writes kmem_cache_create(__GFP_DMA32). The code is
there and not easy to change. But one distro builder decides to disable
DMA32. This will leads to all the kmem_cache_create() through warning?

This behavior is what we expect?

>
>Does that clarify?
>
>>
>> --
>> Wei Yang
>> Help you, Help me
Nicolas Boichat - Dec. 6, 2018, 3:49 a.m.
On Wed, Dec 5, 2018 at 10:02 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 12/5/18 6:48 AM, Nicolas Boichat wrote:
> > In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
> > data structures smaller than a page with GFP_DMA32 flag.
> >
> > This change makes it possible to create a custom cache in DMA32 zone
> > using kmem_cache_create, then allocate memory using kmem_cache_alloc.
> >
> > We do not create a DMA32 kmalloc cache array, as there are currently
> > no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
> > ensures that such calls still fail (as they do before this change).
> >
> > Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")
>
> Same as my comment for 1/3.

I'll drop.

> > Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
>
> In general,
> Acked-by: Vlastimil Babka <vbabka@suse.cz>
>
> Some comments below:
>
> > ---
> >
> > Changes since v2:
> >  - Clarified commit message
> >  - Add entry in sysfs-kernel-slab to document the new sysfs file
> >
> > (v3 used the page_frag approach)
> >
> > Documentation/ABI/testing/sysfs-kernel-slab |  9 +++++++++
> >  include/linux/slab.h                        |  2 ++
> >  mm/internal.h                               |  8 ++++++--
> >  mm/slab.c                                   |  4 +++-
> >  mm/slab.h                                   |  3 ++-
> >  mm/slab_common.c                            |  2 +-
> >  mm/slub.c                                   | 18 +++++++++++++++++-
> >  7 files changed, 40 insertions(+), 6 deletions(-)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
> > index 29601d93a1c2ea..d742c6cfdffbe9 100644
> > --- a/Documentation/ABI/testing/sysfs-kernel-slab
> > +++ b/Documentation/ABI/testing/sysfs-kernel-slab
> > @@ -106,6 +106,15 @@ Description:
> >               are from ZONE_DMA.
> >               Available when CONFIG_ZONE_DMA is enabled.
> >
> > +What:                /sys/kernel/slab/cache/cache_dma32
> > +Date:                December 2018
> > +KernelVersion:       4.21
> > +Contact:     Nicolas Boichat <drinkcat@chromium.org>
> > +Description:
> > +             The cache_dma32 file is read-only and specifies whether objects
> > +             are from ZONE_DMA32.
> > +             Available when CONFIG_ZONE_DMA32 is enabled.
>
> I don't have a strong opinion. It's a new file, yeah, but consistent
> with already existing ones. I'd leave the decision with SL*B maintainers.
>
> >  What:                /sys/kernel/slab/cache/cpu_slabs
> >  Date:                May 2007
> >  KernelVersion:       2.6.22
> > diff --git a/include/linux/slab.h b/include/linux/slab.h
> > index 11b45f7ae4057c..9449b19c5f107a 100644
> > --- a/include/linux/slab.h
> > +++ b/include/linux/slab.h
> > @@ -32,6 +32,8 @@
> >  #define SLAB_HWCACHE_ALIGN   ((slab_flags_t __force)0x00002000U)
> >  /* Use GFP_DMA memory */
> >  #define SLAB_CACHE_DMA               ((slab_flags_t __force)0x00004000U)
> > +/* Use GFP_DMA32 memory */
> > +#define SLAB_CACHE_DMA32     ((slab_flags_t __force)0x00008000U)
> >  /* DEBUG: Store the last owner for bug hunting */
> >  #define SLAB_STORE_USER              ((slab_flags_t __force)0x00010000U)
> >  /* Panic if kmem_cache_create() fails */
> > diff --git a/mm/internal.h b/mm/internal.h
> > index a2ee82a0cd44ae..fd244ad716eaf8 100644
> > --- a/mm/internal.h
> > +++ b/mm/internal.h
> > @@ -14,6 +14,7 @@
> >  #include <linux/fs.h>
> >  #include <linux/mm.h>
> >  #include <linux/pagemap.h>
> > +#include <linux/slab.h>
> >  #include <linux/tracepoint-defs.h>
> >
> >  /*
> > @@ -34,9 +35,12 @@
> >  #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
> >
> >  /* Check for flags that must not be used with a slab allocator */
> > -static inline gfp_t check_slab_flags(gfp_t flags)
> > +static inline gfp_t check_slab_flags(gfp_t flags, slab_flags_t slab_flags)
> >  {
> > -     gfp_t bug_mask = __GFP_DMA32 | __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> > +     gfp_t bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> > +
> > +     if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
> > +             bug_mask |= __GFP_DMA32;
>
> I'll point out that this is not even strictly needed AFAICS, as only
> flags passed to kmem_cache_alloc() are checked - the cache->allocflags
> derived from SLAB_CACHE_DMA32 are appended only after check_slab_flags()
> (in both SLAB and SLUB AFAICS). And for a cache created with
> SLAB_CACHE_DMA32, the caller of kmem_cache_alloc() doesn't need to also
> include __GFP_DMA32, the allocation will be from ZONE_DMA32 regardless.

Yes, you're right. I also looked at existing users of SLAB_CACHE_DMA,
and there is one case in drivers/scsi/scsi_lib.c where GFP_DMA is not
be passed (all the other users pass it).

I can drop GFP_DMA32 from my call in io-pgtable-arm-v7s.c.

> So it would be fine even unchanged. The check would anyway need some
> more love to catch the same with __GFP_DMA to be consistent and cover
> all corner cases.

Yes, the test is not complete. If we really wanted this to be
accurate, we'd need to check that GFP_* exactly matches SLAB_CACHE_*.

The only problem with dropping this is test that we should restore
GFP_DMA32 warning/errors somewhere else (as Christopher pointed out
here: https://lkml.org/lkml/2018/11/22/430), especially for kmalloc
case.

Maybe this can be done in kmalloc_slab.

> >
> >       if (unlikely(flags & bug_mask)) {
> >               gfp_t invalid_mask = flags & bug_mask;
Nicolas Boichat - Dec. 6, 2018, 3:55 a.m.
On Thu, Dec 6, 2018 at 11:32 AM Wei Yang <richard.weiyang@gmail.com> wrote:
>
> On Thu, Dec 06, 2018 at 08:41:36AM +0800, Nicolas Boichat wrote:
> >On Wed, Dec 5, 2018 at 8:18 PM Wei Yang <richard.weiyang@gmail.com> wrote:
> >>
> >> On Wed, Dec 05, 2018 at 03:39:51PM +0800, Nicolas Boichat wrote:
> >> >On Wed, Dec 5, 2018 at 3:25 PM Wei Yang <richard.weiyang@gmail.com> wrote:
> >> >>
> >> >> On Wed, Dec 05, 2018 at 01:48:27PM +0800, Nicolas Boichat wrote:
> >> >> >In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
> >> >> >data structures smaller than a page with GFP_DMA32 flag.
> >> >> >
> >> >> >This change makes it possible to create a custom cache in DMA32 zone
> >> >> >using kmem_cache_create, then allocate memory using kmem_cache_alloc.
> >> >> >
> >> >> >We do not create a DMA32 kmalloc cache array, as there are currently
> >> >> >no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
> >> >> >ensures that such calls still fail (as they do before this change).
> >> >> >
> >> >> >Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")
> >> >> >Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
> >> >> >---
> >> >> >
> >> >> >Changes since v2:
> >> >> > - Clarified commit message
> >> >> > - Add entry in sysfs-kernel-slab to document the new sysfs file
> >> >> >
> >> >> >(v3 used the page_frag approach)
> >> >> >
> >> >> >Documentation/ABI/testing/sysfs-kernel-slab |  9 +++++++++
> >> >> > include/linux/slab.h                        |  2 ++
> >> >> > mm/internal.h                               |  8 ++++++--
> >> >> > mm/slab.c                                   |  4 +++-
> >> >> > mm/slab.h                                   |  3 ++-
> >> >> > mm/slab_common.c                            |  2 +-
> >> >> > mm/slub.c                                   | 18 +++++++++++++++++-
> >> >> > 7 files changed, 40 insertions(+), 6 deletions(-)
> >> >> >
> >> >> >diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
> >> >> >index 29601d93a1c2ea..d742c6cfdffbe9 100644
> >> >> >--- a/Documentation/ABI/testing/sysfs-kernel-slab
> >> >> >+++ b/Documentation/ABI/testing/sysfs-kernel-slab
> >> >> >@@ -106,6 +106,15 @@ Description:
> >> >> >               are from ZONE_DMA.
> >> >> >               Available when CONFIG_ZONE_DMA is enabled.
> >> >> >
> >> >> >+What:         /sys/kernel/slab/cache/cache_dma32
> >> >> >+Date:         December 2018
> >> >> >+KernelVersion:        4.21
> >> >> >+Contact:      Nicolas Boichat <drinkcat@chromium.org>
> >> >> >+Description:
> >> >> >+              The cache_dma32 file is read-only and specifies whether objects
> >> >> >+              are from ZONE_DMA32.
> >> >> >+              Available when CONFIG_ZONE_DMA32 is enabled.
> >> >> >+
> >> >> > What:         /sys/kernel/slab/cache/cpu_slabs
> >> >> > Date:         May 2007
> >> >> > KernelVersion:        2.6.22
> >> >> >diff --git a/include/linux/slab.h b/include/linux/slab.h
> >> >> >index 11b45f7ae4057c..9449b19c5f107a 100644
> >> >> >--- a/include/linux/slab.h
> >> >> >+++ b/include/linux/slab.h
> >> >> >@@ -32,6 +32,8 @@
> >> >> > #define SLAB_HWCACHE_ALIGN    ((slab_flags_t __force)0x00002000U)
> >> >> > /* Use GFP_DMA memory */
> >> >> > #define SLAB_CACHE_DMA                ((slab_flags_t __force)0x00004000U)
> >> >> >+/* Use GFP_DMA32 memory */
> >> >> >+#define SLAB_CACHE_DMA32      ((slab_flags_t __force)0x00008000U)
> >> >> > /* DEBUG: Store the last owner for bug hunting */
> >> >> > #define SLAB_STORE_USER               ((slab_flags_t __force)0x00010000U)
> >> >> > /* Panic if kmem_cache_create() fails */
> >> >> >diff --git a/mm/internal.h b/mm/internal.h
> >> >> >index a2ee82a0cd44ae..fd244ad716eaf8 100644
> >> >> >--- a/mm/internal.h
> >> >> >+++ b/mm/internal.h
> >> >> >@@ -14,6 +14,7 @@
> >> >> > #include <linux/fs.h>
> >> >> > #include <linux/mm.h>
> >> >> > #include <linux/pagemap.h>
> >> >> >+#include <linux/slab.h>
> >> >> > #include <linux/tracepoint-defs.h>
> >> >> >
> >> >> > /*
> >> >> >@@ -34,9 +35,12 @@
> >> >> > #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
> >> >> >
> >> >> > /* Check for flags that must not be used with a slab allocator */
> >> >> >-static inline gfp_t check_slab_flags(gfp_t flags)
> >> >> >+static inline gfp_t check_slab_flags(gfp_t flags, slab_flags_t slab_flags)
> >> >> > {
> >> >> >-      gfp_t bug_mask = __GFP_DMA32 | __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> >> >> >+      gfp_t bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> >> >> >+
> >> >> >+      if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
> >> >> >+              bug_mask |= __GFP_DMA32;
> >> >>
> >> >> The original version doesn't check CONFIG_ZONE_DMA32.
> >> >>
> >> >> Do we need to add this condition here?
> >> >> Could we just decide the bug_mask based on slab_flags?
> >> >
> >> >We can. The reason I did it this way is that when we don't have
> >> >CONFIG_ZONE_DMA32, the compiler should be able to simplify to:
> >> >
> >> >bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
> >> >if (true || ..) => if (true)
> >> >   bug_mask |= __GFP_DMA32;
> >> >
> >> >Then just
> >> >bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK | __GFP_DMA32;
> >> >
> >> >And since the function is inline, slab_flags would not even need to be
> >> >accessed at all.
> >> >
> >>
> >> Hmm, I get one confusion.
> >>
> >> This means if CONFIG_ZONE_DMA32 is not enabled, bug_mask will always
> >> contains __GFP_DMA32. This will check with cachep->flags.
> >>
> >> If cachep->flags has GFP_DMA32, this always fail?
> >>
> >> Is this possible?
> >
> >Not fully sure to understand the question, but the code is:
> >if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
> >       bug_mask |= __GFP_DMA32;
> >
> >IS_ENABLED(CONFIG_ZONE_DMA32) == true:
> > - (slab_flags & SLAB_CACHE_DMA32) => bug_mask untouched, __GFP_DMA32
> >is allowed.
> > - !(slab_flags & SLAB_CACHE_DMA32) => bug_mask |= __GFP_DMA32;,
> >__GFP_DMA32 triggers warning
> >IS_ENABLED(CONFIG_ZONE_DMA32) == false:
> >  => bug_mask |= __GFP_DMA32;, __GFP_DMA32 triggers warning (as
> >expected, GFP_DMA32 does not make sense if there is no DMA32 zone).
>
> This is the case I am thinking.
>
> The warning is reasonable since there is no DMA32. While the
> kmem_cache_create() user is not easy to change their code.
>
> For example, one writes code and wants to have a kmem_cache with DMA32
> capability, so he writes kmem_cache_create(__GFP_DMA32). The code is
> there and not easy to change. But one distro builder decides to disable
> DMA32. This will leads to all the kmem_cache_create() through warning?

I don't think CONFIG_ZONE_DMA32 can be enabled/disabled by
distro/user? IIUC this is a property of the architecture, some have it
enabled, some don't.

> This behavior is what we expect?
>
> >
> >Does that clarify?
> >
> >>
> >> --
> >> Wei Yang
> >> Help you, Help me
>
> --
> Wei Yang
> Help you, Help me
Wei Yang - Dec. 6, 2018, 6:29 a.m.
On Thu, Dec 06, 2018 at 11:55:02AM +0800, Nicolas Boichat wrote:
>On Thu, Dec 6, 2018 at 11:32 AM Wei Yang <richard.weiyang@gmail.com> wrote:
>>
>> On Thu, Dec 06, 2018 at 08:41:36AM +0800, Nicolas Boichat wrote:
>> >On Wed, Dec 5, 2018 at 8:18 PM Wei Yang <richard.weiyang@gmail.com> wrote:
>> >>
>> >> On Wed, Dec 05, 2018 at 03:39:51PM +0800, Nicolas Boichat wrote:
>> >> >On Wed, Dec 5, 2018 at 3:25 PM Wei Yang <richard.weiyang@gmail.com> wrote:
>> >> >>
>> >> >> On Wed, Dec 05, 2018 at 01:48:27PM +0800, Nicolas Boichat wrote:
>> >> >> >In some cases (e.g. IOMMU ARMv7s page allocator), we need to allocate
>> >> >> >data structures smaller than a page with GFP_DMA32 flag.
>> >> >> >
>> >> >> >This change makes it possible to create a custom cache in DMA32 zone
>> >> >> >using kmem_cache_create, then allocate memory using kmem_cache_alloc.
>> >> >> >
>> >> >> >We do not create a DMA32 kmalloc cache array, as there are currently
>> >> >> >no users of kmalloc(..., GFP_DMA32). The new test in check_slab_flags
>> >> >> >ensures that such calls still fail (as they do before this change).
>> >> >> >
>> >> >> >Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32")
>> >> >> >Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
>> >> >> >---
>> >> >> >
>> >> >> >Changes since v2:
>> >> >> > - Clarified commit message
>> >> >> > - Add entry in sysfs-kernel-slab to document the new sysfs file
>> >> >> >
>> >> >> >(v3 used the page_frag approach)
>> >> >> >
>> >> >> >Documentation/ABI/testing/sysfs-kernel-slab |  9 +++++++++
>> >> >> > include/linux/slab.h                        |  2 ++
>> >> >> > mm/internal.h                               |  8 ++++++--
>> >> >> > mm/slab.c                                   |  4 +++-
>> >> >> > mm/slab.h                                   |  3 ++-
>> >> >> > mm/slab_common.c                            |  2 +-
>> >> >> > mm/slub.c                                   | 18 +++++++++++++++++-
>> >> >> > 7 files changed, 40 insertions(+), 6 deletions(-)
>> >> >> >
>> >> >> >diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
>> >> >> >index 29601d93a1c2ea..d742c6cfdffbe9 100644
>> >> >> >--- a/Documentation/ABI/testing/sysfs-kernel-slab
>> >> >> >+++ b/Documentation/ABI/testing/sysfs-kernel-slab
>> >> >> >@@ -106,6 +106,15 @@ Description:
>> >> >> >               are from ZONE_DMA.
>> >> >> >               Available when CONFIG_ZONE_DMA is enabled.
>> >> >> >
>> >> >> >+What:         /sys/kernel/slab/cache/cache_dma32
>> >> >> >+Date:         December 2018
>> >> >> >+KernelVersion:        4.21
>> >> >> >+Contact:      Nicolas Boichat <drinkcat@chromium.org>
>> >> >> >+Description:
>> >> >> >+              The cache_dma32 file is read-only and specifies whether objects
>> >> >> >+              are from ZONE_DMA32.
>> >> >> >+              Available when CONFIG_ZONE_DMA32 is enabled.
>> >> >> >+
>> >> >> > What:         /sys/kernel/slab/cache/cpu_slabs
>> >> >> > Date:         May 2007
>> >> >> > KernelVersion:        2.6.22
>> >> >> >diff --git a/include/linux/slab.h b/include/linux/slab.h
>> >> >> >index 11b45f7ae4057c..9449b19c5f107a 100644
>> >> >> >--- a/include/linux/slab.h
>> >> >> >+++ b/include/linux/slab.h
>> >> >> >@@ -32,6 +32,8 @@
>> >> >> > #define SLAB_HWCACHE_ALIGN    ((slab_flags_t __force)0x00002000U)
>> >> >> > /* Use GFP_DMA memory */
>> >> >> > #define SLAB_CACHE_DMA                ((slab_flags_t __force)0x00004000U)
>> >> >> >+/* Use GFP_DMA32 memory */
>> >> >> >+#define SLAB_CACHE_DMA32      ((slab_flags_t __force)0x00008000U)
>> >> >> > /* DEBUG: Store the last owner for bug hunting */
>> >> >> > #define SLAB_STORE_USER               ((slab_flags_t __force)0x00010000U)
>> >> >> > /* Panic if kmem_cache_create() fails */
>> >> >> >diff --git a/mm/internal.h b/mm/internal.h
>> >> >> >index a2ee82a0cd44ae..fd244ad716eaf8 100644
>> >> >> >--- a/mm/internal.h
>> >> >> >+++ b/mm/internal.h
>> >> >> >@@ -14,6 +14,7 @@
>> >> >> > #include <linux/fs.h>
>> >> >> > #include <linux/mm.h>
>> >> >> > #include <linux/pagemap.h>
>> >> >> >+#include <linux/slab.h>
>> >> >> > #include <linux/tracepoint-defs.h>
>> >> >> >
>> >> >> > /*
>> >> >> >@@ -34,9 +35,12 @@
>> >> >> > #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
>> >> >> >
>> >> >> > /* Check for flags that must not be used with a slab allocator */
>> >> >> >-static inline gfp_t check_slab_flags(gfp_t flags)
>> >> >> >+static inline gfp_t check_slab_flags(gfp_t flags, slab_flags_t slab_flags)
>> >> >> > {
>> >> >> >-      gfp_t bug_mask = __GFP_DMA32 | __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>> >> >> >+      gfp_t bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>> >> >> >+
>> >> >> >+      if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
>> >> >> >+              bug_mask |= __GFP_DMA32;
>> >> >>
>> >> >> The original version doesn't check CONFIG_ZONE_DMA32.
>> >> >>
>> >> >> Do we need to add this condition here?
>> >> >> Could we just decide the bug_mask based on slab_flags?
>> >> >
>> >> >We can. The reason I did it this way is that when we don't have
>> >> >CONFIG_ZONE_DMA32, the compiler should be able to simplify to:
>> >> >
>> >> >bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
>> >> >if (true || ..) => if (true)
>> >> >   bug_mask |= __GFP_DMA32;
>> >> >
>> >> >Then just
>> >> >bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK | __GFP_DMA32;
>> >> >
>> >> >And since the function is inline, slab_flags would not even need to be
>> >> >accessed at all.
>> >> >
>> >>
>> >> Hmm, I get one confusion.
>> >>
>> >> This means if CONFIG_ZONE_DMA32 is not enabled, bug_mask will always
>> >> contains __GFP_DMA32. This will check with cachep->flags.
>> >>
>> >> If cachep->flags has GFP_DMA32, this always fail?
>> >>
>> >> Is this possible?
>> >
>> >Not fully sure to understand the question, but the code is:
>> >if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
>> >       bug_mask |= __GFP_DMA32;
>> >
>> >IS_ENABLED(CONFIG_ZONE_DMA32) == true:
>> > - (slab_flags & SLAB_CACHE_DMA32) => bug_mask untouched, __GFP_DMA32
>> >is allowed.
>> > - !(slab_flags & SLAB_CACHE_DMA32) => bug_mask |= __GFP_DMA32;,
>> >__GFP_DMA32 triggers warning
>> >IS_ENABLED(CONFIG_ZONE_DMA32) == false:
>> >  => bug_mask |= __GFP_DMA32;, __GFP_DMA32 triggers warning (as
>> >expected, GFP_DMA32 does not make sense if there is no DMA32 zone).
>>
>> This is the case I am thinking.
>>
>> The warning is reasonable since there is no DMA32. While the
>> kmem_cache_create() user is not easy to change their code.
>>
>> For example, one writes code and wants to have a kmem_cache with DMA32
>> capability, so he writes kmem_cache_create(__GFP_DMA32). The code is
>> there and not easy to change. But one distro builder decides to disable
>> DMA32. This will leads to all the kmem_cache_create() through warning?
>
>I don't think CONFIG_ZONE_DMA32 can be enabled/disabled by
>distro/user? IIUC this is a property of the architecture, some have it
>enabled, some don't.

Ok, thanks.

>
>> This behavior is what we expect?
>>
>> >
>> >Does that clarify?
>> >
>> >>
>> >> --
>> >> Wei Yang
>> >> Help you, Help me
>>
>> --
>> Wei Yang
>> Help you, Help me
Vlastimil Babka - Dec. 6, 2018, 9:34 a.m.
On 12/6/18 4:49 AM, Nicolas Boichat wrote:
>> So it would be fine even unchanged. The check would anyway need some
>> more love to catch the same with __GFP_DMA to be consistent and cover
>> all corner cases.
> Yes, the test is not complete. If we really wanted this to be
> accurate, we'd need to check that GFP_* exactly matches SLAB_CACHE_*.
> 
> The only problem with dropping this is test that we should restore
> GFP_DMA32 warning/errors somewhere else (as Christopher pointed out
> here: https://lkml.org/lkml/2018/11/22/430), especially for kmalloc
> case.

I meant just dropping that patch hunk, not the whole test. Then the test
stays as it is and will keep warning anyone calling kmalloc(GFP_DMA32).
It would also warn anyone calling kmem_cache_alloc(GFP_DMA32) on
SLAB_CACHE_DMA32 cache, but since the gfp can be just dropped, and you
as the only user of this so far will do that, it's fine?

> Maybe this can be done in kmalloc_slab.
Nicolas Boichat - Dec. 6, 2018, 10:09 a.m.
On Thu, Dec 6, 2018 at 5:37 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 12/6/18 4:49 AM, Nicolas Boichat wrote:
> >> So it would be fine even unchanged. The check would anyway need some
> >> more love to catch the same with __GFP_DMA to be consistent and cover
> >> all corner cases.
> > Yes, the test is not complete. If we really wanted this to be
> > accurate, we'd need to check that GFP_* exactly matches SLAB_CACHE_*.
> >
> > The only problem with dropping this is test that we should restore
> > GFP_DMA32 warning/errors somewhere else (as Christopher pointed out
> > here: https://lkml.org/lkml/2018/11/22/430), especially for kmalloc
> > case.
>
> I meant just dropping that patch hunk, not the whole test. Then the test
> stays as it is and will keep warning anyone calling kmalloc(GFP_DMA32).
> It would also warn anyone calling kmem_cache_alloc(GFP_DMA32) on
> SLAB_CACHE_DMA32 cache, but since the gfp can be just dropped, and you
> as the only user of this so far will do that, it's fine?

I missed your point, this would work fine indeed.

Thanks.

> > Maybe this can be done in kmalloc_slab.

Patch

diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
index 29601d93a1c2ea..d742c6cfdffbe9 100644
--- a/Documentation/ABI/testing/sysfs-kernel-slab
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -106,6 +106,15 @@  Description:
 		are from ZONE_DMA.
 		Available when CONFIG_ZONE_DMA is enabled.
 
+What:		/sys/kernel/slab/cache/cache_dma32
+Date:		December 2018
+KernelVersion:	4.21
+Contact:	Nicolas Boichat <drinkcat@chromium.org>
+Description:
+		The cache_dma32 file is read-only and specifies whether objects
+		are from ZONE_DMA32.
+		Available when CONFIG_ZONE_DMA32 is enabled.
+
 What:		/sys/kernel/slab/cache/cpu_slabs
 Date:		May 2007
 KernelVersion:	2.6.22
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 11b45f7ae4057c..9449b19c5f107a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -32,6 +32,8 @@ 
 #define SLAB_HWCACHE_ALIGN	((slab_flags_t __force)0x00002000U)
 /* Use GFP_DMA memory */
 #define SLAB_CACHE_DMA		((slab_flags_t __force)0x00004000U)
+/* Use GFP_DMA32 memory */
+#define SLAB_CACHE_DMA32	((slab_flags_t __force)0x00008000U)
 /* DEBUG: Store the last owner for bug hunting */
 #define SLAB_STORE_USER		((slab_flags_t __force)0x00010000U)
 /* Panic if kmem_cache_create() fails */
diff --git a/mm/internal.h b/mm/internal.h
index a2ee82a0cd44ae..fd244ad716eaf8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -14,6 +14,7 @@ 
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
 #include <linux/tracepoint-defs.h>
 
 /*
@@ -34,9 +35,12 @@ 
 #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
 
 /* Check for flags that must not be used with a slab allocator */
-static inline gfp_t check_slab_flags(gfp_t flags)
+static inline gfp_t check_slab_flags(gfp_t flags, slab_flags_t slab_flags)
 {
-	gfp_t bug_mask = __GFP_DMA32 | __GFP_HIGHMEM | ~__GFP_BITS_MASK;
+	gfp_t bug_mask = __GFP_HIGHMEM | ~__GFP_BITS_MASK;
+
+	if (!IS_ENABLED(CONFIG_ZONE_DMA32) || !(slab_flags & SLAB_CACHE_DMA32))
+		bug_mask |= __GFP_DMA32;
 
 	if (unlikely(flags & bug_mask)) {
 		gfp_t invalid_mask = flags & bug_mask;
diff --git a/mm/slab.c b/mm/slab.c
index 65a774f05e7836..2fd3b9a996cbe6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2109,6 +2109,8 @@  int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
 	cachep->allocflags = __GFP_COMP;
 	if (flags & SLAB_CACHE_DMA)
 		cachep->allocflags |= GFP_DMA;
+	if (flags & SLAB_CACHE_DMA32)
+		cachep->allocflags |= GFP_DMA32;
 	if (flags & SLAB_RECLAIM_ACCOUNT)
 		cachep->allocflags |= __GFP_RECLAIMABLE;
 	cachep->size = size;
@@ -2643,7 +2645,7 @@  static struct page *cache_grow_begin(struct kmem_cache *cachep,
 	 * Be lazy and only check for valid flags here,  keeping it out of the
 	 * critical path in kmem_cache_alloc().
 	 */
-	flags = check_slab_flags(flags);
+	flags = check_slab_flags(flags, cachep->flags);
 	WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
 	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
 
diff --git a/mm/slab.h b/mm/slab.h
index 4190c24ef0e9df..fcf717e12f0a86 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -127,7 +127,8 @@  static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
 
 
 /* Legal flag mask for kmem_cache_create(), for various configurations */
-#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
+#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
+			 SLAB_CACHE_DMA32 | SLAB_PANIC | \
 			 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
 
 #if defined(CONFIG_DEBUG_SLAB)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 70b0cc85db67f8..18b7b809c8d064 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -53,7 +53,7 @@  static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
 		SLAB_FAILSLAB | SLAB_KASAN)
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
-			 SLAB_ACCOUNT)
+			 SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
 
 /*
  * Merge control. If this is set then no merging of slab caches will occur.
diff --git a/mm/slub.c b/mm/slub.c
index 21a3f6866da472..6d47765a82d150 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1685,7 +1685,7 @@  static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 {
-	flags = check_slab_flags(flags);
+	flags = check_slab_flags(flags, s->flags);
 
 	return allocate_slab(s,
 		flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
@@ -3577,6 +3577,9 @@  static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	if (s->flags & SLAB_CACHE_DMA)
 		s->allocflags |= GFP_DMA;
 
+	if (s->flags & SLAB_CACHE_DMA32)
+		s->allocflags |= GFP_DMA32;
+
 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
 		s->allocflags |= __GFP_RECLAIMABLE;
 
@@ -5095,6 +5098,14 @@  static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
 SLAB_ATTR_RO(cache_dma);
 #endif
 
+#ifdef CONFIG_ZONE_DMA32
+static ssize_t cache_dma32_show(struct kmem_cache *s, char *buf)
+{
+	return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA32));
+}
+SLAB_ATTR_RO(cache_dma32);
+#endif
+
 static ssize_t usersize_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%u\n", s->usersize);
@@ -5435,6 +5446,9 @@  static struct attribute *slab_attrs[] = {
 #ifdef CONFIG_ZONE_DMA
 	&cache_dma_attr.attr,
 #endif
+#ifdef CONFIG_ZONE_DMA32
+	&cache_dma32_attr.attr,
+#endif
 #ifdef CONFIG_NUMA
 	&remote_node_defrag_ratio_attr.attr,
 #endif
@@ -5665,6 +5679,8 @@  static char *create_unique_id(struct kmem_cache *s)
 	 */
 	if (s->flags & SLAB_CACHE_DMA)
 		*p++ = 'd';
+	if (s->flags & SLAB_CACHE_DMA32)
+		*p++ = 'D';
 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
 		*p++ = 'a';
 	if (s->flags & SLAB_CONSISTENCY_CHECKS)