From 283fd6fe0528ae2fe0222a35ecfdfcbbaeee8159 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 24 Mar 2022 18:09:58 -0700 Subject: mm/migration: add trace events for THP migrations Patch series "mm/migration: Add trace events", v3. This adds trace events for all migration scenarios including base page, THP and HugeTLB. This patch (of 3): This adds two trace events for PMD based THP migration without split. These events closely follow the implementation details like setting and removing of PMD migration entries, which are essential operations for THP migration. This moves CREATE_TRACE_POINTS into generic THP from powerpc for these new trace events to be available on other platforms as well. Link: https://lkml.kernel.org/r/1643368182-9588-1-git-send-email-anshuman.khandual@arm.com Link: https://lkml.kernel.org/r/1643368182-9588-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Cc: Steven Rostedt Cc: Ingo Molnar Cc: Zi Yan Cc: Naoya Horiguchi Cc: John Hubbard Cc: Matthew Wilcox Cc: Michael Ellerman Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/book3s64/trace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/trace.c b/arch/powerpc/mm/book3s64/trace.c index b86e7b906257..ccd64b5e6cac 100644 --- a/arch/powerpc/mm/book3s64/trace.c +++ b/arch/powerpc/mm/book3s64/trace.c @@ -3,6 +3,5 @@ * This file is for defining trace points and trace related helpers. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define CREATE_TRACE_POINTS #include #endif -- cgit v1.2.3 From 4cc79b3303f224a920f3aff21f3d231749d73384 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 24 Mar 2022 18:10:01 -0700 Subject: mm/migration: add trace events for base page and HugeTLB migrations This adds two trace events for base page and HugeTLB page migrations. These events, closely follow the implementation details like setting and removing of PTE migration entries, which are essential operations for migration. The new CREATE_TRACE_POINTS in covers both and based trace events. Hence drop redundant CREATE_TRACE_POINTS from other places which could have otherwise conflicted during build. Link: https://lkml.kernel.org/r/1643368182-9588-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reported-by: kernel test robot Cc: Steven Rostedt Cc: Ingo Molnar Cc: Zi Yan Cc: Naoya Horiguchi Cc: John Hubbard Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init.c | 1 - include/trace/events/migrate.h | 31 +++++++++++++++++++++++++++++++ mm/migrate.c | 4 +++- mm/rmap.c | 6 ++++++ 4 files changed, 40 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 4ba024d5b63a..d8cfce221275 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -31,7 +31,6 @@ * We need to define the tracepoints somewhere, and tlb.c * is only compiled when SMP=y. */ -#define CREATE_TRACE_POINTS #include #include "mm_internal.h" diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index 779f3fad9ecd..061b5128f335 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -105,6 +105,37 @@ TRACE_EVENT(mm_migrate_pages_start, __print_symbolic(__entry->reason, MIGRATE_REASON)) ); +DECLARE_EVENT_CLASS(migration_pte, + + TP_PROTO(unsigned long addr, unsigned long pte, int order), + + TP_ARGS(addr, pte, order), + + TP_STRUCT__entry( + __field(unsigned long, addr) + __field(unsigned long, pte) + __field(int, order) + ), + + TP_fast_assign( + __entry->addr = addr; + __entry->pte = pte; + __entry->order = order; + ), + + TP_printk("addr=%lx, pte=%lx order=%d", __entry->addr, __entry->pte, __entry->order) +); + +DEFINE_EVENT(migration_pte, set_migration_pte, + TP_PROTO(unsigned long addr, unsigned long pte, int order), + TP_ARGS(addr, pte, order) +); + +DEFINE_EVENT(migration_pte, remove_migration_pte, + TP_PROTO(unsigned long addr, unsigned long pte, int order), + TP_ARGS(addr, pte, order) +); + #endif /* _TRACE_MIGRATE_H */ /* This part must be outside protection */ diff --git a/mm/migrate.c b/mm/migrate.c index 4f30ed37856f..3d60823afd2d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -53,7 +53,6 @@ #include -#define CREATE_TRACE_POINTS #include #include "internal.h" @@ -249,6 +248,9 @@ static bool remove_migration_pte(struct folio *folio, if (vma->vm_flags & VM_LOCKED) mlock_page_drain(smp_processor_id()); + trace_remove_migration_pte(pvmw.address, pte_val(pte), + compound_order(new)); + /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, pvmw.address, pvmw.pte); } diff --git a/mm/rmap.c b/mm/rmap.c index ee1f10df984d..bfcc8e3d412f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -76,7 +76,9 @@ #include +#define CREATE_TRACE_POINTS #include +#include #include "internal.h" @@ -1849,6 +1851,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (pte_swp_uffd_wp(pteval)) swp_pte = pte_swp_mkuffd_wp(swp_pte); set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); + trace_set_migration_pte(pvmw.address, pte_val(swp_pte), + compound_order(&folio->page)); /* * No need to invalidate here it will synchronize on * against the special swap migration pte. @@ -1917,6 +1921,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (pte_uffd_wp(pteval)) swp_pte = pte_swp_mkuffd_wp(swp_pte); set_pte_at(mm, address, pvmw.pte, swp_pte); + trace_set_migration_pte(address, pte_val(swp_pte), + compound_order(&folio->page)); /* * No need to invalidate here it will synchronize on * against the special swap migration pte. -- cgit v1.2.3 From 63840de296472f3914bb933b11ba2b764590755e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:52 -0700 Subject: kasan, x86, arm64, s390: rename functions for modules shadow Rename kasan_free_shadow to kasan_free_module_shadow and kasan_module_alloc to kasan_alloc_module_shadow. These functions are used to allocate/free shadow memory for kernel modules when KASAN_VMALLOC is not enabled. The new names better reflect their purpose. Also reword the comment next to their declaration to improve clarity. Link: https://lkml.kernel.org/r/36db32bde765d5d0b856f77d2d806e838513fe84.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Catalin Marinas Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/module.c | 2 +- arch/s390/kernel/module.c | 2 +- arch/x86/kernel/module.c | 2 +- include/linux/kasan.h | 14 +++++++------- mm/kasan/shadow.c | 4 ++-- mm/vmalloc.c | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 309a27553c87..d3a1fa818348 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -58,7 +58,7 @@ void *module_alloc(unsigned long size) PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index b032e556eeb7..a7aefc278909 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -45,7 +45,7 @@ void *module_alloc(unsigned long size) p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 96d7c27b7093..504ea65987e8 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -78,7 +78,7 @@ void *module_alloc(unsigned long size) MODULES_END, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) { + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4ed94616c8f0..b450c7653137 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -433,17 +433,17 @@ static inline void kasan_populate_early_vm_area_shadow(void *start, !defined(CONFIG_KASAN_VMALLOC) /* - * These functions provide a special case to support backing module - * allocations with real shadow memory. With KASAN vmalloc, the special - * case is unnecessary, as the work is handled in the generic case. + * These functions allocate and free shadow memory for kernel modules. + * They are only required when KASAN_VMALLOC is not supported, as otherwise + * shadow memory is allocated by the generic vmalloc handlers. */ -int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask); -void kasan_free_shadow(const struct vm_struct *vm); +int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask); +void kasan_free_module_shadow(const struct vm_struct *vm); #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; } -static inline void kasan_free_shadow(const struct vm_struct *vm) {} +static inline int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask) { return 0; } +static inline void kasan_free_module_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 94136f84b449..e5c4393eb861 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -498,7 +498,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, #else /* CONFIG_KASAN_VMALLOC */ -int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) +int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask) { void *ret; size_t scaled_size; @@ -534,7 +534,7 @@ int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) return -ENOMEM; } -void kasan_free_shadow(const struct vm_struct *vm) +void kasan_free_module_shadow(const struct vm_struct *vm) { if (vm->flags & VM_KASAN) vfree(kasan_mem_to_shadow(vm->addr)); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 99e0f3e8d1a5..feecd614bb77 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2547,7 +2547,7 @@ struct vm_struct *remove_vm_area(const void *addr) va->vm = NULL; spin_unlock(&vmap_area_lock); - kasan_free_shadow(vm); + kasan_free_module_shadow(vm); free_unmap_vmap_area(va); return vm; -- cgit v1.2.3 From 51fb34de2a4c8fa0f221246313700bfe3b6c586d Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:10 -0700 Subject: kasan, arm64: reset pointer tags of vmapped stacks Once tag-based KASAN modes start tagging vmalloc() allocations, kernel stacks start getting tagged if CONFIG_VMAP_STACK is enabled. Reset the tag of kernel stack pointers after allocation in arch_alloc_vmap_stack(). For SW_TAGS KASAN, when CONFIG_KASAN_STACK is enabled, the instrumentation can't handle the SP register being tagged. For HW_TAGS KASAN, there's no instrumentation-related issues. However, the impact of having a tagged SP register needs to be properly evaluated, so keep it non-tagged for now. Note, that the memory for the stack allocation still gets tagged to catch vmalloc-into-stack out-of-bounds accesses. [andreyknvl@google.com: fix case when a stack is retrieved from cached_stacks] Link: https://lkml.kernel.org/r/f50c5f96ef896d7936192c888b0c0a7674e33184.1644943792.git.andreyknvl@google.com [dan.carpenter@oracle.com: remove unnecessary check in alloc_thread_stack_node()] Link: https://lkml.kernel.org/r/20220301080706.GB17208@kili Link: https://lkml.kernel.org/r/698c5ab21743c796d46c15d075b9481825973e34.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Dan Carpenter Acked-by: Catalin Marinas Acked-by: Marco Elver Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/vmap_stack.h | 5 ++++- kernel/fork.c | 11 ++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h index 894e031b28d2..20873099c035 100644 --- a/arch/arm64/include/asm/vmap_stack.h +++ b/arch/arm64/include/asm/vmap_stack.h @@ -17,10 +17,13 @@ */ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node) { + void *p; + BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); - return __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node, + p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node, __builtin_return_address(0)); + return kasan_reset_tag(p); } #endif /* __ASM_VMAP_STACK_H */ diff --git a/kernel/fork.c b/kernel/fork.c index ce0ffe48937f..9796897560ab 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -286,11 +286,13 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) if (!s) continue; - /* Mark stack accessible for KASAN. */ + /* Reset stack metadata. */ kasan_unpoison_range(s->addr, THREAD_SIZE); + stack = kasan_reset_tag(s->addr); + /* Clear stale pointers from reused stack. */ - memset(s->addr, 0, THREAD_SIZE); + memset(stack, 0, THREAD_SIZE); if (memcg_charge_kernel_stack(s)) { vfree(s->addr); @@ -298,7 +300,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) } tsk->stack_vm_area = s; - tsk->stack = s->addr; + tsk->stack = stack; return 0; } @@ -326,8 +328,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) * so cache the vm_struct. */ tsk->stack_vm_area = vm; - if (stack) - stack = kasan_reset_tag(stack); + stack = kasan_reset_tag(stack); tsk->stack = stack; return 0; } -- cgit v1.2.3 From 01d92c7f358ce892279ca830cf6ccf2862a17d1c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:16 -0700 Subject: kasan, vmalloc, arm64: mark vmalloc mappings as pgprot_tagged HW_TAGS KASAN relies on ARM Memory Tagging Extension (MTE). With MTE, a memory region must be mapped as MT_NORMAL_TAGGED to allow setting memory tags via MTE-specific instructions. Add proper protection bits to vmalloc() allocations. These allocations are always backed by page_alloc pages, so the tags will actually be getting set on the corresponding physical memory. Link: https://lkml.kernel.org/r/983fc33542db2f6b1e77b34ca23448d4640bbb9e.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/vmalloc.h | 6 ++++++ include/linux/vmalloc.h | 7 +++++++ mm/vmalloc.c | 9 +++++++++ 3 files changed, 22 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index b9185503feae..38fafffe699f 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -25,4 +25,10 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot) #endif +#define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged +static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) +{ + return pgprot_tagged(prot); +} + #endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 2ca95c7db463..3b1df7da402d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -115,6 +115,13 @@ static inline int arch_vmap_pte_supported_shift(unsigned long size) } #endif +#ifndef arch_vmap_pgprot_tagged +static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) +{ + return prot; +} +#endif + /* * Highlevel APIs for driver use */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 1ab1f1b2f5b7..8530d86c3e58 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3128,6 +3128,15 @@ again: goto fail; } + /* + * Modify protection bits to allow tagging. + * This must be done before mapping by __vmalloc_area_node(). + */ + if (kasan_hw_tags_enabled() && + pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) + prot = arch_vmap_pgprot_tagged(prot); + + /* Allocate physical pages and map them into vmalloc space. */ addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node); if (!addr) goto fail; -- cgit v1.2.3 From 36c4a73bf8d24721222d9ee4080ac955973fd388 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:38 -0700 Subject: kasan, arm64: don't tag executable vmalloc allocations Besides asking vmalloc memory to be executable via the prot argument of __vmalloc_node_range() (see the previous patch), the kernel can skip that bit and instead mark memory as executable via set_memory_x(). Once tag-based KASAN modes start tagging vmalloc allocations, executing code from such allocations will lead to the PC register getting a tag, which is not tolerated by the kernel. Generic kernel code typically allocates memory via module_alloc() if it intends to mark memory as executable. (On arm64 module_alloc() uses __vmalloc_node_range() without setting the executable bit). Thus, reset pointer tags of pointers returned from module_alloc(). However, on arm64 there's an exception: the eBPF subsystem. Instead of using module_alloc(), it uses vmalloc() (via bpf_jit_alloc_exec()) to allocate its JIT region. Thus, reset pointer tags of pointers returned from bpf_jit_alloc_exec(). Resetting tags for these pointers results in untagged pointers being passed to set_memory_x(). This causes conflicts in arithmetic checks in change_memory_common(), as vm_struct->addr pointer returned by find_vm_area() is tagged. Reset pointer tag of find_vm_area(addr)->addr in change_memory_common(). Link: https://lkml.kernel.org/r/b7b2595423340cd7d76b770e5d519acf3b72f0ab.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Catalin Marinas Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/module.c | 3 ++- arch/arm64/mm/pageattr.c | 2 +- arch/arm64/net/bpf_jit_comp.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index d3a1fa818348..f2d4bb14bfab 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -63,7 +63,8 @@ void *module_alloc(unsigned long size) return NULL; } - return p; + /* Memory is intended to be executable, reset the pointer tag. */ + return kasan_reset_tag(p); } enum aarch64_reloc_op { diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index a3bacd79507a..64e985eaa52d 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -85,7 +85,7 @@ static int change_memory_common(unsigned long addr, int numpages, */ area = find_vm_area((void *)addr); if (!area || - end > (unsigned long)area->addr + area->size || + end > (unsigned long)kasan_reset_tag(area->addr) + area->size || !(area->flags & VM_ALLOC)) return -EINVAL; diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index e850c69e128c..fcc675aa1670 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1304,7 +1304,8 @@ u64 bpf_jit_alloc_exec_limit(void) void *bpf_jit_alloc_exec(unsigned long size) { - return vmalloc(size); + /* Memory is intended to be executable, reset the pointer tag. */ + return kasan_reset_tag(vmalloc(size)); } void bpf_jit_free_exec(void *addr) -- cgit v1.2.3 From f6f37d9320a11e9059f11a99fc59dfb8e307c07f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:53 -0700 Subject: arm64: select KASAN_VMALLOC for SW/HW_TAGS modes Generic KASAN already selects KASAN_VMALLOC to allow VMAP_STACK to be selected unconditionally, see commit acc3042d62cb9 ("arm64: Kconfig: select KASAN_VMALLOC if KANSAN_GENERIC is enabled"). The same change is needed for SW_TAGS KASAN. HW_TAGS KASAN does not require enabling KASAN_VMALLOC for VMAP_STACK, they already work together as is. Still, selecting KASAN_VMALLOC still makes sense to make vmalloc() always protected. In case any bugs in KASAN's vmalloc() support are discovered, the command line kasan.vmalloc flag can be used to disable vmalloc() checking. Select KASAN_VMALLOC for all KASAN modes for arm64. Link: https://lkml.kernel.org/r/99d6b3ebf57fc1930ff71f9a4a71eea19881b270.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Catalin Marinas Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f8680892401b..23048be0333b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -208,7 +208,7 @@ config ARM64 select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING - select KASAN_VMALLOC if KASAN_GENERIC + select KASAN_VMALLOC if KASAN select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH -- cgit v1.2.3 From 24e988c7fd1ee701ea78fd138ed309e5f5f207b4 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 24 Mar 2022 18:14:05 -0700 Subject: mm: generalize ARCH_HAS_FILTER_PGPROT ARCH_HAS_FILTER_PGPROT config has duplicate definitions on platforms that subscribe it. Instead make it a generic config option which can be selected on applicable platforms when required. Link: https://lkml.kernel.org/r/1643004823-16441-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Cc: Will Deacon Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 3 --- mm/Kconfig | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b037fe7be374..10e4c332e15d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -337,9 +337,6 @@ config GENERIC_CALIBRATE_DELAY config ARCH_HAS_CPU_RELAX def_bool y -config ARCH_HAS_FILTER_PGPROT - def_bool y - config ARCH_HIBERNATION_POSSIBLE def_bool y diff --git a/mm/Kconfig b/mm/Kconfig index 761f5021ba51..034d87953600 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -762,6 +762,9 @@ config ARCH_HAS_CURRENT_STACK_POINTER register alias named "current_stack_pointer", this config can be selected. +config ARCH_HAS_FILTER_PGPROT + bool + config ARCH_HAS_PTE_DEVMAP bool -- cgit v1.2.3 From 9457056ac426e5ed0671356509c8dcce69f8dee0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 24 Mar 2022 18:14:12 -0700 Subject: mm: madvise: MADV_DONTNEED_LOCKED MADV_DONTNEED historically rejects mlocked ranges, but with MLOCK_ONFAULT and MCL_ONFAULT allowing to mlock without populating, there are valid use cases for depopulating locked ranges as well. Users mlock memory to protect secrets. There are allocators for secure buffers that want in-use memory generally mlocked, but cleared and invalidated memory to give up the physical pages. This could be done with explicit munlock -> mlock calls on free -> alloc of course, but that adds two unnecessary syscalls, heavy mmap_sem write locks, vma splits and re-merges - only to get rid of the backing pages. Users also mlockall(MCL_ONFAULT) to suppress sustained paging, but are okay with on-demand initial population. It seems valid to selectively free some memory during the lifetime of such a process, without having to mess with its overall policy. Why add a separate flag? Isn't this a pretty niche usecase? - MADV_DONTNEED has been bailing on locked vmas forever. It's at least conceivable that someone, somewhere is relying on mlock to protect data from perhaps broader invalidation calls. Changing this behavior now could lead to quiet data corruption. - It also clarifies expectations around MADV_FREE and maybe MADV_REMOVE. It avoids the situation where one quietly behaves different than the others. MADV_FREE_LOCKED can be added later. - The combination of mlock() and madvise() in the first place is probably niche. But where it happens, I'd say that dropping pages from a locked region once they don't contain secrets or won't page anymore is much saner than relying on mlock to protect memory from speculative or errant invalidation calls. It's just that we can't change the default behavior because of the two previous points. Given that, an explicit new flag seems to make the most sense. [hannes@cmpxchg.org: fix mips build] Link: https://lkml.kernel.org/r/20220304171912.305060-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Mike Kravetz Reviewed-by: Shakeel Butt Acked-by: Vlastimil Babka Cc: Nadav Amit Cc: David Hildenbrand Cc: Dr. David Alan Gilbert Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/uapi/asm/mman.h | 2 ++ arch/mips/include/uapi/asm/mman.h | 2 ++ arch/parisc/include/uapi/asm/mman.h | 2 ++ arch/xtensa/include/uapi/asm/mman.h | 2 ++ include/uapi/asm-generic/mman-common.h | 2 ++ mm/madvise.c | 24 ++++++++++++++---------- 6 files changed, 24 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 56b4ee5a6c9e..4aa996423b0d 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -74,6 +74,8 @@ #define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ #define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ +#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 40b210c65a5a..1be428663c10 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -101,6 +101,8 @@ #define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ #define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ +#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 9e3c010c0f61..a7ea3204a5fa 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -55,6 +55,8 @@ #define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ #define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ +#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + #define MADV_MERGEABLE 65 /* KSM may merge identical pages */ #define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */ diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index b3a22095371b..7966a58af472 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -109,6 +109,8 @@ #define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ #define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ +#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 1567a3294c3d..6c1aa92a92e4 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -75,6 +75,8 @@ #define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ #define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ +#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/mm/madvise.c b/mm/madvise.c index 2e6213664c52..b41858ee937b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -52,6 +52,7 @@ static int madvise_need_mmap_write(int behavior) case MADV_REMOVE: case MADV_WILLNEED: case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: case MADV_COLD: case MADV_PAGEOUT: case MADV_FREE: @@ -502,14 +503,9 @@ static void madvise_cold_page_range(struct mmu_gather *tlb, tlb_end_vma(tlb, vma); } -static inline bool can_madv_lru_non_huge_vma(struct vm_area_struct *vma) -{ - return !(vma->vm_flags & (VM_LOCKED|VM_PFNMAP)); -} - static inline bool can_madv_lru_vma(struct vm_area_struct *vma) { - return can_madv_lru_non_huge_vma(vma) && !is_vm_hugetlb_page(vma); + return !(vma->vm_flags & (VM_LOCKED|VM_PFNMAP|VM_HUGETLB)); } static long madvise_cold(struct vm_area_struct *vma, @@ -787,10 +783,16 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma, unsigned long *end, int behavior) { - if (!is_vm_hugetlb_page(vma)) - return can_madv_lru_non_huge_vma(vma); + if (!is_vm_hugetlb_page(vma)) { + unsigned int forbidden = VM_PFNMAP; + + if (behavior != MADV_DONTNEED_LOCKED) + forbidden |= VM_LOCKED; + + return !(vma->vm_flags & forbidden); + } - if (behavior != MADV_DONTNEED) + if (behavior != MADV_DONTNEED && behavior != MADV_DONTNEED_LOCKED) return false; if (start & ~huge_page_mask(hstate_vma(vma))) return false; @@ -854,7 +856,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, VM_WARN_ON(start >= end); } - if (behavior == MADV_DONTNEED) + if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED) return madvise_dontneed_single_vma(vma, start, end); else if (behavior == MADV_FREE) return madvise_free_single_vma(vma, start, end); @@ -993,6 +995,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, return madvise_pageout(vma, prev, start, end); case MADV_FREE: case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: return madvise_dontneed_free(vma, prev, start, end, behavior); case MADV_POPULATE_READ: case MADV_POPULATE_WRITE: @@ -1123,6 +1126,7 @@ madvise_behavior_valid(int behavior) case MADV_REMOVE: case MADV_WILLNEED: case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: case MADV_FREE: case MADV_COLD: case MADV_PAGEOUT: -- cgit v1.2.3