From 0f472d04f59ff89d15b2a1c4eafde7317ddd67a2 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 16 Jul 2019 16:27:33 -0700 Subject: mm/ioremap: probe platform for p4d huge map support Finish up what commit c2febafc6773 ("mm: convert generic code to 5-level paging") started while levelling up P4D huge mapping support at par with PUD and PMD. A new arch call back arch_ioremap_p4d_supported() is added which just maintains status quo (P4D huge map not supported) on x86, arm64 and powerpc. When HAVE_ARCH_HUGE_VMAP is enabled its just a simple check from the arch about the support, hence runtime effects are minimal. Link: http://lkml.kernel.org/r/1561699231-20991-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Thomas Gleixner Acked-by: Michael Ellerman (powerpc) Cc: Catalin Marinas Cc: Will Deacon Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/book3s64/radix_pgtable.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 65c2ba1e1783..b4ca9e95e678 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1237,3 +1237,8 @@ int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, return 0; } } + +int __init arch_ioremap_p4d_supported(void) +{ + return 0; +} -- cgit v1.2.3 From b98cca444d287a63dd96df04af7fb9793567599e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 16 Jul 2019 16:28:00 -0700 Subject: mm, kprobes: generalize and rename notify_page_fault() as kprobe_page_fault() Architectures which support kprobes have very similar boilerplate around calling kprobe_fault_handler(). Use a helper function in kprobes.h to unify them, based on the x86 code. This changes the behaviour for other architectures when preemption is enabled. Previously, they would have disabled preemption while calling the kprobe handler. However, preemption would be disabled if this fault was due to a kprobe, so we know the fault was not due to a kprobe handler and can simply return failure. This behaviour was introduced in commit a980c0ef9f6d ("x86/kprobes: Refactor kprobes_fault() like kprobe_exceptions_notify()") [anshuman.khandual@arm.com: export kprobe_fault_handler()] Link: http://lkml.kernel.org/r/1561133358-8876-1-git-send-email-anshuman.khandual@arm.com Link: http://lkml.kernel.org/r/1560420444-25737-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Dave Hansen Cc: Michal Hocko Cc: Matthew Wilcox Cc: Mark Rutland Cc: Christophe Leroy Cc: Stephen Rothwell Cc: Andrey Konovalov Cc: Michael Ellerman Cc: Paul Mackerras Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Yoshinori Sato Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Andy Lutomirski Cc: Vineet Gupta Cc: James Hogan Cc: Paul Burton Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/fault.c | 24 +----------------------- arch/arm64/mm/fault.c | 24 +----------------------- arch/ia64/mm/fault.c | 24 +----------------------- arch/mips/include/asm/kprobes.h | 1 + arch/mips/kernel/kprobes.c | 2 +- arch/powerpc/mm/fault.c | 23 ++--------------------- arch/s390/mm/fault.c | 16 +--------------- arch/sh/mm/fault.c | 18 ++---------------- arch/sparc/mm/fault_64.c | 16 +--------------- arch/x86/mm/fault.c | 21 ++------------------- include/linux/kprobes.h | 19 +++++++++++++++++++ 11 files changed, 32 insertions(+), 156 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 0e417233dad7..890eeaac3cbb 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -27,28 +27,6 @@ #ifdef CONFIG_MMU -#ifdef CONFIG_KPROBES -static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) -{ - int ret = 0; - - if (!user_mode(regs)) { - /* kprobe_running() needs smp_processor_id() */ - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, fsr)) - ret = 1; - preempt_enable(); - } - - return ret; -} -#else -static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) -{ - return 0; -} -#endif - /* * This is useful to dump out the page tables associated with * 'addr' in mm 'mm'. @@ -265,7 +243,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) vm_fault_t fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - if (notify_page_fault(regs, fsr)) + if (kprobe_page_fault(regs, fsr)) return 0; tsk = current; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c8c61b1eb479..9568c116ac7f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -59,28 +59,6 @@ static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) return debug_fault_info + DBG_ESR_EVT(esr); } -#ifdef CONFIG_KPROBES -static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) -{ - int ret = 0; - - /* kprobe_running() needs smp_processor_id() */ - if (!user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, esr)) - ret = 1; - preempt_enable(); - } - - return ret; -} -#else -static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) -{ - return 0; -} -#endif - static void data_abort_decode(unsigned int esr) { pr_alert("Data abort info:\n"); @@ -434,7 +412,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - if (notify_page_fault(regs, esr)) + if (kprobe_page_fault(regs, esr)) return 0; /* diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 3c3a283d3172..c2f299fe9e04 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -21,28 +21,6 @@ extern int die(char *, struct pt_regs *, long); -#ifdef CONFIG_KPROBES -static inline int notify_page_fault(struct pt_regs *regs, int trap) -{ - int ret = 0; - - if (!user_mode(regs)) { - /* kprobe_running() needs smp_processor_id() */ - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, trap)) - ret = 1; - preempt_enable(); - } - - return ret; -} -#else -static inline int notify_page_fault(struct pt_regs *regs, int trap) -{ - return 0; -} -#endif - /* * Return TRUE if ADDRESS points at a page in the kernel's mapped segment * (inside region 5, on ia64) and that page is present. @@ -116,7 +94,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re /* * This is to handle the kprobes on user space access instructions */ - if (notify_page_fault(regs, TRAP_BRKPT)) + if (kprobe_page_fault(regs, TRAP_BRKPT)) return; if (user_mode(regs)) diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h index 3cf8e4d5fa28..68b1e5d458cf 100644 --- a/arch/mips/include/asm/kprobes.h +++ b/arch/mips/include/asm/kprobes.h @@ -41,6 +41,7 @@ do { \ #define kretprobe_blacklist_size 0 void arch_remove_kprobe(struct kprobe *p); +int kprobe_fault_handler(struct pt_regs *regs, int trapnr); /* Architecture specific copy of original instruction*/ struct arch_specific_insn { diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 81ba1d3c367c..6cfae2411c04 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -398,7 +398,7 @@ out: return 1; } -static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index d989592b6fc8..8432c281de92 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -42,26 +42,6 @@ #include #include -static inline bool notify_page_fault(struct pt_regs *regs) -{ - bool ret = false; - -#ifdef CONFIG_KPROBES - /* kprobe_running() needs smp_processor_id() */ - if (!user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, 11)) - ret = true; - preempt_enable(); - } -#endif /* CONFIG_KPROBES */ - - if (unlikely(debugger_fault_handler(regs))) - ret = true; - - return ret; -} - /* * Check whether the instruction inst is a store using * an update addressing form which will update r1. @@ -461,8 +441,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, int is_write = page_fault_is_write(error_code); vm_fault_t fault, major = 0; bool must_retry = false; + bool kprobe_fault = kprobe_page_fault(regs, 11); - if (notify_page_fault(regs)) + if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) return 0; if (unlikely(page_fault_is_bad(error_code))) { diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 0ba174f779da..63507662828f 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -67,20 +67,6 @@ static int __init fault_init(void) } early_initcall(fault_init); -static inline int notify_page_fault(struct pt_regs *regs) -{ - int ret = 0; - - /* kprobe_running() needs smp_processor_id() */ - if (kprobes_built_in() && !user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, 14)) - ret = 1; - preempt_enable(); - } - return ret; -} - /* * Find out which address space caused the exception. */ @@ -412,7 +398,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) */ clear_pt_regs_flag(regs, PIF_PER_TRAP); - if (notify_page_fault(regs)) + if (kprobe_page_fault(regs, 14)) return 0; mm = tsk->mm; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 3093bc372138..5f51456f4fc7 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -24,20 +24,6 @@ #include #include -static inline int notify_page_fault(struct pt_regs *regs, int trap) -{ - int ret = 0; - - if (kprobes_built_in() && !user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, trap)) - ret = 1; - preempt_enable(); - } - - return ret; -} - static void force_sig_info_fault(int si_signo, int si_code, unsigned long address) { @@ -412,14 +398,14 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (unlikely(fault_in_kernel_space(address))) { if (vmalloc_fault(address) >= 0) return; - if (notify_page_fault(regs, vec)) + if (kprobe_page_fault(regs, vec)) return; bad_area_nosemaphore(regs, error_code, address); return; } - if (unlikely(notify_page_fault(regs, vec))) + if (unlikely(kprobe_page_fault(regs, vec))) return; /* Only enable interrupts if they were on before the fault */ diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 83fda4d9c3b2..2371fb6b97e4 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -38,20 +38,6 @@ int show_unhandled_signals = 1; -static inline __kprobes int notify_page_fault(struct pt_regs *regs) -{ - int ret = 0; - - /* kprobe_running() needs smp_processor_id() */ - if (kprobes_built_in() && !user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, 0)) - ret = 1; - preempt_enable(); - } - return ret; -} - static void __kprobes unhandled_fault(unsigned long address, struct task_struct *tsk, struct pt_regs *regs) @@ -285,7 +271,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) fault_code = get_thread_fault_code(); - if (notify_page_fault(regs)) + if (kprobe_page_fault(regs, 0)) goto exit_exception; si_code = SEGV_MAPERR; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 794f364cb882..d1634c59ed56 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -46,23 +46,6 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static nokprobe_inline int kprobes_fault(struct pt_regs *regs) -{ - if (!kprobes_built_in()) - return 0; - if (user_mode(regs)) - return 0; - /* - * To be potentially processing a kprobe fault and to be allowed to call - * kprobe_running(), we have to be non-preemptible. - */ - if (preemptible()) - return 0; - if (!kprobe_running()) - return 0; - return kprobe_fault_handler(regs, X86_TRAP_PF); -} - /* * Prefetch quirks: * @@ -1282,7 +1265,7 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, return; /* kprobes don't want to hook the spurious faults: */ - if (kprobes_fault(regs)) + if (kprobe_page_fault(regs, X86_TRAP_PF)) return; /* @@ -1313,7 +1296,7 @@ void do_user_addr_fault(struct pt_regs *regs, mm = tsk->mm; /* kprobes don't want to hook the spurious faults: */ - if (unlikely(kprobes_fault(regs))) + if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF))) return; /* diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 443d9800ca3f..04bdaf01112c 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -458,4 +458,23 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr) } #endif +/* Returns true if kprobes handled the fault */ +static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs, + unsigned int trap) +{ + if (!kprobes_built_in()) + return false; + if (user_mode(regs)) + return false; + /* + * To be potentially processing a kprobe fault and to be allowed + * to call kprobe_running(), we have to be non-preemptible. + */ + if (preemptible()) + return false; + if (!kprobe_running()) + return false; + return kprobe_fault_handler(regs, trap); +} + #endif /* _LINUX_KPROBES_H */ -- cgit v1.2.3 From f296f1df6e0e5b17654709c05b1821a1b58d329f Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 16 Jul 2019 16:29:39 -0700 Subject: powerpc: define syscall_get_error() syscall_get_error() is required to be implemented on this architecture in addition to already implemented syscall_get_nr(), syscall_get_arguments(), syscall_get_return_value(), and syscall_get_arch() functions in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Link: http://lkml.kernel.org/r/20190510152824.GE28558@altlinux.org Signed-off-by: Dmitry V. Levin Acked-by: Michael Ellerman Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Greentime Hu Cc: Helge Deller [parisc] Cc: James E.J. Bottomley Cc: James Hogan Cc: kbuild test robot Cc: Kees Cook Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Shuah Khan Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/syscall.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 81abcf6a737b..38d62acfdce7 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -35,6 +35,16 @@ static inline void syscall_rollback(struct task_struct *task, regs->gpr[3] = regs->orig_gpr3; } +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + /* + * If the system call failed, + * regs->gpr[3] contains a positive ERRORCODE. + */ + return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; +} + static inline long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { -- cgit v1.2.3 From 8aa3c927ec10d1230c3ace8357f624479665f701 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 16 Jul 2019 16:30:41 -0700 Subject: mm/mmap: move common defines to mman-common.h Two architecture that use arch specific MMAP flags are powerpc and sparc. We still have few flag values common across them and other architectures. Consolidate this in mman-common.h. Also update the comment to indicate where to find HugeTLB specific reserved values Link: http://lkml.kernel.org/r/20190604090950.31417-1-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/uapi/asm/mman.h | 6 +----- arch/sparc/include/uapi/asm/mman.h | 6 ------ include/uapi/asm-generic/mman-common.h | 6 +++++- include/uapi/asm-generic/mman.h | 9 ++++----- 4 files changed, 10 insertions(+), 17 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index 65065ce32814..c0c737215b00 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -21,15 +21,11 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ + #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ #define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ -#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ - /* Override any generic PKEY permission defines */ #define PKEY_DISABLE_EXECUTE 0x4 #undef PKEY_ACCESS_MASK diff --git a/arch/sparc/include/uapi/asm/mman.h b/arch/sparc/include/uapi/asm/mman.h index f6f99ec65bb3..cec9f4109687 100644 --- a/arch/sparc/include/uapi/asm/mman.h +++ b/arch/sparc/include/uapi/asm/mman.h @@ -22,10 +22,4 @@ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ #define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ -#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ - - #endif /* _UAPI__SPARC_MMAN_H__ */ diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 93a8b420ce1e..63b1f506ea67 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -20,7 +20,11 @@ #define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x20 /* don't use a file */ -/* 0x0100 - 0x40000 flags are defined in asm-generic/mman.h */ +/* 0x0100 - 0x4000 flags are defined in asm-generic/mman.h */ +#define MAP_POPULATE 0x008000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x010000 /* do not block on IO */ +#define MAP_STACK 0x020000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x040000 /* create a huge page mapping */ #define MAP_SYNC 0x080000 /* perform synchronous page faults for the mapping */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 2dffcbf705b3..57e8195d0b53 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -9,12 +9,11 @@ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ #define MAP_LOCKED 0x2000 /* pages are locked */ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ -#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ -/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ +/* + * Bits [26:31] are reserved, see asm-generic/hugetlb_encode.h + * for MAP_HUGETLB usage + */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -- cgit v1.2.3 From 175967318c3018d01931ac950c82adab5deb47ca Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 16 Jul 2019 16:30:47 -0700 Subject: mm: introduce ARCH_HAS_PTE_DEVMAP ARCH_HAS_ZONE_DEVICE is somewhat meaningless in itself, and combined with the long-out-of-date comment can lead to the impression than an architecture may just enable it (since __add_pages() now "comprehends device memory" for itself) and expect things to work. In practice, however, ZONE_DEVICE users have little chance of functioning correctly without __HAVE_ARCH_PTE_DEVMAP, so let's clean that up the same way as ARCH_HAS_PTE_SPECIAL and make it the proper dependency so the real situation is clearer. Link: http://lkml.kernel.org/r/87554aa78478a02a63f2c4cf60a847279ae3eb3b.1558547956.git.robin.murphy@arm.com Signed-off-by: Robin Murphy Acked-by: Dan Williams Reviewed-by: Ira Weiny Acked-by: Oliver O'Halloran Reviewed-by: Anshuman Khandual Cc: Michael Ellerman Cc: Catalin Marinas Cc: David Hildenbrand Cc: Jerome Glisse Cc: Michal Hocko Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 - arch/x86/Kconfig | 2 +- arch/x86/include/asm/pgtable.h | 4 ++-- arch/x86/include/asm/pgtable_types.h | 1 - include/linux/mm.h | 4 ++-- include/linux/pfn_t.h | 4 ++-- mm/Kconfig | 5 ++--- mm/gup.c | 2 +- 9 files changed, 11 insertions(+), 14 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f516796dd819..d8dcd8820369 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -129,6 +129,7 @@ config PPC select ARCH_HAS_MMIOWB if PPC64 select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if PPC64 + select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64 @@ -136,7 +137,6 @@ config PPC select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64 select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64 select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_KEEP_MEMBLOCK select ARCH_MIGHT_HAVE_PC_PARPORT diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 62e6ea0a7650..8308f32e9782 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -90,7 +90,6 @@ #define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */ #define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */ #define _PAGE_DEVMAP _RPAGE_SW1 /* software: ZONE_DEVICE page */ -#define __HAVE_ARCH_PTE_DEVMAP /* * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 879741336771..4a55bd01e918 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -70,6 +70,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PMEM_API if X86_64 + select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 @@ -80,7 +81,6 @@ config X86 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_HAS_ZONE_DEVICE if X86_64 select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_MIGHT_HAVE_PC_PARPORT diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5e0509b41986..0bc530c4eb13 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -271,7 +271,7 @@ static inline int has_transparent_hugepage(void) return boot_cpu_has(X86_FEATURE_PSE); } -#ifdef __HAVE_ARCH_PTE_DEVMAP +#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pmd_devmap(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_DEVMAP); @@ -732,7 +732,7 @@ static inline int pte_present(pte_t a) return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } -#ifdef __HAVE_ARCH_PTE_DEVMAP +#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t a) { return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d6ff0bbdb394..b5e49e6bac63 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -103,7 +103,6 @@ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) -#define __HAVE_ARCH_PTE_DEVMAP #else #define _PAGE_NX (_AT(pteval_t, 0)) #define _PAGE_DEVMAP (_AT(pteval_t, 0)) diff --git a/include/linux/mm.h b/include/linux/mm.h index baa8b8761d8c..f43f4de4de68 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -547,7 +547,7 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma) struct mmu_gather; struct inode; -#if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) +#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) static inline int pmd_devmap(pmd_t pmd) { return 0; @@ -1750,7 +1750,7 @@ static inline void sync_mm_rss(struct mm_struct *mm) } #endif -#ifndef __HAVE_ARCH_PTE_DEVMAP +#ifndef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t pte) { return 0; diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 01e8037023f7..2d9148221e9a 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -97,7 +97,7 @@ static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot) #endif #endif -#ifdef __HAVE_ARCH_PTE_DEVMAP +#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline bool pfn_t_devmap(pfn_t pfn) { const u64 flags = PFN_DEV|PFN_MAP; @@ -115,7 +115,7 @@ pmd_t pmd_mkdevmap(pmd_t pmd); defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) pud_t pud_mkdevmap(pud_t pud); #endif -#endif /* __HAVE_ARCH_PTE_DEVMAP */ +#endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */ #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL static inline bool pfn_t_special(pfn_t pfn) diff --git a/mm/Kconfig b/mm/Kconfig index 495d7368ced8..56cec636a1fc 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -649,8 +649,7 @@ config IDLE_PAGE_TRACKING See Documentation/admin-guide/mm/idle_page_tracking.rst for more details. -# arch_add_memory() comprehends device memory -config ARCH_HAS_ZONE_DEVICE +config ARCH_HAS_PTE_DEVMAP bool config ZONE_DEVICE @@ -658,7 +657,7 @@ config ZONE_DEVICE depends on MEMORY_HOTPLUG depends on MEMORY_HOTREMOVE depends on SPARSEMEM_VMEMMAP - depends on ARCH_HAS_ZONE_DEVICE + depends on ARCH_HAS_PTE_DEVMAP select XARRAY_MULTI help diff --git a/mm/gup.c b/mm/gup.c index 8bbaa5523116..98f13ab37bac 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1895,7 +1895,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, } #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ -#if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) static int __gup_device_huge(unsigned long pfn, unsigned long addr, unsigned long end, struct page **pages, int *nr) { -- cgit v1.2.3 From 79eb597cba06c435b72f220e9d426ae413fc2579 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 16 Jul 2019 16:30:54 -0700 Subject: mm: add account_locked_vm utility function locked_vm accounting is done roughly the same way in five places, so unify them in a helper. Include the helper's caller in the debug print to distinguish between callsites. Error codes stay the same, so user-visible behavior does too. The one exception is that the -EPERM case in tce_account_locked_vm is removed because Alexey has never seen it triggered. [daniel.m.jordan@oracle.com: v3] Link: http://lkml.kernel.org/r/20190529205019.20927-1-daniel.m.jordan@oracle.com [sfr@canb.auug.org.au: fix mm/util.c] Link: http://lkml.kernel.org/r/20190524175045.26897-1-daniel.m.jordan@oracle.com Signed-off-by: Daniel Jordan Signed-off-by: Stephen Rothwell Tested-by: Alexey Kardashevskiy Acked-by: Alex Williamson Cc: Alan Tull Cc: Alex Williamson Cc: Benjamin Herrenschmidt Cc: Christoph Lameter Cc: Christophe Leroy Cc: Davidlohr Bueso Cc: Jason Gunthorpe Cc: Mark Rutland Cc: Michael Ellerman Cc: Moritz Fischer Cc: Paul Mackerras Cc: Steve Sistare Cc: Wu Hao Cc: Ira Weiny Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kvm/book3s_64_vio.c | 44 ++------------------- arch/powerpc/mm/book3s64/iommu_api.c | 41 ++------------------ drivers/fpga/dfl-afu-dma-region.c | 53 ++----------------------- drivers/vfio/vfio_iommu_spapr_tce.c | 54 +++----------------------- drivers/vfio/vfio_iommu_type1.c | 17 +------- include/linux/mm.h | 4 ++ mm/util.c | 75 ++++++++++++++++++++++++++++++++++++ 7 files changed, 98 insertions(+), 190 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 5bf05cc774e2..e99a14798ab0 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -45,43 +46,6 @@ static unsigned long kvmppc_stt_pages(unsigned long tce_pages) return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; } -static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) -{ - long ret = 0; - - if (!current || !current->mm) - return ret; /* process exited */ - - down_write(¤t->mm->mmap_sem); - - if (inc) { - unsigned long locked, lock_limit; - - locked = current->mm->locked_vm + stt_pages; - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - ret = -ENOMEM; - else - current->mm->locked_vm += stt_pages; - } else { - if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) - stt_pages = current->mm->locked_vm; - - current->mm->locked_vm -= stt_pages; - } - - pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, - inc ? '+' : '-', - stt_pages << PAGE_SHIFT, - current->mm->locked_vm << PAGE_SHIFT, - rlimit(RLIMIT_MEMLOCK), - ret ? " - exceeded" : ""); - - up_write(¤t->mm->mmap_sem); - - return ret; -} - static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) { struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, @@ -291,7 +255,7 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) kvm_put_kvm(stt->kvm); - kvmppc_account_memlimit( + account_locked_vm(current->mm, kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); call_rcu(&stt->rcu, release_spapr_tce_table); @@ -316,7 +280,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, return -EINVAL; npages = kvmppc_tce_pages(size); - ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); + ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true); if (ret) return ret; @@ -362,7 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, kfree(stt); fail_acct: - kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); + account_locked_vm(current->mm, kvmppc_stt_pages(npages), false); return ret; } diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 90ee3a89722c..b056cae3388b 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -46,40 +47,6 @@ struct mm_iommu_table_group_mem_t { u64 dev_hpa; /* Device memory base address */ }; -static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, - unsigned long npages, bool incr) -{ - long ret = 0, locked, lock_limit; - - if (!npages) - return 0; - - down_write(&mm->mmap_sem); - - if (incr) { - locked = mm->locked_vm + npages; - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - ret = -ENOMEM; - else - mm->locked_vm += npages; - } else { - if (WARN_ON_ONCE(npages > mm->locked_vm)) - npages = mm->locked_vm; - mm->locked_vm -= npages; - } - - pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", - current ? current->pid : 0, - incr ? '+' : '-', - npages << PAGE_SHIFT, - mm->locked_vm << PAGE_SHIFT, - rlimit(RLIMIT_MEMLOCK)); - up_write(&mm->mmap_sem); - - return ret; -} - bool mm_iommu_preregistered(struct mm_struct *mm) { return !list_empty(&mm->context.iommu_group_mem_list); @@ -96,7 +63,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, unsigned long entry, chunk; if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { - ret = mm_iommu_adjust_locked_vm(mm, entries, true); + ret = account_locked_vm(mm, entries, true); if (ret) return ret; @@ -211,7 +178,7 @@ free_exit: kfree(mem); unlock_exit: - mm_iommu_adjust_locked_vm(mm, locked_entries, false); + account_locked_vm(mm, locked_entries, false); return ret; } @@ -311,7 +278,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) unlock_exit: mutex_unlock(&mem_list_mutex); - mm_iommu_adjust_locked_vm(mm, unlock_entries, false); + account_locked_vm(mm, unlock_entries, false); return ret; } diff --git a/drivers/fpga/dfl-afu-dma-region.c b/drivers/fpga/dfl-afu-dma-region.c index dcd80b088c7b..62f924489db5 100644 --- a/drivers/fpga/dfl-afu-dma-region.c +++ b/drivers/fpga/dfl-afu-dma-region.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "dfl-afu.h" @@ -31,52 +32,6 @@ void afu_dma_region_init(struct dfl_feature_platform_data *pdata) afu->dma_regions = RB_ROOT; } -/** - * afu_dma_adjust_locked_vm - adjust locked memory - * @dev: port device - * @npages: number of pages - * @incr: increase or decrease locked memory - * - * Increase or decrease the locked memory size with npages input. - * - * Return 0 on success. - * Return -ENOMEM if locked memory size is over the limit and no CAP_IPC_LOCK. - */ -static int afu_dma_adjust_locked_vm(struct device *dev, long npages, bool incr) -{ - unsigned long locked, lock_limit; - int ret = 0; - - /* the task is exiting. */ - if (!current->mm) - return 0; - - down_write(¤t->mm->mmap_sem); - - if (incr) { - locked = current->mm->locked_vm + npages; - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - ret = -ENOMEM; - else - current->mm->locked_vm += npages; - } else { - if (WARN_ON_ONCE(npages > current->mm->locked_vm)) - npages = current->mm->locked_vm; - current->mm->locked_vm -= npages; - } - - dev_dbg(dev, "[%d] RLIMIT_MEMLOCK %c%ld %ld/%ld%s\n", current->pid, - incr ? '+' : '-', npages << PAGE_SHIFT, - current->mm->locked_vm << PAGE_SHIFT, rlimit(RLIMIT_MEMLOCK), - ret ? "- exceeded" : ""); - - up_write(¤t->mm->mmap_sem); - - return ret; -} - /** * afu_dma_pin_pages - pin pages of given dma memory region * @pdata: feature device platform data @@ -92,7 +47,7 @@ static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata, struct device *dev = &pdata->dev->dev; int ret, pinned; - ret = afu_dma_adjust_locked_vm(dev, npages, true); + ret = account_locked_vm(current->mm, npages, true); if (ret) return ret; @@ -121,7 +76,7 @@ put_pages: free_pages: kfree(region->pages); unlock_vm: - afu_dma_adjust_locked_vm(dev, npages, false); + account_locked_vm(current->mm, npages, false); return ret; } @@ -141,7 +96,7 @@ static void afu_dma_unpin_pages(struct dfl_feature_platform_data *pdata, put_all_pages(region->pages, npages); kfree(region->pages); - afu_dma_adjust_locked_vm(dev, npages, false); + account_locked_vm(current->mm, npages, false); dev_dbg(dev, "%ld pages unpinned\n", npages); } diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 7048c9198c21..8ce9ad21129f 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -31,51 +32,6 @@ static void tce_iommu_detach_group(void *iommu_data, struct iommu_group *iommu_group); -static long try_increment_locked_vm(struct mm_struct *mm, long npages) -{ - long ret = 0, locked, lock_limit; - - if (WARN_ON_ONCE(!mm)) - return -EPERM; - - if (!npages) - return 0; - - down_write(&mm->mmap_sem); - locked = mm->locked_vm + npages; - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - ret = -ENOMEM; - else - mm->locked_vm += npages; - - pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid, - npages << PAGE_SHIFT, - mm->locked_vm << PAGE_SHIFT, - rlimit(RLIMIT_MEMLOCK), - ret ? " - exceeded" : ""); - - up_write(&mm->mmap_sem); - - return ret; -} - -static void decrement_locked_vm(struct mm_struct *mm, long npages) -{ - if (!mm || !npages) - return; - - down_write(&mm->mmap_sem); - if (WARN_ON_ONCE(npages > mm->locked_vm)) - npages = mm->locked_vm; - mm->locked_vm -= npages; - pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid, - npages << PAGE_SHIFT, - mm->locked_vm << PAGE_SHIFT, - rlimit(RLIMIT_MEMLOCK)); - up_write(&mm->mmap_sem); -} - /* * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation * @@ -333,7 +289,7 @@ static int tce_iommu_enable(struct tce_container *container) return ret; locked = table_group->tce32_size >> PAGE_SHIFT; - ret = try_increment_locked_vm(container->mm, locked); + ret = account_locked_vm(container->mm, locked, true); if (ret) return ret; @@ -352,7 +308,7 @@ static void tce_iommu_disable(struct tce_container *container) container->enabled = false; BUG_ON(!container->mm); - decrement_locked_vm(container->mm, container->locked_pages); + account_locked_vm(container->mm, container->locked_pages, false); } static void *tce_iommu_open(unsigned long arg) @@ -656,7 +612,7 @@ static long tce_iommu_create_table(struct tce_container *container, if (!table_size) return -EINVAL; - ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT); + ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true); if (ret) return ret; @@ -675,7 +631,7 @@ static void tce_iommu_free_table(struct tce_container *container, unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; iommu_tce_table_put(tbl); - decrement_locked_vm(container->mm, pages); + account_locked_vm(container->mm, pages, false); } static long tce_iommu_create_window(struct tce_container *container, diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index add34adfadc7..054391f30fa8 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -272,21 +272,8 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async) ret = down_write_killable(&mm->mmap_sem); if (!ret) { - if (npage > 0) { - if (!dma->lock_cap) { - unsigned long limit; - - limit = task_rlimit(dma->task, - RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (mm->locked_vm + npage > limit) - ret = -ENOMEM; - } - } - - if (!ret) - mm->locked_vm += npage; - + ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task, + dma->lock_cap); up_write(&mm->mmap_sem); } diff --git a/include/linux/mm.h b/include/linux/mm.h index f43f4de4de68..bd6512559bed 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1543,6 +1543,10 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); +int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc); +int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, + struct task_struct *task, bool bypass_rlim); + /* Container for pinned pfns / pages */ struct frame_vector { unsigned int nr_allocated; /* Number of frames we have space for */ diff --git a/mm/util.c b/mm/util.c index 68575a315dc5..e6351a80f248 100644 --- a/mm/util.c +++ b/mm/util.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -300,6 +301,80 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) } #endif +/** + * __account_locked_vm - account locked pages to an mm's locked_vm + * @mm: mm to account against + * @pages: number of pages to account + * @inc: %true if @pages should be considered positive, %false if not + * @task: task used to check RLIMIT_MEMLOCK + * @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped + * + * Assumes @task and @mm are valid (i.e. at least one reference on each), and + * that mmap_sem is held as writer. + * + * Return: + * * 0 on success + * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. + */ +int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, + struct task_struct *task, bool bypass_rlim) +{ + unsigned long locked_vm, limit; + int ret = 0; + + lockdep_assert_held_write(&mm->mmap_sem); + + locked_vm = mm->locked_vm; + if (inc) { + if (!bypass_rlim) { + limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked_vm + pages > limit) + ret = -ENOMEM; + } + if (!ret) + mm->locked_vm = locked_vm + pages; + } else { + WARN_ON_ONCE(pages > locked_vm); + mm->locked_vm = locked_vm - pages; + } + + pr_debug("%s: [%d] caller %ps %c%lu %lu/%lu%s\n", __func__, task->pid, + (void *)_RET_IP_, (inc) ? '+' : '-', pages << PAGE_SHIFT, + locked_vm << PAGE_SHIFT, task_rlimit(task, RLIMIT_MEMLOCK), + ret ? " - exceeded" : ""); + + return ret; +} +EXPORT_SYMBOL_GPL(__account_locked_vm); + +/** + * account_locked_vm - account locked pages to an mm's locked_vm + * @mm: mm to account against, may be NULL + * @pages: number of pages to account + * @inc: %true if @pages should be considered positive, %false if not + * + * Assumes a non-NULL @mm is valid (i.e. at least one reference on it). + * + * Return: + * * 0 on success, or if mm is NULL + * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. + */ +int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc) +{ + int ret; + + if (pages == 0 || !mm) + return 0; + + down_write(&mm->mmap_sem); + ret = __account_locked_vm(mm, pages, inc, current, + capable(CAP_IPC_LOCK)); + up_write(&mm->mmap_sem); + + return ret; +} +EXPORT_SYMBOL_GPL(account_locked_vm); + unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) -- cgit v1.2.3