From f44d21985eb6af7361d3785e26923355172147bd Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 21 Mar 2012 16:33:56 -0700 Subject: mm: do not reset cached_hole_size when vma is unmapped In the current code, cached_hole_size is set to the maximum value if the unmapped vma is less that free_area_cache so the next search will search from the base address. Actually, we can keep cached_hole_size so that if the next required size is more than cached_hole_size, it can search from free_area_cache. Signed-off-by: Xiao Guangrong Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Michal Hocko Cc: Hillf Danton Cc: Andrea Arcangeli Cc: KAMEZAWA Hiroyuki Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index da15a79b144..4f31764d838 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1426,10 +1426,8 @@ void arch_unmap_area(struct mm_struct *mm, unsigned long addr) /* * Is this a new hole at the lowest possible address? */ - if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) { + if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) mm->free_area_cache = addr; - mm->cached_hole_size = ~0UL; - } } /* -- cgit v1.2.3 From b716ad953a2bc4a543143c1d9836b7007a4b182f Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 21 Mar 2012 16:33:56 -0700 Subject: mm: search from free_area_cache for the bigger size If the required size is bigger than cached_hole_size it is better to search from free_area_cache - it is easier to get a free region, specifically for the 64 bit process whose address space is large enough Do it just as hugetlb_get_unmapped_area_topdown() in arch/x86/mm/hugetlbpage.c Signed-off-by: Xiao Guangrong Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Michal Hocko Cc: Hillf Danton Cc: Andrea Arcangeli Cc: KAMEZAWA Hiroyuki Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/sys_x86_64.c | 34 +++++++++++++++++----------------- mm/mmap.c | 36 +++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 32 deletions(-) (limited to 'mm/mmap.c') diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 051489082d5..ef59642ff1b 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -195,7 +195,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; - unsigned long addr = addr0; + unsigned long addr = addr0, start_addr; /* requested length too big for entire address space */ if (len > TASK_SIZE) @@ -223,25 +223,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, mm->free_area_cache = mm->mmap_base; } +try_again: /* either no address requested or can't fit in requested address hole */ - addr = mm->free_area_cache; - - /* make sure it can fit in the remaining address space */ - if (addr > len) { - unsigned long tmp_addr = align_addr(addr - len, filp, - ALIGN_TOPDOWN); - - vma = find_vma(mm, tmp_addr); - if (!vma || tmp_addr + len <= vma->vm_start) - /* remember the address as a hint for next time */ - return mm->free_area_cache = tmp_addr; - } - - if (mm->mmap_base < len) - goto bottomup; + start_addr = addr = mm->free_area_cache; - addr = mm->mmap_base-len; + if (addr < len) + goto fail; + addr -= len; do { addr = align_addr(addr, filp, ALIGN_TOPDOWN); @@ -263,6 +252,17 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = vma->vm_start-len; } while (len < vma->vm_start); +fail: + /* + * if hint left us with no space for the requested + * mapping then try again: + */ + if (start_addr != mm->mmap_base) { + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = 0; + goto try_again; + } + bottomup: /* * A failed mmap() very likely causes application failure, diff --git a/mm/mmap.c b/mm/mmap.c index 4f31764d838..9e0c0de2e7e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1442,7 +1442,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; - unsigned long addr = addr0; + unsigned long addr = addr0, start_addr; /* requested length too big for entire address space */ if (len > TASK_SIZE) @@ -1466,22 +1466,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, mm->free_area_cache = mm->mmap_base; } +try_again: /* either no address requested or can't fit in requested address hole */ - addr = mm->free_area_cache; + start_addr = addr = mm->free_area_cache; - /* make sure it can fit in the remaining address space */ - if (addr > len) { - vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) - /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); - } - - if (mm->mmap_base < len) - goto bottomup; - - addr = mm->mmap_base-len; + if (addr < len) + goto fail; + addr -= len; do { /* * Lookup failure means no vma is above this address, @@ -1501,7 +1493,21 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = vma->vm_start-len; } while (len < vma->vm_start); -bottomup: +fail: + /* + * if hint left us with no space for the requested + * mapping then try again: + * + * Note: this is different with the case of bottomup + * which does the fully line-search, but we use find_vma + * here that causes some holes skipped. + */ + if (start_addr != mm->mmap_base) { + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = 0; + goto try_again; + } + /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario -- cgit v1.2.3 From 40716e29243de46720e5773797791466c28904ec Mon Sep 17 00:00:00 2001 From: Steven Truelove Date: Wed, 21 Mar 2012 16:34:14 -0700 Subject: hugetlbfs: fix alignment of huge page requests When calling shmget() with SHM_HUGETLB, shmget aligns the request size to PAGE_SIZE, but this is not sufficient. Modify hugetlb_file_setup() to align requests to the huge page size, and to accept an address argument so that all alignment checks can be performed in hugetlb_file_setup(), rather than in its callers. Change newseg() and mmap_pgoff() to match the new prototype and eliminate a now redundant alignment check. [akpm@linux-foundation.org: fix build] Signed-off-by: Steven Truelove Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 14 +++++++++----- include/linux/hugetlb.h | 6 ++++-- ipc/shm.c | 2 +- mm/mmap.c | 6 +++--- 4 files changed, 17 insertions(+), 11 deletions(-) (limited to 'mm/mmap.c') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 79408159a00..631329f3de6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -935,8 +935,8 @@ static int can_do_hugetlb_shm(void) return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); } -struct file *hugetlb_file_setup(const char *name, size_t size, - vm_flags_t acctflag, +struct file *hugetlb_file_setup(const char *name, unsigned long addr, + size_t size, vm_flags_t acctflag, struct user_struct **user, int creat_flags) { int error = -ENOMEM; @@ -945,6 +945,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, struct path path; struct dentry *root; struct qstr quick_string; + struct hstate *hstate; + unsigned long num_pages; *user = NULL; if (!hugetlbfs_vfsmount) @@ -978,10 +980,12 @@ struct file *hugetlb_file_setup(const char *name, size_t size, if (!inode) goto out_dentry; + hstate = hstate_inode(inode); + size += addr & ~huge_page_mask(hstate); + num_pages = ALIGN(size, huge_page_size(hstate)) >> + huge_page_shift(hstate); error = -ENOMEM; - if (hugetlb_reserve_pages(inode, 0, - size >> huge_page_shift(hstate_inode(inode)), NULL, - acctflag)) + if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag)) goto out_inode; d_instantiate(path.dentry, inode); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index cf0181738c9..000837e126e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -152,7 +152,8 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; -struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, +struct file *hugetlb_file_setup(const char *name, unsigned long addr, + size_t size, vm_flags_t acct, struct user_struct **user, int creat_flags); static inline int is_file_hugepages(struct file *file) @@ -168,7 +169,8 @@ static inline int is_file_hugepages(struct file *file) #else /* !CONFIG_HUGETLBFS */ #define is_file_hugepages(file) 0 -static inline struct file *hugetlb_file_setup(const char *name, size_t size, +static inline struct file * +hugetlb_file_setup(const char *name, unsigned long addr, size_t size, vm_flags_t acctflag, struct user_struct **user, int creat_flags) { return ERR_PTR(-ENOSYS); diff --git a/ipc/shm.c b/ipc/shm.c index b76be5bda6c..406c5b20819 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -482,7 +482,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) /* hugetlb_file_setup applies strict accounting */ if (shmflg & SHM_NORESERVE) acctflag = VM_NORESERVE; - file = hugetlb_file_setup(name, size, acctflag, + file = hugetlb_file_setup(name, 0, size, acctflag, &shp->mlock_user, HUGETLB_SHMFS_INODE); } else { /* diff --git a/mm/mmap.c b/mm/mmap.c index 9e0c0de2e7e..a19cc271e79 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1099,9 +1099,9 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, * A dummy user value is used because we are not locking * memory so no accounting is necessary */ - len = ALIGN(len, huge_page_size(&default_hstate)); - file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, - &user, HUGETLB_ANONHUGE_INODE); + file = hugetlb_file_setup(HUGETLB_ANON_FILE, addr, len, + VM_NORESERVE, &user, + HUGETLB_ANONHUGE_INODE); if (IS_ERR(file)) return PTR_ERR(file); } -- cgit v1.2.3 From 88f6b4c32e531dc5b06bd05144f790847a1fdaeb Mon Sep 17 00:00:00 2001 From: Kautuk Consul Date: Wed, 21 Mar 2012 16:34:16 -0700 Subject: mmap.c: fix comment for __insert_vm_struct() The comment above __insert_vm_struct seems to suggest that this function is also going to link the VMA with the anon_vma, but this is not true. This function only links the VMA to the mm->mm_rb tree and the mm->mmap linked list. [akpm@linux-foundation.org: improve comment layout and text] Signed-off-by: Kautuk Consul Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index a19cc271e79..230f0bac06b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -451,9 +451,8 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, } /* - * Helper for vma_adjust in the split_vma insert case: - * insert vm structure into list and rbtree and anon_vma, - * but it has already been inserted into prio_tree earlier. + * Helper for vma_adjust() in the split_vma insert case: insert a vma into the + * mm's list and rbtree. It has already been inserted into the prio_tree. */ static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { -- cgit v1.2.3