From c0ff7453bb5c7c98e0885fb94279f2571946f280 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 24 May 2010 14:32:08 -0700 Subject: cpuset,mm: fix no node to alloc memory when changing cpuset's mems Before applying this patch, cpuset updates task->mems_allowed and mempolicy by setting all new bits in the nodemask first, and clearing all old unallowed bits later. But in the way, the allocator may find that there is no node to alloc memory. The reason is that cpuset rebinds the task's mempolicy, it cleans the nodes which the allocater can alloc pages on, for example: (mpol: mempolicy) task1 task1's mpol task2 alloc page 1 alloc on node0? NO 1 1 change mems from 1 to 0 1 rebind task1's mpol 0-1 set new bits 0 clear disallowed bits alloc on node1? NO 0 ... can't alloc page goto oom This patch fixes this problem by expanding the nodes range first(set newly allowed bits) and shrink it lazily(clear newly disallowed bits). So we use a variable to tell the write-side task that read-side task is reading nodemask, and the write-side task clears newly disallowed nodes after read-side task ends the current memory allocation. [akpm@linux-foundation.org: fix spello] Signed-off-by: Miao Xie Cc: David Rientjes Cc: Nick Piggin Cc: Paul Menage Cc: Lee Schermerhorn Cc: Hugh Dickins Cc: Ravikiran Thirumalai Cc: KOSAKI Motohiro Cc: Christoph Lameter Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 596180fedd3..f7da2a2934b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1990,10 +1990,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (unlikely(!zonelist->_zonerefs->zone)) return NULL; + get_mems_allowed(); /* The preferred zone is used for statistics later */ first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); - if (!preferred_zone) + if (!preferred_zone) { + put_mems_allowed(); return NULL; + } /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, @@ -2003,6 +2006,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); + put_mems_allowed(); trace_mm_page_alloc(page, order, gfp_mask, migratetype); return page; -- cgit v1.2.3