drm/i915/gt: Shrink i915_page_directory's slab bucket

kmalloc uses power-of-two slab buckets for small allocations (up to a few pages). Since i915_page_directory is a page of pointers, plus a couple more, this is rounded up to 8K, and we waste nearly 50% of that allocation. Long terms this leads to poor memory utilisation, bloating the kernel footprint, but the problem is exacerbated by our conservative preallocation scheme for binding VMA. As we are required to allocate all levels for each vma just in case we need to insert them upon binding, this leads to a large multiplication factor for a single page vma. By halving the allocation we need for the page directory structure, we halve the impact of that factor, bringing workloads that once fitted into memory, hopefully back to fitting into memory. We maintain the split between i915_page_directory and i915_page_table as we only need half the allocation for the lowest, most populous, level. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-3-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
author: Chris Wilson <chris@chris-wilson.co.uk> 2020-07-29 17:42:19 +0100
committer: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> 2020-09-07 14:24:23 +0300
commit: 82adf901138cc0564656dc8dc3a47862a546cb2a (patch)
tree: a91ebb7a3198d28b64c2ef17b78cdfe8e5a6c248 /drivers/gpu/drm/i915/gt/gen8_ppgtt.c
parent: 89351925a477441ae5fdd0136aec80b49ea1c53c (diff)
1 files changed, 14 insertions, 12 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index a40cb7ea8c66..eb64f474a78c 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -181,7 +181,7 @@ static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
 		} while (pde++, --count);
 	}
 
-	free_px(vm, pd);
+	free_px(vm, &pd->pt, lvl);
 }
 
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
@@ -248,7 +248,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
 		}
 
 		if (release_pd_entry(pd, idx, pt, scratch))
-			free_px(vm, pt);
+			free_px(vm, pt, lvl);
 	} while (idx++, --len);
 
 	return start;
@@ -628,7 +628,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 		err = pin_pt_dma(vm, pde->pt.base);
 		if (err) {
 			i915_gem_object_put(pde->pt.base);
-			kfree(pde);
+			free_pd(vm, pde);
 			return err;
 		}
 
@@ -648,28 +648,30 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
 	struct i915_page_directory *pd;
 	int err;
 
-	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
+	GEM_BUG_ON(count > I915_PDES);
 
-	pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
+	pd = __alloc_pd(count);
 	if (unlikely(!pd))
 		return ERR_PTR(-ENOMEM);
 
 	pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
 	if (IS_ERR(pd->pt.base)) {
-		kfree(pd);
-		return ERR_PTR(-ENOMEM);
+		err = PTR_ERR(pd->pt.base);
+		pd->pt.base = NULL;
+		goto err_pd;
 	}
 
 	err = pin_pt_dma(vm, pd->pt.base);
-	if (err) {
-		i915_gem_object_put(pd->pt.base);
-		kfree(pd);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto err_pd;
 
 	fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
 	atomic_inc(px_used(pd)); /* mark as pinned */
 	return pd;
+
+err_pd:
+	free_pd(vm, pd);
+	return ERR_PTR(err);
 }
 
 /*
author	Chris Wilson <chris@chris-wilson.co.uk>	2020-07-29 17:42:19 +0100
committer	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>	2020-09-07 14:24:23 +0300
commit	82adf901138cc0564656dc8dc3a47862a546cb2a (patch)
tree	a91ebb7a3198d28b64c2ef17b78cdfe8e5a6c248 /drivers/gpu/drm/i915/gt/gen8_ppgtt.c
parent	89351925a477441ae5fdd0136aec80b49ea1c53c (diff)