TILER: Cleaned up tiler-reserve.c

Fixed formattings. Added comments Standardized method parameter order. Removed can_together flag that is now unneeded. Signed-off-by: Lajos Molnar <molnar@ti.com> Signed-off-by: David Sin <davidsin@ti.com>
author: Lajos Molnar <molnar@ti.com> 2011-04-07 08:41:56 +0100
committer: Andy Green <andy.green@linaro.org> 2011-04-07 08:41:56 +0100
commit: 6d80c1675284b35a0ebccac97b3f6ecf291b5089 (patch)
tree: b622d5314ff3e705a27ac555556af59e6897d751 /drivers
parent: 8098da93346c8720aabc983cda70f1c2bed6eb2b (diff)
3 files changed, 221 insertions, 142 deletions
diff --git a/drivers/media/video/tiler/_tiler.h b/drivers/media/video/tiler/_tiler.h
index 4227fa172f7..24031348bf6 100644
--- a/drivers/media/video/tiler/_tiler.h
+++ b/drivers/media/video/tiler/_tiler.h
@@ -91,7 +91,7 @@ struct tiler_ops {
 			u32 key, u32 gid, struct process_info *pi,
 			struct mem_info **info, u32 usr_addr);
 	void (*reserve_nv12) (u32 n, u32 width, u32 height, u32 align, u32 offs,
-			u32 gid, struct process_info *pi, bool can_together);
+					u32 gid, struct process_info *pi);
 	void (*reserve) (u32 n, enum tiler_fmt fmt, u32 width, u32 height,
 			u32 align, u32 offs, u32 gid, struct process_info *pi);
 	void (*unreserve) (u32 gid, struct process_info *pi);
diff --git a/drivers/media/video/tiler/tiler-iface.c b/drivers/media/video/tiler/tiler-iface.c
index ff653628a28..688af6c8f71 100644
--- a/drivers/media/video/tiler/tiler-iface.c
+++ b/drivers/media/video/tiler/tiler-iface.c
@@ -504,8 +504,7 @@ static s32 tiler_ioctl(struct inode *ip, struct file *filp, u32 cmd,
 					  block_info.dim.area.height,
 					  block_info.align,
 					  block_info.offs,
-					  block_info.group_id, pi,
-					  ops->nv12_packed);
+					  block_info.group_id, pi);
 		} else {
 			ops->reserve(block_info.key,
 				     block_info.fmt,
@@ -633,8 +632,7 @@ s32 tiler_reservex_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
 	struct process_info *pi = __get_pi(pid, true);
 
 	if (pi)
-		ops->reserve_nv12(n, width, height, align, offs, gid, pi,
-							ops->nv12_packed);
+		ops->reserve_nv12(n, width, height, align, offs, gid, pi);
 	return 0;
 }
 EXPORT_SYMBOL(tiler_reservex_nv12);
diff --git a/drivers/media/video/tiler/tiler-reserve.c b/drivers/media/video/tiler/tiler-reserve.c
index 3f87f5af929..6715d3ddd6a 100644
--- a/drivers/media/video/tiler/tiler-reserve.c
+++ b/drivers/media/video/tiler/tiler-reserve.c
@@ -1,7 +1,9 @@
 /*
  * tiler-reserve.c
  *
- * TILER driver area reservation functions for TI OMAP processors.
+ * TILER driver area reservation functions for TI TILER hardware block.
+ *
+ * Author: Lajos Molnar <molnar@ti.com>
  *
  * Copyright (C) 2009-2010 Texas Instruments, Inc.
  *
@@ -14,94 +16,145 @@
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  */
 
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
 #include "_tiler.h"
 
-static struct tiler_ops *ops;
-static int band_8;	/* 8-bit band in slots */
-static int band_16;	/* 16-bit band in slots */
-
-/* TILER is designed so that a (w * h) * 8bit area is twice as wide as a
-   (w/2 * h/2) * 16bit area.  Since having pairs of such 8-bit and 16-bit
-   blocks is a common usecase for TILER, we optimize packing these into a
-   TILER area */
-
-/* we want to find the most effective packing for the smallest area */
-
-/* we have two algorithms for packing nv12 blocks */
+static struct tiler_ops *ops;	/* shared methods and variables */
+static int band_8;		/* size of 8-bit band in slots */
+static int band_16;		/* size of 16-bit band in slots */
 
-/* we want to find the most effective packing for the smallest area */
-
-static inline u32 nv12_eff(u16 n, u16 w, u16 area, u16 n_need)
-{
-	/* rank by total area needed first */
-	return 0x10000000 - DIV_ROUND_UP(n_need, n) * area * 32 +
-		/* then by efficiency */
-		1024 * n * ((w * 3 + 1) >> 1) / area;
-}
-
-/* This method is used for both 2D and NV12 packing */
-
-/* return maximum buffers that can be packed next to each other */
-/* o(ffset), w(idth), e(ff_width), b(and), n(um blocks), area( needed) */
-/* assumptions: w > 0, o < a <= e */
-static u32 tiler_best2pack(u16 o, u16 w, u16 e, u16 b, u16 *n, u16 *area)
+/**
+ * Calculate the maximum number buffers that can be packed next to each other,
+ * and the area they occupy. This method is used for both 2D and NV12 packing.
+ *
+ * @author a0194118 (7/16/2010)
+ *
+ * @param o	desired offset
+ * @param w	width of one block (>0)
+ * @param a	desired alignment
+ * @param b	band width (each block must occupy the same number of bands)
+ * @param n	pointer to the desired number of blocks to pack.  It will be
+ *		updated with the maximum number of blocks that can be packed.
+ * @param _area	pointer to store total area needed
+ *
+ * @return packing efficiency (0-1024)
+ */
+static u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area)
 {
 	u16 m = 0, max_n = *n;		/* m is mostly n - 1 */
+	u16 e = ALIGN(w, a);		/* effective width of one block */
 	u32 eff, best_eff = 0;		/* best values */
-	u16 stride = ALIGN(o + w, b), ar = stride;	/* current area */
+	u16 stride = ALIGN(o + w, b);	/* block stride */
+	u16 area = stride;		/* area needed (for m + 1 blocks) */
 
-	/*
-	 * blocks must fit in tiler container and
-	 * block stride must be the same: defined as align(o + w, b)
-	 *
-	 * == align(o + (n-1) * e + w, b) - trim((o + (n-1) * e, b) for all n
-	 */
+	/* NOTE: block #m+1 occupies the range (o + m * e, o + m * e + w) */
+
+	/* see how many blocks we can pack */
 	while (m < max_n &&
-	       o + m * e + w <= ops->width &&
-	       stride == ALIGN(ar - o - m * e, b)) {
-		/* get efficiency */
+		/* blocks must fit in tiler container */
+		o + m * e + w <= ops->width &&
+		/* block stride must be correct */
+		stride == ALIGN(area - o - m * e, b)) {
+
 		m++;
-		eff = m * w * 1024 / ar;
+		eff = m * w * 1024 / area;
 		if (eff > best_eff) {
+			/* store packing for best efficiency & smallest area */
 			best_eff = eff;
 			*n = m;
-			if (area)
-				*area = ar;
+			if (_area)
+				*_area = area;
 		}
-		ar = ALIGN(o + m * e + w, b);
+		/* update area */
+		area = ALIGN(o + m * e + w, b);
 	}
 
 	return best_eff;
 }
 
-/* We have two algorithms for packing nv12 blocks: either pack 8 and 16 bit
-   blocks separately as 2D, or pack them into same area */
+/*
+ * NV12 Reservation Functions
+ *
+ * TILER is designed so that a (w * h) * 8bit area is twice as wide as a
+ * (w/2 * h/2) * 16bit area.  Since having pairs of such 8-bit and 16-bit
+ * blocks is a common usecase for TILER, we optimize packing these into a
+ * TILER area.
+ *
+ * During reservation we want to find the most effective packing (most used area
+ * in the smallest overall area)
+ *
+ * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit
+ * blocks into separate container areas, or pack them together into same area.
+ */
 
-/* nv12 packing algorithm 1: pack 8 and 16 bit block into separate areas */
-/* assumptions: w > 0, o < a, 2 <= a */
-static u16 nv12_separate(u16 o, u16 w, u16 a, u16 n, u16 *area)
+/**
+ * Calculate effectiveness of packing. We weight total area much higher than
+ * packing efficiency to get the smallest overall container use.
+ *
+ * @param w		width of one (8-bit) block
+ * @param n		buffers in a packing
+ * @param area		width of packing area
+ * @param n_total	total number of buffers to be packed
+ * @return effectiveness, the higher the better
+ */
+static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total)
 {
-	tiler_best2pack(o, w, ALIGN(w, a), band_8, &n, area);
-	tiler_best2pack(o / 2, (w + 1) / 2, ALIGN(w, a) / 2, band_16, &n, area);
+	return 0x10000000 -
+		/* weigh against total area needed (for all buffers) */
+		/* 64-slots = -2048 */
+		DIV_ROUND_UP(n_total, n) * area * 32 +
+		/* packing efficiency (0 - 1024) */
+		1024 * n * ((w * 3 + 1) >> 1) / area;
+}
+
+/**
+ * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate
+ * areas.
+ *
+ * @author a0194118 (7/16/2010)
+ *
+ * @param o	desired offset (<a)
+ * @param a	desired alignment (>=2)
+ * @param w	block width (>0)
+ * @param n	number of blocks desired
+ * @param area	pointer to store total area needed
+ *
+ * @return number of blocks that can be allocated
+ */
+static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area)
+{
+	tiler_best2pack(o, a, band_8, w, &n, area);
+	tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area);
 	*area *= 3;
 	return n;
 }
 
-/* We use 4 packing methods for same area packing that give the best result
-   for most parameters.  We pack into a 64-slot area, so that we don't have
-   to worry about stride issues (all blocks get 4K stride).  For some of the
-   algorithms this could be true even if the area was 128. */
+/*
+ * Specialized NV12 Reservation Algorithms
+ *
+ * We use 4 packing methods that pack nv12 blocks into the same area.  Together
+ * these 4 methods give the optimal result for most possible input parameters.
+ *
+ * For now we pack into a 64-slot area, so that we don't have to worry about
+ * stride issues (all blocks get 4K stride). For some of the algorithms this
+ * could be true even if the area was 128.
+ */
 
-/* packing types are marked using a letter sequence, capital letters denoting
-   8-bit blocks, lower case letters denoting corresponding 16-bit blocks. */
+/**
+ * Packing types are marked using a letter sequence, capital letters denoting
+ * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks.
+ *
+ * All methods have the following parameters. They also define the maximum
+ * number of coordinates that could potentially be packed.
+ *
+ * @param o, a, w, n offset, alignment, width, # of blocks as usual
+ * @param area		pointer to store area needed for packing
+ * @param p		pointer to store packing coordinates
+ * @return		number of blocks that can be packed
+ */
 
-/* progressive packing: AAAAaaaaBBbbCc into 64-slot area */
-/* o(ffset), w(idth), a(lign), area, n(um blocks), p(acking) */
+/* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */
 #define MAX_A 21
-static int nv12_A(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
+static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
 {
 	u16 x = o, u, l, m = 0;
 	*area = band_8;
@@ -113,6 +166,7 @@ static int nv12_A(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
 		/* pack until upper bound */
 		while (x + w <= u && m < n) {
 			/* save packing */
+			BUG_ON(m + 1 >= MAX_A);
 			*p++ = x;
 			*p++ = l;
 			l = (*area + x + w + 1) >> 1;
@@ -124,11 +178,14 @@ static int nv12_A(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
 	return m;
 }
 
-/* regressive packing: cCbbBBaaaaAAAA into 64-slot area */
-static int nv12_revA(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
+/* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */
+static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
 {
 	u16 m;
-	n = nv12_A((a - (o + w) % a) % a, w, a, area, n, p);
+
+	/* this is a mirrored packing of method A */
+	n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p);
+
 	/* reverse packing */
 	for (m = 0; m < n; m++) {
 		*p = *area - *p - w;
@@ -139,9 +196,9 @@ static int nv12_revA(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
 	return n;
 }
 
-/* simple layout: aAbcBdeCfgDhEFGH */
+/* Method B: simple layout: aAbcBdeCfgDhEFGH */
 #define MAX_B 8
-static int nv12_B(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
+static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
 {
 	u16 e  = (o + w) % a;	/* end offset */
 	u16 o1 = (o >> 1) % a;			/* half offset */
@@ -157,6 +214,7 @@ static int nv12_B(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
 	   2nd half can be before or after */
 	if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e))
 		while (o + w <= *area && m < n) {
+			BUG_ON(m + 1 >= MAX_B);
 			*p++ = o;
 			*p++ = o >> 1;
 			m++;
@@ -165,9 +223,9 @@ static int nv12_B(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
 	return m;
 }
 
-/* butterfly layout: AAbbaaBB */
+/* Method C: butterfly layout: AAbbaaBB */
 #define MAX_C 20
-static int nv12_C(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
+static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
 {
 	int m = 0;
 	u16 o2, e = ALIGN(w, a), i = 0, j = 0;
@@ -176,6 +234,7 @@ static int nv12_C(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
 
 	m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1;
 	for (i = j = 0; i < m && j < n; i++, j++) {
+		BUG_ON(j + 1 >= MAX_C);
 		*p++ = o + i * e;
 		*p++ = (o + i * e + *area) >> 1;
 		if (++j < n) {
@@ -186,15 +245,15 @@ static int nv12_C(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
 	return j;
 }
 
-/* for large allocation: aA or Aa */
+/* Method D: for large allocation: aA or Aa */
 #define MAX_D 1
-static int nv12_D(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
+static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p)
 {
 	u16 o1, w1 = (w + 1) >> 1, d;
 	*area = ALIGN(o + w, band_8);
 
 	for (d = 0; n > 0 && d + o + w <= *area; d += a) {
-		/* fit 16-bit before 8-bit */
+		/* try to fit 16-bit before 8-bit */
 		o1 = ((o + d) % band_8) >> 1;
 		if (o1 + w1 <= o + d) {
 			*p++ = o + d;
@@ -202,7 +261,7 @@ static int nv12_D(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
 			return 1;
 		}
 
-		/* fit 16-bit after 8-bit */
+		/* try to fit 16-bit after 8-bit */
 		o1 += ALIGN(d + o + w - o1, band_16);
 		if (o1 + w1 <= *area) {
 			*p++ = o;
@@ -213,25 +272,33 @@ static int nv12_D(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
 	return 0;
 }
 
-#define MAX_ANY max(max(MAX_A, MAX_B), max(MAX_C, MAX_D))
-
-/* nv12 packing algorithm 2: pack 8 and 16 bit block into same areas */
-/* assumptions: w > 0, o < a, 2 <= a, packing has at least MAX_ANY * 2 bytes */
-static u16 nv12_together(u16 o, u16 w, u16 a, u16 n, u16 *area, u8 *packing)
+/**
+ * Umbrella nv12 packing method. This selects the best packings from the above
+ * methods.  It also contains hardcoded packings for parameter combinations
+ * that have more efficient packings. This method provides is guaranteed to
+ * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large.
+ */
+#define MAX_ANY 21	/* must be MAX(method-MAX-s, hardcoded n-s) */
+static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing)
 {
-	u16 n_best, n2, a_, o_, w_;
+	u16 n_best, a_best, n2, a_, o_, w_;
 
 	/* algo results (packings) */
 	u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2];
 	u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2];
 	u8 pack_D[MAX_D * 2];
 
-	/* These packings are sorted by increasing area, and then by decreasing
-	   n.  We may not get the best efficiency as we are trying to minimize
-	   the area. */
+	/*
+	 * Hardcoded packings.  They are sorted by increasing area, and then by
+	 * decreasing n.  We may not get the best efficiency if less than n
+	 * blocks are needed as packings are not necessarily sorted in
+	 * increasing order.  However, for those n-s one of the other 4 methods
+	 * may return the optimal packing.
+	 */
 	u8 packings[] = {
 		/* n=9, o=2, w=4, a=4, area=64 */
 		9, 2, 4, 4, 64,
+			/* 8-bit, 16-bit block coordinate pairs */
 			2, 33,	6, 35,	10, 37,	14, 39,	18, 41,
 			46, 23,	50, 25, 54, 27,	58, 29,
 		/* o=0, w=12, a=4, n=3 */
@@ -246,27 +313,30 @@ static u16 nv12_together(u16 o, u16 w, u16 a, u16 n, u16 *area, u8 *packing)
 
 	/* start with smallest area algorithms A, B & C, stop if we can
 	   pack all buffers */
-	n_best = nv12_A(o, w, a, area, n, pack_A);
+	n_best = nv12_A(o, a, w, n, area, pack_A);
 	p_best = pack_A;
 	if (n_best < n) {
-		n2 = nv12_revA(o, w, a, area, n, pack_rA);
+		n2 = nv12_revA(o, a, w, n, &a_best, pack_rA);
 		if (n2 > n_best) {
 			n_best = n2;
 			p_best = pack_rA;
+			*area = a_best;
 		}
 	}
 	if (n_best < n) {
-		n2 = nv12_B(o, w, a, area, n, pack_B);
+		n2 = nv12_B(o, a, w, n, &a_best, pack_B);
 		if (n2 > n_best) {
 			n_best = n2;
 			p_best = pack_B;
+			*area = a_best;
 		}
 	}
 	if (n_best < n) {
-		n2 = nv12_C(o, w, a, area, n, pack_C);
+		n2 = nv12_C(o, a, w, n, &a_best, pack_C);
 		if (n2 > n_best) {
 			n_best = n2;
 			p_best = pack_C;
+			*area = a_best;
 		}
 	}
 
@@ -278,47 +348,52 @@ static u16 nv12_together(u16 o, u16 w, u16 a, u16 n, u16 *area, u8 *packing)
 		a_ = *p++;
 		/* stop if we already have a better packing */
 		if (n2 < n_best)
-			p = p_end;	/* fake stop */
+			break;
 
 		/* check if this packing is satisfactory */
-		else if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) {
+		if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) {
 			*area = *p++;
 			n_best = min(n2, n);
 			p_best = p;
 			break;
-		} else {
-			/* skip to next packing */
-			p += 1 + n2 * 2;
 		}
+
+		/* skip to next packing */
+		p += 1 + n2 * 2;
 	}
 
-	/* check whether 8 and 16 bit blocks can be co-packed (this will
-	   actually be done in the end by the normal allocation) to see if
-	   this is just as good as packing separately */
+	/*
+	 * If so far unsuccessful, check whether 8 and 16 bit blocks can be
+	 * co-packed.  This will actually be done in the end by the normal
+	 * allocation, but we need to reserve a big-enough area.
+	 */
 	if (!n_best) {
-		n_best = nv12_D(o, w, a, area, n, pack_D);
+		n_best = nv12_D(o, a, w, n, area, pack_D);
 		p_best = NULL;
 	}
 
-	if (p_best && n_best)
+	/* store best packing */
+	if (p_best && n_best) {
+		BUG_ON(n_best > MAX_ANY);
 		memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A));
+	}
 
 	return n_best;
 }
 
-/* can_together: 8-bit and 16-bit views are in the same container */
+/* reserve nv12 blocks */
 static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
-			 u32 gid, struct process_info *pi, bool can_together)
+					u32 gid, struct process_info *pi)
 {
-	u16 w, h, band, a = align, o = offs, eff_w;
+	u16 w, h, band, a = align, o = offs;
 	struct gid_info *gi;
 	int res = 0, res2, i;
 	u16 n_t, n_s, area_t, area_s;
-	u8 packing[2 * 21];
+	u8 packing[2 * MAX_ANY];
 	struct list_head reserved = LIST_HEAD_INIT(reserved);
 
 	/* adjust alignment to the largest slot width (128 bytes) */
-	a = MAX(PAGE_SIZE / MIN(band_8, band_16), a);
+	a = max_t(u16, PAGE_SIZE / min(band_8, band_16), a);
 
 	/* Check input parameters for correctness, and support */
 	if (!width || !height || !n ||
@@ -337,36 +412,33 @@ static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
 	if (!gi)
 		return;
 
-	eff_w = ALIGN(w, a);
-
+	/* reserve in groups until failed or all is reserved */
 	for (i = 0; i < n && res >= 0; i += res) {
 		/* check packing separately vs together */
-		n_s = nv12_separate(o, w, a, n - i, &area_s);
-		if (can_together)
-			n_t = nv12_together(o, w, a, n - i, &area_t, packing);
+		n_s = nv12_separate(o, a, w, n - i, &area_s);
+		if (ops->nv12_packed)
+			n_t = nv12_together(o, a, w, n - i, &area_t, packing);
 		else
 			n_t = 0;
 
 		/* pack based on better efficiency */
 		res = -1;
-		if (!can_together ||
-			nv12_eff(n_s, w, area_s, n - i) >
-			nv12_eff(n_t, w, area_t, n - i)) {
-
-			/* reserve blocks separately into a temporary list,
-			   so that we can free them if unsuccessful */
-			res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band, a, o,
+		if (!ops->nv12_packed ||
+			nv12_eff(w, n_s, area_s, n - i) >
+			nv12_eff(w, n_t, area_t, n - i)) {
+
+			/*
+			 * Reserve blocks separately into a temporary list, so
+			 * that we can free them if unsuccessful. We need to be
+			 * able to reserve both 8- and 16-bit blocks as the
+			 * offsets of them must match.
+			 */
+			res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, o,
 						gi, &reserved);
+			res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h,
+				band_16, a >> 1, o >> 1, gi, &reserved);
 
-			/* only reserve 16-bit blocks if 8-bit was successful,
-			   as we will try to match 16-bit areas to an already
-			   reserved 8-bit area, and there is no guarantee that
-			   an unreserved 8-bit area will match the offset of
-			   a singly reserved 16-bit area. */
-			res2 = (res < 0 ? res :
-				ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) / 2, h,
-				band / 2, a / 2, o / 2, gi, &reserved));
-			if (res2 < 0 || res != res2) {
+			if (res2 < 0 || res < 0 || res != res2) {
 				/* clean up */
 				ops->release(&reserved);
 				res = -1;
@@ -377,7 +449,7 @@ static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
 		}
 
 		/* if separate packing failed, still try to pack together */
-		if (res < 0 && can_together && n_t) {
+		if (res < 0 && ops->nv12_packed && n_t) {
 			/* pack together */
 			res = ops->lay_nv12(n_t, area_t, w, h, gi, packing);
 		}
@@ -386,13 +458,19 @@ static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
 	ops->release_gi(gi);
 }
 
-/* reserve 2d blocks (if standard allocator is inefficient) */
+/**
+ * We also optimize packing regular 2D areas as the auto-packing may result in
+ * sub-optimal efficiency. This is most pronounced if the area is wider than
+ * half a PAGE_SIZE (e.g. 2048 in 8-bit mode, or 1024 in 16-bit mode).
+ */
+
+/* reserve 2d blocks */
 static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height,
 			   u32 align, u32 offs, u32 gid,
 			   struct process_info *pi)
 {
 	u32 bpt, res = 0, i;
-	u16 o = offs, a = align, band, w, h, e, n_try;
+	u16 o = offs, a = align, band, w, h, n_try;
 	struct gid_info *gi;
 	const struct tiler_geom *g;
 
@@ -402,13 +480,14 @@ static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height,
 	    fmt < TILFMT_8BIT || fmt > TILFMT_32BIT)
 		return;
 
-	/* tiler page width in pixels, bytes per pixel, tiler page in bytes */
+	/* tiler slot in bytes */
 	g = ops->geom(fmt);
 	bpt = g->slot_w * g->bpp;
 
-	/* check offset.  Also, if block is less than half the mapping window,
-	   the default allocation is sufficient.  Also check for basic area
-	   info. */
+	/*
+	 *  For blocks narrower than half PAGE_SIZE the default allocation is
+	 *  sufficient.  Also check for basic area info.
+	 */
 	if (width * g->bpp * 2 <= PAGE_SIZE ||
 	    ops->analize(fmt, width, height, &w, &h, &band, &a, &o, NULL))
 		return;
@@ -418,18 +497,17 @@ static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height,
 	if (!gi)
 		return;
 
-	/* effective width of a buffer */
-	e = ALIGN(w, a);
-
-	for (i = 0; i < n && res >= 0; i += res) {
+	/* reserve in groups until failed or all is reserved */
+	for (i = 0; i < n && res >= 0; i += res + 1) {
 		/* blocks to allocate in one area */
-		n_try = MIN(n - i, ops->width);
-		tiler_best2pack(offs, w, e, band, &n_try, NULL);
+		n_try = min(n - i, ops->width);
+		tiler_best2pack(offs, a, band, w, &n_try, NULL);
 
 		res = -1;
 		while (n_try > 1) {
+			/* adjust res so we fail on 0 return value */
 			res = ops->lay_2d(fmt, n_try, w, h, band, a, o,
-						gi, &gi->reserved);
+						gi, &gi->reserved) - 1;
 			if (res >= 0)
 				break;
 
@@ -442,6 +520,7 @@ static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height,
 	ops->release_gi(gi);
 }
 
+/* unreserve blocks for a group id */
 static void unreserve_blocks(u32 gid, struct process_info *pi)
 {
 	struct gid_info *gi;
@@ -455,6 +534,7 @@ static void unreserve_blocks(u32 gid, struct process_info *pi)
 	ops->release_gi(gi);
 }
 
+/* initialize shared method pointers and global static variables */
 void tiler_reserve_init(struct tiler_ops *tiler)
 {
 	ops = tiler;
@@ -463,7 +543,8 @@ void tiler_reserve_init(struct tiler_ops *tiler)
 	ops->reserve = reserve_blocks;
 	ops->unreserve = unreserve_blocks;
 
-	band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w;
+	band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w
+		/ ops->geom(TILFMT_8BIT)->bpp;
 	band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w
 		/ ops->geom(TILFMT_16BIT)->bpp;
 }
author	Lajos Molnar <molnar@ti.com>	2011-04-07 08:41:56 +0100
committer	Andy Green <andy.green@linaro.org>	2011-04-07 08:41:56 +0100
commit	6d80c1675284b35a0ebccac97b3f6ecf291b5089 (patch)
tree	b622d5314ff3e705a27ac555556af59e6897d751 /drivers
parent	8098da93346c8720aabc983cda70f1c2bed6eb2b (diff)