summaryrefslogtreecommitdiff
path: root/drivers/media/video/tiler
diff options
context:
space:
mode:
authorLajos Molnar <molnar@ti.com>2011-04-07 08:41:18 +0100
committerAndy Green <andy.green@linaro.org>2011-04-07 08:41:18 +0100
commit12fb128efa899550414c0b42b12f170b2b569a01 (patch)
tree1675f0e6d3c0a86553236052a1e98e75734dc010 /drivers/media/video/tiler
parent62638d129f1aa1652ceb269309cd857f83f7d0f1 (diff)
TILER: Added close to optimal NV12 packing reservation logic.
These give the optimal packings for block sizes less than 64 slots wide. Also added reservation ioctls. Signed-off-by: Lajos Molnar <molnar@ti.com> Signed-off-by: David Sin <davidsin@ti.com>
Diffstat (limited to 'drivers/media/video/tiler')
-rw-r--r--drivers/media/video/tiler/tiler.c390
-rw-r--r--drivers/media/video/tiler/tiler_pack.c265
-rw-r--r--drivers/media/video/tiler/tiler_pack.h11
3 files changed, 606 insertions, 60 deletions
diff --git a/drivers/media/video/tiler/tiler.c b/drivers/media/video/tiler/tiler.c
index 903df67b86f..1d7b51e6ccd 100644
--- a/drivers/media/video/tiler/tiler.c
+++ b/drivers/media/video/tiler/tiler.c
@@ -38,6 +38,7 @@
#include "../dmm/tmm.h"
#include "tiler_def.h"
#include "tcm/tcm_sita.h" /* Algo Specific header */
+#include "tiler_pack.h"
#include <linux/syscalls.h>
@@ -1285,6 +1286,310 @@ static s32 alloc_block(enum tiler_fmt fmt, u32 width, u32 height,
u32 align, u32 offs, u32 key, u32 gid, struct process_info *pi,
struct mem_info **info);
+/* we have two algorithms for packing nv12 blocks */
+
+/* we want to find the most effective packing for the smallest area */
+
+/* layout reserved 2d areas in a larger area */
+/* NOTE: band, w, h, a(lign), o(ffs) is in slots */
+static s32 reserve_2d(enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band,
+ u16 align, u16 offs, struct gid_info *gi,
+ struct list_head *pos)
+{
+ u16 x, x0, e = ALIGN(w, align), w_res = (n - 1) * e + w;
+ struct mem_info *mi = NULL;
+ struct area_info *ai = NULL;
+
+ printk(KERN_INFO "packing %u %u buffers into %u width\n",
+ n, w, w_res);
+
+ /* calculate dimensions, band, offs and alignment in slots */
+ /* reserve an area */
+ ai = area_new_m(ALIGN(w_res + offs, max(band, align)), h,
+ max(band, align), TCM(fmt), gi);
+ if (!ai)
+ return -ENOMEM;
+
+ /* lay out blocks in the reserved area */
+ for (n = 0, x = offs; x < w_res; x += e, n++) {
+ /* reserve a block struct */
+ mi = kmalloc(sizeof(*mi), GFP_KERNEL);
+ if (!mi)
+ break;
+
+ memset(mi, 0, sizeof(*mi));
+ x0 = ai->area.p0.x + x;
+ _m_add2area(mi, ai, x0, w, &ai->blocks);
+ list_add(&mi->global, pos);
+ }
+
+ mutex_unlock(&mtx);
+ return n;
+}
+
+/* reserve nv12 blocks if standard allocator is inefficient */
+/* TILER is designed so that a (w * h) * 8bit area is twice as wide as a
+ (w/2 * h/2) * 16bit area. Since having pairs of such 8-bit and 16-bit
+ blocks is a common usecase for TILER, we optimize packing these into a
+ TILER area */
+static s32 pack_nv12(int n, u16 w, u16 w1, u16 h, struct gid_info *gi,
+ u8 *p)
+{
+ u16 wh = (w1 + 1) >> 1, width, x0;
+ int m;
+
+ struct mem_info *mi = NULL;
+ struct area_info *ai = NULL;
+ struct list_head *pos;
+
+ /* reserve area */
+ ai = area_new_m(w, h, 64, TCM(TILFMT_8BIT), gi);
+ if (!ai)
+ return -ENOMEM;
+
+ /* lay out blocks in the reserved area */
+ for (m = 0; m < 2 * n; m++) {
+ width = (m & 1) ? wh : w1;
+ x0 = ai->area.p0.x + *p++;
+
+ /* get insertion head */
+ list_for_each(pos, &ai->blocks) {
+ mi = list_entry(pos, struct mem_info, by_area);
+ if (mi->area.p0.x > x0)
+ break;
+ }
+
+ /* reserve a block struct */
+ mi = kmalloc(sizeof(*mi), GFP_KERNEL);
+ if (!mi)
+ break;
+
+ memset(mi, 0, sizeof(*mi));
+
+ _m_add2area(mi, ai, x0, width, pos);
+ list_add(&mi->global, &gi->reserved);
+ }
+
+ mutex_unlock(&mtx);
+ return n;
+}
+
+static inline u32 nv12_eff(u16 n, u16 w, u16 area, u16 n_need)
+{
+ /* rank by total area needed first */
+ return 0x10000000 - DIV_ROUND_UP(n_need, n) * area * 32 +
+ /* then by efficiency */
+ 1024 * n * ((w * 3 + 1) >> 1) / area;
+}
+
+static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
+ u32 gid, struct process_info *pi)
+{
+ /* adjust alignment to at least 128 bytes (16-bit slot width) */
+ u16 w, h, band, a = MAX(128, align), o = offs, eff_w;
+ struct gid_info *gi;
+ int res = 0, res2, i;
+ u16 n_t, n_s, area_t, area_s;
+ u8 packing[2 * 21];
+ struct list_head reserved = LIST_HEAD_INIT(reserved);
+ struct mem_info *mi, *mi_;
+ bool can_together = TMM(TILFMT_8BIT) == TMM(TILFMT_16BIT);
+
+ /* Check input parameters for correctness, and support */
+ if (!width || !height || !n ||
+ offs >= (align ? : PAGE_SIZE) || offs & 1 ||
+ align >= PAGE_SIZE || TCM(TILFMT_8BIT) != TCM(TILFMT_16BIT) ||
+ n > TILER_WIDTH * TILER_HEIGHT / 2)
+ return;
+
+ /* calculate dimensions, band, offs and alignment in slots */
+ if (__analize_area(TILFMT_8BIT, width, height, &w, &h, &band, &a, &o,
+ NULL))
+ return;
+
+ /* get group context */
+ mutex_lock(&mtx);
+ gi = _m_get_gi(pi, gid);
+ mutex_unlock(&mtx);
+ if (!gi)
+ return;
+
+ eff_w = ALIGN(w, a);
+
+ for (i = 0; i < n && res >= 0; i += res) {
+ /* check packing separately vs together */
+ n_s = nv12_separate(o, w, a, n - i, &area_s);
+ if (can_together)
+ n_t = nv12_together(o, w, a, n - i, &area_t, packing);
+ else
+ n_t = 0;
+
+ /* pack based on better efficiency */
+ res = -1;
+ if (!can_together ||
+ nv12_eff(n_s, w, area_s, n - i) >
+ nv12_eff(n_t, w, area_t, n - i)) {
+ /* pack separately */
+
+ res = reserve_2d(TILFMT_8BIT, n_s, w, h, band, a, o, gi,
+ &reserved);
+
+ /* only reserve 16-bit blocks if 8-bit was successful,
+ as we will try to match 16-bit areas to an already
+ reserved 8-bit area, and there is no guarantee that
+ an unreserved 8-bit area will match the offset of
+ a singly reserved 16-bit area. */
+ res2 = (res < 0 ? res :
+ reserve_2d(TILFMT_16BIT, n_s, (w + 1) / 2, h,
+ band / 2, a / 2, o / 2, gi, &reserved));
+ if (res2 < 0 || res != res2) {
+ /* clean up */
+ mutex_lock(&mtx);
+ list_for_each_entry_safe(mi, mi_, &reserved,
+ global)
+ _m_free(mi);
+ mutex_unlock(&mtx);
+ res = -1;
+ } else {
+ /* add list to reserved */
+ mutex_lock(&mtx);
+ list_splice_init(&reserved, &gi->reserved);
+ mutex_unlock(&mtx);
+ }
+ }
+
+ /* if separate packing failed, still try to pack together */
+ if (res < 0 && can_together && n_t) {
+ /* pack together */
+ res = pack_nv12(n_t, area_t, w, h, gi, packing);
+ }
+ }
+
+ mutex_lock(&mtx);
+ gi->refs--;
+ _m_try_free_group(gi);
+ mutex_unlock(&mtx);
+}
+
+/* reserve 2d blocks (if standard allocator is inefficient) */
+static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height,
+ u32 align, u32 offs, u32 gid,
+ struct process_info *pi)
+{
+ u32 til_width, bpp, bpt, res = 0, i;
+ u16 o = offs, a = align, band, w, h, e, n_try;
+ struct gid_info *gi;
+
+ /* Check input parameters for correctness, and support */
+ if (!width || !height || !n ||
+ align > PAGE_SIZE || offs >= (align ? : PAGE_SIZE) ||
+ fmt < TILFMT_8BIT || fmt > TILFMT_32BIT)
+ return;
+
+ /* tiler page width in pixels, bytes per pixel, tiler page in bytes */
+ til_width = fmt == TILFMT_32BIT ? 32 : 64;
+ bpp = 1 << (fmt - TILFMT_8BIT);
+ bpt = til_width * bpp;
+
+ /* check offset. Also, if block is less than half the mapping window,
+ the default allocation is sufficient. Also check for basic area
+ info. */
+ if (width * bpp * 2 <= PAGE_SIZE ||
+ __analize_area(fmt, width, height, &w, &h, &band, &a, &o, NULL))
+ return;
+
+ /* get group id */
+ mutex_lock(&mtx);
+ gi = _m_get_gi(pi, gid);
+ mutex_unlock(&mtx);
+ if (!gi)
+ return;
+
+ /* effective width of a buffer */
+ e = ALIGN(w, a);
+
+ for (i = 0; i < n && res >= 0; i += res) {
+ /* blocks to allocate in one area */
+ n_try = MIN(n - i, TILER_WIDTH);
+ tiler_best2pack(offs, w, e, band, &n_try, NULL);
+
+ res = -1;
+ while (n_try > 1) {
+ res = reserve_2d(fmt, n_try, w, h, band, a, o, gi,
+ &gi->reserved);
+ if (res >= 0)
+ break;
+
+ /* reduce n if failed to allocate area */
+ n_try--;
+ }
+ }
+ /* keep reserved blocks even if failed to reserve all */
+
+ mutex_lock(&mtx);
+ gi->refs--;
+ _m_try_free_group(gi);
+ mutex_unlock(&mtx);
+}
+
+s32 tiler_reservex(u32 n, enum tiler_fmt fmt, u32 width, u32 height,
+ u32 align, u32 offs, u32 gid, pid_t pid)
+{
+ struct process_info *pi = __get_pi(pid, true);
+
+ if (pi)
+ reserve_blocks(n, fmt, width, height, align, offs, gid, pi);
+ return 0;
+}
+EXPORT_SYMBOL(tiler_reservex);
+
+s32 tiler_reserve(u32 n, enum tiler_fmt fmt, u32 width, u32 height,
+ u32 align, u32 offs)
+{
+ return tiler_reservex(n, fmt, width, height, align, offs,
+ 0, current->tgid);
+}
+EXPORT_SYMBOL(tiler_reserve);
+
+/* reserve area for n identical buffers */
+s32 tiler_reservex_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs,
+ u32 gid, pid_t pid)
+{
+ struct process_info *pi = __get_pi(pid, true);
+
+ if (pi)
+ reserve_nv12(n, width, height, align, offs, gid, pi);
+ return 0;
+}
+EXPORT_SYMBOL(tiler_reservex_nv12);
+
+s32 tiler_reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs)
+{
+ return tiler_reservex_nv12(n, width, height, align, offs,
+ 0, current->tgid);
+}
+EXPORT_SYMBOL(tiler_reserve_nv12);
+
+void unreserve_blocks(struct process_info *pi, u32 gid)
+{
+ struct gid_info *gi;
+ struct mem_info *mi, *mi_;
+
+ mutex_lock(&mtx);
+ gi = _m_get_gi(pi, gid);
+ if (!gi)
+ goto done;
+ /* we have the mutex, so no need to keep reference */
+ gi->refs--;
+
+ list_for_each_entry_safe(mi, mi_, &gi->reserved, global) {
+ BUG_ON(mi->refs || mi->alloced);
+ _m_free(mi);
+ }
+done:
+ mutex_unlock(&mtx);
+}
+
static s32 tiler_ioctl(struct inode *ip, struct file *filp, u32 cmd,
unsigned long arg)
{
@@ -1454,6 +1759,31 @@ static s32 tiler_ioctl(struct inode *ip, struct file *filp, u32 cmd,
mutex_unlock(&mtx);
return -EFAULT;
break;
+ case TILIOC_PRBLK:
+ if (copy_from_user(&block_info, (void __user *)arg,
+ sizeof(block_info)))
+ return -EFAULT;
+
+ if (block_info.fmt == TILFMT_8AND16) {
+ reserve_nv12(block_info.key,
+ block_info.dim.area.width,
+ block_info.dim.area.height,
+ block_info.align,
+ block_info.offs,
+ block_info.group_id, pi);
+ } else {
+ reserve_blocks(block_info.key,
+ block_info.fmt,
+ block_info.dim.area.width,
+ block_info.dim.area.height,
+ block_info.align,
+ block_info.offs,
+ block_info.group_id, pi);
+ }
+ break;
+ case TILIOC_URBLK:
+ unreserve_blocks(pi, arg);
+ break;
case TILIOC_QBLK:
if (copy_from_user(&block_info, (void __user *)arg,
sizeof(block_info)))
@@ -1562,66 +1892,6 @@ s32 tiler_alloc(struct tiler_block_t *blk, enum tiler_fmt fmt,
}
EXPORT_SYMBOL(tiler_alloc);
-static void reserve_nv12_blocks(u32 n, u32 width, u32 height,
- u32 align, u32 offs, u32 gid, pid_t pid)
-{
-}
-
-static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height,
- u32 align, u32 offs, u32 gid, pid_t pid)
-{
-}
-
-/* reserve area for n identical buffers */
-s32 tiler_reservex(u32 n, struct tiler_buf_info *b, pid_t pid)
-{
- u32 i;
-
- if (b->num_blocks > TILER_MAX_NUM_BLOCKS)
- return -EINVAL;
-
- for (i = 0; i < b->num_blocks; i++) {
- /* check for NV12 reservations */
- if (i + 1 < b->num_blocks &&
- b->blocks[i].fmt == TILFMT_8BIT &&
- b->blocks[i + 1].fmt == TILFMT_16BIT &&
- b->blocks[i].dim.area.height ==
- b->blocks[i + 1].dim.area.height &&
- b->blocks[i].dim.area.width ==
- b->blocks[i + 1].dim.area.width) {
- reserve_nv12_blocks(n,
- b->blocks[i].dim.area.width,
- b->blocks[i].dim.area.height,
- 0, /* align */
- 0, /* offs */
- 0, /* gid */
- pid);
- i++;
- } else if (b->blocks[i].fmt >= TILFMT_8BIT &&
- b->blocks[i].fmt <= TILFMT_32BIT) {
- /* other 2D reservations */
- reserve_blocks(n,
- b->blocks[i].fmt,
- b->blocks[i].dim.area.width,
- b->blocks[i].dim.area.height,
- 0, /* align */
- 0, /* offs */
- 0, /* gid */
- pid);
- } else {
- return -EINVAL;
- }
- }
- return 0;
-}
-EXPORT_SYMBOL(tiler_reservex);
-
-s32 tiler_reserve(u32 n, struct tiler_buf_info *b)
-{
- return tiler_reservex(n, b, current->tgid);
-}
-EXPORT_SYMBOL(tiler_reserve);
-
static void __exit tiler_exit(void)
{
struct process_info *pi = NULL, *pi_ = NULL;
diff --git a/drivers/media/video/tiler/tiler_pack.c b/drivers/media/video/tiler/tiler_pack.c
index 4e20b356ed2..542b789f8f7 100644
--- a/drivers/media/video/tiler/tiler_pack.c
+++ b/drivers/media/video/tiler/tiler_pack.c
@@ -19,4 +19,269 @@
#include <mach/tiler.h>
#include "tiler_def.h"
+/* we want to find the most effective packing for the smallest area */
+
+/* This method is used for both 2D and NV12 packing */
+
+/* return maximum buffers that can be packed next to each other */
+/* o(ffset), w(idth), e(ff_width), b(and), n(um blocks), area( needed) */
+/* assumptions: w > 0, o < a <= e */
+u32 tiler_best2pack(u16 o, u16 w, u16 e, u16 b, u16 *n, u16 *area)
+{
+ u16 m = 0, max_n = *n; /* m is mostly n - 1 */
+ u32 eff, best_eff = 0; /* best values */
+ u16 stride = ALIGN(o + w, b), ar = stride; /* current area */
+
+ /*
+ * blocks must fit in tiler container and
+ * block stride must be the same: defined as align(o + w, b)
+ *
+ * == align(o + (n-1) * e + w, b) - trim((o + (n-1) * e, b) for all n
+ */
+ while (m < max_n &&
+ o + m * e + w <= TILER_WIDTH &&
+ stride == ALIGN(ar - o - m * e, b)) {
+ /* get efficiency */
+ m++;
+ eff = m * w * 1024 / ar;
+ if (eff > best_eff) {
+ best_eff = eff;
+ *n = m;
+ if (area)
+ *area = ar;
+ }
+ ar = ALIGN(o + m * e + w, b);
+ }
+
+ return best_eff;
+}
+
+/* We have two algorithms for packing nv12 blocks: either pack 8 and 16 bit
+ blocks separately as 2D, or pack them into same area */
+
+/* nv12 packing algorithm 1: pack 8 and 16 bit block into separate areas */
+/* assumptions: w > 0, o < a, 2 <= a */
+u16 nv12_separate(u16 o, u16 w, u16 a, u16 n, u16 *area)
+{
+ tiler_best2pack(o, w, ALIGN(w, a), 64, &n, area);
+ tiler_best2pack(o / 2, (w + 1) / 2, ALIGN(w, a) / 2, 32, &n, area);
+ *area *= 3;
+ return n;
+}
+
+/* We use 4 packing methods for same area packing that give the best result
+ for most parameters. We pack into a 64-slot area, so that we don't have
+ to worry about stride issues (all blocks get 4K stride). For some of the
+ algorithms this could be true even if the area was 128. */
+
+/* packing types are marked using a letter sequence, capital letters denoting
+ 8-bit blocks, lower case letters denoting corresponding 16-bit blocks. */
+
+/* progressive packing: AAAAaaaaBBbbCc into 64-slot area */
+/* o(ffset), w(idth), a(lign), area, n(um blocks), p(acking) */
+#define MAX_A 21
+static int nv12_A(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
+{
+ u16 x = o, u, l, m = 0;
+ *area = 64;
+
+ while (x + w < *area && m < n) {
+ /* current 8bit upper bound (a) is next 8bit lower bound (B) */
+ l = u = (*area + x) >> 1;
+
+ /* pack until upper bound */
+ while (x + w <= u && m < n) {
+ /* save packing */
+ *p++ = x;
+ *p++ = l;
+ l = (*area + x + w + 1) >> 1;
+ x = ALIGN(x + w - o, a) + o;
+ m++;
+ }
+ x = ALIGN(l - o, a) + o; /* set new lower bound */
+ }
+ return m;
+}
+
+/* regressive packing: cCbbBBaaaaAAAA into 64-slot area */
+static int nv12_revA(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
+{
+ u16 m;
+ n = nv12_A((a - (o + w) % a) % a, w, a, area, n, p);
+ /* reverse packing */
+ for (m = 0; m < n; m++) {
+ *p = *area - *p - w;
+ p++;
+ *p = *area - *p - ((w + 1) >> 1);
+ p++;
+ }
+ return n;
+}
+
+/* simple layout: aAbcBdeCfgDhEFGH */
+#define MAX_B 8
+static int nv12_B(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
+{
+ u16 e = (o + w) % a; /* end offset */
+ u16 o1 = (o >> 1) % a; /* half offset */
+ u16 e1 = ((o + w + 1) >> 1) % a; /* half end offset */
+ u16 o2 = o1 + (a >> 2); /* 2nd half offset */
+ u16 e2 = e1 + (a >> 2); /* 2nd half end offset */
+ u16 m = 0;
+ *area = 64;
+
+ /* ensure 16-bit blocks don't overlap 8-bit blocks */
+
+ /* width cannot wrap around alignment, half block must be before block,
+ 2nd half can be before or after */
+ if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e))
+ while (o + w <= *area && m < n) {
+ *p++ = o;
+ *p++ = o >> 1;
+ m++;
+ o += a;
+ }
+ return m;
+}
+
+/* butterfly layout: AAbbaaBB */
+#define MAX_C 20
+static int nv12_C(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
+{
+ int m = 0;
+ u16 o2, e = ALIGN(w, a), i = 0, j = 0;
+ *area = 64;
+ o2 = *area - (a - (o + w) % a) % a; /* end of last possible block */
+
+ m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1;
+ for (i = j = 0; i < m && j < n; i++, j++) {
+ *p++ = o + i * e;
+ *p++ = (o + i * e + *area) >> 1;
+ if (++j < n) {
+ *p++ = o2 - i * e - w;
+ *p++ = (o2 - i * e - w) >> 1;
+ }
+ }
+ return j;
+}
+
+/* for large allocation: aA or Aa */
+#define MAX_D 1
+static int nv12_D(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p)
+{
+ u16 o1, w1 = (w + 1) >> 1, d;
+ *area = ALIGN(o + w, 64);
+
+ for (d = 0; n > 0 && d + o + w <= *area; d += a) {
+ /* fit 16-bit before 8-bit */
+ o1 = ((o + d) % 64) >> 1;
+ if (o1 + w1 <= o + d) {
+ *p++ = o + d;
+ *p++ = o1;
+ return 1;
+ }
+
+ /* fit 16-bit after 8-bit */
+ o1 += ALIGN(d + o + w - o1, 32);
+ if (o1 + w1 <= *area) {
+ *p++ = o;
+ *p++ = o1;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+#define MAX_ANY max(max(MAX_A, MAX_B), max(MAX_C, MAX_D))
+
+/* nv12 packing algorithm 2: pack 8 and 16 bit block into same areas */
+/* assumptions: w > 0, o < a, 2 <= a, packing has at least MAX_ANY * 2 bytes */
+u16 nv12_together(u16 o, u16 w, u16 a, u16 n, u16 *area, u8 *packing)
+{
+ u16 n_best, n2, a_, o_, w_;
+
+ /* algo results (packings) */
+ u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2];
+ u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2];
+ u8 pack_D[MAX_D * 2];
+
+ /* These packings are sorted by increasing area, and then by decreasing
+ n. We may not get the best efficiency as we are trying to minimize
+ the area. */
+ u8 packings[] = {
+ /* n=9, o=2, w=4, a=4, area=64 */
+ 9, 2, 4, 4, 64,
+ 2, 33, 6, 35, 10, 37, 14, 39, 18, 41,
+ 46, 23, 50, 25, 54, 27, 58, 29,
+ /* o=0, w=12, a=4, n=3 */
+ 3, 0, 12, 4, 64,
+ 0, 32, 12, 38, 48, 24,
+ /* end */
+ 0
+ }, *p = packings, *p_best = NULL, *p_end;
+ p_end = packings + sizeof(packings) - 1;
+
+ /* see which method gives the best packing */
+
+ /* start with smallest area algorithms A, B & C, stop if we can
+ pack all buffers */
+ n_best = nv12_A(o, w, a, area, n, pack_A);
+ p_best = pack_A;
+ if (n_best < n) {
+ n2 = nv12_revA(o, w, a, area, n, pack_rA);
+ if (n2 > n_best) {
+ n_best = n2;
+ p_best = pack_rA;
+ }
+ }
+ if (n_best < n) {
+ n2 = nv12_B(o, w, a, area, n, pack_B);
+ if (n2 > n_best) {
+ n_best = n2;
+ p_best = pack_B;
+ }
+ }
+ if (n_best < n) {
+ n2 = nv12_C(o, w, a, area, n, pack_C);
+ if (n2 > n_best) {
+ n_best = n2;
+ p_best = pack_C;
+ }
+ }
+
+ /* traverse any special packings */
+ while (*p) {
+ n2 = *p++;
+ o_ = *p++;
+ w_ = *p++;
+ a_ = *p++;
+ /* stop if we already have a better packing */
+ if (n2 < n_best)
+ p = p_end; /* fake stop */
+
+ /* check if this packing is satisfactory */
+ else if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) {
+ *area = *p++;
+ n_best = min(n2, n);
+ p_best = p;
+ break;
+ } else {
+ /* skip to next packing */
+ p += 1 + n2 * 2;
+ }
+ }
+
+ /* check whether 8 and 16 bit blocks can be co-packed (this will
+ actually be done in the end by the normal allocation) to see if
+ this is just as good as packing separately */
+ if (!n_best) {
+ n_best = nv12_D(o, w, a, area, n, pack_D);
+ p_best = NULL;
+ }
+
+ if (p_best && n_best)
+ memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A));
+
+ return n_best;
+}
diff --git a/drivers/media/video/tiler/tiler_pack.h b/drivers/media/video/tiler/tiler_pack.h
new file mode 100644
index 00000000000..e454b1d240f
--- /dev/null
+++ b/drivers/media/video/tiler/tiler_pack.h
@@ -0,0 +1,11 @@
+#ifndef __TILER_PACK_H__
+#define __TILER_PACK_H__
+
+u32 tiler_best2pack(u16 o, u16 w, u16 e, u16 b, u16 *n, u16 *area);
+
+u16 nv12_together(u16 o, u16 w, u16 a, u16 n, u16 *area, u8 *packing);
+
+u16 nv12_separate(u16 o, u16 w, u16 a, int n, u16 *area);
+
+
+#endif