diff options
author | Lajos Molnar <molnar@ti.com> | 2011-04-07 08:41:56 +0100 |
---|---|---|
committer | Andy Green <andy.green@linaro.org> | 2011-04-07 08:41:56 +0100 |
commit | 6d80c1675284b35a0ebccac97b3f6ecf291b5089 (patch) | |
tree | b622d5314ff3e705a27ac555556af59e6897d751 /drivers | |
parent | 8098da93346c8720aabc983cda70f1c2bed6eb2b (diff) |
TILER: Cleaned up tiler-reserve.c
Fixed formattings.
Added comments
Standardized method parameter order.
Removed can_together flag that is now unneeded.
Signed-off-by: Lajos Molnar <molnar@ti.com>
Signed-off-by: David Sin <davidsin@ti.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/media/video/tiler/_tiler.h | 2 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-iface.c | 6 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-reserve.c | 355 |
3 files changed, 221 insertions, 142 deletions
diff --git a/drivers/media/video/tiler/_tiler.h b/drivers/media/video/tiler/_tiler.h index 4227fa172f7..24031348bf6 100644 --- a/drivers/media/video/tiler/_tiler.h +++ b/drivers/media/video/tiler/_tiler.h @@ -91,7 +91,7 @@ struct tiler_ops { u32 key, u32 gid, struct process_info *pi, struct mem_info **info, u32 usr_addr); void (*reserve_nv12) (u32 n, u32 width, u32 height, u32 align, u32 offs, - u32 gid, struct process_info *pi, bool can_together); + u32 gid, struct process_info *pi); void (*reserve) (u32 n, enum tiler_fmt fmt, u32 width, u32 height, u32 align, u32 offs, u32 gid, struct process_info *pi); void (*unreserve) (u32 gid, struct process_info *pi); diff --git a/drivers/media/video/tiler/tiler-iface.c b/drivers/media/video/tiler/tiler-iface.c index ff653628a28..688af6c8f71 100644 --- a/drivers/media/video/tiler/tiler-iface.c +++ b/drivers/media/video/tiler/tiler-iface.c @@ -504,8 +504,7 @@ static s32 tiler_ioctl(struct inode *ip, struct file *filp, u32 cmd, block_info.dim.area.height, block_info.align, block_info.offs, - block_info.group_id, pi, - ops->nv12_packed); + block_info.group_id, pi); } else { ops->reserve(block_info.key, block_info.fmt, @@ -633,8 +632,7 @@ s32 tiler_reservex_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, struct process_info *pi = __get_pi(pid, true); if (pi) - ops->reserve_nv12(n, width, height, align, offs, gid, pi, - ops->nv12_packed); + ops->reserve_nv12(n, width, height, align, offs, gid, pi); return 0; } EXPORT_SYMBOL(tiler_reservex_nv12); diff --git a/drivers/media/video/tiler/tiler-reserve.c b/drivers/media/video/tiler/tiler-reserve.c index 3f87f5af929..6715d3ddd6a 100644 --- a/drivers/media/video/tiler/tiler-reserve.c +++ b/drivers/media/video/tiler/tiler-reserve.c @@ -1,7 +1,9 @@ /* * tiler-reserve.c * - * TILER driver area reservation functions for TI OMAP processors. + * TILER driver area reservation functions for TI TILER hardware block. + * + * Author: Lajos Molnar <molnar@ti.com> * * Copyright (C) 2009-2010 Texas Instruments, Inc. * @@ -14,94 +16,145 @@ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/slab.h> #include "_tiler.h" -static struct tiler_ops *ops; -static int band_8; /* 8-bit band in slots */ -static int band_16; /* 16-bit band in slots */ - -/* TILER is designed so that a (w * h) * 8bit area is twice as wide as a - (w/2 * h/2) * 16bit area. Since having pairs of such 8-bit and 16-bit - blocks is a common usecase for TILER, we optimize packing these into a - TILER area */ - -/* we want to find the most effective packing for the smallest area */ - -/* we have two algorithms for packing nv12 blocks */ +static struct tiler_ops *ops; /* shared methods and variables */ +static int band_8; /* size of 8-bit band in slots */ +static int band_16; /* size of 16-bit band in slots */ -/* we want to find the most effective packing for the smallest area */ - -static inline u32 nv12_eff(u16 n, u16 w, u16 area, u16 n_need) -{ - /* rank by total area needed first */ - return 0x10000000 - DIV_ROUND_UP(n_need, n) * area * 32 + - /* then by efficiency */ - 1024 * n * ((w * 3 + 1) >> 1) / area; -} - -/* This method is used for both 2D and NV12 packing */ - -/* return maximum buffers that can be packed next to each other */ -/* o(ffset), w(idth), e(ff_width), b(and), n(um blocks), area( needed) */ -/* assumptions: w > 0, o < a <= e */ -static u32 tiler_best2pack(u16 o, u16 w, u16 e, u16 b, u16 *n, u16 *area) +/** + * Calculate the maximum number buffers that can be packed next to each other, + * and the area they occupy. This method is used for both 2D and NV12 packing. + * + * @author a0194118 (7/16/2010) + * + * @param o desired offset + * @param w width of one block (>0) + * @param a desired alignment + * @param b band width (each block must occupy the same number of bands) + * @param n pointer to the desired number of blocks to pack. It will be + * updated with the maximum number of blocks that can be packed. + * @param _area pointer to store total area needed + * + * @return packing efficiency (0-1024) + */ +static u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area) { u16 m = 0, max_n = *n; /* m is mostly n - 1 */ + u16 e = ALIGN(w, a); /* effective width of one block */ u32 eff, best_eff = 0; /* best values */ - u16 stride = ALIGN(o + w, b), ar = stride; /* current area */ + u16 stride = ALIGN(o + w, b); /* block stride */ + u16 area = stride; /* area needed (for m + 1 blocks) */ - /* - * blocks must fit in tiler container and - * block stride must be the same: defined as align(o + w, b) - * - * == align(o + (n-1) * e + w, b) - trim((o + (n-1) * e, b) for all n - */ + /* NOTE: block #m+1 occupies the range (o + m * e, o + m * e + w) */ + + /* see how many blocks we can pack */ while (m < max_n && - o + m * e + w <= ops->width && - stride == ALIGN(ar - o - m * e, b)) { - /* get efficiency */ + /* blocks must fit in tiler container */ + o + m * e + w <= ops->width && + /* block stride must be correct */ + stride == ALIGN(area - o - m * e, b)) { + m++; - eff = m * w * 1024 / ar; + eff = m * w * 1024 / area; if (eff > best_eff) { + /* store packing for best efficiency & smallest area */ best_eff = eff; *n = m; - if (area) - *area = ar; + if (_area) + *_area = area; } - ar = ALIGN(o + m * e + w, b); + /* update area */ + area = ALIGN(o + m * e + w, b); } return best_eff; } -/* We have two algorithms for packing nv12 blocks: either pack 8 and 16 bit - blocks separately as 2D, or pack them into same area */ +/* + * NV12 Reservation Functions + * + * TILER is designed so that a (w * h) * 8bit area is twice as wide as a + * (w/2 * h/2) * 16bit area. Since having pairs of such 8-bit and 16-bit + * blocks is a common usecase for TILER, we optimize packing these into a + * TILER area. + * + * During reservation we want to find the most effective packing (most used area + * in the smallest overall area) + * + * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit + * blocks into separate container areas, or pack them together into same area. + */ -/* nv12 packing algorithm 1: pack 8 and 16 bit block into separate areas */ -/* assumptions: w > 0, o < a, 2 <= a */ -static u16 nv12_separate(u16 o, u16 w, u16 a, u16 n, u16 *area) +/** + * Calculate effectiveness of packing. We weight total area much higher than + * packing efficiency to get the smallest overall container use. + * + * @param w width of one (8-bit) block + * @param n buffers in a packing + * @param area width of packing area + * @param n_total total number of buffers to be packed + * @return effectiveness, the higher the better + */ +static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total) { - tiler_best2pack(o, w, ALIGN(w, a), band_8, &n, area); - tiler_best2pack(o / 2, (w + 1) / 2, ALIGN(w, a) / 2, band_16, &n, area); + return 0x10000000 - + /* weigh against total area needed (for all buffers) */ + /* 64-slots = -2048 */ + DIV_ROUND_UP(n_total, n) * area * 32 + + /* packing efficiency (0 - 1024) */ + 1024 * n * ((w * 3 + 1) >> 1) / area; +} + +/** + * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate + * areas. + * + * @author a0194118 (7/16/2010) + * + * @param o desired offset (<a) + * @param a desired alignment (>=2) + * @param w block width (>0) + * @param n number of blocks desired + * @param area pointer to store total area needed + * + * @return number of blocks that can be allocated + */ +static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area) +{ + tiler_best2pack(o, a, band_8, w, &n, area); + tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area); *area *= 3; return n; } -/* We use 4 packing methods for same area packing that give the best result - for most parameters. We pack into a 64-slot area, so that we don't have - to worry about stride issues (all blocks get 4K stride). For some of the - algorithms this could be true even if the area was 128. */ +/* + * Specialized NV12 Reservation Algorithms + * + * We use 4 packing methods that pack nv12 blocks into the same area. Together + * these 4 methods give the optimal result for most possible input parameters. + * + * For now we pack into a 64-slot area, so that we don't have to worry about + * stride issues (all blocks get 4K stride). For some of the algorithms this + * could be true even if the area was 128. + */ -/* packing types are marked using a letter sequence, capital letters denoting - 8-bit blocks, lower case letters denoting corresponding 16-bit blocks. */ +/** + * Packing types are marked using a letter sequence, capital letters denoting + * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks. + * + * All methods have the following parameters. They also define the maximum + * number of coordinates that could potentially be packed. + * + * @param o, a, w, n offset, alignment, width, # of blocks as usual + * @param area pointer to store area needed for packing + * @param p pointer to store packing coordinates + * @return number of blocks that can be packed + */ -/* progressive packing: AAAAaaaaBBbbCc into 64-slot area */ -/* o(ffset), w(idth), a(lign), area, n(um blocks), p(acking) */ +/* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */ #define MAX_A 21 -static int nv12_A(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) +static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) { u16 x = o, u, l, m = 0; *area = band_8; @@ -113,6 +166,7 @@ static int nv12_A(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) /* pack until upper bound */ while (x + w <= u && m < n) { /* save packing */ + BUG_ON(m + 1 >= MAX_A); *p++ = x; *p++ = l; l = (*area + x + w + 1) >> 1; @@ -124,11 +178,14 @@ static int nv12_A(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) return m; } -/* regressive packing: cCbbBBaaaaAAAA into 64-slot area */ -static int nv12_revA(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) +/* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */ +static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) { u16 m; - n = nv12_A((a - (o + w) % a) % a, w, a, area, n, p); + + /* this is a mirrored packing of method A */ + n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p); + /* reverse packing */ for (m = 0; m < n; m++) { *p = *area - *p - w; @@ -139,9 +196,9 @@ static int nv12_revA(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) return n; } -/* simple layout: aAbcBdeCfgDhEFGH */ +/* Method B: simple layout: aAbcBdeCfgDhEFGH */ #define MAX_B 8 -static int nv12_B(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) +static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) { u16 e = (o + w) % a; /* end offset */ u16 o1 = (o >> 1) % a; /* half offset */ @@ -157,6 +214,7 @@ static int nv12_B(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) 2nd half can be before or after */ if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e)) while (o + w <= *area && m < n) { + BUG_ON(m + 1 >= MAX_B); *p++ = o; *p++ = o >> 1; m++; @@ -165,9 +223,9 @@ static int nv12_B(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) return m; } -/* butterfly layout: AAbbaaBB */ +/* Method C: butterfly layout: AAbbaaBB */ #define MAX_C 20 -static int nv12_C(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) +static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) { int m = 0; u16 o2, e = ALIGN(w, a), i = 0, j = 0; @@ -176,6 +234,7 @@ static int nv12_C(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1; for (i = j = 0; i < m && j < n; i++, j++) { + BUG_ON(j + 1 >= MAX_C); *p++ = o + i * e; *p++ = (o + i * e + *area) >> 1; if (++j < n) { @@ -186,15 +245,15 @@ static int nv12_C(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) return j; } -/* for large allocation: aA or Aa */ +/* Method D: for large allocation: aA or Aa */ #define MAX_D 1 -static int nv12_D(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) +static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) { u16 o1, w1 = (w + 1) >> 1, d; *area = ALIGN(o + w, band_8); for (d = 0; n > 0 && d + o + w <= *area; d += a) { - /* fit 16-bit before 8-bit */ + /* try to fit 16-bit before 8-bit */ o1 = ((o + d) % band_8) >> 1; if (o1 + w1 <= o + d) { *p++ = o + d; @@ -202,7 +261,7 @@ static int nv12_D(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) return 1; } - /* fit 16-bit after 8-bit */ + /* try to fit 16-bit after 8-bit */ o1 += ALIGN(d + o + w - o1, band_16); if (o1 + w1 <= *area) { *p++ = o; @@ -213,25 +272,33 @@ static int nv12_D(u16 o, u16 w, u16 a, u16 *area, u16 n, u8 *p) return 0; } -#define MAX_ANY max(max(MAX_A, MAX_B), max(MAX_C, MAX_D)) - -/* nv12 packing algorithm 2: pack 8 and 16 bit block into same areas */ -/* assumptions: w > 0, o < a, 2 <= a, packing has at least MAX_ANY * 2 bytes */ -static u16 nv12_together(u16 o, u16 w, u16 a, u16 n, u16 *area, u8 *packing) +/** + * Umbrella nv12 packing method. This selects the best packings from the above + * methods. It also contains hardcoded packings for parameter combinations + * that have more efficient packings. This method provides is guaranteed to + * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large. + */ +#define MAX_ANY 21 /* must be MAX(method-MAX-s, hardcoded n-s) */ +static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing) { - u16 n_best, n2, a_, o_, w_; + u16 n_best, a_best, n2, a_, o_, w_; /* algo results (packings) */ u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2]; u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2]; u8 pack_D[MAX_D * 2]; - /* These packings are sorted by increasing area, and then by decreasing - n. We may not get the best efficiency as we are trying to minimize - the area. */ + /* + * Hardcoded packings. They are sorted by increasing area, and then by + * decreasing n. We may not get the best efficiency if less than n + * blocks are needed as packings are not necessarily sorted in + * increasing order. However, for those n-s one of the other 4 methods + * may return the optimal packing. + */ u8 packings[] = { /* n=9, o=2, w=4, a=4, area=64 */ 9, 2, 4, 4, 64, + /* 8-bit, 16-bit block coordinate pairs */ 2, 33, 6, 35, 10, 37, 14, 39, 18, 41, 46, 23, 50, 25, 54, 27, 58, 29, /* o=0, w=12, a=4, n=3 */ @@ -246,27 +313,30 @@ static u16 nv12_together(u16 o, u16 w, u16 a, u16 n, u16 *area, u8 *packing) /* start with smallest area algorithms A, B & C, stop if we can pack all buffers */ - n_best = nv12_A(o, w, a, area, n, pack_A); + n_best = nv12_A(o, a, w, n, area, pack_A); p_best = pack_A; if (n_best < n) { - n2 = nv12_revA(o, w, a, area, n, pack_rA); + n2 = nv12_revA(o, a, w, n, &a_best, pack_rA); if (n2 > n_best) { n_best = n2; p_best = pack_rA; + *area = a_best; } } if (n_best < n) { - n2 = nv12_B(o, w, a, area, n, pack_B); + n2 = nv12_B(o, a, w, n, &a_best, pack_B); if (n2 > n_best) { n_best = n2; p_best = pack_B; + *area = a_best; } } if (n_best < n) { - n2 = nv12_C(o, w, a, area, n, pack_C); + n2 = nv12_C(o, a, w, n, &a_best, pack_C); if (n2 > n_best) { n_best = n2; p_best = pack_C; + *area = a_best; } } @@ -278,47 +348,52 @@ static u16 nv12_together(u16 o, u16 w, u16 a, u16 n, u16 *area, u8 *packing) a_ = *p++; /* stop if we already have a better packing */ if (n2 < n_best) - p = p_end; /* fake stop */ + break; /* check if this packing is satisfactory */ - else if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) { + if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) { *area = *p++; n_best = min(n2, n); p_best = p; break; - } else { - /* skip to next packing */ - p += 1 + n2 * 2; } + + /* skip to next packing */ + p += 1 + n2 * 2; } - /* check whether 8 and 16 bit blocks can be co-packed (this will - actually be done in the end by the normal allocation) to see if - this is just as good as packing separately */ + /* + * If so far unsuccessful, check whether 8 and 16 bit blocks can be + * co-packed. This will actually be done in the end by the normal + * allocation, but we need to reserve a big-enough area. + */ if (!n_best) { - n_best = nv12_D(o, w, a, area, n, pack_D); + n_best = nv12_D(o, a, w, n, area, pack_D); p_best = NULL; } - if (p_best && n_best) + /* store best packing */ + if (p_best && n_best) { + BUG_ON(n_best > MAX_ANY); memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A)); + } return n_best; } -/* can_together: 8-bit and 16-bit views are in the same container */ +/* reserve nv12 blocks */ static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, - u32 gid, struct process_info *pi, bool can_together) + u32 gid, struct process_info *pi) { - u16 w, h, band, a = align, o = offs, eff_w; + u16 w, h, band, a = align, o = offs; struct gid_info *gi; int res = 0, res2, i; u16 n_t, n_s, area_t, area_s; - u8 packing[2 * 21]; + u8 packing[2 * MAX_ANY]; struct list_head reserved = LIST_HEAD_INIT(reserved); /* adjust alignment to the largest slot width (128 bytes) */ - a = MAX(PAGE_SIZE / MIN(band_8, band_16), a); + a = max_t(u16, PAGE_SIZE / min(band_8, band_16), a); /* Check input parameters for correctness, and support */ if (!width || !height || !n || @@ -337,36 +412,33 @@ static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, if (!gi) return; - eff_w = ALIGN(w, a); - + /* reserve in groups until failed or all is reserved */ for (i = 0; i < n && res >= 0; i += res) { /* check packing separately vs together */ - n_s = nv12_separate(o, w, a, n - i, &area_s); - if (can_together) - n_t = nv12_together(o, w, a, n - i, &area_t, packing); + n_s = nv12_separate(o, a, w, n - i, &area_s); + if (ops->nv12_packed) + n_t = nv12_together(o, a, w, n - i, &area_t, packing); else n_t = 0; /* pack based on better efficiency */ res = -1; - if (!can_together || - nv12_eff(n_s, w, area_s, n - i) > - nv12_eff(n_t, w, area_t, n - i)) { - - /* reserve blocks separately into a temporary list, - so that we can free them if unsuccessful */ - res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band, a, o, + if (!ops->nv12_packed || + nv12_eff(w, n_s, area_s, n - i) > + nv12_eff(w, n_t, area_t, n - i)) { + + /* + * Reserve blocks separately into a temporary list, so + * that we can free them if unsuccessful. We need to be + * able to reserve both 8- and 16-bit blocks as the + * offsets of them must match. + */ + res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, o, gi, &reserved); + res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h, + band_16, a >> 1, o >> 1, gi, &reserved); - /* only reserve 16-bit blocks if 8-bit was successful, - as we will try to match 16-bit areas to an already - reserved 8-bit area, and there is no guarantee that - an unreserved 8-bit area will match the offset of - a singly reserved 16-bit area. */ - res2 = (res < 0 ? res : - ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) / 2, h, - band / 2, a / 2, o / 2, gi, &reserved)); - if (res2 < 0 || res != res2) { + if (res2 < 0 || res < 0 || res != res2) { /* clean up */ ops->release(&reserved); res = -1; @@ -377,7 +449,7 @@ static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, } /* if separate packing failed, still try to pack together */ - if (res < 0 && can_together && n_t) { + if (res < 0 && ops->nv12_packed && n_t) { /* pack together */ res = ops->lay_nv12(n_t, area_t, w, h, gi, packing); } @@ -386,13 +458,19 @@ static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, ops->release_gi(gi); } -/* reserve 2d blocks (if standard allocator is inefficient) */ +/** + * We also optimize packing regular 2D areas as the auto-packing may result in + * sub-optimal efficiency. This is most pronounced if the area is wider than + * half a PAGE_SIZE (e.g. 2048 in 8-bit mode, or 1024 in 16-bit mode). + */ + +/* reserve 2d blocks */ static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height, u32 align, u32 offs, u32 gid, struct process_info *pi) { u32 bpt, res = 0, i; - u16 o = offs, a = align, band, w, h, e, n_try; + u16 o = offs, a = align, band, w, h, n_try; struct gid_info *gi; const struct tiler_geom *g; @@ -402,13 +480,14 @@ static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height, fmt < TILFMT_8BIT || fmt > TILFMT_32BIT) return; - /* tiler page width in pixels, bytes per pixel, tiler page in bytes */ + /* tiler slot in bytes */ g = ops->geom(fmt); bpt = g->slot_w * g->bpp; - /* check offset. Also, if block is less than half the mapping window, - the default allocation is sufficient. Also check for basic area - info. */ + /* + * For blocks narrower than half PAGE_SIZE the default allocation is + * sufficient. Also check for basic area info. + */ if (width * g->bpp * 2 <= PAGE_SIZE || ops->analize(fmt, width, height, &w, &h, &band, &a, &o, NULL)) return; @@ -418,18 +497,17 @@ static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height, if (!gi) return; - /* effective width of a buffer */ - e = ALIGN(w, a); - - for (i = 0; i < n && res >= 0; i += res) { + /* reserve in groups until failed or all is reserved */ + for (i = 0; i < n && res >= 0; i += res + 1) { /* blocks to allocate in one area */ - n_try = MIN(n - i, ops->width); - tiler_best2pack(offs, w, e, band, &n_try, NULL); + n_try = min(n - i, ops->width); + tiler_best2pack(offs, a, band, w, &n_try, NULL); res = -1; while (n_try > 1) { + /* adjust res so we fail on 0 return value */ res = ops->lay_2d(fmt, n_try, w, h, band, a, o, - gi, &gi->reserved); + gi, &gi->reserved) - 1; if (res >= 0) break; @@ -442,6 +520,7 @@ static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height, ops->release_gi(gi); } +/* unreserve blocks for a group id */ static void unreserve_blocks(u32 gid, struct process_info *pi) { struct gid_info *gi; @@ -455,6 +534,7 @@ static void unreserve_blocks(u32 gid, struct process_info *pi) ops->release_gi(gi); } +/* initialize shared method pointers and global static variables */ void tiler_reserve_init(struct tiler_ops *tiler) { ops = tiler; @@ -463,7 +543,8 @@ void tiler_reserve_init(struct tiler_ops *tiler) ops->reserve = reserve_blocks; ops->unreserve = unreserve_blocks; - band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w; + band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w + / ops->geom(TILFMT_8BIT)->bpp; band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w / ops->geom(TILFMT_16BIT)->bpp; } |