package/libsquish/0001-kodi.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327

Add Kodi-specific patch

Kodi 15.0 contains an updated version of libsquish:
https://github.com/xbmc/xbmc/tree/master/tools/depends/native/libsquish-native

The OpenElec project provides a separate tarball including the Kodi-
specific patches:
http://sources.openelec.tv/devel/libsquish-1.10-openelec.tar.gz

This patch contains the relevant diff between upstream libsquish 1.13
and the OpenElec tarball.

Signed-off-by: Bernd Kuhls <bernd.kuhls@t-online.de>

diff -uwNr 1.13/squish.cpp libsquish-1.10-openelec/squish.cpp
--- 1.13/squish.cpp	2015-04-30 12:48:49.000000000 +0200
+++ libsquish-1.10-openelec/squish.cpp	2015-01-09 10:58:43.000000000 +0100
@@ -23,6 +23,7 @@
 
    -------------------------------------------------------------------------- */
 
+#include <string.h>
 #include <squish.h>
 #include "colourset.h"
 #include "maths.h"
@@ -39,7 +40,7 @@
     // grab the flag bits
     int method = flags & ( kDxt1 | kDxt3 | kDxt5 );
     int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
-    int extra = flags & kWeightColourByAlpha;
+	int extra = flags & ( kWeightColourByAlpha | kSourceBGRA );
 
     // set defaults
     if( method != kDxt3 && method != kDxt5 )
@@ -124,8 +125,30 @@
     return blockcount*blocksize;
 }
 
+void CopyRGBA( u8 const* source, u8* dest, int flags )
+{
+	if (flags & kSourceBGRA)
+	{
+		// convert from bgra to rgba
+		dest[0] = source[2];
+		dest[1] = source[1];
+		dest[2] = source[0];
+		dest[3] = source[3];
+	}
+	else
+	{
+		for( int i = 0; i < 4; ++i )
+			*dest++ = *source++;
+	}
+}
+
 void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
 {
+	CompressImage(rgba, width, height, width*4, blocks, flags, metric);
+}
+  
+void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
+{
     // fix any bad flags
     flags = FixFlags( flags );
 
@@ -154,20 +177,14 @@
                     if( sx < width && sy < height )
                     {
                         // copy the rgba value
-                        u8 const* sourcePixel = rgba + 4*( width*sy + sx );
-                        for( int i = 0; i < 4; ++i )
-                            *targetPixel++ = *sourcePixel++;
-
+						u8 const* sourcePixel = rgba + pitch*sy + 4*sx;
+						CopyRGBA(sourcePixel, targetPixel, flags);
                         // enable this pixel
                         mask |= ( 1 << ( 4*py + px ) );
                     }
-                    else
-                    {
-                        // skip this pixel as its outside the image
                         targetPixel += 4;
                     }
                 }
-            }
 
             // compress it into the output
             CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
@@ -180,6 +197,11 @@
 
 void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
 {
+	DecompressImage( rgba, width, height, width*4, blocks, flags );
+}
+
+void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
+{
     // fix any bad flags
     flags = FixFlags( flags );
 
@@ -207,24 +229,132 @@
                     int sy = y + py;
                     if( sx < width && sy < height )
                     {
-                        u8* targetPixel = rgba + 4*( width*sy + sx );
+						u8* targetPixel = rgba + pitch*sy + 4*sx;
 
                         // copy the rgba value
+						CopyRGBA(sourcePixel, targetPixel, flags);
+					}
+					sourcePixel += 4;
+				}
+			}
+			
+			// advance
+			sourceBlock += bytesPerBlock;
+		}
+	}
+}
+
+static double ErrorSq(double x, double y)
+{
+	return (x - y) * (x - y);
+}
+
+static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse)
+{
+	// Computes the MSE for the block and weights it by the variance of the original block.
+	// If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
+	// then the block is close to being a single colour. Quantisation errors in single colour blocks
+	// are easier to see than similar errors in blocks that contain more colours, particularly when there
+	// are many such blocks in a large area (eg a blue sky background) as they cause banding.  Given that
+	// banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
+	// of 5. This implies that images with large, single colour areas will have a higher potential WMSE
+	// than images with lots of detail.
+
+	cmse = amse = 0;
+	unsigned int sum_p[4];  // per channel sum of pixels
+	unsigned int sum_p2[4]; // per channel sum of pixels squared
+	memset(sum_p, 0, sizeof(sum_p));
+	memset(sum_p2, 0, sizeof(sum_p2));
+	for( unsigned int py = 0; py < 4; ++py )
+	{
+		for( unsigned int px = 0; px < 4; ++px )
+		{
+			if( px < w && py < h )
+			{
+				double pixelCMSE = 0;
+				for( int i = 0; i < 3; ++i )
+				{
+					pixelCMSE += ErrorSq(original[i], compressed[i]);
+					sum_p[i] += original[i];
+					sum_p2[i] += (unsigned int)original[i]*original[i];
+				}
+				if( original[3] == 0 && compressed[3] == 0 )
+					pixelCMSE = 0; // transparent in both, so colour is inconsequential
+				amse += ErrorSq(original[3], compressed[3]);
+				cmse += pixelCMSE;
+				sum_p[3] += original[3];
+				sum_p2[3] += (unsigned int)original[3]*original[3];
+			}
+			original += 4;
+			compressed += 4;
+		}
+	}
+	unsigned int variance = 0;
                         for( int i = 0; i < 4; ++i )
-                            *targetPixel++ = *sourcePixel++;
+		variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i];
+	if( variance < 4 * w * w * h * h )
+	{
+		amse *= 5;
+		cmse *= 5;
                     }
-                    else
+}
+  
+void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
                     {
-                        // skip this pixel as its outside the image
-                        sourcePixel += 4;
+	ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE);
+}
+                
+void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
+{
+	// fix any bad flags
+	flags = FixFlags( flags );
+	colourMSE = alphaMSE = 0;
+
+	// initialise the block input
+	squish::u8 const* sourceBlock = dxt;
+	int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16;
+
+	// loop over blocks
+	for( int y = 0; y < height; y += 4 )
+	{
+		for( int x = 0; x < width; x += 4 )
+		{
+			// decompress the block
+			u8 targetRgba[4*16];
+			Decompress( targetRgba, sourceBlock, flags );
+			u8 const* sourcePixel = targetRgba;
+
+			// copy across to a similar pixel block
+			u8 originalRgba[4*16];
+			u8* originalPixel = originalRgba;
+
+			for( int py = 0; py < 4; ++py )
+			{
+				for( int px = 0; px < 4; ++px )
+				{
+					int sx = x + px;
+					int sy = y + py;
+					if( sx < width && sy < height )
+					{
+						u8 const* targetPixel = rgba + pitch*sy + 4*sx;
+						CopyRGBA(targetPixel, originalPixel, flags);
                     }
+					sourcePixel += 4;
+					originalPixel += 4;
                 }
             }
 
+			// compute the weighted MSE of the block
+			double blockCMSE, blockAMSE;
+			ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE);
+			colourMSE += blockCMSE;
+			alphaMSE += blockAMSE;
             // advance
             sourceBlock += bytesPerBlock;
         }
     }
+	colourMSE /= (width * height * 3);
+	alphaMSE /= (width * height);
 }
 
 } // namespace squish
diff -uwNr 1.13/squish.h libsquish-1.10-openelec/squish.h
--- 1.13/squish.h	2015-04-30 12:55:27.000000000 +0200
+++ libsquish-1.10-openelec/squish.h	2015-01-09 10:58:43.000000000 +0100
@@ -57,7 +57,10 @@
     kColourRangeFit = ( 1 << 4 ),
 
     //! Weight the colour by alpha during cluster fit (disabled by default).
-    kWeightColourByAlpha = ( 1 << 7 )
+	kWeightColourByAlpha = ( 1 << 7 ),
+	
+	//! Source is BGRA rather than RGBA
+	kSourceBGRA = ( 1 << 9 ),
 };
 
 // -----------------------------------------------------------------------------
@@ -194,6 +197,7 @@
     @param rgba   The pixels of the source.
     @param width  The width of the source image.
     @param height The height of the source image.
+	@param pitch	The pitch of the source image.
     @param blocks Storage for the compressed output.
     @param flags  Compression flags.
     @param metric An optional perceptual metric.
@@ -231,6 +235,7 @@
     to allocate for the compressed output.
 */
 void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric = 0 );
+void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric = 0 );
 
 // -----------------------------------------------------------------------------
 
@@ -239,6 +244,7 @@
     @param rgba   Storage for the decompressed pixels.
     @param width  The width of the source image.
     @param height The height of the source image.
+	@param pitch    The pitch of the decompressed pixels.
     @param blocks The compressed DXT blocks.
     @param flags  Compression flags.
 
@@ -254,6 +260,32 @@
     Internally this function calls squish::Decompress for each block.
 */
 void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags );
+void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags );
+
+// -----------------------------------------------------------------------------
+
+/*! @brief Computes MSE of an compressed image in memory.
+
+	@param rgba		The original image pixels.
+	@param width	The width of the source image.
+	@param height	The height of the source image.
+	@param pitch  	The pitch of the source image.
+	@param dxt		The compressed dxt blocks
+	@param flags	Compression flags.
+	@param colourMSE	The MSE of the colour values.
+	@param alphaMSE	The MSE of the alpha values.
+	
+	The colour MSE and alpha MSE are computed across all pixels. The colour MSE is
+	averaged across all rgb values (i.e. colourMSE = sum sum_k ||dxt.k - rgba.k||/3)
+	
+	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
+	however, DXT1 will be used by default if none is specified. All other flags 
+	are ignored.
+
+	Internally this function calls squish::Decompress for each block.
+*/
+void ComputeMSE(u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE);
+void ComputeMSE(u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE);
 
 // -----------------------------------------------------------------------------
 
diff -uwNr 1.13/squish.pc.in libsquish-1.10-openelec/squish.pc.in
--- 1.13/squish.pc	1970-01-01 01:00:00.000000000 +0100
+++ libsquish-1.10-openelec/squish.pc	2015-01-09 10:58:43.000000000 +0100
@@ -0,0 +1,13 @@
+prefix=/usr
+exec_prefix=${prefix}
+libdir=${prefix}/lib
+sharedlibdir=${libdir}
+includedir=${prefix}/include
+
+Name: squish
+Description: squish DXT lib
+Version: 1.1.3-kodi
+
+Requires:
+Libs: -L${libdir} -L${sharedlibdir} -lsquish
+Cflags: -I${includedir}