summaryrefslogtreecommitdiff
path: root/package/libfreeimage
diff options
context:
space:
mode:
authorPeter Korsgaard <peter@korsgaard.com>2015-11-24 21:29:09 +0100
committerPeter Korsgaard <peter@korsgaard.com>2015-11-24 21:36:12 +0100
commitc626998d47e14cc8086917fe17c28045a2f9b09a (patch)
treec868c26db0ec29dba0529713e55ef17583187100 /package/libfreeimage
parent7f1ec872cd163ff1b5dd2448f8ff8af1a7178347 (diff)
libfreeimage: fix build issue with gcc 5.x
Fixes: http://autobuild.buildroot.net/results/fe9/fe9c5ee9cdc52c48df5efe7560ccd5a0d3cde154/ http://autobuild.buildroot.net/results/358/3580d520da6f8d8f4ffeffcd2b969ebb0851ad37/ http://autobuild.buildroot.net/results/0cb/0cb2f0c69301c7a85a354c435da245cd904533fb/ http://autobuild.buildroot.net/results/52c/52c2967015aa79843db05e901a1563447a4029b2/ gcc 5.x gets confused by the string literals used in the inline assembly in the files from the embedded libwebp library. This issue is already fixed in upstream libwebp, but that fix is not directly applicable to the tweaked version embedded in libfreeimage. I haven't been able to find a public VCS for libfreeimage, but the issue has been discussed on the freeimage list: http://sourceforge.net/p/freeimage/discussion/36110/thread/605ef8e4/ Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
Diffstat (limited to 'package/libfreeimage')
-rw-r--r--package/libfreeimage/0003-LibWebP-fix-compilation-issue-with-GCC-5.x-C-11.patch1172
1 files changed, 1172 insertions, 0 deletions
diff --git a/package/libfreeimage/0003-LibWebP-fix-compilation-issue-with-GCC-5.x-C-11.patch b/package/libfreeimage/0003-LibWebP-fix-compilation-issue-with-GCC-5.x-C-11.patch
new file mode 100644
index 000000000..6964827cc
--- /dev/null
+++ b/package/libfreeimage/0003-LibWebP-fix-compilation-issue-with-GCC-5.x-C-11.patch
@@ -0,0 +1,1172 @@
+From 44bce1b66c1cdd5308ac3ac773ea0a53d83790fd Mon Sep 17 00:00:00 2001
+From: Peter Korsgaard <peter@korsgaard.com>
+Date: Tue, 24 Nov 2015 21:16:39 +0100
+Subject: [PATCH] LibWebP: fix compilation issue with GCC 5.x / C++11
+
+GCC 5.1 / C++11 gets confused about the "#<TEXT>" in the inline assembly
+code, and dies with errors like:
+
+Source/LibWebP/./src/dsp/dsp.upsampling_mips_dsp_r2.c:37:34: error: invalid
+character ' ' in raw string delimiter
+
+Fix it by introducting white space around the string literals like it has
+been done in upstream webp:
+
+https://chromium.googlesource.com/webm/libwebp/+/eebaf97f5a1cb713d81d311308d8a48c124e5aef
+
+Discussed upstream:
+http://sourceforge.net/p/freeimage/discussion/36110/thread/605ef8e4/
+
+[Scripted by sed -i 's/"\(#[A-Z0-9]*\)"/" \1 "/g' *.c]
+Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
+---
+ Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c | 28 +-
+ Source/LibWebP/src/dsp/dsp.enc_mips32.c | 314 ++++++++++-----------
+ Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c | 288 +++++++++----------
+ Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c | 10 +-
+ Source/LibWebP/src/dsp/dsp.lossless_mips32.c | 34 +--
+ Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c | 8 +-
+ .../LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c | 18 +-
+ Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c | 10 +-
+ 8 files changed, 355 insertions(+), 355 deletions(-)
+
+diff --git a/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
+index dac2c93..aaa8111 100644
+--- a/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
+@@ -548,10 +548,10 @@ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+ // TEMP3 = SRC[D + D1 * BPS]
+ #define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3, \
+ A, A1, B, B1, C, C1, D, D1, SRC) \
+- "lbu %["#TEMP0"], "#A"+"#A1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
+- "lbu %["#TEMP1"], "#B"+"#B1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
+- "lbu %["#TEMP2"], "#C"+"#C1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
+- "lbu %["#TEMP3"], "#D"+"#D1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
++ "lbu %[" #TEMP0 "], " #A "+" #A1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
++ "lbu %[" #TEMP1 "], " #B "+" #B1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
++ "lbu %[" #TEMP2 "], " #C "+" #C1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
++ "lbu %[" #TEMP3 "], " #D "+" #D1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
+
+ static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+ int i;
+@@ -623,8 +623,8 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+ // DST[A * BPS] = TEMP0
+ // DST[B + C * BPS] = TEMP1
+ #define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST) \
+- "usw %["#TEMP0"], "#A"*"XSTR(BPS)"(%["#DST"]) \n\t" \
+- "usw %["#TEMP1"], "#B"+"#C"*"XSTR(BPS)"(%["#DST"]) \n\t"
++ "usw %[" #TEMP0 "], " #A "*"XSTR(BPS)"(%[" #DST "]) \n\t" \
++ "usw %[" #TEMP1 "], " #B "+" #C "*"XSTR(BPS)"(%[" #DST "]) \n\t"
+
+ static void VE4(uint8_t* dst) { // vertical
+ const uint8_t* top = dst - BPS;
+@@ -725,8 +725,8 @@ static void RD4(uint8_t* dst) { // Down-right
+ // TEMP0 = SRC[A * BPS]
+ // TEMP1 = SRC[B + C * BPS]
+ #define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC) \
+- "ulw %["#TEMP0"], "#A"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
+- "ulw %["#TEMP1"], "#B"+"#C"*"XSTR(BPS)"(%["#SRC"]) \n\t"
++ "ulw %[" #TEMP0 "], " #A "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
++ "ulw %[" #TEMP1 "], " #B "+" #C "*"XSTR(BPS)"(%[" #SRC "]) \n\t"
+
+ static void LD4(uint8_t* dst) { // Down-Left
+ int temp0, temp1, temp2, temp3, temp4;
+@@ -873,24 +873,24 @@ static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
+ #define CLIPPING(SIZE) \
+ "preceu.ph.qbl %[temp2], %[temp0] \n\t" \
+ "preceu.ph.qbr %[temp0], %[temp0] \n\t" \
+-".if "#SIZE" == 8 \n\t" \
++".if " #SIZE " == 8 \n\t" \
+ "preceu.ph.qbl %[temp3], %[temp1] \n\t" \
+ "preceu.ph.qbr %[temp1], %[temp1] \n\t" \
+ ".endif \n\t" \
+ "addu.ph %[temp2], %[temp2], %[dst_1] \n\t" \
+ "addu.ph %[temp0], %[temp0], %[dst_1] \n\t" \
+-".if "#SIZE" == 8 \n\t" \
++".if " #SIZE " == 8 \n\t" \
+ "addu.ph %[temp3], %[temp3], %[dst_1] \n\t" \
+ "addu.ph %[temp1], %[temp1], %[dst_1] \n\t" \
+ ".endif \n\t" \
+ "shll_s.ph %[temp2], %[temp2], 7 \n\t" \
+ "shll_s.ph %[temp0], %[temp0], 7 \n\t" \
+-".if "#SIZE" == 8 \n\t" \
++".if " #SIZE " == 8 \n\t" \
+ "shll_s.ph %[temp3], %[temp3], 7 \n\t" \
+ "shll_s.ph %[temp1], %[temp1], 7 \n\t" \
+ ".endif \n\t" \
+ "precrqu_s.qb.ph %[temp0], %[temp2], %[temp0] \n\t" \
+-".if "#SIZE" == 8 \n\t" \
++".if " #SIZE " == 8 \n\t" \
+ "precrqu_s.qb.ph %[temp1], %[temp3], %[temp1] \n\t" \
+ ".endif \n\t"
+
+@@ -899,7 +899,7 @@ static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
+ int dst_1 = ((int)(DST)[-1] << 16) + (DST)[-1]; \
+ int temp0, temp1, temp2, temp3; \
+ __asm__ volatile ( \
+- ".if "#SIZE" < 8 \n\t" \
++ ".if " #SIZE " < 8 \n\t" \
+ "ulw %[temp0], 0(%[top]) \n\t" \
+ "subu.ph %[dst_1], %[dst_1], %[top_1] \n\t" \
+ CLIPPING(4) \
+@@ -911,7 +911,7 @@ static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
+ CLIPPING(8) \
+ "usw %[temp0], 0(%[dst]) \n\t" \
+ "usw %[temp1], 4(%[dst]) \n\t" \
+- ".if "#SIZE" == 16 \n\t" \
++ ".if " #SIZE " == 16 \n\t" \
+ "ulw %[temp0], 8(%[top]) \n\t" \
+ "ulw %[temp1], 12(%[top]) \n\t" \
+ CLIPPING(8) \
+diff --git a/Source/LibWebP/src/dsp/dsp.enc_mips32.c b/Source/LibWebP/src/dsp/dsp.enc_mips32.c
+index 545aa3a..bf1c16d 100644
+--- a/Source/LibWebP/src/dsp/dsp.enc_mips32.c
++++ b/Source/LibWebP/src/dsp/dsp.enc_mips32.c
+@@ -31,26 +31,26 @@ static const int kC2 = 35468;
+ // TEMP0..TEMP3 - registers for corresponding tmp elements
+ // TEMP4..TEMP5 - temporary registers
+ #define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \
+- "lh %[temp16], "#A"(%[temp20]) \n\t" \
+- "lh %[temp18], "#B"(%[temp20]) \n\t" \
+- "lh %[temp17], "#C"(%[temp20]) \n\t" \
+- "lh %[temp19], "#D"(%[temp20]) \n\t" \
+- "addu %["#TEMP4"], %[temp16], %[temp18] \n\t" \
++ "lh %[temp16], " #A "(%[temp20]) \n\t" \
++ "lh %[temp18], " #B "(%[temp20]) \n\t" \
++ "lh %[temp17], " #C "(%[temp20]) \n\t" \
++ "lh %[temp19], " #D "(%[temp20]) \n\t" \
++ "addu %[" #TEMP4 "], %[temp16], %[temp18] \n\t" \
+ "subu %[temp16], %[temp16], %[temp18] \n\t" \
+- "mul %["#TEMP0"], %[temp17], %[kC2] \n\t" \
++ "mul %[" #TEMP0 "], %[temp17], %[kC2] \n\t" \
+ "mul %[temp18], %[temp19], %[kC1] \n\t" \
+ "mul %[temp17], %[temp17], %[kC1] \n\t" \
+ "mul %[temp19], %[temp19], %[kC2] \n\t" \
+- "sra %["#TEMP0"], %["#TEMP0"], 16 \n\n" \
++ "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\n" \
+ "sra %[temp18], %[temp18], 16 \n\n" \
+ "sra %[temp17], %[temp17], 16 \n\n" \
+ "sra %[temp19], %[temp19], 16 \n\n" \
+- "subu %["#TEMP2"], %["#TEMP0"], %[temp18] \n\t" \
+- "addu %["#TEMP3"], %[temp17], %[temp19] \n\t" \
+- "addu %["#TEMP0"], %["#TEMP4"], %["#TEMP3"] \n\t" \
+- "addu %["#TEMP1"], %[temp16], %["#TEMP2"] \n\t" \
+- "subu %["#TEMP2"], %[temp16], %["#TEMP2"] \n\t" \
+- "subu %["#TEMP3"], %["#TEMP4"], %["#TEMP3"] \n\t"
++ "subu %[" #TEMP2 "], %[" #TEMP0 "], %[temp18] \n\t" \
++ "addu %[" #TEMP3 "], %[temp17], %[temp19] \n\t" \
++ "addu %[" #TEMP0 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t" \
++ "addu %[" #TEMP1 "], %[temp16], %[" #TEMP2 "] \n\t" \
++ "subu %[" #TEMP2 "], %[temp16], %[" #TEMP2 "] \n\t" \
++ "subu %[" #TEMP3 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t"
+
+ // macro for one horizontal pass in ITransformOne
+ // MUL and STORE macros inlined
+@@ -59,58 +59,58 @@ static const int kC2 = 35468;
+ // A - offset in bytes to load from ref and store to dst buffer
+ // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+ #define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12) \
+- "addiu %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
+- "addu %[temp16], %["#TEMP0"], %["#TEMP8"] \n\t" \
+- "subu %[temp17], %["#TEMP0"], %["#TEMP8"] \n\t" \
+- "mul %["#TEMP0"], %["#TEMP4"], %[kC2] \n\t" \
+- "mul %["#TEMP8"], %["#TEMP12"], %[kC1] \n\t" \
+- "mul %["#TEMP4"], %["#TEMP4"], %[kC1] \n\t" \
+- "mul %["#TEMP12"], %["#TEMP12"], %[kC2] \n\t" \
+- "sra %["#TEMP0"], %["#TEMP0"], 16 \n\t" \
+- "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
+- "sra %["#TEMP4"], %["#TEMP4"], 16 \n\t" \
+- "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
+- "subu %[temp18], %["#TEMP0"], %["#TEMP8"] \n\t" \
+- "addu %[temp19], %["#TEMP4"], %["#TEMP12"] \n\t" \
+- "addu %["#TEMP0"], %[temp16], %[temp19] \n\t" \
+- "addu %["#TEMP4"], %[temp17], %[temp18] \n\t" \
+- "subu %["#TEMP8"], %[temp17], %[temp18] \n\t" \
+- "subu %["#TEMP12"], %[temp16], %[temp19] \n\t" \
++ "addiu %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
++ "addu %[temp16], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
++ "subu %[temp17], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
++ "mul %[" #TEMP0 "], %[" #TEMP4 "], %[kC2] \n\t" \
++ "mul %[" #TEMP8 "], %[" #TEMP12 "], %[kC1] \n\t" \
++ "mul %[" #TEMP4 "], %[" #TEMP4 "], %[kC1] \n\t" \
++ "mul %[" #TEMP12 "], %[" #TEMP12 "], %[kC2] \n\t" \
++ "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \
++ "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
++ "sra %[" #TEMP4 "], %[" #TEMP4 "], 16 \n\t" \
++ "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
++ "subu %[temp18], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
++ "addu %[temp19], %[" #TEMP4 "], %[" #TEMP12 "] \n\t" \
++ "addu %[" #TEMP0 "], %[temp16], %[temp19] \n\t" \
++ "addu %[" #TEMP4 "], %[temp17], %[temp18] \n\t" \
++ "subu %[" #TEMP8 "], %[temp17], %[temp18] \n\t" \
++ "subu %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
+ "lw %[temp20], 0(%[args]) \n\t" \
+- "sra %["#TEMP0"], %["#TEMP0"], 3 \n\t" \
+- "sra %["#TEMP4"], %["#TEMP4"], 3 \n\t" \
+- "sra %["#TEMP8"], %["#TEMP8"], 3 \n\t" \
+- "sra %["#TEMP12"], %["#TEMP12"], 3 \n\t" \
+- "lbu %[temp16], 0+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
+- "lbu %[temp17], 1+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
+- "lbu %[temp18], 2+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
+- "lbu %[temp19], 3+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
+- "addu %["#TEMP0"], %[temp16], %["#TEMP0"] \n\t" \
+- "addu %["#TEMP4"], %[temp17], %["#TEMP4"] \n\t" \
+- "addu %["#TEMP8"], %[temp18], %["#TEMP8"] \n\t" \
+- "addu %["#TEMP12"], %[temp19], %["#TEMP12"] \n\t" \
+- "slt %[temp16], %["#TEMP0"], $zero \n\t" \
+- "slt %[temp17], %["#TEMP4"], $zero \n\t" \
+- "slt %[temp18], %["#TEMP8"], $zero \n\t" \
+- "slt %[temp19], %["#TEMP12"], $zero \n\t" \
+- "movn %["#TEMP0"], $zero, %[temp16] \n\t" \
+- "movn %["#TEMP4"], $zero, %[temp17] \n\t" \
+- "movn %["#TEMP8"], $zero, %[temp18] \n\t" \
+- "movn %["#TEMP12"], $zero, %[temp19] \n\t" \
++ "sra %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \
++ "sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \
++ "sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \
++ "sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \
++ "lbu %[temp16], 0+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
++ "lbu %[temp17], 1+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
++ "lbu %[temp18], 2+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
++ "lbu %[temp19], 3+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
++ "addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \
++ "addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \
++ "addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \
++ "addu %[" #TEMP12 "], %[temp19], %[" #TEMP12 "] \n\t" \
++ "slt %[temp16], %[" #TEMP0 "], $zero \n\t" \
++ "slt %[temp17], %[" #TEMP4 "], $zero \n\t" \
++ "slt %[temp18], %[" #TEMP8 "], $zero \n\t" \
++ "slt %[temp19], %[" #TEMP12 "], $zero \n\t" \
++ "movn %[" #TEMP0 "], $zero, %[temp16] \n\t" \
++ "movn %[" #TEMP4 "], $zero, %[temp17] \n\t" \
++ "movn %[" #TEMP8 "], $zero, %[temp18] \n\t" \
++ "movn %[" #TEMP12 "], $zero, %[temp19] \n\t" \
+ "addiu %[temp20], $zero, 255 \n\t" \
+- "slt %[temp16], %["#TEMP0"], %[temp20] \n\t" \
+- "slt %[temp17], %["#TEMP4"], %[temp20] \n\t" \
+- "slt %[temp18], %["#TEMP8"], %[temp20] \n\t" \
+- "slt %[temp19], %["#TEMP12"], %[temp20] \n\t" \
+- "movz %["#TEMP0"], %[temp20], %[temp16] \n\t" \
+- "movz %["#TEMP4"], %[temp20], %[temp17] \n\t" \
++ "slt %[temp16], %[" #TEMP0 "], %[temp20] \n\t" \
++ "slt %[temp17], %[" #TEMP4 "], %[temp20] \n\t" \
++ "slt %[temp18], %[" #TEMP8 "], %[temp20] \n\t" \
++ "slt %[temp19], %[" #TEMP12 "], %[temp20] \n\t" \
++ "movz %[" #TEMP0 "], %[temp20], %[temp16] \n\t" \
++ "movz %[" #TEMP4 "], %[temp20], %[temp17] \n\t" \
+ "lw %[temp16], 8(%[args]) \n\t" \
+- "movz %["#TEMP8"], %[temp20], %[temp18] \n\t" \
+- "movz %["#TEMP12"], %[temp20], %[temp19] \n\t" \
+- "sb %["#TEMP0"], 0+"XSTR(BPS)"*"#A"(%[temp16]) \n\t" \
+- "sb %["#TEMP4"], 1+"XSTR(BPS)"*"#A"(%[temp16]) \n\t" \
+- "sb %["#TEMP8"], 2+"XSTR(BPS)"*"#A"(%[temp16]) \n\t" \
+- "sb %["#TEMP12"], 3+"XSTR(BPS)"*"#A"(%[temp16]) \n\t"
++ "movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \
++ "movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \
++ "sb %[" #TEMP0 "], 0+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
++ "sb %[" #TEMP4 "], 1+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
++ "sb %[" #TEMP8 "], 2+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
++ "sb %[" #TEMP12 "], 3+"XSTR(BPS)"*" #A "(%[temp16]) \n\t"
+
+ // Does one or two inverse transforms.
+ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
+@@ -161,9 +161,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
+ // K - offset in bytes (kZigzag[n] * 4)
+ // N - offset in bytes (n * 2)
+ #define QUANTIZE_ONE(J, K, N) \
+- "lh %[temp0], "#J"(%[ppin]) \n\t" \
+- "lhu %[temp1], "#J"(%[ppsharpen]) \n\t" \
+- "lw %[temp2], "#K"(%[ppzthresh]) \n\t" \
++ "lh %[temp0], " #J "(%[ppin]) \n\t" \
++ "lhu %[temp1], " #J "(%[ppsharpen]) \n\t" \
++ "lw %[temp2], " #K "(%[ppzthresh]) \n\t" \
+ "sra %[sign], %[temp0], 15 \n\t" \
+ "xor %[coeff], %[temp0], %[sign] \n\t" \
+ "subu %[coeff], %[coeff], %[sign] \n\t" \
+@@ -172,9 +172,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
+ "addiu %[temp5], $zero, 0 \n\t" \
+ "addiu %[level], $zero, 0 \n\t" \
+ "beqz %[temp4], 2f \n\t" \
+- "lhu %[temp1], "#J"(%[ppiq]) \n\t" \
+- "lw %[temp2], "#K"(%[ppbias]) \n\t" \
+- "lhu %[temp3], "#J"(%[ppq]) \n\t" \
++ "lhu %[temp1], " #J "(%[ppiq]) \n\t" \
++ "lw %[temp2], " #K "(%[ppbias]) \n\t" \
++ "lhu %[temp3], " #J "(%[ppq]) \n\t" \
+ "mul %[level], %[coeff], %[temp1] \n\t" \
+ "addu %[level], %[level], %[temp2] \n\t" \
+ "sra %[level], %[level], 17 \n\t" \
+@@ -184,8 +184,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
+ "subu %[level], %[level], %[sign] \n\t" \
+ "mul %[temp5], %[level], %[temp3] \n\t" \
+ "2: \n\t" \
+- "sh %[temp5], "#J"(%[ppin]) \n\t" \
+- "sh %[level], "#N"(%[pout]) \n\t"
++ "sh %[temp5], " #J "(%[ppin]) \n\t" \
++ "sh %[level], " #N "(%[pout]) \n\t"
+
+ static int QuantizeBlock(int16_t in[16], int16_t out[16],
+ const VP8Matrix* const mtx) {
+@@ -254,14 +254,14 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+ // E..H - offsets in bytes to store first results to tmp buffer
+ // E1..H1 - offsets in bytes to store second results to tmp buffer
+ #define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1) \
+- "lbu %[temp0], 0+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
+- "lbu %[temp1], 1+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
+- "lbu %[temp2], 2+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
+- "lbu %[temp3], 3+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
+- "lbu %[temp4], 0+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
+- "lbu %[temp5], 1+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
+- "lbu %[temp6], 2+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
+- "lbu %[temp7], 3+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
++ "lbu %[temp0], 0+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
++ "lbu %[temp1], 1+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
++ "lbu %[temp2], 2+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
++ "lbu %[temp3], 3+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
++ "lbu %[temp4], 0+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
++ "lbu %[temp5], 1+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
++ "lbu %[temp6], 2+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
++ "lbu %[temp7], 3+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
+ "addu %[temp8], %[temp0], %[temp2] \n\t" \
+ "subu %[temp0], %[temp0], %[temp2] \n\t" \
+ "addu %[temp2], %[temp1], %[temp3] \n\t" \
+@@ -278,14 +278,14 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+ "subu %[temp3], %[temp3], %[temp6] \n\t" \
+ "addu %[temp6], %[temp4], %[temp5] \n\t" \
+ "subu %[temp4], %[temp4], %[temp5] \n\t" \
+- "sw %[temp7], "#E"(%[tmp]) \n\t" \
+- "sw %[temp2], "#H"(%[tmp]) \n\t" \
+- "sw %[temp8], "#F"(%[tmp]) \n\t" \
+- "sw %[temp0], "#G"(%[tmp]) \n\t" \
+- "sw %[temp1], "#E1"(%[tmp]) \n\t" \
+- "sw %[temp3], "#H1"(%[tmp]) \n\t" \
+- "sw %[temp6], "#F1"(%[tmp]) \n\t" \
+- "sw %[temp4], "#G1"(%[tmp]) \n\t"
++ "sw %[temp7], " #E "(%[tmp]) \n\t" \
++ "sw %[temp2], " #H "(%[tmp]) \n\t" \
++ "sw %[temp8], " #F "(%[tmp]) \n\t" \
++ "sw %[temp0], " #G "(%[tmp]) \n\t" \
++ "sw %[temp1], " #E1 "(%[tmp]) \n\t" \
++ "sw %[temp3], " #H1 "(%[tmp]) \n\t" \
++ "sw %[temp6], " #F1 "(%[tmp]) \n\t" \
++ "sw %[temp4], " #G1 "(%[tmp]) \n\t"
+
+ // macro for one vertical pass in Disto4x4 (TTransform)
+ // two calls of function TTransform are merged into single one
+@@ -300,10 +300,10 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+ // A1..D1 - offsets in bytes to load second results from tmp buffer
+ // E..H - offsets in bytes to load from w buffer
+ #define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H) \
+- "lw %[temp0], "#A1"(%[tmp]) \n\t" \
+- "lw %[temp1], "#C1"(%[tmp]) \n\t" \
+- "lw %[temp2], "#B1"(%[tmp]) \n\t" \
+- "lw %[temp3], "#D1"(%[tmp]) \n\t" \
++ "lw %[temp0], " #A1 "(%[tmp]) \n\t" \
++ "lw %[temp1], " #C1 "(%[tmp]) \n\t" \
++ "lw %[temp2], " #B1 "(%[tmp]) \n\t" \
++ "lw %[temp3], " #D1 "(%[tmp]) \n\t" \
+ "addu %[temp8], %[temp0], %[temp1] \n\t" \
+ "subu %[temp0], %[temp0], %[temp1] \n\t" \
+ "addu %[temp1], %[temp2], %[temp3] \n\t" \
+@@ -324,18 +324,18 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+ "subu %[temp1], %[temp1], %[temp5] \n\t" \
+ "subu %[temp0], %[temp0], %[temp6] \n\t" \
+ "subu %[temp8], %[temp8], %[temp7] \n\t" \
+- "lhu %[temp4], "#E"(%[w]) \n\t" \
+- "lhu %[temp5], "#F"(%[w]) \n\t" \
+- "lhu %[temp6], "#G"(%[w]) \n\t" \
+- "lhu %[temp7], "#H"(%[w]) \n\t" \
++ "lhu %[temp4], " #E "(%[w]) \n\t" \
++ "lhu %[temp5], " #F "(%[w]) \n\t" \
++ "lhu %[temp6], " #G "(%[w]) \n\t" \
++ "lhu %[temp7], " #H "(%[w]) \n\t" \
+ "madd %[temp4], %[temp3] \n\t" \
+ "madd %[temp5], %[temp1] \n\t" \
+ "madd %[temp6], %[temp0] \n\t" \
+ "madd %[temp7], %[temp8] \n\t" \
+- "lw %[temp0], "#A"(%[tmp]) \n\t" \
+- "lw %[temp1], "#C"(%[tmp]) \n\t" \
+- "lw %[temp2], "#B"(%[tmp]) \n\t" \
+- "lw %[temp3], "#D"(%[tmp]) \n\t" \
++ "lw %[temp0], " #A "(%[tmp]) \n\t" \
++ "lw %[temp1], " #C "(%[tmp]) \n\t" \
++ "lw %[temp2], " #B "(%[tmp]) \n\t" \
++ "lw %[temp3], " #D "(%[tmp]) \n\t" \
+ "addu %[temp8], %[temp0], %[temp1] \n\t" \
+ "subu %[temp0], %[temp0], %[temp1] \n\t" \
+ "addu %[temp1], %[temp2], %[temp3] \n\t" \
+@@ -413,70 +413,70 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+ // A - offset in bytes to load from src and ref buffers
+ // TEMP0..TEMP3 - registers for corresponding tmp elements
+ #define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \
+- "lw %["#TEMP1"], 0(%[args]) \n\t" \
+- "lw %["#TEMP2"], 4(%[args]) \n\t" \
+- "lbu %[temp16], 0+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
+- "lbu %[temp17], 0+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
+- "lbu %[temp18], 1+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
+- "lbu %[temp19], 1+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
++ "lw %[" #TEMP1 "], 0(%[args]) \n\t" \
++ "lw %[" #TEMP2 "], 4(%[args]) \n\t" \
++ "lbu %[temp16], 0+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
++ "lbu %[temp17], 0+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
++ "lbu %[temp18], 1+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
++ "lbu %[temp19], 1+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
+ "subu %[temp20], %[temp16], %[temp17] \n\t" \
+- "lbu %[temp16], 2+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
+- "lbu %[temp17], 2+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
+- "subu %["#TEMP0"], %[temp18], %[temp19] \n\t" \
+- "lbu %[temp18], 3+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
+- "lbu %[temp19], 3+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
+- "subu %["#TEMP1"], %[temp16], %[temp17] \n\t" \
+- "subu %["#TEMP2"], %[temp18], %[temp19] \n\t" \
+- "addu %["#TEMP3"], %[temp20], %["#TEMP2"] \n\t" \
+- "subu %["#TEMP2"], %[temp20], %["#TEMP2"] \n\t" \
+- "addu %[temp20], %["#TEMP0"], %["#TEMP1"] \n\t" \
+- "subu %["#TEMP0"], %["#TEMP0"], %["#TEMP1"] \n\t" \
+- "mul %[temp16], %["#TEMP2"], %[c5352] \n\t" \
+- "mul %[temp17], %["#TEMP2"], %[c2217] \n\t" \
+- "mul %[temp18], %["#TEMP0"], %[c5352] \n\t" \
+- "mul %[temp19], %["#TEMP0"], %[c2217] \n\t" \
+- "addu %["#TEMP1"], %["#TEMP3"], %[temp20] \n\t" \
+- "subu %[temp20], %["#TEMP3"], %[temp20] \n\t" \
+- "sll %["#TEMP0"], %["#TEMP1"], 3 \n\t" \
+- "sll %["#TEMP2"], %[temp20], 3 \n\t" \
++ "lbu %[temp16], 2+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
++ "lbu %[temp17], 2+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
++ "subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \
++ "lbu %[temp18], 3+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
++ "lbu %[temp19], 3+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
++ "subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \
++ "subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \
++ "addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \
++ "subu %[" #TEMP2 "], %[temp20], %[" #TEMP2 "] \n\t" \
++ "addu %[temp20], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
++ "subu %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
++ "mul %[temp16], %[" #TEMP2 "], %[c5352] \n\t" \
++ "mul %[temp17], %[" #TEMP2 "], %[c2217] \n\t" \
++ "mul %[temp18], %[" #TEMP0 "], %[c5352] \n\t" \
++ "mul %[temp19], %[" #TEMP0 "], %[c2217] \n\t" \
++ "addu %[" #TEMP1 "], %[" #TEMP3 "], %[temp20] \n\t" \
++ "subu %[temp20], %[" #TEMP3 "], %[temp20] \n\t" \
++ "sll %[" #TEMP0 "], %[" #TEMP1 "], 3 \n\t" \
++ "sll %[" #TEMP2 "], %[temp20], 3 \n\t" \
+ "addiu %[temp16], %[temp16], 1812 \n\t" \
+ "addiu %[temp17], %[temp17], 937 \n\t" \
+ "addu %[temp16], %[temp16], %[temp19] \n\t" \
+ "subu %[temp17], %[temp17], %[temp18] \n\t" \
+- "sra %["#TEMP1"], %[temp16], 9 \n\t" \
+- "sra %["#TEMP3"], %[temp17], 9 \n\t"
++ "sra %[" #TEMP1 "], %[temp16], 9 \n\t" \
++ "sra %[" #TEMP3 "], %[temp17], 9 \n\t"
+
+ // macro for one vertical pass in FTransform
+ // temp0..temp15 holds tmp[0]..tmp[15]
+ // A..D - offsets in bytes to store to out buffer
+ // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+ #define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
+- "addu %[temp16], %["#TEMP0"], %["#TEMP12"] \n\t" \
+- "subu %[temp19], %["#TEMP0"], %["#TEMP12"] \n\t" \
+- "addu %[temp17], %["#TEMP4"], %["#TEMP8"] \n\t" \
+- "subu %[temp18], %["#TEMP4"], %["#TEMP8"] \n\t" \
+- "mul %["#TEMP8"], %[temp19], %[c2217] \n\t" \
+- "mul %["#TEMP12"], %[temp18], %[c2217] \n\t" \
+- "mul %["#TEMP4"], %[temp19], %[c5352] \n\t" \
++ "addu %[temp16], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
++ "subu %[temp19], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
++ "addu %[temp17], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
++ "subu %[temp18], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
++ "mul %[" #TEMP8 "], %[temp19], %[c2217] \n\t" \
++ "mul %[" #TEMP12 "], %[temp18], %[c2217] \n\t" \
++ "mul %[" #TEMP4 "], %[temp19], %[c5352] \n\t" \
+ "mul %[temp18], %[temp18], %[c5352] \n\t" \
+ "addiu %[temp16], %[temp16], 7 \n\t" \
+- "addu %["#TEMP0"], %[temp16], %[temp17] \n\t" \
+- "sra %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
+- "addu %["#TEMP12"], %["#TEMP12"], %["#TEMP4"] \n\t" \
+- "subu %["#TEMP4"], %[temp16], %[temp17] \n\t" \
+- "sra %["#TEMP4"], %["#TEMP4"], 4 \n\t" \
+- "addiu %["#TEMP8"], %["#TEMP8"], 30000 \n\t" \
+- "addiu %["#TEMP12"], %["#TEMP12"], 12000 \n\t" \
+- "addiu %["#TEMP8"], %["#TEMP8"], 21000 \n\t" \
+- "subu %["#TEMP8"], %["#TEMP8"], %[temp18] \n\t" \
+- "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
+- "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
+- "addiu %[temp16], %["#TEMP12"], 1 \n\t" \
+- "movn %["#TEMP12"], %[temp16], %[temp19] \n\t" \
+- "sh %["#TEMP0"], "#A"(%[temp20]) \n\t" \
+- "sh %["#TEMP4"], "#C"(%[temp20]) \n\t" \
+- "sh %["#TEMP8"], "#D"(%[temp20]) \n\t" \
+- "sh %["#TEMP12"], "#B"(%[temp20]) \n\t"
++ "addu %[" #TEMP0 "], %[temp16], %[temp17] \n\t" \
++ "sra %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
++ "addu %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "] \n\t" \
++ "subu %[" #TEMP4 "], %[temp16], %[temp17] \n\t" \
++ "sra %[" #TEMP4 "], %[" #TEMP4 "], 4 \n\t" \
++ "addiu %[" #TEMP8 "], %[" #TEMP8 "], 30000 \n\t" \
++ "addiu %[" #TEMP12 "], %[" #TEMP12 "], 12000 \n\t" \
++ "addiu %[" #TEMP8 "], %[" #TEMP8 "], 21000 \n\t" \
++ "subu %[" #TEMP8 "], %[" #TEMP8 "], %[temp18] \n\t" \
++ "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
++ "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
++ "addiu %[temp16], %[" #TEMP12 "], 1 \n\t" \
++ "movn %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
++ "sh %[" #TEMP0 "], " #A "(%[temp20]) \n\t" \
++ "sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \
++ "sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
++ "sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
+
+ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+ int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+@@ -516,14 +516,14 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+ #if !defined(WORK_AROUND_GCC)
+
+ #define GET_SSE_INNER(A, B, C, D) \
+- "lbu %[temp0], "#A"(%[a]) \n\t" \
+- "lbu %[temp1], "#A"(%[b]) \n\t" \
+- "lbu %[temp2], "#B"(%[a]) \n\t" \
+- "lbu %[temp3], "#B"(%[b]) \n\t" \
+- "lbu %[temp4], "#C"(%[a]) \n\t" \
+- "lbu %[temp5], "#C"(%[b]) \n\t" \
+- "lbu %[temp6], "#D"(%[a]) \n\t" \
+- "lbu %[temp7], "#D"(%[b]) \n\t" \
++ "lbu %[temp0], " #A "(%[a]) \n\t" \
++ "lbu %[temp1], " #A "(%[b]) \n\t" \
++ "lbu %[temp2], " #B "(%[a]) \n\t" \
++ "lbu %[temp3], " #B "(%[b]) \n\t" \
++ "lbu %[temp4], " #C "(%[a]) \n\t" \
++ "lbu %[temp5], " #C "(%[b]) \n\t" \
++ "lbu %[temp6], " #D "(%[a]) \n\t" \
++ "lbu %[temp7], " #D "(%[b]) \n\t" \
+ "subu %[temp0], %[temp0], %[temp1] \n\t" \
+ "subu %[temp2], %[temp2], %[temp3] \n\t" \
+ "subu %[temp4], %[temp4], %[temp5] \n\t" \
+diff --git a/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
+index ec58efe..1a3f968 100644
+--- a/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
+@@ -27,25 +27,25 @@ static const int kC2 = 35468;
+ // I - input (macro doesn't change it)
+ #define ADD_SUB_HALVES_X4(O0, O1, O2, O3, O4, O5, O6, O7, \
+ I0, I1, I2, I3, I4, I5, I6, I7) \
+- "addq.ph %["#O0"], %["#I0"], %["#I1"] \n\t" \
+- "subq.ph %["#O1"], %["#I0"], %["#I1"] \n\t" \
+- "addq.ph %["#O2"], %["#I2"], %["#I3"] \n\t" \
+- "subq.ph %["#O3"], %["#I2"], %["#I3"] \n\t" \
+- "addq.ph %["#O4"], %["#I4"], %["#I5"] \n\t" \
+- "subq.ph %["#O5"], %["#I4"], %["#I5"] \n\t" \
+- "addq.ph %["#O6"], %["#I6"], %["#I7"] \n\t" \
+- "subq.ph %["#O7"], %["#I6"], %["#I7"] \n\t"
++ "addq.ph %[" #O0 "], %[" #I0 "], %[" #I1 "] \n\t" \
++ "subq.ph %[" #O1 "], %[" #I0 "], %[" #I1 "] \n\t" \
++ "addq.ph %[" #O2 "], %[" #I2 "], %[" #I3 "] \n\t" \
++ "subq.ph %[" #O3 "], %[" #I2 "], %[" #I3 "] \n\t" \
++ "addq.ph %[" #O4 "], %[" #I4 "], %[" #I5 "] \n\t" \
++ "subq.ph %[" #O5 "], %[" #I4 "], %[" #I5 "] \n\t" \
++ "addq.ph %[" #O6 "], %[" #I6 "], %[" #I7 "] \n\t" \
++ "subq.ph %[" #O7 "], %[" #I6 "], %[" #I7 "] \n\t"
+
+ // IO - input/output
+ #define ABS_X8(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7) \
+- "absq_s.ph %["#IO0"], %["#IO0"] \n\t" \
+- "absq_s.ph %["#IO1"], %["#IO1"] \n\t" \
+- "absq_s.ph %["#IO2"], %["#IO2"] \n\t" \
+- "absq_s.ph %["#IO3"], %["#IO3"] \n\t" \
+- "absq_s.ph %["#IO4"], %["#IO4"] \n\t" \
+- "absq_s.ph %["#IO5"], %["#IO5"] \n\t" \
+- "absq_s.ph %["#IO6"], %["#IO6"] \n\t" \
+- "absq_s.ph %["#IO7"], %["#IO7"] \n\t"
++ "absq_s.ph %[" #IO0 "], %[" #IO0 "] \n\t" \
++ "absq_s.ph %[" #IO1 "], %[" #IO1 "] \n\t" \
++ "absq_s.ph %[" #IO2 "], %[" #IO2 "] \n\t" \
++ "absq_s.ph %[" #IO3 "], %[" #IO3 "] \n\t" \
++ "absq_s.ph %[" #IO4 "], %[" #IO4 "] \n\t" \
++ "absq_s.ph %[" #IO5 "], %[" #IO5 "] \n\t" \
++ "absq_s.ph %[" #IO6 "], %[" #IO6 "] \n\t" \
++ "absq_s.ph %[" #IO7 "], %[" #IO7 "] \n\t"
+
+ // dpa.w.ph $ac0 temp0 ,temp1
+ // $ac += temp0[31..16] * temp1[31..16] + temp0[15..0] * temp1[15..0]
+@@ -56,15 +56,15 @@ static const int kC2 = 35468;
+ #define MUL_HALF(O0, I0, I1, I2, I3, I4, I5, I6, I7, \
+ I8, I9, I10, I11, I12, I13, I14, I15) \
+ "mult $ac0, $zero, $zero \n\t" \
+- "dpa.w.ph $ac0, %["#I2"], %["#I0"] \n\t" \
+- "dpax.w.ph $ac0, %["#I5"], %["#I6"] \n\t" \
+- "dpa.w.ph $ac0, %["#I8"], %["#I9"] \n\t" \
+- "dpax.w.ph $ac0, %["#I11"], %["#I4"] \n\t" \
+- "dpa.w.ph $ac0, %["#I12"], %["#I7"] \n\t" \
+- "dpax.w.ph $ac0, %["#I13"], %["#I1"] \n\t" \
+- "dpa.w.ph $ac0, %["#I14"], %["#I3"] \n\t" \
+- "dpax.w.ph $ac0, %["#I15"], %["#I10"] \n\t" \
+- "mflo %["#O0"], $ac0 \n\t"
++ "dpa.w.ph $ac0, %[" #I2 "], %[" #I0 "] \n\t" \
++ "dpax.w.ph $ac0, %[" #I5 "], %[" #I6 "] \n\t" \
++ "dpa.w.ph $ac0, %[" #I8 "], %[" #I9 "] \n\t" \
++ "dpax.w.ph $ac0, %[" #I11 "], %[" #I4 "] \n\t" \
++ "dpa.w.ph $ac0, %[" #I12 "], %[" #I7 "] \n\t" \
++ "dpax.w.ph $ac0, %[" #I13 "], %[" #I1 "] \n\t" \
++ "dpa.w.ph $ac0, %[" #I14 "], %[" #I3 "] \n\t" \
++ "dpax.w.ph $ac0, %[" #I15 "], %[" #I10 "] \n\t" \
++ "mflo %[" #O0 "], $ac0 \n\t"
+
+ #define OUTPUT_EARLY_CLOBBER_REGS_17() \
+ OUTPUT_EARLY_CLOBBER_REGS_10(), \
+@@ -77,69 +77,69 @@ static const int kC2 = 35468;
+ // A - offset in bytes to load from src and ref buffers
+ // TEMP0..TEMP3 - registers for corresponding tmp elements
+ #define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \
+- "lw %["#TEMP0"], 0(%[args]) \n\t" \
+- "lw %["#TEMP1"], 4(%[args]) \n\t" \
+- "lw %["#TEMP2"], "XSTR(BPS)"*"#A"(%["#TEMP0"]) \n\t" \
+- "lw %["#TEMP3"], "XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
+- "preceu.ph.qbl %["#TEMP0"], %["#TEMP2"] \n\t" \
+- "preceu.ph.qbl %["#TEMP1"], %["#TEMP3"] \n\t" \
+- "preceu.ph.qbr %["#TEMP2"], %["#TEMP2"] \n\t" \
+- "preceu.ph.qbr %["#TEMP3"], %["#TEMP3"] \n\t" \
+- "subq.ph %["#TEMP0"], %["#TEMP0"], %["#TEMP1"] \n\t" \
+- "subq.ph %["#TEMP2"], %["#TEMP2"], %["#TEMP3"] \n\t" \
+- "rotr %["#TEMP0"], %["#TEMP0"], 16 \n\t" \
+- "addq.ph %["#TEMP1"], %["#TEMP2"], %["#TEMP0"] \n\t" \
+- "subq.ph %["#TEMP3"], %["#TEMP2"], %["#TEMP0"] \n\t" \
+- "seh %["#TEMP0"], %["#TEMP1"] \n\t" \
+- "sra %[temp16], %["#TEMP1"], 16 \n\t" \
+- "seh %[temp19], %["#TEMP3"] \n\t" \
+- "sra %["#TEMP3"], %["#TEMP3"], 16 \n\t" \
+- "subu %["#TEMP2"], %["#TEMP0"], %[temp16] \n\t" \
+- "addu %["#TEMP0"], %["#TEMP0"], %[temp16] \n\t" \
++ "lw %[" #TEMP0 "], 0(%[args]) \n\t" \
++ "lw %[" #TEMP1 "], 4(%[args]) \n\t" \
++ "lw %[" #TEMP2 "], "XSTR(BPS)"*" #A "(%[" #TEMP0 "]) \n\t" \
++ "lw %[" #TEMP3 "], "XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
++ "preceu.ph.qbl %[" #TEMP0 "], %[" #TEMP2 "] \n\t" \
++ "preceu.ph.qbl %[" #TEMP1 "], %[" #TEMP3 "] \n\t" \
++ "preceu.ph.qbr %[" #TEMP2 "], %[" #TEMP2 "] \n\t" \
++ "preceu.ph.qbr %[" #TEMP3 "], %[" #TEMP3 "] \n\t" \
++ "subq.ph %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
++ "subq.ph %[" #TEMP2 "], %[" #TEMP2 "], %[" #TEMP3 "] \n\t" \
++ "rotr %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \
++ "addq.ph %[" #TEMP1 "], %[" #TEMP2 "], %[" #TEMP0 "] \n\t" \
++ "subq.ph %[" #TEMP3 "], %[" #TEMP2 "], %[" #TEMP0 "] \n\t" \
++ "seh %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
++ "sra %[temp16], %[" #TEMP1 "], 16 \n\t" \
++ "seh %[temp19], %[" #TEMP3 "] \n\t" \
++ "sra %[" #TEMP3 "], %[" #TEMP3 "], 16 \n\t" \
++ "subu %[" #TEMP2 "], %[" #TEMP0 "], %[temp16] \n\t" \
++ "addu %[" #TEMP0 "], %[" #TEMP0 "], %[temp16] \n\t" \
+ "mul %[temp17], %[temp19], %[c2217] \n\t" \
+- "mul %[temp18], %["#TEMP3"], %[c5352] \n\t" \
+- "mul %["#TEMP1"], %[temp19], %[c5352] \n\t" \
+- "mul %[temp16], %["#TEMP3"], %[c2217] \n\t" \
+- "sll %["#TEMP2"], %["#TEMP2"], 3 \n\t" \
+- "sll %["#TEMP0"], %["#TEMP0"], 3 \n\t" \
+- "subu %["#TEMP3"], %[temp17], %[temp18] \n\t" \
+- "addu %["#TEMP1"], %[temp16], %["#TEMP1"] \n\t" \
+- "addiu %["#TEMP3"], %["#TEMP3"], 937 \n\t" \
+- "addiu %["#TEMP1"], %["#TEMP1"], 1812 \n\t" \
+- "sra %["#TEMP3"], %["#TEMP3"], 9 \n\t" \
+- "sra %["#TEMP1"], %["#TEMP1"], 9 \n\t"
++ "mul %[temp18], %[" #TEMP3 "], %[c5352] \n\t" \
++ "mul %[" #TEMP1 "], %[temp19], %[c5352] \n\t" \
++ "mul %[temp16], %[" #TEMP3 "], %[c2217] \n\t" \
++ "sll %[" #TEMP2 "], %[" #TEMP2 "], 3 \n\t" \
++ "sll %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \
++ "subu %[" #TEMP3 "], %[temp17], %[temp18] \n\t" \
++ "addu %[" #TEMP1 "], %[temp16], %[" #TEMP1 "] \n\t" \
++ "addiu %[" #TEMP3 "], %[" #TEMP3 "], 937 \n\t" \
++ "addiu %[" #TEMP1 "], %[" #TEMP1 "], 1812 \n\t" \
++ "sra %[" #TEMP3 "], %[" #TEMP3 "], 9 \n\t" \
++ "sra %[" #TEMP1 "], %[" #TEMP1 "], 9 \n\t"
+
+ // macro for one vertical pass in FTransform
+ // temp0..temp15 holds tmp[0]..tmp[15]
+ // A..D - offsets in bytes to store to out buffer
+ // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+ #define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
+- "addu %[temp16], %["#TEMP0"], %["#TEMP12"] \n\t" \
+- "subu %[temp19], %["#TEMP0"], %["#TEMP12"] \n\t" \
+- "addu %[temp17], %["#TEMP4"], %["#TEMP8"] \n\t" \
+- "subu %[temp18], %["#TEMP4"], %["#TEMP8"] \n\t" \
+- "mul %["#TEMP8"], %[temp19], %[c2217] \n\t" \
+- "mul %["#TEMP12"], %[temp18], %[c2217] \n\t" \
+- "mul %["#TEMP4"], %[temp19], %[c5352] \n\t" \
++ "addu %[temp16], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
++ "subu %[temp19], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
++ "addu %[temp17], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
++ "subu %[temp18], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
++ "mul %[" #TEMP8 "], %[temp19], %[c2217] \n\t" \
++ "mul %[" #TEMP12 "], %[temp18], %[c2217] \n\t" \
++ "mul %[" #TEMP4 "], %[temp19], %[c5352] \n\t" \
+ "mul %[temp18], %[temp18], %[c5352] \n\t" \
+ "addiu %[temp16], %[temp16], 7 \n\t" \
+- "addu %["#TEMP0"], %[temp16], %[temp17] \n\t" \
+- "sra %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
+- "addu %["#TEMP12"], %["#TEMP12"], %["#TEMP4"] \n\t" \
+- "subu %["#TEMP4"], %[temp16], %[temp17] \n\t" \
+- "sra %["#TEMP4"], %["#TEMP4"], 4 \n\t" \
+- "addiu %["#TEMP8"], %["#TEMP8"], 30000 \n\t" \
+- "addiu %["#TEMP12"], %["#TEMP12"], 12000 \n\t" \
+- "addiu %["#TEMP8"], %["#TEMP8"], 21000 \n\t" \
+- "subu %["#TEMP8"], %["#TEMP8"], %[temp18] \n\t" \
+- "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
+- "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
+- "addiu %[temp16], %["#TEMP12"], 1 \n\t" \
+- "movn %["#TEMP12"], %[temp16], %[temp19] \n\t" \
+- "sh %["#TEMP0"], "#A"(%[temp20]) \n\t" \
+- "sh %["#TEMP4"], "#C"(%[temp20]) \n\t" \
+- "sh %["#TEMP8"], "#D"(%[temp20]) \n\t" \
+- "sh %["#TEMP12"], "#B"(%[temp20]) \n\t"
++ "addu %[" #TEMP0 "], %[temp16], %[temp17] \n\t" \
++ "sra %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
++ "addu %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "] \n\t" \
++ "subu %[" #TEMP4 "], %[temp16], %[temp17] \n\t" \
++ "sra %[" #TEMP4 "], %[" #TEMP4 "], 4 \n\t" \
++ "addiu %[" #TEMP8 "], %[" #TEMP8 "], 30000 \n\t" \
++ "addiu %[" #TEMP12 "], %[" #TEMP12 "], 12000 \n\t" \
++ "addiu %[" #TEMP8 "], %[" #TEMP8 "], 21000 \n\t" \
++ "subu %[" #TEMP8 "], %[" #TEMP8 "], %[temp18] \n\t" \
++ "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
++ "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
++ "addiu %[temp16], %[" #TEMP12 "], 1 \n\t" \
++ "movn %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
++ "sh %[" #TEMP0 "], " #A "(%[temp20]) \n\t" \
++ "sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \
++ "sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
++ "sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
+
+ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+ const int c2217 = 2217;
+@@ -329,11 +329,11 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+ // Intra predictions
+
+ #define FILL_PART(J, SIZE) \
+- "usw %[value], 0+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
+- "usw %[value], 4+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
+- ".if "#SIZE" == 16 \n\t" \
+- "usw %[value], 8+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
+- "usw %[value], 12+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
++ "usw %[value], 0+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
++ "usw %[value], 4+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
++ ".if " #SIZE " == 16 \n\t" \
++ "usw %[value], 8+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
++ "usw %[value], 12+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
+ ".endif \n\t"
+
+ #define FILL_8_OR_16(DST, VALUE, SIZE) do { \
+@@ -348,7 +348,7 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+ FILL_PART( 5, SIZE) \
+ FILL_PART( 6, SIZE) \
+ FILL_PART( 7, SIZE) \
+- ".if "#SIZE" == 16 \n\t" \
++ ".if " #SIZE " == 16 \n\t" \
+ FILL_PART( 8, 16) \
+ FILL_PART( 9, 16) \
+ FILL_PART(10, 16) \
+@@ -425,7 +425,7 @@ HORIZONTAL_PRED(dst, left, 16)
+ CLIPPING() \
+ "usw %[temp0], 0(%[dst]) \n\t" \
+ "usw %[temp1], 4(%[dst]) \n\t" \
+- ".if "#SIZE" == 16 \n\t" \
++ ".if " #SIZE " == 16 \n\t" \
+ "ulw %[temp0], 8(%[top]) \n\t" \
+ "ulw %[temp1], 12(%[top]) \n\t" \
+ CLIPPING() \
+@@ -1060,8 +1060,8 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+ #if !defined(WORK_AROUND_GCC)
+
+ #define GET_SSE_INNER(A) \
+- "lw %[temp0], "#A"(%[a]) \n\t" \
+- "lw %[temp1], "#A"(%[b]) \n\t" \
++ "lw %[temp0], " #A "(%[a]) \n\t" \
++ "lw %[temp1], " #A "(%[b]) \n\t" \
+ "preceu.ph.qbr %[temp2], %[temp0] \n\t" \
+ "preceu.ph.qbl %[temp0], %[temp0] \n\t" \
+ "preceu.ph.qbr %[temp3], %[temp1] \n\t" \
+@@ -1185,28 +1185,28 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+ // N - offset in bytes (n * 2)
+ // N1 - offset in bytes ((n + 1) * 2)
+ #define QUANTIZE_ONE(J, K, N, N1) \
+- "ulw %[temp1], "#J"(%[ppin]) \n\t" \
+- "ulw %[temp2], "#J"(%[ppsharpen]) \n\t" \
+- "lhu %[temp3], "#K"(%[ppzthresh]) \n\t" \
+- "lhu %[temp6], "#K"+4(%[ppzthresh]) \n\t" \
++ "ulw %[temp1], " #J "(%[ppin]) \n\t" \
++ "ulw %[temp2], " #J "(%[ppsharpen]) \n\t" \
++ "lhu %[temp3], " #K "(%[ppzthresh]) \n\t" \
++ "lhu %[temp6], " #K "+4(%[ppzthresh]) \n\t" \
+ "absq_s.ph %[temp4], %[temp1] \n\t" \
+ "ins %[temp3], %[temp6], 16, 16 \n\t" \
+ "addu.ph %[coeff], %[temp4], %[temp2] \n\t" \
+ "shra.ph %[sign], %[temp1], 15 \n\t" \
+ "li %[level], 0x10001 \n\t" \
+ "cmp.lt.ph %[temp3], %[coeff] \n\t" \
+- "lhu %[temp1], "#J"(%[ppiq]) \n\t" \
++ "lhu %[temp1], " #J "(%[ppiq]) \n\t" \
+ "pick.ph %[temp5], %[level], $0 \n\t" \
+- "lw %[temp2], "#K"(%[ppbias]) \n\t" \
++ "lw %[temp2], " #K "(%[ppbias]) \n\t" \
+ "beqz %[temp5], 0f \n\t" \
+- "lhu %[temp3], "#J"(%[ppq]) \n\t" \
++ "lhu %[temp3], " #J "(%[ppq]) \n\t" \
+ "beq %[temp5], %[level], 1f \n\t" \
+ "andi %[temp5], %[temp5], 0x1 \n\t" \
+ "andi %[temp4], %[coeff], 0xffff \n\t" \
+ "beqz %[temp5], 2f \n\t" \
+ "mul %[level], %[temp4], %[temp1] \n\t" \
+- "sh $0, "#J"+2(%[ppin]) \n\t" \
+- "sh $0, "#N1"(%[pout]) \n\t" \
++ "sh $0, " #J "+2(%[ppin]) \n\t" \
++ "sh $0, " #N1 "(%[pout]) \n\t" \
+ "addu %[level], %[level], %[temp2] \n\t" \
+ "sra %[level], %[level], 17 \n\t" \
+ "slt %[temp4], %[max_level], %[level] \n\t" \
+@@ -1216,15 +1216,15 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+ "subu %[level], %[level], %[temp6] \n\t" \
+ "mul %[temp5], %[level], %[temp3] \n\t" \
+ "or %[ret], %[ret], %[level] \n\t" \
+- "sh %[level], "#N"(%[pout]) \n\t" \
+- "sh %[temp5], "#J"(%[ppin]) \n\t" \
++ "sh %[level], " #N "(%[pout]) \n\t" \
++ "sh %[temp5], " #J "(%[ppin]) \n\t" \
+ "j 3f \n\t" \
+ "2: \n\t" \
+- "lhu %[temp1], "#J"+2(%[ppiq]) \n\t" \
++ "lhu %[temp1], " #J "+2(%[ppiq]) \n\t" \
+ "srl %[temp5], %[coeff], 16 \n\t" \
+ "mul %[level], %[temp5], %[temp1] \n\t" \
+- "lw %[temp2], "#K"+4(%[ppbias]) \n\t" \
+- "lhu %[temp3], "#J"+2(%[ppq]) \n\t" \
++ "lw %[temp2], " #K "+4(%[ppbias]) \n\t" \
++ "lhu %[temp3], " #J "+2(%[ppq]) \n\t" \
+ "addu %[level], %[level], %[temp2] \n\t" \
+ "sra %[level], %[level], 17 \n\t" \
+ "srl %[temp6], %[sign], 16 \n\t" \
+@@ -1233,20 +1233,20 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+ "xor %[level], %[level], %[temp6] \n\t" \
+ "subu %[level], %[level], %[temp6] \n\t" \
+ "mul %[temp5], %[level], %[temp3] \n\t" \
+- "sh $0, "#J"(%[ppin]) \n\t" \
+- "sh $0, "#N"(%[pout]) \n\t" \
++ "sh $0, " #J "(%[ppin]) \n\t" \
++ "sh $0, " #N "(%[pout]) \n\t" \
+ "or %[ret], %[ret], %[level] \n\t" \
+- "sh %[temp5], "#J"+2(%[ppin]) \n\t" \
+- "sh %[level], "#N1"(%[pout]) \n\t" \
++ "sh %[temp5], " #J "+2(%[ppin]) \n\t" \
++ "sh %[level], " #N1 "(%[pout]) \n\t" \
+ "j 3f \n\t" \
+ "1: \n\t" \
+- "lhu %[temp1], "#J"(%[ppiq]) \n\t" \
+- "lw %[temp2], "#K"(%[ppbias]) \n\t" \
+- "ulw %[temp3], "#J"(%[ppq]) \n\t" \
++ "lhu %[temp1], " #J "(%[ppiq]) \n\t" \
++ "lw %[temp2], " #K "(%[ppbias]) \n\t" \
++ "ulw %[temp3], " #J "(%[ppq]) \n\t" \
+ "andi %[temp5], %[coeff], 0xffff \n\t" \
+ "srl %[temp0], %[coeff], 16 \n\t" \
+- "lhu %[temp6], "#J"+2(%[ppiq]) \n\t" \
+- "lw %[coeff], "#K"+4(%[ppbias]) \n\t" \
++ "lhu %[temp6], " #J "+2(%[ppiq]) \n\t" \
++ "lw %[coeff], " #K "+4(%[ppbias]) \n\t" \
+ "mul %[level], %[temp5], %[temp1] \n\t" \
+ "mul %[temp4], %[temp0], %[temp6] \n\t" \
+ "addu %[level], %[level], %[temp2] \n\t" \
+@@ -1259,15 +1259,15 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+ "subu.ph %[level], %[level], %[sign] \n\t" \
+ "mul.ph %[temp3], %[level], %[temp3] \n\t" \
+ "or %[ret], %[ret], %[level] \n\t" \
+- "sh %[level], "#N"(%[pout]) \n\t" \
++ "sh %[level], " #N "(%[pout]) \n\t" \
+ "srl %[level], %[level], 16 \n\t" \
+- "sh %[level], "#N1"(%[pout]) \n\t" \
+- "usw %[temp3], "#J"(%[ppin]) \n\t" \
++ "sh %[level], " #N1 "(%[pout]) \n\t" \
++ "usw %[temp3], " #J "(%[ppin]) \n\t" \
+ "j 3f \n\t" \
+ "0: \n\t" \
+- "sh $0, "#N"(%[pout]) \n\t" \
+- "sh $0, "#N1"(%[pout]) \n\t" \
+- "usw $0, "#J"(%[ppin]) \n\t" \
++ "sh $0, " #N "(%[pout]) \n\t" \
++ "sh $0, " #N1 "(%[pout]) \n\t" \
++ "usw $0, " #J "(%[ppin]) \n\t" \
+ "3: \n\t"
+
+ static int QuantizeBlock(int16_t in[16], int16_t out[16],
+@@ -1326,37 +1326,37 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+ // A, B, C, D - offset in bytes to load from in buffer
+ // TEMP0, TEMP1 - registers for corresponding tmp elements
+ #define HORIZONTAL_PASS_WHT(A, B, C, D, TEMP0, TEMP1) \
+- "lh %["#TEMP0"], "#A"(%[in]) \n\t" \
+- "lh %["#TEMP1"], "#B"(%[in]) \n\t" \
+- "lh %[temp8], "#C"(%[in]) \n\t" \
+- "lh %[temp9], "#D"(%[in]) \n\t" \
+- "ins %["#TEMP1"], %["#TEMP0"], 16, 16 \n\t" \
++ "lh %[" #TEMP0 "], " #A "(%[in]) \n\t" \
++ "lh %[" #TEMP1 "], " #B "(%[in]) \n\t" \
++ "lh %[temp8], " #C "(%[in]) \n\t" \
++ "lh %[temp9], " #D "(%[in]) \n\t" \
++ "ins %[" #TEMP1 "], %[" #TEMP0 "], 16, 16 \n\t" \
+ "ins %[temp9], %[temp8], 16, 16 \n\t" \
+- "subq.ph %[temp8], %["#TEMP1"], %[temp9] \n\t" \
+- "addq.ph %[temp9], %["#TEMP1"], %[temp9] \n\t" \
+- "precrq.ph.w %["#TEMP0"], %[temp8], %[temp9] \n\t" \
++ "subq.ph %[temp8], %[" #TEMP1 "], %[temp9] \n\t" \
++ "addq.ph %[temp9], %[" #TEMP1 "], %[temp9] \n\t" \
++ "precrq.ph.w %[" #TEMP0 "], %[temp8], %[temp9] \n\t" \
+ "append %[temp8], %[temp9], 16 \n\t" \
+- "subq.ph %["#TEMP1"], %["#TEMP0"], %[temp8] \n\t" \
+- "addq.ph %["#TEMP0"], %["#TEMP0"], %[temp8] \n\t" \
+- "rotr %["#TEMP1"], %["#TEMP1"], 16 \n\t"
++ "subq.ph %[" #TEMP1 "], %[" #TEMP0 "], %[temp8] \n\t" \
++ "addq.ph %[" #TEMP0 "], %[" #TEMP0 "], %[temp8] \n\t" \
++ "rotr %[" #TEMP1 "], %[" #TEMP1 "], 16 \n\t"
+
+ // macro for one vertical pass in FTransformWHT
+ // temp0..temp7 holds tmp[0]..tmp[15]
+ // A, B, C, D - offsets in bytes to store to out buffer
+ // TEMP0, TEMP2, TEMP4 and TEMP6 - registers for corresponding tmp elements
+ #define VERTICAL_PASS_WHT(A, B, C, D, TEMP0, TEMP2, TEMP4, TEMP6) \
+- "addq.ph %[temp8], %["#TEMP0"], %["#TEMP4"] \n\t" \
+- "addq.ph %[temp9], %["#TEMP2"], %["#TEMP6"] \n\t" \
+- "subq.ph %["#TEMP2"], %["#TEMP2"], %["#TEMP6"] \n\t" \
+- "subq.ph %["#TEMP6"], %["#TEMP0"], %["#TEMP4"] \n\t" \
+- "addqh.ph %["#TEMP0"], %[temp8], %[temp9] \n\t" \
+- "subqh.ph %["#TEMP4"], %["#TEMP6"], %["#TEMP2"] \n\t" \
+- "addqh.ph %["#TEMP2"], %["#TEMP2"], %["#TEMP6"] \n\t" \
+- "subqh.ph %["#TEMP6"], %[temp8], %[temp9] \n\t" \
+- "usw %["#TEMP0"], "#A"(%[out]) \n\t" \
+- "usw %["#TEMP2"], "#B"(%[out]) \n\t" \
+- "usw %["#TEMP4"], "#C"(%[out]) \n\t" \
+- "usw %["#TEMP6"], "#D"(%[out]) \n\t"
++ "addq.ph %[temp8], %[" #TEMP0 "], %[" #TEMP4 "] \n\t" \
++ "addq.ph %[temp9], %[" #TEMP2 "], %[" #TEMP6 "] \n\t" \
++ "subq.ph %[" #TEMP2 "], %[" #TEMP2 "], %[" #TEMP6 "] \n\t" \
++ "subq.ph %[" #TEMP6 "], %[" #TEMP0 "], %[" #TEMP4 "] \n\t" \
++ "addqh.ph %[" #TEMP0 "], %[temp8], %[temp9] \n\t" \
++ "subqh.ph %[" #TEMP4 "], %[" #TEMP6 "], %[" #TEMP2 "] \n\t" \
++ "addqh.ph %[" #TEMP2 "], %[" #TEMP2 "], %[" #TEMP6 "] \n\t" \
++ "subqh.ph %[" #TEMP6 "], %[temp8], %[temp9] \n\t" \
++ "usw %[" #TEMP0 "], " #A "(%[out]) \n\t" \
++ "usw %[" #TEMP2 "], " #B "(%[out]) \n\t" \
++ "usw %[" #TEMP4 "], " #C "(%[out]) \n\t" \
++ "usw %[" #TEMP6 "], " #D "(%[out]) \n\t"
+
+ static void FTransformWHT(const int16_t* in, int16_t* out) {
+ int temp0, temp1, temp2, temp3, temp4;
+@@ -1385,10 +1385,10 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
+ // convert 8 coeffs at time
+ // A, B, C, D - offsets in bytes to load from out buffer
+ #define CONVERT_COEFFS_TO_BIN(A, B, C, D) \
+- "ulw %[temp0], "#A"(%[out]) \n\t" \
+- "ulw %[temp1], "#B"(%[out]) \n\t" \
+- "ulw %[temp2], "#C"(%[out]) \n\t" \
+- "ulw %[temp3], "#D"(%[out]) \n\t" \
++ "ulw %[temp0], " #A "(%[out]) \n\t" \
++ "ulw %[temp1], " #B "(%[out]) \n\t" \
++ "ulw %[temp2], " #C "(%[out]) \n\t" \
++ "ulw %[temp3], " #D "(%[out]) \n\t" \
+ "absq_s.ph %[temp0], %[temp0] \n\t" \
+ "absq_s.ph %[temp1], %[temp1] \n\t" \
+ "absq_s.ph %[temp2], %[temp2] \n\t" \
+diff --git a/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
+index 6c34efb..6a1f8f4 100644
+--- a/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
+@@ -48,7 +48,7 @@
+ "srl %[temp0], %[length], 0x2 \n\t" \
+ "beqz %[temp0], 4f \n\t" \
+ " andi %[temp6], %[length], 0x3 \n\t" \
+- ".if "#INVERSE" \n\t" \
++ ".if " #INVERSE " \n\t" \
+ "lbu %[temp1], -1(%[src]) \n\t" \
+ "1: \n\t" \
+ "lbu %[temp2], 0(%[src]) \n\t" \
+@@ -84,7 +84,7 @@
+ "lbu %[temp1], -1(%[src]) \n\t" \
+ "lbu %[temp2], 0(%[src]) \n\t" \
+ "addiu %[src], %[src], 1 \n\t" \
+- ".if "#INVERSE" \n\t" \
++ ".if " #INVERSE " \n\t" \
+ "addu %[temp3], %[temp1], %[temp2] \n\t" \
+ "sb %[temp3], -1(%[src]) \n\t" \
+ ".else \n\t" \
+@@ -131,7 +131,7 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
+ "ulw %[temp3], 4(%[src]) \n\t" \
+ "ulw %[temp4], 4(%[pred]) \n\t" \
+ "addiu %[src], %[src], 8 \n\t" \
+- ".if "#INVERSE" \n\t" \
++ ".if " #INVERSE " \n\t" \
+ "addu.qb %[temp5], %[temp1], %[temp2] \n\t" \
+ "addu.qb %[temp6], %[temp3], %[temp4] \n\t" \
+ ".else \n\t" \
+@@ -152,7 +152,7 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
+ "lbu %[temp2], 0(%[pred]) \n\t" \
+ "addiu %[src], %[src], 1 \n\t" \
+ "addiu %[pred], %[pred], 1 \n\t" \
+- ".if "#INVERSE" \n\t" \
++ ".if " #INVERSE " \n\t" \
+ "addu %[temp3], %[temp1], %[temp2] \n\t" \
+ ".else \n\t" \
+ "subu %[temp3], %[temp1], %[temp2] \n\t" \
+@@ -177,7 +177,7 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
+ __asm__ volatile ( \
+ "lbu %[temp1], 0(%[src]) \n\t" \
+ "lbu %[temp2], 0(%[pred]) \n\t" \
+- ".if "#INVERSE" \n\t" \
++ ".if " #INVERSE " \n\t" \
+ "addu %[temp3], %[temp1], %[temp2] \n\t" \
+ ".else \n\t" \
+ "subu %[temp3], %[temp1], %[temp2] \n\t" \
+diff --git a/Source/LibWebP/src/dsp/dsp.lossless_mips32.c b/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
+index 68fbe85..abe97c1 100644
+--- a/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
++++ b/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
+@@ -285,28 +285,28 @@ static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
+ // literal_ and successive histograms could be unaligned
+ // so we must use ulw and usw
+ #define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2) \
+- "ulw %[temp0], "#A"(%["#P0"]) \n\t" \
+- "ulw %[temp1], "#B"(%["#P0"]) \n\t" \
+- "ulw %[temp2], "#C"(%["#P0"]) \n\t" \
+- "ulw %[temp3], "#D"(%["#P0"]) \n\t" \
+- "ulw %[temp4], "#A"(%["#P1"]) \n\t" \
+- "ulw %[temp5], "#B"(%["#P1"]) \n\t" \
+- "ulw %[temp6], "#C"(%["#P1"]) \n\t" \
+- "ulw %[temp7], "#D"(%["#P1"]) \n\t" \
++ "ulw %[temp0], " #A "(%[" #P0 "]) \n\t" \
++ "ulw %[temp1], " #B "(%[" #P0 "]) \n\t" \
++ "ulw %[temp2], " #C "(%[" #P0 "]) \n\t" \
++ "ulw %[temp3], " #D "(%[" #P0 "]) \n\t" \
++ "ulw %[temp4], " #A "(%[" #P1 "]) \n\t" \
++ "ulw %[temp5], " #B "(%[" #P1 "]) \n\t" \
++ "ulw %[temp6], " #C "(%[" #P1 "]) \n\t" \
++ "ulw %[temp7], " #D "(%[" #P1 "]) \n\t" \
+ "addu %[temp4], %[temp4], %[temp0] \n\t" \
+ "addu %[temp5], %[temp5], %[temp1] \n\t" \
+ "addu %[temp6], %[temp6], %[temp2] \n\t" \
+ "addu %[temp7], %[temp7], %[temp3] \n\t" \
+- "addiu %["#P0"], %["#P0"], 16 \n\t" \
+- ".if "#E" == 1 \n\t" \
+- "addiu %["#P1"], %["#P1"], 16 \n\t" \
++ "addiu %[" #P0 "], %[" #P0 "], 16 \n\t" \
++ ".if " #E " == 1 \n\t" \
++ "addiu %[" #P1 "], %[" #P1 "], 16 \n\t" \
+ ".endif \n\t" \
+- "usw %[temp4], "#A"(%["#P2"]) \n\t" \
+- "usw %[temp5], "#B"(%["#P2"]) \n\t" \
+- "usw %[temp6], "#C"(%["#P2"]) \n\t" \
+- "usw %[temp7], "#D"(%["#P2"]) \n\t" \
+- "addiu %["#P2"], %["#P2"], 16 \n\t" \
+- "bne %["#P0"], %[LoopEnd], 1b \n\t" \
++ "usw %[temp4], " #A "(%[" #P2 "]) \n\t" \
++ "usw %[temp5], " #B "(%[" #P2 "]) \n\t" \
++ "usw %[temp6], " #C "(%[" #P2 "]) \n\t" \
++ "usw %[temp7], " #D "(%[" #P2 "]) \n\t" \
++ "addiu %[" #P2 "], %[" #P2 "], 16 \n\t" \
++ "bne %[" #P0 "], %[LoopEnd], 1b \n\t" \
+ ".set pop \n\t" \
+
+ #define ASM_END_COMMON_0 \
+diff --git a/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
+index 821cda9..31ac181 100644
+--- a/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
+@@ -29,14 +29,14 @@ static void FUNC_NAME(const TYPE* src, \
+ for (x = 0; x < (width >> 2); ++x) { \
+ int tmp1, tmp2, tmp3, tmp4; \
+ __asm__ volatile ( \
+- ".ifc "#TYPE", uint8_t \n\t" \
++ ".ifc " #TYPE ", uint8_t \n\t" \
+ "lbu %[tmp1], 0(%[src]) \n\t" \
+ "lbu %[tmp2], 1(%[src]) \n\t" \
+ "lbu %[tmp3], 2(%[src]) \n\t" \
+ "lbu %[tmp4], 3(%[src]) \n\t" \
+ "addiu %[src], %[src], 4 \n\t" \
+ ".endif \n\t" \
+- ".ifc "#TYPE", uint32_t \n\t" \
++ ".ifc " #TYPE ", uint32_t \n\t" \
+ "lw %[tmp1], 0(%[src]) \n\t" \
+ "lw %[tmp2], 4(%[src]) \n\t" \
+ "lw %[tmp3], 8(%[src]) \n\t" \
+@@ -55,7 +55,7 @@ static void FUNC_NAME(const TYPE* src, \
+ "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \
+ "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \
+ "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \
+- ".ifc "#TYPE", uint8_t \n\t" \
++ ".ifc " #TYPE ", uint8_t \n\t" \
+ "ext %[tmp1], %[tmp1], 8, 8 \n\t" \
+ "ext %[tmp2], %[tmp2], 8, 8 \n\t" \
+ "ext %[tmp3], %[tmp3], 8, 8 \n\t" \
+@@ -66,7 +66,7 @@ static void FUNC_NAME(const TYPE* src, \
+ "sb %[tmp4], 3(%[dst]) \n\t" \
+ "addiu %[dst], %[dst], 4 \n\t" \
+ ".endif \n\t" \
+- ".ifc "#TYPE", uint32_t \n\t" \
++ ".ifc " #TYPE ", uint32_t \n\t" \
+ "sw %[tmp1], 0(%[dst]) \n\t" \
+ "sw %[tmp2], 4(%[dst]) \n\t" \
+ "sw %[tmp3], 8(%[dst]) \n\t" \
+diff --git a/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
+index a7864a0..cb3adfe 100644
+--- a/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
+@@ -34,15 +34,15 @@
+ G = G - t2 + kGCst; \
+ B = B + kBCst; \
+ __asm__ volatile ( \
+- "shll_s.w %["#R"], %["#R"], 9 \n\t" \
+- "shll_s.w %["#G"], %["#G"], 9 \n\t" \
+- "shll_s.w %["#B"], %["#B"], 9 \n\t" \
+- "precrqu_s.qb.ph %["#R"], %["#R"], $zero \n\t" \
+- "precrqu_s.qb.ph %["#G"], %["#G"], $zero \n\t" \
+- "precrqu_s.qb.ph %["#B"], %["#B"], $zero \n\t" \
+- "srl %["#R"], %["#R"], 24 \n\t" \
+- "srl %["#G"], %["#G"], 24 \n\t" \
+- "srl %["#B"], %["#B"], 24 \n\t" \
++ "shll_s.w %[" #R "], %[" #R "], 9 \n\t" \
++ "shll_s.w %[" #G "], %[" #G "], 9 \n\t" \
++ "shll_s.w %[" #B "], %[" #B "], 9 \n\t" \
++ "precrqu_s.qb.ph %[" #R "], %[" #R "], $zero \n\t" \
++ "precrqu_s.qb.ph %[" #G "], %[" #G "], $zero \n\t" \
++ "precrqu_s.qb.ph %[" #B "], %[" #B "], $zero \n\t" \
++ "srl %[" #R "], %[" #R "], 24 \n\t" \
++ "srl %[" #G "], %[" #G "], 24 \n\t" \
++ "srl %[" #B "], %[" #B "], 24 \n\t" \
+ : [R]"+r"(R), [G]"+r"(G), [B]"+r"(B) \
+ : \
+ ); \
+diff --git a/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
+index 66adde5..51cbe9e 100644
+--- a/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
+@@ -39,12 +39,12 @@
+ "addu %[temp5], %[temp0], %[temp1] \n\t" \
+ "subu %[temp6], %[temp0], %[temp2] \n\t" \
+ "addu %[temp7], %[temp0], %[temp4] \n\t" \
+-".if "#K" \n\t" \
++".if " #K " \n\t" \
+ "lbu %[temp0], 1(%[y]) \n\t" \
+ ".endif \n\t" \
+ "shll_s.w %[temp5], %[temp5], 9 \n\t" \
+ "shll_s.w %[temp6], %[temp6], 9 \n\t" \
+-".if "#K" \n\t" \
++".if " #K " \n\t" \
+ "mul %[temp0], %[t_con_5], %[temp0] \n\t" \
+ ".endif \n\t" \
+ "shll_s.w %[temp7], %[temp7], 9 \n\t" \
+@@ -54,9 +54,9 @@
+ "srl %[temp5], %[temp5], 24 \n\t" \
+ "srl %[temp6], %[temp6], 24 \n\t" \
+ "srl %[temp7], %[temp7], 24 \n\t" \
+- "sb %[temp5], "#R"(%[dst]) \n\t" \
+- "sb %[temp6], "#G"(%[dst]) \n\t" \
+- "sb %[temp7], "#B"(%[dst]) \n\t" \
++ "sb %[temp5], " #R "(%[dst]) \n\t" \
++ "sb %[temp6], " #G "(%[dst]) \n\t" \
++ "sb %[temp7], " #B "(%[dst]) \n\t" \
+
+ #define ASM_CLOBBER_LIST() \
+ : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), \
+--
+2.1.4
+