From 799757ccf1d03c33c75bc597cd5ef77741dcb6a7 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 3 Jun 2011 09:17:04 +0000 Subject: Imported upstream 4.91 --- sbc/formats.h | 55 ++ sbc/sbc.c | 1234 +++++++++++++++++++++++++++++++++++++++++++ sbc/sbc.h | 113 ++++ sbc/sbc_math.h | 61 +++ sbc/sbc_primitives.c | 554 +++++++++++++++++++ sbc/sbc_primitives.h | 80 +++ sbc/sbc_primitives_armv6.c | 299 +++++++++++ sbc/sbc_primitives_armv6.h | 52 ++ sbc/sbc_primitives_iwmmxt.c | 304 +++++++++++ sbc/sbc_primitives_iwmmxt.h | 42 ++ sbc/sbc_primitives_mmx.c | 375 +++++++++++++ sbc/sbc_primitives_mmx.h | 41 ++ sbc/sbc_primitives_neon.c | 893 +++++++++++++++++++++++++++++++ sbc/sbc_primitives_neon.h | 41 ++ sbc/sbc_tables.h | 660 +++++++++++++++++++++++ sbc/sbcdec.c | 293 ++++++++++ sbc/sbcenc.c | 308 +++++++++++ sbc/sbcinfo.c | 322 +++++++++++ sbc/sbctester.c | 358 +++++++++++++ 19 files changed, 6085 insertions(+) create mode 100644 sbc/formats.h create mode 100644 sbc/sbc.c create mode 100644 sbc/sbc.h create mode 100644 sbc/sbc_math.h create mode 100644 sbc/sbc_primitives.c create mode 100644 sbc/sbc_primitives.h create mode 100644 sbc/sbc_primitives_armv6.c create mode 100644 sbc/sbc_primitives_armv6.h create mode 100644 sbc/sbc_primitives_iwmmxt.c create mode 100644 sbc/sbc_primitives_iwmmxt.h create mode 100644 sbc/sbc_primitives_mmx.c create mode 100644 sbc/sbc_primitives_mmx.h create mode 100644 sbc/sbc_primitives_neon.c create mode 100644 sbc/sbc_primitives_neon.h create mode 100644 sbc/sbc_tables.h create mode 100644 sbc/sbcdec.c create mode 100644 sbc/sbcenc.c create mode 100644 sbc/sbcinfo.c create mode 100644 sbc/sbctester.c (limited to 'sbc') diff --git a/sbc/formats.h b/sbc/formats.h new file mode 100644 index 0000000..3050b25 --- /dev/null +++ b/sbc/formats.h @@ -0,0 +1,55 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2010 Marcel Holtmann + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define COMPOSE_ID(a,b,c,d) ((a) | ((b)<<8) | ((c)<<16) | ((d)<<24)) +#define LE_SHORT(v) (v) +#define LE_INT(v) (v) +#define BE_SHORT(v) bswap_16(v) +#define BE_INT(v) bswap_32(v) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define COMPOSE_ID(a,b,c,d) ((d) | ((c)<<8) | ((b)<<16) | ((a)<<24)) +#define LE_SHORT(v) bswap_16(v) +#define LE_INT(v) bswap_32(v) +#define BE_SHORT(v) (v) +#define BE_INT(v) (v) +#else +#error "Wrong endian" +#endif + +#define AU_MAGIC COMPOSE_ID('.','s','n','d') + +#define AU_FMT_ULAW 1 +#define AU_FMT_LIN8 2 +#define AU_FMT_LIN16 3 + +struct au_header { + uint32_t magic; /* '.snd' */ + uint32_t hdr_size; /* size of header (min 24) */ + uint32_t data_size; /* size of data */ + uint32_t encoding; /* see to AU_FMT_XXXX */ + uint32_t sample_rate; /* sample rate */ + uint32_t channels; /* number of channels (voices) */ +}; diff --git a/sbc/sbc.c b/sbc/sbc.c new file mode 100644 index 0000000..77fcc5d --- /dev/null +++ b/sbc/sbc.c @@ -0,0 +1,1234 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2008 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* todo items: + + use a log2 table for byte integer scale factors calculation (sum log2 results + for high and low bytes) fill bitpool by 16 bits instead of one at a time in + bits allocation/bitpool generation port to the dsp + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include + +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc.h" +#include "sbc_primitives.h" + +#define SBC_SYNCWORD 0x9C + +/* This structure contains an unpacked SBC frame. + Yes, there is probably quite some unused space herein */ +struct sbc_frame { + uint8_t frequency; + uint8_t block_mode; + uint8_t blocks; + enum { + MONO = SBC_MODE_MONO, + DUAL_CHANNEL = SBC_MODE_DUAL_CHANNEL, + STEREO = SBC_MODE_STEREO, + JOINT_STEREO = SBC_MODE_JOINT_STEREO + } mode; + uint8_t channels; + enum { + LOUDNESS = SBC_AM_LOUDNESS, + SNR = SBC_AM_SNR + } allocation; + uint8_t subband_mode; + uint8_t subbands; + uint8_t bitpool; + uint16_t codesize; + uint8_t length; + + /* bit number x set means joint stereo has been used in subband x */ + uint8_t joint; + + /* only the lower 4 bits of every element are to be used */ + uint32_t SBC_ALIGNED scale_factor[2][8]; + + /* raw integer subband samples in the frame */ + int32_t SBC_ALIGNED sb_sample_f[16][2][8]; + + /* modified subband samples */ + int32_t SBC_ALIGNED sb_sample[16][2][8]; + + /* original pcm audio samples */ + int16_t SBC_ALIGNED pcm_sample[2][16*8]; +}; + +struct sbc_decoder_state { + int subbands; + int32_t V[2][170]; + int offset[2][16]; +}; + +/* + * Calculates the CRC-8 of the first len bits in data + */ +static const uint8_t crc_table[256] = { + 0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53, + 0xE8, 0xF5, 0xD2, 0xCF, 0x9C, 0x81, 0xA6, 0xBB, + 0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E, + 0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76, + 0x87, 0x9A, 0xBD, 0xA0, 0xF3, 0xEE, 0xC9, 0xD4, + 0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C, + 0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19, + 0xA2, 0xBF, 0x98, 0x85, 0xD6, 0xCB, 0xEC, 0xF1, + 0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40, + 0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8, + 0xDE, 0xC3, 0xE4, 0xF9, 0xAA, 0xB7, 0x90, 0x8D, + 0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65, + 0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7, + 0x7C, 0x61, 0x46, 0x5B, 0x08, 0x15, 0x32, 0x2F, + 0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A, + 0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2, + 0x26, 0x3B, 0x1C, 0x01, 0x52, 0x4F, 0x68, 0x75, + 0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D, + 0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8, + 0x03, 0x1E, 0x39, 0x24, 0x77, 0x6A, 0x4D, 0x50, + 0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2, + 0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A, + 0x6C, 0x71, 0x56, 0x4B, 0x18, 0x05, 0x22, 0x3F, + 0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7, + 0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66, + 0xDD, 0xC0, 0xE7, 0xFA, 0xA9, 0xB4, 0x93, 0x8E, + 0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB, + 0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43, + 0xB2, 0xAF, 0x88, 0x95, 0xC6, 0xDB, 0xFC, 0xE1, + 0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09, + 0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C, + 0x97, 0x8A, 0xAD, 0xB0, 0xE3, 0xFE, 0xD9, 0xC4 +}; + +static uint8_t sbc_crc8(const uint8_t *data, size_t len) +{ + uint8_t crc = 0x0f; + size_t i; + uint8_t octet; + + for (i = 0; i < len / 8; i++) + crc = crc_table[crc ^ data[i]]; + + octet = data[i]; + for (i = 0; i < len % 8; i++) { + char bit = ((octet ^ crc) & 0x80) >> 7; + + crc = ((crc & 0x7f) << 1) ^ (bit ? 0x1d : 0); + + octet = octet << 1; + } + + return crc; +} + +/* + * Code straight from the spec to calculate the bits array + * Takes a pointer to the frame in question, a pointer to the bits array and + * the sampling frequency (as 2 bit integer) + */ +static SBC_ALWAYS_INLINE void sbc_calculate_bits_internal( + const struct sbc_frame *frame, int (*bits)[8], int subbands) +{ + uint8_t sf = frame->frequency; + + if (frame->mode == MONO || frame->mode == DUAL_CHANNEL) { + int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice; + int ch, sb; + + for (ch = 0; ch < frame->channels; ch++) { + max_bitneed = 0; + if (frame->allocation == SNR) { + for (sb = 0; sb < subbands; sb++) { + bitneed[ch][sb] = frame->scale_factor[ch][sb]; + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } else { + for (sb = 0; sb < subbands; sb++) { + if (frame->scale_factor[ch][sb] == 0) + bitneed[ch][sb] = -5; + else { + if (subbands == 4) + loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; + else + loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; + if (loudness > 0) + bitneed[ch][sb] = loudness / 2; + else + bitneed[ch][sb] = loudness; + } + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } + + bitcount = 0; + slicecount = 0; + bitslice = max_bitneed + 1; + do { + bitslice--; + bitcount += slicecount; + slicecount = 0; + for (sb = 0; sb < subbands; sb++) { + if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) + slicecount++; + else if (bitneed[ch][sb] == bitslice + 1) + slicecount += 2; + } + } while (bitcount + slicecount < frame->bitpool); + + if (bitcount + slicecount == frame->bitpool) { + bitcount += slicecount; + bitslice--; + } + + for (sb = 0; sb < subbands; sb++) { + if (bitneed[ch][sb] < bitslice + 2) + bits[ch][sb] = 0; + else { + bits[ch][sb] = bitneed[ch][sb] - bitslice; + if (bits[ch][sb] > 16) + bits[ch][sb] = 16; + } + } + + for (sb = 0; bitcount < frame->bitpool && + sb < subbands; sb++) { + if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { + bits[ch][sb]++; + bitcount++; + } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) { + bits[ch][sb] = 2; + bitcount += 2; + } + } + + for (sb = 0; bitcount < frame->bitpool && + sb < subbands; sb++) { + if (bits[ch][sb] < 16) { + bits[ch][sb]++; + bitcount++; + } + } + + } + + } else if (frame->mode == STEREO || frame->mode == JOINT_STEREO) { + int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice; + int ch, sb; + + max_bitneed = 0; + if (frame->allocation == SNR) { + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < subbands; sb++) { + bitneed[ch][sb] = frame->scale_factor[ch][sb]; + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } + } else { + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < subbands; sb++) { + if (frame->scale_factor[ch][sb] == 0) + bitneed[ch][sb] = -5; + else { + if (subbands == 4) + loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; + else + loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; + if (loudness > 0) + bitneed[ch][sb] = loudness / 2; + else + bitneed[ch][sb] = loudness; + } + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } + } + + bitcount = 0; + slicecount = 0; + bitslice = max_bitneed + 1; + do { + bitslice--; + bitcount += slicecount; + slicecount = 0; + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < subbands; sb++) { + if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) + slicecount++; + else if (bitneed[ch][sb] == bitslice + 1) + slicecount += 2; + } + } + } while (bitcount + slicecount < frame->bitpool); + + if (bitcount + slicecount == frame->bitpool) { + bitcount += slicecount; + bitslice--; + } + + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < subbands; sb++) { + if (bitneed[ch][sb] < bitslice + 2) { + bits[ch][sb] = 0; + } else { + bits[ch][sb] = bitneed[ch][sb] - bitslice; + if (bits[ch][sb] > 16) + bits[ch][sb] = 16; + } + } + } + + ch = 0; + sb = 0; + while (bitcount < frame->bitpool) { + if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { + bits[ch][sb]++; + bitcount++; + } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) { + bits[ch][sb] = 2; + bitcount += 2; + } + if (ch == 1) { + ch = 0; + sb++; + if (sb >= subbands) + break; + } else + ch = 1; + } + + ch = 0; + sb = 0; + while (bitcount < frame->bitpool) { + if (bits[ch][sb] < 16) { + bits[ch][sb]++; + bitcount++; + } + if (ch == 1) { + ch = 0; + sb++; + if (sb >= subbands) + break; + } else + ch = 1; + } + + } + +} + +static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) +{ + if (frame->subbands == 4) + sbc_calculate_bits_internal(frame, bits, 4); + else + sbc_calculate_bits_internal(frame, bits, 8); +} + +/* + * Unpacks a SBC frame at the beginning of the stream in data, + * which has at most len bytes into frame. + * Returns the length in bytes of the packed frame, or a negative + * value on error. The error codes are: + * + * -1 Data stream too short + * -2 Sync byte incorrect + * -3 CRC8 incorrect + * -4 Bitpool value out of bounds + */ +static int sbc_unpack_frame(const uint8_t *data, struct sbc_frame *frame, + size_t len) +{ + unsigned int consumed; + /* Will copy the parts of the header that are relevant to crc + * calculation here */ + uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int crc_pos = 0; + int32_t temp; + + int audio_sample; + int ch, sb, blk, bit; /* channel, subband, block and bit standard + counters */ + int bits[2][8]; /* bits distribution */ + uint32_t levels[2][8]; /* levels derived from that */ + + if (len < 4) + return -1; + + if (data[0] != SBC_SYNCWORD) + return -2; + + frame->frequency = (data[1] >> 6) & 0x03; + + frame->block_mode = (data[1] >> 4) & 0x03; + switch (frame->block_mode) { + case SBC_BLK_4: + frame->blocks = 4; + break; + case SBC_BLK_8: + frame->blocks = 8; + break; + case SBC_BLK_12: + frame->blocks = 12; + break; + case SBC_BLK_16: + frame->blocks = 16; + break; + } + + frame->mode = (data[1] >> 2) & 0x03; + switch (frame->mode) { + case MONO: + frame->channels = 1; + break; + case DUAL_CHANNEL: /* fall-through */ + case STEREO: + case JOINT_STEREO: + frame->channels = 2; + break; + } + + frame->allocation = (data[1] >> 1) & 0x01; + + frame->subband_mode = (data[1] & 0x01); + frame->subbands = frame->subband_mode ? 8 : 4; + + frame->bitpool = data[2]; + + if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) && + frame->bitpool > 16 * frame->subbands) + return -4; + + if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) && + frame->bitpool > 32 * frame->subbands) + return -4; + + /* data[3] is crc, we're checking it later */ + + consumed = 32; + + crc_header[0] = data[1]; + crc_header[1] = data[2]; + crc_pos = 16; + + if (frame->mode == JOINT_STEREO) { + if (len * 8 < consumed + frame->subbands) + return -1; + + frame->joint = 0x00; + for (sb = 0; sb < frame->subbands - 1; sb++) + frame->joint |= ((data[4] >> (7 - sb)) & 0x01) << sb; + if (frame->subbands == 4) + crc_header[crc_pos / 8] = data[4] & 0xf0; + else + crc_header[crc_pos / 8] = data[4]; + + consumed += frame->subbands; + crc_pos += frame->subbands; + } + + if (len * 8 < consumed + (4 * frame->subbands * frame->channels)) + return -1; + + for (ch = 0; ch < frame->channels; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + /* FIXME assert(consumed % 4 == 0); */ + frame->scale_factor[ch][sb] = + (data[consumed >> 3] >> (4 - (consumed & 0x7))) & 0x0F; + crc_header[crc_pos >> 3] |= + frame->scale_factor[ch][sb] << (4 - (crc_pos & 0x7)); + + consumed += 4; + crc_pos += 4; + } + } + + if (data[3] != sbc_crc8(crc_header, crc_pos)) + return -3; + + sbc_calculate_bits(frame, bits); + + for (ch = 0; ch < frame->channels; ch++) { + for (sb = 0; sb < frame->subbands; sb++) + levels[ch][sb] = (1 << bits[ch][sb]) - 1; + } + + for (blk = 0; blk < frame->blocks; blk++) { + for (ch = 0; ch < frame->channels; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + if (levels[ch][sb] > 0) { + audio_sample = 0; + for (bit = 0; bit < bits[ch][sb]; bit++) { + if (consumed > len * 8) + return -1; + + if ((data[consumed >> 3] >> (7 - (consumed & 0x7))) & 0x01) + audio_sample |= 1 << (bits[ch][sb] - bit - 1); + + consumed++; + } + + frame->sb_sample[blk][ch][sb] = + (((audio_sample << 1) | 1) << frame->scale_factor[ch][sb]) / + levels[ch][sb] - (1 << frame->scale_factor[ch][sb]); + } else + frame->sb_sample[blk][ch][sb] = 0; + } + } + } + + if (frame->mode == JOINT_STEREO) { + for (blk = 0; blk < frame->blocks; blk++) { + for (sb = 0; sb < frame->subbands; sb++) { + if (frame->joint & (0x01 << sb)) { + temp = frame->sb_sample[blk][0][sb] + + frame->sb_sample[blk][1][sb]; + frame->sb_sample[blk][1][sb] = + frame->sb_sample[blk][0][sb] - + frame->sb_sample[blk][1][sb]; + frame->sb_sample[blk][0][sb] = temp; + } + } + } + } + + if ((consumed & 0x7) != 0) + consumed += 8 - (consumed & 0x7); + + return consumed >> 3; +} + +static void sbc_decoder_init(struct sbc_decoder_state *state, + const struct sbc_frame *frame) +{ + int i, ch; + + memset(state->V, 0, sizeof(state->V)); + state->subbands = frame->subbands; + + for (ch = 0; ch < 2; ch++) + for (i = 0; i < frame->subbands * 2; i++) + state->offset[ch][i] = (10 * i + 10); +} + +static SBC_ALWAYS_INLINE int16_t sbc_clip16(int32_t s) +{ + if (s > 0x7FFF) + return 0x7FFF; + else if (s < -0x8000) + return -0x8000; + else + return s; +} + +static inline void sbc_synthesize_four(struct sbc_decoder_state *state, + struct sbc_frame *frame, int ch, int blk) +{ + int i, k, idx; + int32_t *v = state->V[ch]; + int *offset = state->offset[ch]; + + for (i = 0; i < 8; i++) { + /* Shifting */ + offset[i]--; + if (offset[i] < 0) { + offset[i] = 79; + memcpy(v + 80, v, 9 * sizeof(*v)); + } + + /* Distribute the new matrix value to the shifted position */ + v[offset[i]] = SCALE4_STAGED1( + MULA(synmatrix4[i][0], frame->sb_sample[blk][ch][0], + MULA(synmatrix4[i][1], frame->sb_sample[blk][ch][1], + MULA(synmatrix4[i][2], frame->sb_sample[blk][ch][2], + MUL (synmatrix4[i][3], frame->sb_sample[blk][ch][3]))))); + } + + /* Compute the samples */ + for (idx = 0, i = 0; i < 4; i++, idx += 5) { + k = (i + 4) & 0xf; + + /* Store in output, Q0 */ + frame->pcm_sample[ch][blk * 4 + i] = sbc_clip16(SCALE4_STAGED1( + MULA(v[offset[i] + 0], sbc_proto_4_40m0[idx + 0], + MULA(v[offset[k] + 1], sbc_proto_4_40m1[idx + 0], + MULA(v[offset[i] + 2], sbc_proto_4_40m0[idx + 1], + MULA(v[offset[k] + 3], sbc_proto_4_40m1[idx + 1], + MULA(v[offset[i] + 4], sbc_proto_4_40m0[idx + 2], + MULA(v[offset[k] + 5], sbc_proto_4_40m1[idx + 2], + MULA(v[offset[i] + 6], sbc_proto_4_40m0[idx + 3], + MULA(v[offset[k] + 7], sbc_proto_4_40m1[idx + 3], + MULA(v[offset[i] + 8], sbc_proto_4_40m0[idx + 4], + MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4])))))))))))); + } +} + +static inline void sbc_synthesize_eight(struct sbc_decoder_state *state, + struct sbc_frame *frame, int ch, int blk) +{ + int i, j, k, idx; + int *offset = state->offset[ch]; + + for (i = 0; i < 16; i++) { + /* Shifting */ + offset[i]--; + if (offset[i] < 0) { + offset[i] = 159; + for (j = 0; j < 9; j++) + state->V[ch][j + 160] = state->V[ch][j]; + } + + /* Distribute the new matrix value to the shifted position */ + state->V[ch][offset[i]] = SCALE8_STAGED1( + MULA(synmatrix8[i][0], frame->sb_sample[blk][ch][0], + MULA(synmatrix8[i][1], frame->sb_sample[blk][ch][1], + MULA(synmatrix8[i][2], frame->sb_sample[blk][ch][2], + MULA(synmatrix8[i][3], frame->sb_sample[blk][ch][3], + MULA(synmatrix8[i][4], frame->sb_sample[blk][ch][4], + MULA(synmatrix8[i][5], frame->sb_sample[blk][ch][5], + MULA(synmatrix8[i][6], frame->sb_sample[blk][ch][6], + MUL( synmatrix8[i][7], frame->sb_sample[blk][ch][7]))))))))); + } + + /* Compute the samples */ + for (idx = 0, i = 0; i < 8; i++, idx += 5) { + k = (i + 8) & 0xf; + + /* Store in output, Q0 */ + frame->pcm_sample[ch][blk * 8 + i] = sbc_clip16(SCALE8_STAGED1( + MULA(state->V[ch][offset[i] + 0], sbc_proto_8_80m0[idx + 0], + MULA(state->V[ch][offset[k] + 1], sbc_proto_8_80m1[idx + 0], + MULA(state->V[ch][offset[i] + 2], sbc_proto_8_80m0[idx + 1], + MULA(state->V[ch][offset[k] + 3], sbc_proto_8_80m1[idx + 1], + MULA(state->V[ch][offset[i] + 4], sbc_proto_8_80m0[idx + 2], + MULA(state->V[ch][offset[k] + 5], sbc_proto_8_80m1[idx + 2], + MULA(state->V[ch][offset[i] + 6], sbc_proto_8_80m0[idx + 3], + MULA(state->V[ch][offset[k] + 7], sbc_proto_8_80m1[idx + 3], + MULA(state->V[ch][offset[i] + 8], sbc_proto_8_80m0[idx + 4], + MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4])))))))))))); + } +} + +static int sbc_synthesize_audio(struct sbc_decoder_state *state, + struct sbc_frame *frame) +{ + int ch, blk; + + switch (frame->subbands) { + case 4: + for (ch = 0; ch < frame->channels; ch++) { + for (blk = 0; blk < frame->blocks; blk++) + sbc_synthesize_four(state, frame, ch, blk); + } + return frame->blocks * 4; + + case 8: + for (ch = 0; ch < frame->channels; ch++) { + for (blk = 0; blk < frame->blocks; blk++) + sbc_synthesize_eight(state, frame, ch, blk); + } + return frame->blocks * 8; + + default: + return -EIO; + } +} + +static int sbc_analyze_audio(struct sbc_encoder_state *state, + struct sbc_frame *frame) +{ + int ch, blk; + int16_t *x; + + switch (frame->subbands) { + case 4: + for (ch = 0; ch < frame->channels; ch++) { + x = &state->X[ch][state->position - 16 + + frame->blocks * 4]; + for (blk = 0; blk < frame->blocks; blk += 4) { + state->sbc_analyze_4b_4s( + x, + frame->sb_sample_f[blk][ch], + frame->sb_sample_f[blk + 1][ch] - + frame->sb_sample_f[blk][ch]); + x -= 16; + } + } + return frame->blocks * 4; + + case 8: + for (ch = 0; ch < frame->channels; ch++) { + x = &state->X[ch][state->position - 32 + + frame->blocks * 8]; + for (blk = 0; blk < frame->blocks; blk += 4) { + state->sbc_analyze_4b_8s( + x, + frame->sb_sample_f[blk][ch], + frame->sb_sample_f[blk + 1][ch] - + frame->sb_sample_f[blk][ch]); + x -= 32; + } + } + return frame->blocks * 8; + + default: + return -EIO; + } +} + +/* Supplementary bitstream writing macros for 'sbc_pack_frame' */ + +#define PUT_BITS(data_ptr, bits_cache, bits_count, v, n) \ + do { \ + bits_cache = (v) | (bits_cache << (n)); \ + bits_count += (n); \ + if (bits_count >= 16) { \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + } \ + } while (0) + +#define FLUSH_BITS(data_ptr, bits_cache, bits_count) \ + do { \ + while (bits_count >= 8) { \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + } \ + if (bits_count > 0) \ + *data_ptr++ = (uint8_t) \ + (bits_cache << (8 - bits_count)); \ + } while (0) + +/* + * Packs the SBC frame from frame into the memory at data. At most len + * bytes will be used, should more memory be needed an appropriate + * error code will be returned. Returns the length of the packed frame + * on success or a negative value on error. + * + * The error codes are: + * -1 Not enough memory reserved + * -2 Unsupported sampling rate + * -3 Unsupported number of blocks + * -4 Unsupported number of subbands + * -5 Bitpool value out of bounds + * -99 not implemented + */ + +static SBC_ALWAYS_INLINE ssize_t sbc_pack_frame_internal(uint8_t *data, + struct sbc_frame *frame, size_t len, + int frame_subbands, int frame_channels, + int joint) +{ + /* Bitstream writer starts from the fourth byte */ + uint8_t *data_ptr = data + 4; + uint32_t bits_cache = 0; + uint32_t bits_count = 0; + + /* Will copy the header parts for CRC-8 calculation here */ + uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int crc_pos = 0; + + uint32_t audio_sample; + + int ch, sb, blk; /* channel, subband, block and bit counters */ + int bits[2][8]; /* bits distribution */ + uint32_t levels[2][8]; /* levels are derived from that */ + uint32_t sb_sample_delta[2][8]; + + data[0] = SBC_SYNCWORD; + + data[1] = (frame->frequency & 0x03) << 6; + + data[1] |= (frame->block_mode & 0x03) << 4; + + data[1] |= (frame->mode & 0x03) << 2; + + data[1] |= (frame->allocation & 0x01) << 1; + + switch (frame_subbands) { + case 4: + /* Nothing to do */ + break; + case 8: + data[1] |= 0x01; + break; + default: + return -4; + break; + } + + data[2] = frame->bitpool; + + if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) && + frame->bitpool > frame_subbands << 4) + return -5; + + if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) && + frame->bitpool > frame_subbands << 5) + return -5; + + /* Can't fill in crc yet */ + + crc_header[0] = data[1]; + crc_header[1] = data[2]; + crc_pos = 16; + + if (frame->mode == JOINT_STEREO) { + PUT_BITS(data_ptr, bits_cache, bits_count, + joint, frame_subbands); + crc_header[crc_pos >> 3] = joint; + crc_pos += frame_subbands; + } + + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { + PUT_BITS(data_ptr, bits_cache, bits_count, + frame->scale_factor[ch][sb] & 0x0F, 4); + crc_header[crc_pos >> 3] <<= 4; + crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F; + crc_pos += 4; + } + } + + /* align the last crc byte */ + if (crc_pos % 8) + crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8); + + data[3] = sbc_crc8(crc_header, crc_pos); + + sbc_calculate_bits(frame, bits); + + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { + levels[ch][sb] = ((1 << bits[ch][sb]) - 1) << + (32 - (frame->scale_factor[ch][sb] + + SCALE_OUT_BITS + 2)); + sb_sample_delta[ch][sb] = (uint32_t) 1 << + (frame->scale_factor[ch][sb] + + SCALE_OUT_BITS + 1); + } + } + + for (blk = 0; blk < frame->blocks; blk++) { + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { + + if (bits[ch][sb] == 0) + continue; + + audio_sample = ((uint64_t) levels[ch][sb] * + (sb_sample_delta[ch][sb] + + frame->sb_sample_f[blk][ch][sb])) >> 32; + + PUT_BITS(data_ptr, bits_cache, bits_count, + audio_sample, bits[ch][sb]); + } + } + } + + FLUSH_BITS(data_ptr, bits_cache, bits_count); + + return data_ptr - data; +} + +static ssize_t sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len, + int joint) +{ + if (frame->subbands == 4) { + if (frame->channels == 1) + return sbc_pack_frame_internal( + data, frame, len, 4, 1, joint); + else + return sbc_pack_frame_internal( + data, frame, len, 4, 2, joint); + } else { + if (frame->channels == 1) + return sbc_pack_frame_internal( + data, frame, len, 8, 1, joint); + else + return sbc_pack_frame_internal( + data, frame, len, 8, 2, joint); + } +} + +static void sbc_encoder_init(struct sbc_encoder_state *state, + const struct sbc_frame *frame) +{ + memset(&state->X, 0, sizeof(state->X)); + state->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7; + + sbc_init_primitives(state); +} + +struct sbc_priv { + int init; + struct SBC_ALIGNED sbc_frame frame; + struct SBC_ALIGNED sbc_decoder_state dec_state; + struct SBC_ALIGNED sbc_encoder_state enc_state; +}; + +static void sbc_set_defaults(sbc_t *sbc, unsigned long flags) +{ + sbc->frequency = SBC_FREQ_44100; + sbc->mode = SBC_MODE_STEREO; + sbc->subbands = SBC_SB_8; + sbc->blocks = SBC_BLK_16; + sbc->bitpool = 32; +#if __BYTE_ORDER == __LITTLE_ENDIAN + sbc->endian = SBC_LE; +#elif __BYTE_ORDER == __BIG_ENDIAN + sbc->endian = SBC_BE; +#else +#error "Unknown byte order" +#endif +} + +int sbc_init(sbc_t *sbc, unsigned long flags) +{ + if (!sbc) + return -EIO; + + memset(sbc, 0, sizeof(sbc_t)); + + sbc->priv_alloc_base = malloc(sizeof(struct sbc_priv) + SBC_ALIGN_MASK); + if (!sbc->priv_alloc_base) + return -ENOMEM; + + sbc->priv = (void *) (((uintptr_t) sbc->priv_alloc_base + + SBC_ALIGN_MASK) & ~((uintptr_t) SBC_ALIGN_MASK)); + + memset(sbc->priv, 0, sizeof(struct sbc_priv)); + + sbc_set_defaults(sbc, flags); + + return 0; +} + +ssize_t sbc_parse(sbc_t *sbc, const void *input, size_t input_len) +{ + return sbc_decode(sbc, input, input_len, NULL, 0, NULL); +} + +ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, size_t *written) +{ + struct sbc_priv *priv; + char *ptr; + int i, ch, framelen, samples; + + if (!sbc || !input) + return -EIO; + + priv = sbc->priv; + + framelen = sbc_unpack_frame(input, &priv->frame, input_len); + + if (!priv->init) { + sbc_decoder_init(&priv->dec_state, &priv->frame); + priv->init = 1; + + sbc->frequency = priv->frame.frequency; + sbc->mode = priv->frame.mode; + sbc->subbands = priv->frame.subband_mode; + sbc->blocks = priv->frame.block_mode; + sbc->allocation = priv->frame.allocation; + sbc->bitpool = priv->frame.bitpool; + + priv->frame.codesize = sbc_get_codesize(sbc); + priv->frame.length = framelen; + } else if (priv->frame.bitpool != sbc->bitpool) { + priv->frame.length = framelen; + sbc->bitpool = priv->frame.bitpool; + } + + if (!output) + return framelen; + + if (written) + *written = 0; + + if (framelen <= 0) + return framelen; + + samples = sbc_synthesize_audio(&priv->dec_state, &priv->frame); + + ptr = output; + + if (output_len < (size_t) (samples * priv->frame.channels * 2)) + samples = output_len / (priv->frame.channels * 2); + + for (i = 0; i < samples; i++) { + for (ch = 0; ch < priv->frame.channels; ch++) { + int16_t s; + s = priv->frame.pcm_sample[ch][i]; + + if (sbc->endian == SBC_BE) { + *ptr++ = (s & 0xff00) >> 8; + *ptr++ = (s & 0x00ff); + } else { + *ptr++ = (s & 0x00ff); + *ptr++ = (s & 0xff00) >> 8; + } + } + } + + if (written) + *written = samples * priv->frame.channels * 2; + + return framelen; +} + +ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, ssize_t *written) +{ + struct sbc_priv *priv; + int samples; + ssize_t framelen; + int (*sbc_enc_process_input)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + + if (!sbc || !input) + return -EIO; + + priv = sbc->priv; + + if (written) + *written = 0; + + if (!priv->init) { + priv->frame.frequency = sbc->frequency; + priv->frame.mode = sbc->mode; + priv->frame.channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; + priv->frame.allocation = sbc->allocation; + priv->frame.subband_mode = sbc->subbands; + priv->frame.subbands = sbc->subbands ? 8 : 4; + priv->frame.block_mode = sbc->blocks; + priv->frame.blocks = 4 + (sbc->blocks * 4); + priv->frame.bitpool = sbc->bitpool; + priv->frame.codesize = sbc_get_codesize(sbc); + priv->frame.length = sbc_get_frame_length(sbc); + + sbc_encoder_init(&priv->enc_state, &priv->frame); + priv->init = 1; + } else if (priv->frame.bitpool != sbc->bitpool) { + priv->frame.length = sbc_get_frame_length(sbc); + priv->frame.bitpool = sbc->bitpool; + } + + /* input must be large enough to encode a complete frame */ + if (input_len < priv->frame.codesize) + return 0; + + /* output must be large enough to receive the encoded frame */ + if (!output || output_len < priv->frame.length) + return -ENOSPC; + + /* Select the needed input data processing function and call it */ + if (priv->frame.subbands == 8) { + if (sbc->endian == SBC_BE) + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_8s_be; + else + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_8s_le; + } else { + if (sbc->endian == SBC_BE) + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_4s_be; + else + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_4s_le; + } + + priv->enc_state.position = sbc_enc_process_input( + priv->enc_state.position, (const uint8_t *) input, + priv->enc_state.X, priv->frame.subbands * priv->frame.blocks, + priv->frame.channels); + + samples = sbc_analyze_audio(&priv->enc_state, &priv->frame); + + if (priv->frame.mode == JOINT_STEREO) { + int j = priv->enc_state.sbc_calc_scalefactors_j( + priv->frame.sb_sample_f, priv->frame.scale_factor, + priv->frame.blocks, priv->frame.subbands); + framelen = sbc_pack_frame(output, &priv->frame, output_len, j); + } else { + priv->enc_state.sbc_calc_scalefactors( + priv->frame.sb_sample_f, priv->frame.scale_factor, + priv->frame.blocks, priv->frame.channels, + priv->frame.subbands); + framelen = sbc_pack_frame(output, &priv->frame, output_len, 0); + } + + if (written) + *written = framelen; + + return samples * priv->frame.channels * 2; +} + +void sbc_finish(sbc_t *sbc) +{ + if (!sbc) + return; + + free(sbc->priv_alloc_base); + + memset(sbc, 0, sizeof(sbc_t)); +} + +size_t sbc_get_frame_length(sbc_t *sbc) +{ + int ret; + uint8_t subbands, channels, blocks, joint, bitpool; + struct sbc_priv *priv; + + priv = sbc->priv; + if (priv->init && priv->frame.bitpool == sbc->bitpool) + return priv->frame.length; + + subbands = sbc->subbands ? 8 : 4; + blocks = 4 + (sbc->blocks * 4); + channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; + joint = sbc->mode == SBC_MODE_JOINT_STEREO ? 1 : 0; + bitpool = sbc->bitpool; + + ret = 4 + (4 * subbands * channels) / 8; + /* This term is not always evenly divide so we round it up */ + if (channels == 1) + ret += ((blocks * channels * bitpool) + 7) / 8; + else + ret += (((joint ? subbands : 0) + blocks * bitpool) + 7) / 8; + + return ret; +} + +unsigned sbc_get_frame_duration(sbc_t *sbc) +{ + uint8_t subbands, blocks; + uint16_t frequency; + struct sbc_priv *priv; + + priv = sbc->priv; + if (!priv->init) { + subbands = sbc->subbands ? 8 : 4; + blocks = 4 + (sbc->blocks * 4); + } else { + subbands = priv->frame.subbands; + blocks = priv->frame.blocks; + } + + switch (sbc->frequency) { + case SBC_FREQ_16000: + frequency = 16000; + break; + + case SBC_FREQ_32000: + frequency = 32000; + break; + + case SBC_FREQ_44100: + frequency = 44100; + break; + + case SBC_FREQ_48000: + frequency = 48000; + break; + default: + return 0; + } + + return (1000000 * blocks * subbands) / frequency; +} + +size_t sbc_get_codesize(sbc_t *sbc) +{ + uint16_t subbands, channels, blocks; + struct sbc_priv *priv; + + priv = sbc->priv; + if (!priv->init) { + subbands = sbc->subbands ? 8 : 4; + blocks = 4 + (sbc->blocks * 4); + channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; + } else { + subbands = priv->frame.subbands; + blocks = priv->frame.blocks; + channels = priv->frame.channels; + } + + return subbands * blocks * channels * 2; +} + +const char *sbc_get_implementation_info(sbc_t *sbc) +{ + struct sbc_priv *priv; + + if (!sbc) + return NULL; + + priv = sbc->priv; + if (!priv) + return NULL; + + return priv->enc_state.implementation_info; +} + +int sbc_reinit(sbc_t *sbc, unsigned long flags) +{ + struct sbc_priv *priv; + + if (!sbc || !sbc->priv) + return -EIO; + + priv = sbc->priv; + + if (priv->init == 1) + memset(sbc->priv, 0, sizeof(struct sbc_priv)); + + sbc_set_defaults(sbc, flags); + + return 0; +} diff --git a/sbc/sbc.h b/sbc/sbc.h new file mode 100644 index 0000000..2f830ad --- /dev/null +++ b/sbc/sbc.h @@ -0,0 +1,113 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_H +#define __SBC_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/* sampling frequency */ +#define SBC_FREQ_16000 0x00 +#define SBC_FREQ_32000 0x01 +#define SBC_FREQ_44100 0x02 +#define SBC_FREQ_48000 0x03 + +/* blocks */ +#define SBC_BLK_4 0x00 +#define SBC_BLK_8 0x01 +#define SBC_BLK_12 0x02 +#define SBC_BLK_16 0x03 + +/* channel mode */ +#define SBC_MODE_MONO 0x00 +#define SBC_MODE_DUAL_CHANNEL 0x01 +#define SBC_MODE_STEREO 0x02 +#define SBC_MODE_JOINT_STEREO 0x03 + +/* allocation method */ +#define SBC_AM_LOUDNESS 0x00 +#define SBC_AM_SNR 0x01 + +/* subbands */ +#define SBC_SB_4 0x00 +#define SBC_SB_8 0x01 + +/* Data endianess */ +#define SBC_LE 0x00 +#define SBC_BE 0x01 + +struct sbc_struct { + unsigned long flags; + + uint8_t frequency; + uint8_t blocks; + uint8_t subbands; + uint8_t mode; + uint8_t allocation; + uint8_t bitpool; + uint8_t endian; + + void *priv; + void *priv_alloc_base; +}; + +typedef struct sbc_struct sbc_t; + +int sbc_init(sbc_t *sbc, unsigned long flags); +int sbc_reinit(sbc_t *sbc, unsigned long flags); + +ssize_t sbc_parse(sbc_t *sbc, const void *input, size_t input_len); + +/* Decodes ONE input block into ONE output block */ +ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, size_t *written); + +/* Encodes ONE input block into ONE output block */ +ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, ssize_t *written); + +/* Returns the output block size in bytes */ +size_t sbc_get_frame_length(sbc_t *sbc); + +/* Returns the time one input/output block takes to play in msec*/ +unsigned sbc_get_frame_duration(sbc_t *sbc); + +/* Returns the input block size in bytes */ +size_t sbc_get_codesize(sbc_t *sbc); + +const char *sbc_get_implementation_info(sbc_t *sbc); +void sbc_finish(sbc_t *sbc); + +#ifdef __cplusplus +} +#endif + +#endif /* __SBC_H */ diff --git a/sbc/sbc_math.h b/sbc/sbc_math.h new file mode 100644 index 0000000..5476860 --- /dev/null +++ b/sbc/sbc_math.h @@ -0,0 +1,61 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2008 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#define fabs(x) ((x) < 0 ? -(x) : (x)) +/* C does not provide an explicit arithmetic shift right but this will + always be correct and every compiler *should* generate optimal code */ +#define ASR(val, bits) ((-2 >> 1 == -1) ? \ + ((int32_t)(val)) >> (bits) : ((int32_t) (val)) / (1 << (bits))) + +#define SCALE_SPROTO4_TBL 12 +#define SCALE_SPROTO8_TBL 14 +#define SCALE_NPROTO4_TBL 11 +#define SCALE_NPROTO8_TBL 11 +#define SCALE4_STAGED1_BITS 15 +#define SCALE4_STAGED2_BITS 16 +#define SCALE8_STAGED1_BITS 15 +#define SCALE8_STAGED2_BITS 16 + +typedef int32_t sbc_fixed_t; + +#define SCALE4_STAGED1(src) ASR(src, SCALE4_STAGED1_BITS) +#define SCALE4_STAGED2(src) ASR(src, SCALE4_STAGED2_BITS) +#define SCALE8_STAGED1(src) ASR(src, SCALE8_STAGED1_BITS) +#define SCALE8_STAGED2(src) ASR(src, SCALE8_STAGED2_BITS) + +#define SBC_FIXED_0(val) { val = 0; } +#define MUL(a, b) ((a) * (b)) +#if defined(__arm__) && (!defined(__thumb__) || defined(__thumb2__)) +#define MULA(a, b, res) ({ \ + int tmp = res; \ + __asm__( \ + "mla %0, %2, %3, %0" \ + : "=&r" (tmp) \ + : "0" (tmp), "r" (a), "r" (b)); \ + tmp; }) +#else +#define MULA(a, b, res) ((a) * (b) + (res)) +#endif diff --git a/sbc/sbc_primitives.c b/sbc/sbc_primitives.c new file mode 100644 index 0000000..ad780d0 --- /dev/null +++ b/sbc/sbc_primitives.c @@ -0,0 +1,554 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives.h" +#include "sbc_primitives_mmx.h" +#include "sbc_primitives_iwmmxt.h" +#include "sbc_primitives_neon.h" +#include "sbc_primitives_armv6.h" + +/* + * A reference C code of analysis filter with SIMD-friendly tables + * reordering and code layout. This code can be used to develop platform + * specific SIMD optimizations. Also it may be used as some kind of test + * for compiler autovectorization capabilities (who knows, if the compiler + * is very good at this stuff, hand optimized assembly may be not strictly + * needed for some platform). + * + * Note: It is also possible to make a simple variant of analysis filter, + * which needs only a single constants table without taking care about + * even/odd cases. This simple variant of filter can be implemented without + * input data permutation. The only thing that would be lost is the + * possibility to use pairwise SIMD multiplications. But for some simple + * CPU cores without SIMD extensions it can be useful. If anybody is + * interested in implementing such variant of a filter, sourcecode from + * bluez versions 4.26/4.27 can be used as a reference and the history of + * the changes in git repository done around that time may be worth checking. + */ + +static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + FIXED_A t1[4]; + FIXED_T t2[4]; + int hop = 0; + + /* rounding coefficient */ + t1[0] = t1[1] = t1[2] = t1[3] = + (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); + + /* low pass polyphase filter */ + for (hop = 0; hop < 40; hop += 8) { + t1[0] += (FIXED_A) in[hop] * consts[hop]; + t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; + t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; + t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; + t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; + t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; + t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; + t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; + } + + /* scaling */ + t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; + t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; + t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; + t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; + + /* do the cos transform */ + t1[0] = (FIXED_A) t2[0] * consts[40 + 0]; + t1[0] += (FIXED_A) t2[1] * consts[40 + 1]; + t1[1] = (FIXED_A) t2[0] * consts[40 + 2]; + t1[1] += (FIXED_A) t2[1] * consts[40 + 3]; + t1[2] = (FIXED_A) t2[0] * consts[40 + 4]; + t1[2] += (FIXED_A) t2[1] * consts[40 + 5]; + t1[3] = (FIXED_A) t2[0] * consts[40 + 6]; + t1[3] += (FIXED_A) t2[1] * consts[40 + 7]; + + t1[0] += (FIXED_A) t2[2] * consts[40 + 8]; + t1[0] += (FIXED_A) t2[3] * consts[40 + 9]; + t1[1] += (FIXED_A) t2[2] * consts[40 + 10]; + t1[1] += (FIXED_A) t2[3] * consts[40 + 11]; + t1[2] += (FIXED_A) t2[2] * consts[40 + 12]; + t1[2] += (FIXED_A) t2[3] * consts[40 + 13]; + t1[3] += (FIXED_A) t2[2] * consts[40 + 14]; + t1[3] += (FIXED_A) t2[3] * consts[40 + 15]; + + out[0] = t1[0] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[1] = t1[1] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[2] = t1[2] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[3] = t1[3] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); +} + +static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + FIXED_A t1[8]; + FIXED_T t2[8]; + int i, hop; + + /* rounding coefficient */ + t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = + (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); + + /* low pass polyphase filter */ + for (hop = 0; hop < 80; hop += 16) { + t1[0] += (FIXED_A) in[hop] * consts[hop]; + t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; + t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; + t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; + t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; + t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; + t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; + t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; + t1[4] += (FIXED_A) in[hop + 8] * consts[hop + 8]; + t1[4] += (FIXED_A) in[hop + 9] * consts[hop + 9]; + t1[5] += (FIXED_A) in[hop + 10] * consts[hop + 10]; + t1[5] += (FIXED_A) in[hop + 11] * consts[hop + 11]; + t1[6] += (FIXED_A) in[hop + 12] * consts[hop + 12]; + t1[6] += (FIXED_A) in[hop + 13] * consts[hop + 13]; + t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14]; + t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15]; + } + + /* scaling */ + t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; + t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; + t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; + t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; + t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; + t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; + t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; + t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; + + + /* do the cos transform */ + t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0; + + for (i = 0; i < 4; i++) { + t1[0] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 0]; + t1[0] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 1]; + t1[1] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 2]; + t1[1] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 3]; + t1[2] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 4]; + t1[2] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 5]; + t1[3] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 6]; + t1[3] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 7]; + t1[4] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 8]; + t1[4] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 9]; + t1[5] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 10]; + t1[5] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 11]; + t1[6] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 12]; + t1[6] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 13]; + t1[7] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 14]; + t1[7] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 15]; + } + + for (i = 0; i < 8; i++) + out[i] = t1[i] >> + (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); +} + +static inline void sbc_analyze_4b_4s_simd(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four_simd(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_simd(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four_simd(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_simd(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_simd(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight_simd(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_simd(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight_simd(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_simd(x + 0, out, analysis_consts_fixed8_simd_even); +} + +static inline int16_t unaligned16_be(const uint8_t *ptr) +{ + return (int16_t) ((ptr[0] << 8) | ptr[1]); +} + +static inline int16_t unaligned16_le(const uint8_t *ptr) +{ + return (int16_t) (ptr[0] | (ptr[1] << 8)); +} + +/* + * Internal helper functions for input data processing. In order to get + * optimal performance, it is important to have "nsamples", "nchannels" + * and "big_endian" arguments used with this inline function as compile + * time constants. + */ + +static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + /* handle X buffer wraparound */ + if (position < nsamples) { + if (nchannels > 0) + memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position], + 36 * sizeof(int16_t)); + if (nchannels > 1) + memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position], + 36 * sizeof(int16_t)); + position = SBC_X_BUFFER_SIZE - 40; + } + + #define PCM(i) (big_endian ? \ + unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) + + /* copy/permutate audio samples */ + while ((nsamples -= 8) >= 0) { + position -= 8; + if (nchannels > 0) { + int16_t *x = &X[0][position]; + x[0] = PCM(0 + 7 * nchannels); + x[1] = PCM(0 + 3 * nchannels); + x[2] = PCM(0 + 6 * nchannels); + x[3] = PCM(0 + 4 * nchannels); + x[4] = PCM(0 + 0 * nchannels); + x[5] = PCM(0 + 2 * nchannels); + x[6] = PCM(0 + 1 * nchannels); + x[7] = PCM(0 + 5 * nchannels); + } + if (nchannels > 1) { + int16_t *x = &X[1][position]; + x[0] = PCM(1 + 7 * nchannels); + x[1] = PCM(1 + 3 * nchannels); + x[2] = PCM(1 + 6 * nchannels); + x[3] = PCM(1 + 4 * nchannels); + x[4] = PCM(1 + 0 * nchannels); + x[5] = PCM(1 + 2 * nchannels); + x[6] = PCM(1 + 1 * nchannels); + x[7] = PCM(1 + 5 * nchannels); + } + pcm += 16 * nchannels; + } + #undef PCM + + return position; +} + +static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s8_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + /* handle X buffer wraparound */ + if (position < nsamples) { + if (nchannels > 0) + memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position], + 72 * sizeof(int16_t)); + if (nchannels > 1) + memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position], + 72 * sizeof(int16_t)); + position = SBC_X_BUFFER_SIZE - 72; + } + + #define PCM(i) (big_endian ? \ + unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) + + /* copy/permutate audio samples */ + while ((nsamples -= 16) >= 0) { + position -= 16; + if (nchannels > 0) { + int16_t *x = &X[0][position]; + x[0] = PCM(0 + 15 * nchannels); + x[1] = PCM(0 + 7 * nchannels); + x[2] = PCM(0 + 14 * nchannels); + x[3] = PCM(0 + 8 * nchannels); + x[4] = PCM(0 + 13 * nchannels); + x[5] = PCM(0 + 9 * nchannels); + x[6] = PCM(0 + 12 * nchannels); + x[7] = PCM(0 + 10 * nchannels); + x[8] = PCM(0 + 11 * nchannels); + x[9] = PCM(0 + 3 * nchannels); + x[10] = PCM(0 + 6 * nchannels); + x[11] = PCM(0 + 0 * nchannels); + x[12] = PCM(0 + 5 * nchannels); + x[13] = PCM(0 + 1 * nchannels); + x[14] = PCM(0 + 4 * nchannels); + x[15] = PCM(0 + 2 * nchannels); + } + if (nchannels > 1) { + int16_t *x = &X[1][position]; + x[0] = PCM(1 + 15 * nchannels); + x[1] = PCM(1 + 7 * nchannels); + x[2] = PCM(1 + 14 * nchannels); + x[3] = PCM(1 + 8 * nchannels); + x[4] = PCM(1 + 13 * nchannels); + x[5] = PCM(1 + 9 * nchannels); + x[6] = PCM(1 + 12 * nchannels); + x[7] = PCM(1 + 10 * nchannels); + x[8] = PCM(1 + 11 * nchannels); + x[9] = PCM(1 + 3 * nchannels); + x[10] = PCM(1 + 6 * nchannels); + x[11] = PCM(1 + 0 * nchannels); + x[12] = PCM(1 + 5 * nchannels); + x[13] = PCM(1 + 1 * nchannels); + x[14] = PCM(1 + 4 * nchannels); + x[15] = PCM(1 + 2 * nchannels); + } + pcm += 32 * nchannels; + } + #undef PCM + + return position; +} + +/* + * Input data processing functions. The data is endian converted if needed, + * channels are deintrleaved and audio samples are reordered for use in + * SIMD-friendly analysis filter function. The results are put into "X" + * array, getting appended to the previous data (or it is better to say + * prepended, as the buffer is filled from top to bottom). Old data is + * discarded when neededed, but availability of (10 * nrof_subbands) + * contiguous samples is always guaranteed for the input to the analysis + * filter. This is achieved by copying a sufficient part of old data + * to the top of the buffer on buffer wraparound. + */ + +static int sbc_enc_process_input_4s_le(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 2, 0); + else + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 1, 0); +} + +static int sbc_enc_process_input_4s_be(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 2, 1); + else + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 1, 1); +} + +static int sbc_enc_process_input_8s_le(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 2, 0); + else + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 1, 0); +} + +static int sbc_enc_process_input_8s_be(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 2, 1); + else + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 1, 1); +} + +/* Supplementary function to count the number of leading zeros */ + +static inline int sbc_clz(uint32_t x) +{ +#ifdef __GNUC__ + return __builtin_clz(x); +#else + /* TODO: this should be replaced with something better if good + * performance is wanted when using compilers other than gcc */ + int cnt = 0; + while (x) { + cnt++; + x >>= 1; + } + return 32 - cnt; +#endif +} + +static void sbc_calc_scalefactors( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands) +{ + int ch, sb, blk; + for (ch = 0; ch < channels; ch++) { + for (sb = 0; sb < subbands; sb++) { + uint32_t x = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + int32_t tmp = fabs(sb_sample_f[blk][ch][sb]); + if (tmp != 0) + x |= tmp - 1; + } + scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - + sbc_clz(x); + } + } +} + +static int sbc_calc_scalefactors_j( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int subbands) +{ + int blk, joint = 0; + int32_t tmp0, tmp1; + uint32_t x, y; + + /* last subband does not use joint stereo */ + int sb = subbands - 1; + x = 1 << SCALE_OUT_BITS; + y = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + tmp0 = fabs(sb_sample_f[blk][0][sb]); + tmp1 = fabs(sb_sample_f[blk][1][sb]); + if (tmp0 != 0) + x |= tmp0 - 1; + if (tmp1 != 0) + y |= tmp1 - 1; + } + scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(x); + scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(y); + + /* the rest of subbands can use joint stereo */ + while (--sb >= 0) { + int32_t sb_sample_j[16][2]; + x = 1 << SCALE_OUT_BITS; + y = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + tmp0 = sb_sample_f[blk][0][sb]; + tmp1 = sb_sample_f[blk][1][sb]; + sb_sample_j[blk][0] = ASR(tmp0, 1) + ASR(tmp1, 1); + sb_sample_j[blk][1] = ASR(tmp0, 1) - ASR(tmp1, 1); + tmp0 = fabs(tmp0); + tmp1 = fabs(tmp1); + if (tmp0 != 0) + x |= tmp0 - 1; + if (tmp1 != 0) + y |= tmp1 - 1; + } + scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - + sbc_clz(x); + scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - + sbc_clz(y); + x = 1 << SCALE_OUT_BITS; + y = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + tmp0 = fabs(sb_sample_j[blk][0]); + tmp1 = fabs(sb_sample_j[blk][1]); + if (tmp0 != 0) + x |= tmp0 - 1; + if (tmp1 != 0) + y |= tmp1 - 1; + } + x = (31 - SCALE_OUT_BITS) - sbc_clz(x); + y = (31 - SCALE_OUT_BITS) - sbc_clz(y); + + /* decide whether to use joint stereo for this subband */ + if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) { + joint |= 1 << (subbands - 1 - sb); + scale_factor[0][sb] = x; + scale_factor[1][sb] = y; + for (blk = 0; blk < blocks; blk++) { + sb_sample_f[blk][0][sb] = sb_sample_j[blk][0]; + sb_sample_f[blk][1][sb] = sb_sample_j[blk][1]; + } + } + } + + /* bitmask with the information about subbands using joint stereo */ + return joint; +} + +/* + * Detect CPU features and setup function pointers + */ +void sbc_init_primitives(struct sbc_encoder_state *state) +{ + /* Default implementation for analyze functions */ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_simd; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_simd; + + /* Default implementation for input reordering / deinterleaving */ + state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le; + state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be; + state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le; + state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be; + + /* Default implementation for scale factors calculation */ + state->sbc_calc_scalefactors = sbc_calc_scalefactors; + state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j; + state->implementation_info = "Generic C"; + + /* X86/AMD64 optimizations */ +#ifdef SBC_BUILD_WITH_MMX_SUPPORT + sbc_init_primitives_mmx(state); +#endif + + /* ARM optimizations */ +#ifdef SBC_BUILD_WITH_ARMV6_SUPPORT + sbc_init_primitives_armv6(state); +#endif +#ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT + sbc_init_primitives_iwmmxt(state); +#endif +#ifdef SBC_BUILD_WITH_NEON_SUPPORT + sbc_init_primitives_neon(state); +#endif +} diff --git a/sbc/sbc_primitives.h b/sbc/sbc_primitives.h new file mode 100644 index 0000000..3fec8d5 --- /dev/null +++ b/sbc/sbc_primitives.h @@ -0,0 +1,80 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_H +#define __SBC_PRIMITIVES_H + +#define SCALE_OUT_BITS 15 +#define SBC_X_BUFFER_SIZE 328 + +#ifdef __GNUC__ +#define SBC_ALWAYS_INLINE __attribute__((always_inline)) +#else +#define SBC_ALWAYS_INLINE inline +#endif + +struct sbc_encoder_state { + int position; + int16_t SBC_ALIGNED X[2][SBC_X_BUFFER_SIZE]; + /* Polyphase analysis filter for 4 subbands configuration, + * it handles 4 blocks at once */ + void (*sbc_analyze_4b_4s)(int16_t *x, int32_t *out, int out_stride); + /* Polyphase analysis filter for 8 subbands configuration, + * it handles 4 blocks at once */ + void (*sbc_analyze_4b_8s)(int16_t *x, int32_t *out, int out_stride); + /* Process input data (deinterleave, endian conversion, reordering), + * depending on the number of subbands and input data byte order */ + int (*sbc_enc_process_input_4s_le)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_4s_be)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_8s_le)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_8s_be)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + /* Scale factors calculation */ + void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands); + /* Scale factors calculation with joint stereo support */ + int (*sbc_calc_scalefactors_j)(int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int subbands); + const char *implementation_info; +}; + +/* + * Initialize pointers to the functions which are the basic "building bricks" + * of SBC codec. Best implementation is selected based on target CPU + * capabilities. + */ +void sbc_init_primitives(struct sbc_encoder_state *encoder_state); + +#endif diff --git a/sbc/sbc_primitives_armv6.c b/sbc/sbc_primitives_armv6.c new file mode 100644 index 0000000..9586098 --- /dev/null +++ b/sbc/sbc_primitives_armv6.c @@ -0,0 +1,299 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_armv6.h" + +/* + * ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline. + */ + +#ifdef SBC_BUILD_WITH_ARMV6_SUPPORT + +static void __attribute__((naked)) sbc_analyze_four_armv6() +{ + /* r0 = in, r1 = out, r2 = consts */ + asm volatile ( + "push {r1, r4-r7, lr}\n" + "push {r8-r11}\n" + "ldrd r4, r5, [r0, #0]\n" + "ldrd r6, r7, [r2, #0]\n" + "ldrd r8, r9, [r0, #16]\n" + "ldrd r10, r11, [r2, #16]\n" + "mov r14, #0x8000\n" + "smlad r3, r4, r6, r14\n" + "smlad r12, r5, r7, r14\n" + "ldrd r4, r5, [r0, #32]\n" + "ldrd r6, r7, [r2, #32]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #48]\n" + "ldrd r10, r11, [r2, #48]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #64]\n" + "ldrd r6, r7, [r2, #64]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #8]\n" + "ldrd r10, r11, [r2, #8]\n" + "smlad r3, r4, r6, r3\n" /* t1[0] is done */ + "smlad r12, r5, r7, r12\n" /* t1[1] is done */ + "ldrd r4, r5, [r0, #24]\n" + "ldrd r6, r7, [r2, #24]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */ + "smlad r12, r8, r10, r14\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #40]\n" + "ldrd r10, r11, [r2, #40]\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r0, #56]\n" + "ldrd r6, r7, [r2, #56]\n" + "smlad r12, r8, r10, r12\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #72]\n" + "ldrd r10, r11, [r2, #72]\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r2, #80]\n" /* start loading cos table */ + "smlad r12, r8, r10, r12\n" /* t1[2] is done */ + "smlad r14, r9, r11, r14\n" /* t1[3] is done */ + "ldrd r6, r7, [r2, #88]\n" + "ldrd r8, r9, [r2, #96]\n" + "ldrd r10, r11, [r2, #104]\n" /* cos table fully loaded */ + "pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */ + "smuad r4, r3, r4\n" + "smuad r5, r3, r5\n" + "smlad r4, r12, r8, r4\n" + "smlad r5, r12, r9, r5\n" + "smuad r6, r3, r6\n" + "smuad r7, r3, r7\n" + "smlad r6, r12, r10, r6\n" + "smlad r7, r12, r11, r7\n" + "pop {r8-r11}\n" + "stmia r1, {r4, r5, r6, r7}\n" + "pop {r1, r4-r7, pc}\n" + ); +} + +#define sbc_analyze_four(in, out, consts) \ + ((void (*)(int16_t *, int32_t *, const FIXED_T*)) \ + sbc_analyze_four_armv6)((in), (out), (consts)) + +static void __attribute__((naked)) sbc_analyze_eight_armv6() +{ + /* r0 = in, r1 = out, r2 = consts */ + asm volatile ( + "push {r1, r4-r7, lr}\n" + "push {r8-r11}\n" + "ldrd r4, r5, [r0, #24]\n" + "ldrd r6, r7, [r2, #24]\n" + "ldrd r8, r9, [r0, #56]\n" + "ldrd r10, r11, [r2, #56]\n" + "mov r14, #0x8000\n" + "smlad r3, r4, r6, r14\n" + "smlad r12, r5, r7, r14\n" + "ldrd r4, r5, [r0, #88]\n" + "ldrd r6, r7, [r2, #88]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #120]\n" + "ldrd r10, r11, [r2, #120]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #152]\n" + "ldrd r6, r7, [r2, #152]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #16]\n" + "ldrd r10, r11, [r2, #16]\n" + "smlad r3, r4, r6, r3\n" /* t1[6] is done */ + "smlad r12, r5, r7, r12\n" /* t1[7] is done */ + "ldrd r4, r5, [r0, #48]\n" + "ldrd r6, r7, [r2, #48]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[6] and t1[7] */ + "str r3, [sp, #-4]!\n" /* save to stack */ + "smlad r3, r8, r10, r14\n" + "smlad r12, r9, r11, r14\n" + "ldrd r8, r9, [r0, #80]\n" + "ldrd r10, r11, [r2, #80]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #112]\n" + "ldrd r6, r7, [r2, #112]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #144]\n" + "ldrd r10, r11, [r2, #144]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #0]\n" + "ldrd r6, r7, [r2, #0]\n" + "smlad r3, r8, r10, r3\n" /* t1[4] is done */ + "smlad r12, r9, r11, r12\n" /* t1[5] is done */ + "ldrd r8, r9, [r0, #32]\n" + "ldrd r10, r11, [r2, #32]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[4] and t1[5] */ + "str r3, [sp, #-4]!\n" /* save to stack */ + "smlad r3, r4, r6, r14\n" + "smlad r12, r5, r7, r14\n" + "ldrd r4, r5, [r0, #64]\n" + "ldrd r6, r7, [r2, #64]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #96]\n" + "ldrd r10, r11, [r2, #96]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #128]\n" + "ldrd r6, r7, [r2, #128]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #8]\n" + "ldrd r10, r11, [r2, #8]\n" + "smlad r3, r4, r6, r3\n" /* t1[0] is done */ + "smlad r12, r5, r7, r12\n" /* t1[1] is done */ + "ldrd r4, r5, [r0, #40]\n" + "ldrd r6, r7, [r2, #40]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */ + "smlad r12, r8, r10, r14\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #72]\n" + "ldrd r10, r11, [r2, #72]\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r0, #104]\n" + "ldrd r6, r7, [r2, #104]\n" + "smlad r12, r8, r10, r12\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #136]\n" + "ldrd r10, r11, [r2, #136]!\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r2, #(160 - 136 + 0)]\n" + "smlad r12, r8, r10, r12\n" /* t1[2] is done */ + "smlad r14, r9, r11, r14\n" /* t1[3] is done */ + "ldrd r6, r7, [r2, #(160 - 136 + 8)]\n" + "smuad r4, r3, r4\n" + "smuad r5, r3, r5\n" + "pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */ + /* r3 = t2[0:1] */ + /* r12 = t2[2:3] */ + "pop {r0, r14}\n" /* t2[4:5], t2[6:7] */ + "ldrd r8, r9, [r2, #(160 - 136 + 32)]\n" + "smuad r6, r3, r6\n" + "smuad r7, r3, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 40)]\n" + "smlad r4, r12, r8, r4\n" + "smlad r5, r12, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 64)]\n" + "smlad r6, r12, r10, r6\n" + "smlad r7, r12, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 72)]\n" + "smlad r4, r0, r8, r4\n" + "smlad r5, r0, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 96)]\n" + "smlad r6, r0, r10, r6\n" + "smlad r7, r0, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 104)]\n" + "smlad r4, r14, r8, r4\n" + "smlad r5, r14, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 0)]\n" + "smlad r6, r14, r10, r6\n" + "smlad r7, r14, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 8)]\n" + "stmia r1!, {r4, r5}\n" + "smuad r4, r3, r8\n" + "smuad r5, r3, r9\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 32)]\n" + "stmia r1!, {r6, r7}\n" + "smuad r6, r3, r10\n" + "smuad r7, r3, r11\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 40)]\n" + "smlad r4, r12, r8, r4\n" + "smlad r5, r12, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 64)]\n" + "smlad r6, r12, r10, r6\n" + "smlad r7, r12, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 72)]\n" + "smlad r4, r0, r8, r4\n" + "smlad r5, r0, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 96)]\n" + "smlad r6, r0, r10, r6\n" + "smlad r7, r0, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 104)]\n" + "smlad r4, r14, r8, r4\n" + "smlad r5, r14, r9, r5\n" + "smlad r6, r14, r10, r6\n" + "smlad r7, r14, r11, r7\n" + "pop {r8-r11}\n" + "stmia r1!, {r4, r5, r6, r7}\n" + "pop {r1, r4-r7, pc}\n" + ); +} + +#define sbc_analyze_eight(in, out, consts) \ + ((void (*)(int16_t *, int32_t *, const FIXED_T*)) \ + sbc_analyze_eight_armv6)((in), (out), (consts)) + +static void sbc_analyze_4b_4s_armv6(int16_t *x, int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static void sbc_analyze_4b_8s_armv6(int16_t *x, int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight(x + 0, out, analysis_consts_fixed8_simd_even); +} + +void sbc_init_primitives_armv6(struct sbc_encoder_state *state) +{ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_armv6; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_armv6; + state->implementation_info = "ARMv6 SIMD"; +} + +#endif diff --git a/sbc/sbc_primitives_armv6.h b/sbc/sbc_primitives_armv6.h new file mode 100644 index 0000000..6a9efe5 --- /dev/null +++ b/sbc/sbc_primitives_armv6.h @@ -0,0 +1,52 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_ARMV6_H +#define __SBC_PRIMITIVES_ARMV6_H + +#include "sbc_primitives.h" + +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7M__) +#define SBC_HAVE_ARMV6 1 +#endif + +#if !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) && \ + defined(__GNUC__) && defined(SBC_HAVE_ARMV6) && \ + defined(__ARM_EABI__) && !defined(__ARM_NEON__) && \ + (!defined(__thumb__) || defined(__thumb2__)) + +#define SBC_BUILD_WITH_ARMV6_SUPPORT + +void sbc_init_primitives_armv6(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/sbc/sbc_primitives_iwmmxt.c b/sbc/sbc_primitives_iwmmxt.c new file mode 100644 index 0000000..213967e --- /dev/null +++ b/sbc/sbc_primitives_iwmmxt.c @@ -0,0 +1,304 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2010 Keith Mok + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_iwmmxt.h" + +/* + * IWMMXT optimizations + */ + +#ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT + +static inline void sbc_analyze_four_iwmmxt(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + asm volatile ( + "wldrd wr0, [%0]\n" + "tbcstw wr4, %2\n" + "wldrd wr2, [%1]\n" + "wldrd wr1, [%0, #8]\n" + "wldrd wr3, [%1, #8]\n" + "wmadds wr0, wr2, wr0\n" + " wldrd wr6, [%0, #16]\n" + "wmadds wr1, wr3, wr1\n" + " wldrd wr7, [%0, #24]\n" + "waddwss wr0, wr0, wr4\n" + " wldrd wr8, [%1, #16]\n" + "waddwss wr1, wr1, wr4\n" + " wldrd wr9, [%1, #24]\n" + " wmadds wr6, wr8, wr6\n" + " wldrd wr2, [%0, #32]\n" + " wmadds wr7, wr9, wr7\n" + " wldrd wr3, [%0, #40]\n" + " waddwss wr0, wr6, wr0\n" + " wldrd wr4, [%1, #32]\n" + " waddwss wr1, wr7, wr1\n" + " wldrd wr5, [%1, #40]\n" + " wmadds wr2, wr4, wr2\n" + "wldrd wr6, [%0, #48]\n" + " wmadds wr3, wr5, wr3\n" + "wldrd wr7, [%0, #56]\n" + " waddwss wr0, wr2, wr0\n" + "wldrd wr8, [%1, #48]\n" + " waddwss wr1, wr3, wr1\n" + "wldrd wr9, [%1, #56]\n" + "wmadds wr6, wr8, wr6\n" + " wldrd wr2, [%0, #64]\n" + "wmadds wr7, wr9, wr7\n" + " wldrd wr3, [%0, #72]\n" + "waddwss wr0, wr6, wr0\n" + " wldrd wr4, [%1, #64]\n" + "waddwss wr1, wr7, wr1\n" + " wldrd wr5, [%1, #72]\n" + " wmadds wr2, wr4, wr2\n" + "tmcr wcgr0, %4\n" + " wmadds wr3, wr5, wr3\n" + " waddwss wr0, wr2, wr0\n" + " waddwss wr1, wr3, wr1\n" + "\n" + "wsrawg wr0, wr0, wcgr0\n" + " wldrd wr4, [%1, #80]\n" + "wsrawg wr1, wr1, wcgr0\n" + " wldrd wr5, [%1, #88]\n" + "wpackwss wr0, wr0, wr0\n" + " wldrd wr6, [%1, #96]\n" + "wpackwss wr1, wr1, wr1\n" + "wmadds wr2, wr5, wr0\n" + " wldrd wr7, [%1, #104]\n" + "wmadds wr0, wr4, wr0\n" + "\n" + " wmadds wr3, wr7, wr1\n" + " wmadds wr1, wr6, wr1\n" + " waddwss wr2, wr3, wr2\n" + " waddwss wr0, wr1, wr0\n" + "\n" + "wstrd wr0, [%3]\n" + "wstrd wr2, [%3, #8]\n" + : + : "r" (in), "r" (consts), + "r" (1 << (SBC_PROTO_FIXED4_SCALE - 1)), "r" (out), + "r" (SBC_PROTO_FIXED4_SCALE) + : "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7", + "wr8", "wr9", "wcgr0", "memory"); +} + +static inline void sbc_analyze_eight_iwmmxt(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + asm volatile ( + "wldrd wr0, [%0]\n" + "tbcstw wr15, %2\n" + "wldrd wr1, [%0, #8]\n" + "wldrd wr2, [%0, #16]\n" + "wldrd wr3, [%0, #24]\n" + "wldrd wr4, [%1]\n" + "wldrd wr5, [%1, #8]\n" + "wldrd wr6, [%1, #16]\n" + "wldrd wr7, [%1, #24]\n" + "wmadds wr0, wr0, wr4\n" + " wldrd wr8, [%1, #32]\n" + "wmadds wr1, wr1, wr5\n" + " wldrd wr9, [%1, #40]\n" + "wmadds wr2, wr2, wr6\n" + " wldrd wr10, [%1, #48]\n" + "wmadds wr3, wr3, wr7\n" + " wldrd wr11, [%1, #56]\n" + "waddwss wr0, wr0, wr15\n" + " wldrd wr4, [%0, #32]\n" + "waddwss wr1, wr1, wr15\n" + " wldrd wr5, [%0, #40]\n" + "waddwss wr2, wr2, wr15\n" + " wldrd wr6, [%0, #48]\n" + "waddwss wr3, wr3, wr15\n" + " wldrd wr7, [%0, #56]\n" + " wmadds wr4, wr4, wr8\n" + " wldrd wr12, [%0, #64]\n" + " wmadds wr5, wr5, wr9\n" + " wldrd wr13, [%0, #72]\n" + " wmadds wr6, wr6, wr10\n" + " wldrd wr14, [%0, #80]\n" + " wmadds wr7, wr7, wr11\n" + " wldrd wr15, [%0, #88]\n" + " waddwss wr0, wr4, wr0\n" + " wldrd wr8, [%1, #64]\n" + " waddwss wr1, wr5, wr1\n" + " wldrd wr9, [%1, #72]\n" + " waddwss wr2, wr6, wr2\n" + " wldrd wr10, [%1, #80]\n" + " waddwss wr3, wr7, wr3\n" + " wldrd wr11, [%1, #88]\n" + " wmadds wr12, wr12, wr8\n" + "wldrd wr4, [%0, #96]\n" + " wmadds wr13, wr13, wr9\n" + "wldrd wr5, [%0, #104]\n" + " wmadds wr14, wr14, wr10\n" + "wldrd wr6, [%0, #112]\n" + " wmadds wr15, wr15, wr11\n" + "wldrd wr7, [%0, #120]\n" + " waddwss wr0, wr12, wr0\n" + "wldrd wr8, [%1, #96]\n" + " waddwss wr1, wr13, wr1\n" + "wldrd wr9, [%1, #104]\n" + " waddwss wr2, wr14, wr2\n" + "wldrd wr10, [%1, #112]\n" + " waddwss wr3, wr15, wr3\n" + "wldrd wr11, [%1, #120]\n" + "wmadds wr4, wr4, wr8\n" + " wldrd wr12, [%0, #128]\n" + "wmadds wr5, wr5, wr9\n" + " wldrd wr13, [%0, #136]\n" + "wmadds wr6, wr6, wr10\n" + " wldrd wr14, [%0, #144]\n" + "wmadds wr7, wr7, wr11\n" + " wldrd wr15, [%0, #152]\n" + "waddwss wr0, wr4, wr0\n" + " wldrd wr8, [%1, #128]\n" + "waddwss wr1, wr5, wr1\n" + " wldrd wr9, [%1, #136]\n" + "waddwss wr2, wr6, wr2\n" + " wldrd wr10, [%1, #144]\n" + " waddwss wr3, wr7, wr3\n" + " wldrd wr11, [%1, #152]\n" + " wmadds wr12, wr12, wr8\n" + "tmcr wcgr0, %4\n" + " wmadds wr13, wr13, wr9\n" + " wmadds wr14, wr14, wr10\n" + " wmadds wr15, wr15, wr11\n" + " waddwss wr0, wr12, wr0\n" + " waddwss wr1, wr13, wr1\n" + " waddwss wr2, wr14, wr2\n" + " waddwss wr3, wr15, wr3\n" + "\n" + "wsrawg wr0, wr0, wcgr0\n" + "wsrawg wr1, wr1, wcgr0\n" + "wsrawg wr2, wr2, wcgr0\n" + "wsrawg wr3, wr3, wcgr0\n" + "\n" + "wpackwss wr0, wr0, wr0\n" + "wpackwss wr1, wr1, wr1\n" + " wldrd wr4, [%1, #160]\n" + "wpackwss wr2, wr2, wr2\n" + " wldrd wr5, [%1, #168]\n" + "wpackwss wr3, wr3, wr3\n" + " wldrd wr6, [%1, #192]\n" + " wmadds wr4, wr4, wr0\n" + " wldrd wr7, [%1, #200]\n" + " wmadds wr5, wr5, wr0\n" + " wldrd wr8, [%1, #224]\n" + " wmadds wr6, wr6, wr1\n" + " wldrd wr9, [%1, #232]\n" + " wmadds wr7, wr7, wr1\n" + " waddwss wr4, wr6, wr4\n" + " waddwss wr5, wr7, wr5\n" + " wmadds wr8, wr8, wr2\n" + "wldrd wr6, [%1, #256]\n" + " wmadds wr9, wr9, wr2\n" + "wldrd wr7, [%1, #264]\n" + "waddwss wr4, wr8, wr4\n" + " waddwss wr5, wr9, wr5\n" + "wmadds wr6, wr6, wr3\n" + "wmadds wr7, wr7, wr3\n" + "waddwss wr4, wr6, wr4\n" + "waddwss wr5, wr7, wr5\n" + "\n" + "wstrd wr4, [%3]\n" + "wstrd wr5, [%3, #8]\n" + "\n" + "wldrd wr6, [%1, #176]\n" + "wldrd wr5, [%1, #184]\n" + "wmadds wr5, wr5, wr0\n" + "wldrd wr8, [%1, #208]\n" + "wmadds wr0, wr6, wr0\n" + "wldrd wr9, [%1, #216]\n" + "wmadds wr9, wr9, wr1\n" + "wldrd wr6, [%1, #240]\n" + "wmadds wr1, wr8, wr1\n" + "wldrd wr7, [%1, #248]\n" + "waddwss wr0, wr1, wr0\n" + "waddwss wr5, wr9, wr5\n" + "wmadds wr7, wr7, wr2\n" + "wldrd wr8, [%1, #272]\n" + "wmadds wr2, wr6, wr2\n" + "wldrd wr9, [%1, #280]\n" + "waddwss wr0, wr2, wr0\n" + "waddwss wr5, wr7, wr5\n" + "wmadds wr9, wr9, wr3\n" + "wmadds wr3, wr8, wr3\n" + "waddwss wr0, wr3, wr0\n" + "waddwss wr5, wr9, wr5\n" + "\n" + "wstrd wr0, [%3, #16]\n" + "wstrd wr5, [%3, #24]\n" + : + : "r" (in), "r" (consts), + "r" (1 << (SBC_PROTO_FIXED8_SCALE - 1)), "r" (out), + "r" (SBC_PROTO_FIXED8_SCALE) + : "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7", + "wr8", "wr9", "wr10", "wr11", "wr12", "wr13", "wr14", "wr15", + "wcgr0", "memory"); +} + +static inline void sbc_analyze_4b_4s_iwmmxt(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four_iwmmxt(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_iwmmxt(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four_iwmmxt(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_iwmmxt(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_iwmmxt(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight_iwmmxt(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_iwmmxt(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight_iwmmxt(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_iwmmxt(x + 0, out, analysis_consts_fixed8_simd_even); +} + +void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *state) +{ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_iwmmxt; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_iwmmxt; + state->implementation_info = "IWMMXT"; +} + +#endif diff --git a/sbc/sbc_primitives_iwmmxt.h b/sbc/sbc_primitives_iwmmxt.h new file mode 100644 index 0000000..b535e68 --- /dev/null +++ b/sbc/sbc_primitives_iwmmxt.h @@ -0,0 +1,42 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2010 Keith Mok + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_IWMMXT_H +#define __SBC_PRIMITIVES_IWMMXT_H + +#include "sbc_primitives.h" + +#if defined(__GNUC__) && defined(__IWMMXT__) && \ + !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) + +#define SBC_BUILD_WITH_IWMMXT_SUPPORT + +void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/sbc/sbc_primitives_mmx.c b/sbc/sbc_primitives_mmx.c new file mode 100644 index 0000000..7f2fbc3 --- /dev/null +++ b/sbc/sbc_primitives_mmx.c @@ -0,0 +1,375 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_mmx.h" + +/* + * MMX optimizations + */ + +#ifdef SBC_BUILD_WITH_MMX_SUPPORT + +static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + static const SBC_ALIGNED int32_t round_c[2] = { + 1 << (SBC_PROTO_FIXED4_SCALE - 1), + 1 << (SBC_PROTO_FIXED4_SCALE - 1), + }; + asm volatile ( + "movq (%0), %%mm0\n" + "movq 8(%0), %%mm1\n" + "pmaddwd (%1), %%mm0\n" + "pmaddwd 8(%1), %%mm1\n" + "paddd (%2), %%mm0\n" + "paddd (%2), %%mm1\n" + "\n" + "movq 16(%0), %%mm2\n" + "movq 24(%0), %%mm3\n" + "pmaddwd 16(%1), %%mm2\n" + "pmaddwd 24(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 32(%0), %%mm2\n" + "movq 40(%0), %%mm3\n" + "pmaddwd 32(%1), %%mm2\n" + "pmaddwd 40(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 48(%0), %%mm2\n" + "movq 56(%0), %%mm3\n" + "pmaddwd 48(%1), %%mm2\n" + "pmaddwd 56(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 64(%0), %%mm2\n" + "movq 72(%0), %%mm3\n" + "pmaddwd 64(%1), %%mm2\n" + "pmaddwd 72(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "psrad %4, %%mm0\n" + "psrad %4, %%mm1\n" + "packssdw %%mm0, %%mm0\n" + "packssdw %%mm1, %%mm1\n" + "\n" + "movq %%mm0, %%mm2\n" + "pmaddwd 80(%1), %%mm0\n" + "pmaddwd 88(%1), %%mm2\n" + "\n" + "movq %%mm1, %%mm3\n" + "pmaddwd 96(%1), %%mm1\n" + "pmaddwd 104(%1), %%mm3\n" + "paddd %%mm1, %%mm0\n" + "paddd %%mm3, %%mm2\n" + "\n" + "movq %%mm0, (%3)\n" + "movq %%mm2, 8(%3)\n" + : + : "r" (in), "r" (consts), "r" (&round_c), "r" (out), + "i" (SBC_PROTO_FIXED4_SCALE) + : "cc", "memory"); +} + +static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + static const SBC_ALIGNED int32_t round_c[2] = { + 1 << (SBC_PROTO_FIXED8_SCALE - 1), + 1 << (SBC_PROTO_FIXED8_SCALE - 1), + }; + asm volatile ( + "movq (%0), %%mm0\n" + "movq 8(%0), %%mm1\n" + "movq 16(%0), %%mm2\n" + "movq 24(%0), %%mm3\n" + "pmaddwd (%1), %%mm0\n" + "pmaddwd 8(%1), %%mm1\n" + "pmaddwd 16(%1), %%mm2\n" + "pmaddwd 24(%1), %%mm3\n" + "paddd (%2), %%mm0\n" + "paddd (%2), %%mm1\n" + "paddd (%2), %%mm2\n" + "paddd (%2), %%mm3\n" + "\n" + "movq 32(%0), %%mm4\n" + "movq 40(%0), %%mm5\n" + "movq 48(%0), %%mm6\n" + "movq 56(%0), %%mm7\n" + "pmaddwd 32(%1), %%mm4\n" + "pmaddwd 40(%1), %%mm5\n" + "pmaddwd 48(%1), %%mm6\n" + "pmaddwd 56(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 64(%0), %%mm4\n" + "movq 72(%0), %%mm5\n" + "movq 80(%0), %%mm6\n" + "movq 88(%0), %%mm7\n" + "pmaddwd 64(%1), %%mm4\n" + "pmaddwd 72(%1), %%mm5\n" + "pmaddwd 80(%1), %%mm6\n" + "pmaddwd 88(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 96(%0), %%mm4\n" + "movq 104(%0), %%mm5\n" + "movq 112(%0), %%mm6\n" + "movq 120(%0), %%mm7\n" + "pmaddwd 96(%1), %%mm4\n" + "pmaddwd 104(%1), %%mm5\n" + "pmaddwd 112(%1), %%mm6\n" + "pmaddwd 120(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 128(%0), %%mm4\n" + "movq 136(%0), %%mm5\n" + "movq 144(%0), %%mm6\n" + "movq 152(%0), %%mm7\n" + "pmaddwd 128(%1), %%mm4\n" + "pmaddwd 136(%1), %%mm5\n" + "pmaddwd 144(%1), %%mm6\n" + "pmaddwd 152(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "psrad %4, %%mm0\n" + "psrad %4, %%mm1\n" + "psrad %4, %%mm2\n" + "psrad %4, %%mm3\n" + "\n" + "packssdw %%mm0, %%mm0\n" + "packssdw %%mm1, %%mm1\n" + "packssdw %%mm2, %%mm2\n" + "packssdw %%mm3, %%mm3\n" + "\n" + "movq %%mm0, %%mm4\n" + "movq %%mm0, %%mm5\n" + "pmaddwd 160(%1), %%mm4\n" + "pmaddwd 168(%1), %%mm5\n" + "\n" + "movq %%mm1, %%mm6\n" + "movq %%mm1, %%mm7\n" + "pmaddwd 192(%1), %%mm6\n" + "pmaddwd 200(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm2, %%mm6\n" + "movq %%mm2, %%mm7\n" + "pmaddwd 224(%1), %%mm6\n" + "pmaddwd 232(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm3, %%mm6\n" + "movq %%mm3, %%mm7\n" + "pmaddwd 256(%1), %%mm6\n" + "pmaddwd 264(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm4, (%3)\n" + "movq %%mm5, 8(%3)\n" + "\n" + "movq %%mm0, %%mm5\n" + "pmaddwd 176(%1), %%mm0\n" + "pmaddwd 184(%1), %%mm5\n" + "\n" + "movq %%mm1, %%mm7\n" + "pmaddwd 208(%1), %%mm1\n" + "pmaddwd 216(%1), %%mm7\n" + "paddd %%mm1, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm2, %%mm7\n" + "pmaddwd 240(%1), %%mm2\n" + "pmaddwd 248(%1), %%mm7\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm3, %%mm7\n" + "pmaddwd 272(%1), %%mm3\n" + "pmaddwd 280(%1), %%mm7\n" + "paddd %%mm3, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm0, 16(%3)\n" + "movq %%mm5, 24(%3)\n" + : + : "r" (in), "r" (consts), "r" (&round_c), "r" (out), + "i" (SBC_PROTO_FIXED8_SCALE) + : "cc", "memory"); +} + +static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four_mmx(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_mmx(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four_mmx(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_mmx(x + 0, out, analysis_consts_fixed4_simd_even); + + asm volatile ("emms\n"); +} + +static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight_mmx(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_mmx(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight_mmx(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_mmx(x + 0, out, analysis_consts_fixed8_simd_even); + + asm volatile ("emms\n"); +} + +static void sbc_calc_scalefactors_mmx( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands) +{ + static const SBC_ALIGNED int32_t consts[2] = { + 1 << SCALE_OUT_BITS, + 1 << SCALE_OUT_BITS, + }; + int ch, sb; + intptr_t blk; + for (ch = 0; ch < channels; ch++) { + for (sb = 0; sb < subbands; sb += 2) { + blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0])); + asm volatile ( + "movq (%4), %%mm0\n" + "1:\n" + "movq (%1, %0), %%mm1\n" + "pxor %%mm2, %%mm2\n" + "pcmpgtd %%mm2, %%mm1\n" + "paddd (%1, %0), %%mm1\n" + "pcmpgtd %%mm1, %%mm2\n" + "pxor %%mm2, %%mm1\n" + + "por %%mm1, %%mm0\n" + + "sub %2, %0\n" + "jns 1b\n" + + "movd %%mm0, %k0\n" + "psrlq $32, %%mm0\n" + "bsrl %k0, %k0\n" + "subl %5, %k0\n" + "movl %k0, (%3)\n" + + "movd %%mm0, %k0\n" + "bsrl %k0, %k0\n" + "subl %5, %k0\n" + "movl %k0, 4(%3)\n" + : "+r" (blk) + : "r" (&sb_sample_f[0][ch][sb]), + "i" ((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0]), + "r" (&scale_factor[ch][sb]), + "r" (&consts), + "i" (SCALE_OUT_BITS) + : "cc", "memory"); + } + } + asm volatile ("emms\n"); +} + +static int check_mmx_support(void) +{ +#ifdef __amd64__ + return 1; /* We assume that all 64-bit processors have MMX support */ +#else + int cpuid_feature_information; + asm volatile ( + /* According to Intel manual, CPUID instruction is supported + * if the value of ID bit (bit 21) in EFLAGS can be modified */ + "pushf\n" + "movl (%%esp), %0\n" + "xorl $0x200000, (%%esp)\n" /* try to modify ID bit */ + "popf\n" + "pushf\n" + "xorl (%%esp), %0\n" /* check if ID bit changed */ + "jz 1f\n" + "push %%eax\n" + "push %%ebx\n" + "push %%ecx\n" + "mov $1, %%eax\n" + "cpuid\n" + "pop %%ecx\n" + "pop %%ebx\n" + "pop %%eax\n" + "1:\n" + "popf\n" + : "=d" (cpuid_feature_information) + : + : "cc"); + return cpuid_feature_information & (1 << 23); +#endif +} + +void sbc_init_primitives_mmx(struct sbc_encoder_state *state) +{ + if (check_mmx_support()) { + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx; + state->sbc_calc_scalefactors = sbc_calc_scalefactors_mmx; + state->implementation_info = "MMX"; + } +} + +#endif diff --git a/sbc/sbc_primitives_mmx.h b/sbc/sbc_primitives_mmx.h new file mode 100644 index 0000000..e0e728b --- /dev/null +++ b/sbc/sbc_primitives_mmx.h @@ -0,0 +1,41 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_MMX_H +#define __SBC_PRIMITIVES_MMX_H + +#include "sbc_primitives.h" + +#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) && \ + !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) + +#define SBC_BUILD_WITH_MMX_SUPPORT + +void sbc_init_primitives_mmx(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/sbc/sbc_primitives_neon.c b/sbc/sbc_primitives_neon.c new file mode 100644 index 0000000..0572158 --- /dev/null +++ b/sbc/sbc_primitives_neon.c @@ -0,0 +1,893 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_neon.h" + +/* + * ARM NEON optimizations + */ + +#ifdef SBC_BUILD_WITH_NEON_SUPPORT + +static inline void _sbc_analyze_four_neon(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + /* TODO: merge even and odd cases (or even merge all four calls to this + * function) in order to have only aligned reads from 'in' array + * and reduce number of load instructions */ + asm volatile ( + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmull.s16 q0, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmull.s16 q1, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q0, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q1, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q0, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q1, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q0, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q1, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q0, d4, d8\n" + "vmlal.s16 q1, d5, d9\n" + + "vpadd.s32 d0, d0, d1\n" + "vpadd.s32 d1, d2, d3\n" + + "vrshrn.s32 d0, q0, %3\n" + + "vld1.16 {d2, d3, d4, d5}, [%1, :128]!\n" + + "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ + + "vmull.s16 q3, d2, d0\n" + "vmull.s16 q4, d3, d0\n" + "vmlal.s16 q3, d4, d1\n" + "vmlal.s16 q4, d5, d1\n" + + "vpadd.s32 d0, d6, d7\n" /* TODO: can be eliminated */ + "vpadd.s32 d1, d8, d9\n" /* TODO: can be eliminated */ + + "vst1.32 {d0, d1}, [%2, :128]\n" + : "+r" (in), "+r" (consts) + : "r" (out), + "i" (SBC_PROTO_FIXED4_SCALE) + : "memory", + "d0", "d1", "d2", "d3", "d4", "d5", + "d6", "d7", "d8", "d9", "d10", "d11"); +} + +static inline void _sbc_analyze_eight_neon(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + /* TODO: merge even and odd cases (or even merge all four calls to this + * function) in order to have only aligned reads from 'in' array + * and reduce number of load instructions */ + asm volatile ( + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmull.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmull.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmull.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmull.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q8, d6, d10\n" + "vmlal.s16 q9, d7, d11\n" + + "vpadd.s32 d0, d12, d13\n" + "vpadd.s32 d1, d14, d15\n" + "vpadd.s32 d2, d16, d17\n" + "vpadd.s32 d3, d18, d19\n" + + "vrshr.s32 q0, q0, %3\n" + "vrshr.s32 q1, q1, %3\n" + "vmovn.s32 d0, q0\n" + "vmovn.s32 d1, q1\n" + + "vdup.i32 d3, d1[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d2, d1[0]\n" /* TODO: can be eliminated */ + "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmull.s16 q6, d4, d0\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmull.s16 q7, d5, d0\n" + "vmull.s16 q8, d6, d0\n" + "vmull.s16 q9, d7, d0\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d1\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d1\n" + "vmlal.s16 q8, d6, d1\n" + "vmlal.s16 q9, d7, d1\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d2\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d2\n" + "vmlal.s16 q8, d6, d2\n" + "vmlal.s16 q9, d7, d2\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d3\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d3\n" + "vmlal.s16 q8, d6, d3\n" + "vmlal.s16 q9, d7, d3\n" + + "vpadd.s32 d0, d12, d13\n" /* TODO: can be eliminated */ + "vpadd.s32 d1, d14, d15\n" /* TODO: can be eliminated */ + "vpadd.s32 d2, d16, d17\n" /* TODO: can be eliminated */ + "vpadd.s32 d3, d18, d19\n" /* TODO: can be eliminated */ + + "vst1.32 {d0, d1, d2, d3}, [%2, :128]\n" + : "+r" (in), "+r" (consts) + : "r" (out), + "i" (SBC_PROTO_FIXED8_SCALE) + : "memory", + "d0", "d1", "d2", "d3", "d4", "d5", + "d6", "d7", "d8", "d9", "d10", "d11", + "d12", "d13", "d14", "d15", "d16", "d17", + "d18", "d19"); +} + +static inline void sbc_analyze_4b_4s_neon(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + _sbc_analyze_four_neon(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + _sbc_analyze_four_neon(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + _sbc_analyze_four_neon(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + _sbc_analyze_four_neon(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_neon(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + _sbc_analyze_eight_neon(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + _sbc_analyze_eight_neon(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + _sbc_analyze_eight_neon(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + _sbc_analyze_eight_neon(x + 0, out, analysis_consts_fixed8_simd_even); +} + +static void sbc_calc_scalefactors_neon( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands) +{ + int ch, sb; + for (ch = 0; ch < channels; ch++) { + for (sb = 0; sb < subbands; sb += 4) { + int blk = blocks; + int32_t *in = &sb_sample_f[0][ch][sb]; + asm volatile ( + "vmov.s32 q0, #0\n" + "vmov.s32 q1, %[c1]\n" + "vmov.s32 q14, #1\n" + "vmov.s32 q15, %[c2]\n" + "vadd.s32 q1, q1, q14\n" + "1:\n" + "vld1.32 {d16, d17}, [%[in], :128], %[inc]\n" + "vabs.s32 q8, q8\n" + "vld1.32 {d18, d19}, [%[in], :128], %[inc]\n" + "vabs.s32 q9, q9\n" + "vld1.32 {d20, d21}, [%[in], :128], %[inc]\n" + "vabs.s32 q10, q10\n" + "vld1.32 {d22, d23}, [%[in], :128], %[inc]\n" + "vabs.s32 q11, q11\n" + "vmax.s32 q0, q0, q8\n" + "vmax.s32 q1, q1, q9\n" + "vmax.s32 q0, q0, q10\n" + "vmax.s32 q1, q1, q11\n" + "subs %[blk], %[blk], #4\n" + "bgt 1b\n" + "vmax.s32 q0, q0, q1\n" + "vsub.s32 q0, q0, q14\n" + "vclz.s32 q0, q0\n" + "vsub.s32 q0, q15, q0\n" + "vst1.32 {d0, d1}, [%[out], :128]\n" + : + [blk] "+r" (blk), + [in] "+r" (in) + : + [inc] "r" ((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0]), + [out] "r" (&scale_factor[ch][sb]), + [c1] "i" (1 << SCALE_OUT_BITS), + [c2] "i" (31 - SCALE_OUT_BITS) + : "d0", "d1", "d2", "d3", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23", "d24", "d25", "d26", + "d27", "d28", "d29", "d30", "d31", "cc", "memory"); + } + } +} + +int sbc_calc_scalefactors_j_neon( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int subbands) +{ + static SBC_ALIGNED int32_t joint_bits_mask[8] = { + 8, 4, 2, 1, 128, 64, 32, 16 + }; + int joint, i; + int32_t *in0, *in1; + int32_t *in = &sb_sample_f[0][0][0]; + uint32_t *out0, *out1; + uint32_t *out = &scale_factor[0][0]; + int32_t *consts = joint_bits_mask; + + i = subbands; + + asm volatile ( + /* + * constants: q13 = (31 - SCALE_OUT_BITS), q14 = 1 + * input: q0 = ((1 << SCALE_OUT_BITS) + 1) + * %[in0] - samples for channel 0 + * %[in1] - samples for shannel 1 + * output: q0, q1 - scale factors without joint stereo + * q2, q3 - scale factors with joint stereo + * q15 - joint stereo selection mask + */ + ".macro calc_scalefactors\n" + "vmov.s32 q1, q0\n" + "vmov.s32 q2, q0\n" + "vmov.s32 q3, q0\n" + "mov %[i], %[blocks]\n" + "1:\n" + "vld1.32 {d18, d19}, [%[in1], :128], %[inc]\n" + "vbic.s32 q11, q9, q14\n" + "vld1.32 {d16, d17}, [%[in0], :128], %[inc]\n" + "vhadd.s32 q10, q8, q11\n" + "vhsub.s32 q11, q8, q11\n" + "vabs.s32 q8, q8\n" + "vabs.s32 q9, q9\n" + "vabs.s32 q10, q10\n" + "vabs.s32 q11, q11\n" + "vmax.s32 q0, q0, q8\n" + "vmax.s32 q1, q1, q9\n" + "vmax.s32 q2, q2, q10\n" + "vmax.s32 q3, q3, q11\n" + "subs %[i], %[i], #1\n" + "bgt 1b\n" + "vsub.s32 q0, q0, q14\n" + "vsub.s32 q1, q1, q14\n" + "vsub.s32 q2, q2, q14\n" + "vsub.s32 q3, q3, q14\n" + "vclz.s32 q0, q0\n" + "vclz.s32 q1, q1\n" + "vclz.s32 q2, q2\n" + "vclz.s32 q3, q3\n" + "vsub.s32 q0, q13, q0\n" + "vsub.s32 q1, q13, q1\n" + "vsub.s32 q2, q13, q2\n" + "vsub.s32 q3, q13, q3\n" + ".endm\n" + /* + * constants: q14 = 1 + * input: q15 - joint stereo selection mask + * %[in0] - value set by calc_scalefactors macro + * %[in1] - value set by calc_scalefactors macro + */ + ".macro update_joint_stereo_samples\n" + "sub %[out1], %[in1], %[inc]\n" + "sub %[out0], %[in0], %[inc]\n" + "sub %[in1], %[in1], %[inc], asl #1\n" + "sub %[in0], %[in0], %[inc], asl #1\n" + "vld1.32 {d18, d19}, [%[in1], :128]\n" + "vbic.s32 q11, q9, q14\n" + "vld1.32 {d16, d17}, [%[in0], :128]\n" + "vld1.32 {d2, d3}, [%[out1], :128]\n" + "vbic.s32 q3, q1, q14\n" + "vld1.32 {d0, d1}, [%[out0], :128]\n" + "vhsub.s32 q10, q8, q11\n" + "vhadd.s32 q11, q8, q11\n" + "vhsub.s32 q2, q0, q3\n" + "vhadd.s32 q3, q0, q3\n" + "vbif.s32 q10, q9, q15\n" + "vbif.s32 d22, d16, d30\n" + "sub %[inc], %[zero], %[inc], asl #1\n" + "sub %[i], %[blocks], #2\n" + "2:\n" + "vbif.s32 d23, d17, d31\n" + "vst1.32 {d20, d21}, [%[in1], :128], %[inc]\n" + "vbif.s32 d4, d2, d30\n" + "vld1.32 {d18, d19}, [%[in1], :128]\n" + "vbif.s32 d5, d3, d31\n" + "vst1.32 {d22, d23}, [%[in0], :128], %[inc]\n" + "vbif.s32 d6, d0, d30\n" + "vld1.32 {d16, d17}, [%[in0], :128]\n" + "vbif.s32 d7, d1, d31\n" + "vst1.32 {d4, d5}, [%[out1], :128], %[inc]\n" + "vbic.s32 q11, q9, q14\n" + "vld1.32 {d2, d3}, [%[out1], :128]\n" + "vst1.32 {d6, d7}, [%[out0], :128], %[inc]\n" + "vbic.s32 q3, q1, q14\n" + "vld1.32 {d0, d1}, [%[out0], :128]\n" + "vhsub.s32 q10, q8, q11\n" + "vhadd.s32 q11, q8, q11\n" + "vhsub.s32 q2, q0, q3\n" + "vhadd.s32 q3, q0, q3\n" + "vbif.s32 q10, q9, q15\n" + "vbif.s32 d22, d16, d30\n" + "subs %[i], %[i], #2\n" + "bgt 2b\n" + "sub %[inc], %[zero], %[inc], asr #1\n" + "vbif.s32 d23, d17, d31\n" + "vst1.32 {d20, d21}, [%[in1], :128]\n" + "vbif.s32 q2, q1, q15\n" + "vst1.32 {d22, d23}, [%[in0], :128]\n" + "vbif.s32 q3, q0, q15\n" + "vst1.32 {d4, d5}, [%[out1], :128]\n" + "vst1.32 {d6, d7}, [%[out0], :128]\n" + ".endm\n" + + "vmov.s32 q14, #1\n" + "vmov.s32 q13, %[c2]\n" + + "cmp %[i], #4\n" + "bne 8f\n" + + "4:\n" /* 4 subbands */ + "add %[in0], %[in], #0\n" + "add %[in1], %[in], #32\n" + "add %[out0], %[out], #0\n" + "add %[out1], %[out], #32\n" + "vmov.s32 q0, %[c1]\n" + "vadd.s32 q0, q0, q14\n" + + "calc_scalefactors\n" + + /* check whether to use joint stereo for subbands 0, 1, 2 */ + "vadd.s32 q15, q0, q1\n" + "vadd.s32 q9, q2, q3\n" + "vmov.s32 d31[1], %[zero]\n" /* last subband -> no joint */ + "vld1.32 {d16, d17}, [%[consts], :128]!\n" + "vcgt.s32 q15, q15, q9\n" + + /* calculate and save to memory 'joint' variable */ + /* update and save scale factors to memory */ + " vand.s32 q8, q8, q15\n" + "vbit.s32 q0, q2, q15\n" + " vpadd.s32 d16, d16, d17\n" + "vbit.s32 q1, q3, q15\n" + " vpadd.s32 d16, d16, d16\n" + "vst1.32 {d0, d1}, [%[out0], :128]\n" + "vst1.32 {d2, d3}, [%[out1], :128]\n" + " vst1.32 {d16[0]}, [%[joint]]\n" + + "update_joint_stereo_samples\n" + "b 9f\n" + + "8:\n" /* 8 subbands */ + "add %[in0], %[in], #16\n\n" + "add %[in1], %[in], #48\n" + "add %[out0], %[out], #16\n\n" + "add %[out1], %[out], #48\n" + "vmov.s32 q0, %[c1]\n" + "vadd.s32 q0, q0, q14\n" + + "calc_scalefactors\n" + + /* check whether to use joint stereo for subbands 4, 5, 6 */ + "vadd.s32 q15, q0, q1\n" + "vadd.s32 q9, q2, q3\n" + "vmov.s32 d31[1], %[zero]\n" /* last subband -> no joint */ + "vld1.32 {d16, d17}, [%[consts], :128]!\n" + "vcgt.s32 q15, q15, q9\n" + + /* calculate part of 'joint' variable and save it to d24 */ + /* update and save scale factors to memory */ + " vand.s32 q8, q8, q15\n" + "vbit.s32 q0, q2, q15\n" + " vpadd.s32 d16, d16, d17\n" + "vbit.s32 q1, q3, q15\n" + "vst1.32 {d0, d1}, [%[out0], :128]\n" + "vst1.32 {d2, d3}, [%[out1], :128]\n" + " vpadd.s32 d24, d16, d16\n" + + "update_joint_stereo_samples\n" + + "add %[in0], %[in], #0\n" + "add %[in1], %[in], #32\n" + "add %[out0], %[out], #0\n\n" + "add %[out1], %[out], #32\n" + "vmov.s32 q0, %[c1]\n" + "vadd.s32 q0, q0, q14\n" + + "calc_scalefactors\n" + + /* check whether to use joint stereo for subbands 0, 1, 2, 3 */ + "vadd.s32 q15, q0, q1\n" + "vadd.s32 q9, q2, q3\n" + "vld1.32 {d16, d17}, [%[consts], :128]!\n" + "vcgt.s32 q15, q15, q9\n" + + /* combine last part of 'joint' with d24 and save to memory */ + /* update and save scale factors to memory */ + " vand.s32 q8, q8, q15\n" + "vbit.s32 q0, q2, q15\n" + " vpadd.s32 d16, d16, d17\n" + "vbit.s32 q1, q3, q15\n" + " vpadd.s32 d16, d16, d16\n" + "vst1.32 {d0, d1}, [%[out0], :128]\n" + " vadd.s32 d16, d16, d24\n" + "vst1.32 {d2, d3}, [%[out1], :128]\n" + " vst1.32 {d16[0]}, [%[joint]]\n" + + "update_joint_stereo_samples\n" + "9:\n" + ".purgem calc_scalefactors\n" + ".purgem update_joint_stereo_samples\n" + : + [i] "+&r" (i), + [in] "+&r" (in), + [in0] "=&r" (in0), + [in1] "=&r" (in1), + [out] "+&r" (out), + [out0] "=&r" (out0), + [out1] "=&r" (out1), + [consts] "+&r" (consts) + : + [inc] "r" ((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0]), + [blocks] "r" (blocks), + [joint] "r" (&joint), + [c1] "i" (1 << SCALE_OUT_BITS), + [c2] "i" (31 - SCALE_OUT_BITS), + [zero] "r" (0) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", + "d23", "d24", "d25", "d26", "d27", "d28", "d29", + "d30", "d31", "cc", "memory"); + + return joint; +} + +#define PERM_BE(a, b, c, d) { \ + (a * 2) + 1, (a * 2) + 0, \ + (b * 2) + 1, (b * 2) + 0, \ + (c * 2) + 1, (c * 2) + 0, \ + (d * 2) + 1, (d * 2) + 0 \ + } +#define PERM_LE(a, b, c, d) { \ + (a * 2) + 0, (a * 2) + 1, \ + (b * 2) + 0, (b * 2) + 1, \ + (c * 2) + 0, (c * 2) + 1, \ + (d * 2) + 0, (d * 2) + 1 \ + } + +static SBC_ALWAYS_INLINE int sbc_enc_process_input_4s_neon_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + static SBC_ALIGNED uint8_t perm_be[2][8] = { + PERM_BE(7, 3, 6, 4), + PERM_BE(0, 2, 1, 5) + }; + static SBC_ALIGNED uint8_t perm_le[2][8] = { + PERM_LE(7, 3, 6, 4), + PERM_LE(0, 2, 1, 5) + }; + /* handle X buffer wraparound */ + if (position < nsamples) { + int16_t *dst = &X[0][SBC_X_BUFFER_SIZE - 40]; + int16_t *src = &X[0][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0}, [%[src], :64]!\n" + "vst1.16 {d0}, [%[dst], :64]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + if (nchannels > 1) { + dst = &X[1][SBC_X_BUFFER_SIZE - 40]; + src = &X[1][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0}, [%[src], :64]!\n" + "vst1.16 {d0}, [%[dst], :64]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + } + position = SBC_X_BUFFER_SIZE - 40; + } + + if ((nchannels > 1) && ((uintptr_t)pcm & 1)) { + /* poor 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #16\n" + "sub %[y], %[y], #16\n" + "sub %[position], %[position], #8\n" + "vld1.8 {d4, d5}, [%[pcm]]!\n" + "vuzp.16 d4, d5\n" + "vld1.8 {d20, d21}, [%[pcm]]!\n" + "vuzp.16 d20, d21\n" + "vswp d5, d20\n" + "vtbl.8 d16, {d4, d5}, d0\n" + "vtbl.8 d17, {d4, d5}, d1\n" + "vtbl.8 d18, {d20, d21}, d0\n" + "vtbl.8 d19, {d20, d21}, d1\n" + "vst1.16 {d16, d17}, [%[x], :128]\n" + "vst1.16 {d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #8\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else if (nchannels > 1) { + /* proper 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #16\n" + "sub %[y], %[y], #16\n" + "sub %[position], %[position], #8\n" + "vld2.16 {d4, d5}, [%[pcm]]!\n" + "vld2.16 {d20, d21}, [%[pcm]]!\n" + "vswp d5, d20\n" + "vtbl.8 d16, {d4, d5}, d0\n" + "vtbl.8 d17, {d4, d5}, d1\n" + "vtbl.8 d18, {d20, d21}, d0\n" + "vtbl.8 d19, {d20, d21}, d1\n" + "vst1.16 {d16, d17}, [%[x], :128]\n" + "vst1.16 {d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #8\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else { + int16_t *x = &X[0][position]; + asm volatile ( + "vld1.8 {d0, d1}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #16\n" + "sub %[position], %[position], #8\n" + "vld1.8 {d4, d5}, [%[pcm]]!\n" + "vtbl.8 d16, {d4, d5}, d0\n" + "vtbl.8 d17, {d4, d5}, d1\n" + "vst1.16 {d16, d17}, [%[x], :128]\n" + "subs %[nsamples], %[nsamples], #8\n" + "bgt 1b\n" + : + [x] "+r" (x), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19"); + } + return position; +} + +static SBC_ALWAYS_INLINE int sbc_enc_process_input_8s_neon_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + static SBC_ALIGNED uint8_t perm_be[4][8] = { + PERM_BE(15, 7, 14, 8), + PERM_BE(13, 9, 12, 10), + PERM_BE(11, 3, 6, 0), + PERM_BE(5, 1, 4, 2) + }; + static SBC_ALIGNED uint8_t perm_le[4][8] = { + PERM_LE(15, 7, 14, 8), + PERM_LE(13, 9, 12, 10), + PERM_LE(11, 3, 6, 0), + PERM_LE(5, 1, 4, 2) + }; + /* handle X buffer wraparound */ + if (position < nsamples) { + int16_t *dst = &X[0][SBC_X_BUFFER_SIZE - 72]; + int16_t *src = &X[0][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1}, [%[src], :128]!\n" + "vst1.16 {d0, d1}, [%[dst], :128]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + if (nchannels > 1) { + dst = &X[1][SBC_X_BUFFER_SIZE - 72]; + src = &X[1][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1}, [%[src], :128]!\n" + "vst1.16 {d0, d1}, [%[dst], :128]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + } + position = SBC_X_BUFFER_SIZE - 72; + } + + if ((nchannels > 1) && ((uintptr_t)pcm & 1)) { + /* poor 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #32\n" + "sub %[y], %[y], #32\n" + "sub %[position], %[position], #16\n" + "vld1.8 {d4, d5, d6, d7}, [%[pcm]]!\n" + "vuzp.16 q2, q3\n" + "vld1.8 {d20, d21, d22, d23}, [%[pcm]]!\n" + "vuzp.16 q10, q11\n" + "vswp q3, q10\n" + "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" + "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" + "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" + "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" + "vtbl.8 d16, {d20, d21, d22, d23}, d0\n" + "vtbl.8 d17, {d20, d21, d22, d23}, d1\n" + "vtbl.8 d18, {d20, d21, d22, d23}, d2\n" + "vtbl.8 d19, {d20, d21, d22, d23}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #16\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else if (nchannels > 1) { + /* proper 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #32\n" + "sub %[y], %[y], #32\n" + "sub %[position], %[position], #16\n" + "vld2.16 {d4, d5, d6, d7}, [%[pcm]]!\n" + "vld2.16 {d20, d21, d22, d23}, [%[pcm]]!\n" + "vswp q3, q10\n" + "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" + "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" + "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" + "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" + "vtbl.8 d16, {d20, d21, d22, d23}, d0\n" + "vtbl.8 d17, {d20, d21, d22, d23}, d1\n" + "vtbl.8 d18, {d20, d21, d22, d23}, d2\n" + "vtbl.8 d19, {d20, d21, d22, d23}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #16\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else { + int16_t *x = &X[0][position]; + asm volatile ( + "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #32\n" + "sub %[position], %[position], #16\n" + "vld1.8 {d4, d5, d6, d7}, [%[pcm]]!\n" + "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" + "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" + "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" + "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" + "subs %[nsamples], %[nsamples], #16\n" + "bgt 1b\n" + : + [x] "+r" (x), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19"); + } + return position; +} + +#undef PERM_BE +#undef PERM_LE + +static int sbc_enc_process_input_4s_be_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_4s_neon_internal( + position, pcm, X, nsamples, nchannels, 1); +} + +static int sbc_enc_process_input_4s_le_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_4s_neon_internal( + position, pcm, X, nsamples, nchannels, 0); +} + +static int sbc_enc_process_input_8s_be_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_8s_neon_internal( + position, pcm, X, nsamples, nchannels, 1); +} + +static int sbc_enc_process_input_8s_le_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_8s_neon_internal( + position, pcm, X, nsamples, nchannels, 0); +} + +void sbc_init_primitives_neon(struct sbc_encoder_state *state) +{ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon; + state->sbc_calc_scalefactors = sbc_calc_scalefactors_neon; + state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j_neon; + state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le_neon; + state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be_neon; + state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le_neon; + state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be_neon; + state->implementation_info = "NEON"; +} + +#endif diff --git a/sbc/sbc_primitives_neon.h b/sbc/sbc_primitives_neon.h new file mode 100644 index 0000000..ea3da06 --- /dev/null +++ b/sbc/sbc_primitives_neon.h @@ -0,0 +1,41 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_NEON_H +#define __SBC_PRIMITIVES_NEON_H + +#include "sbc_primitives.h" + +#if defined(__GNUC__) && defined(__ARM_NEON__) && \ + !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) + +#define SBC_BUILD_WITH_NEON_SUPPORT + +void sbc_init_primitives_neon(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/sbc/sbc_tables.h b/sbc/sbc_tables.h new file mode 100644 index 0000000..28c0d54 --- /dev/null +++ b/sbc/sbc_tables.h @@ -0,0 +1,660 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* A2DP specification: Appendix B, page 69 */ +static const int sbc_offset4[4][4] = { + { -1, 0, 0, 0 }, + { -2, 0, 0, 1 }, + { -2, 0, 0, 1 }, + { -2, 0, 0, 1 } +}; + +/* A2DP specification: Appendix B, page 69 */ +static const int sbc_offset8[4][8] = { + { -2, 0, 0, 0, 0, 0, 0, 1 }, + { -3, 0, 0, 0, 0, 0, 1, 2 }, + { -4, 0, 0, 0, 0, 0, 1, 2 }, + { -4, 0, 0, 0, 0, 0, 1, 2 } +}; + + +#define SS4(val) ASR(val, SCALE_SPROTO4_TBL) +#define SS8(val) ASR(val, SCALE_SPROTO8_TBL) +#define SN4(val) ASR(val, SCALE_NPROTO4_TBL) +#define SN8(val) ASR(val, SCALE_NPROTO8_TBL) + +static const int32_t sbc_proto_4_40m0[] = { + SS4(0x00000000), SS4(0xffa6982f), SS4(0xfba93848), SS4(0x0456c7b8), + SS4(0x005967d1), SS4(0xfffb9ac7), SS4(0xff589157), SS4(0xf9c2a8d8), + SS4(0x027c1434), SS4(0x0019118b), SS4(0xfff3c74c), SS4(0xff137330), + SS4(0xf81b8d70), SS4(0x00ec1b8b), SS4(0xfff0b71a), SS4(0xffe99b00), + SS4(0xfef84470), SS4(0xf6fb4370), SS4(0xffcdc351), SS4(0xffe01dc7) +}; + +static const int32_t sbc_proto_4_40m1[] = { + SS4(0xffe090ce), SS4(0xff2c0475), SS4(0xf694f800), SS4(0xff2c0475), + SS4(0xffe090ce), SS4(0xffe01dc7), SS4(0xffcdc351), SS4(0xf6fb4370), + SS4(0xfef84470), SS4(0xffe99b00), SS4(0xfff0b71a), SS4(0x00ec1b8b), + SS4(0xf81b8d70), SS4(0xff137330), SS4(0xfff3c74c), SS4(0x0019118b), + SS4(0x027c1434), SS4(0xf9c2a8d8), SS4(0xff589157), SS4(0xfffb9ac7) +}; + +static const int32_t sbc_proto_8_80m0[] = { + SS8(0x00000000), SS8(0xfe8d1970), SS8(0xee979f00), SS8(0x11686100), + SS8(0x0172e690), SS8(0xfff5bd1a), SS8(0xfdf1c8d4), SS8(0xeac182c0), + SS8(0x0d9daee0), SS8(0x00e530da), SS8(0xffe9811d), SS8(0xfd52986c), + SS8(0xe7054ca0), SS8(0x0a00d410), SS8(0x006c1de4), SS8(0xffdba705), + SS8(0xfcbc98e8), SS8(0xe3889d20), SS8(0x06af2308), SS8(0x000bb7db), + SS8(0xffca00ed), SS8(0xfc3fbb68), SS8(0xe071bc00), SS8(0x03bf7948), + SS8(0xffc4e05c), SS8(0xffb54b3b), SS8(0xfbedadc0), SS8(0xdde26200), + SS8(0x0142291c), SS8(0xff960e94), SS8(0xff9f3e17), SS8(0xfbd8f358), + SS8(0xdbf79400), SS8(0xff405e01), SS8(0xff7d4914), SS8(0xff8b1a31), + SS8(0xfc1417b8), SS8(0xdac7bb40), SS8(0xfdbb828c), SS8(0xff762170) +}; + +static const int32_t sbc_proto_8_80m1[] = { + SS8(0xff7c272c), SS8(0xfcb02620), SS8(0xda612700), SS8(0xfcb02620), + SS8(0xff7c272c), SS8(0xff762170), SS8(0xfdbb828c), SS8(0xdac7bb40), + SS8(0xfc1417b8), SS8(0xff8b1a31), SS8(0xff7d4914), SS8(0xff405e01), + SS8(0xdbf79400), SS8(0xfbd8f358), SS8(0xff9f3e17), SS8(0xff960e94), + SS8(0x0142291c), SS8(0xdde26200), SS8(0xfbedadc0), SS8(0xffb54b3b), + SS8(0xffc4e05c), SS8(0x03bf7948), SS8(0xe071bc00), SS8(0xfc3fbb68), + SS8(0xffca00ed), SS8(0x000bb7db), SS8(0x06af2308), SS8(0xe3889d20), + SS8(0xfcbc98e8), SS8(0xffdba705), SS8(0x006c1de4), SS8(0x0a00d410), + SS8(0xe7054ca0), SS8(0xfd52986c), SS8(0xffe9811d), SS8(0x00e530da), + SS8(0x0d9daee0), SS8(0xeac182c0), SS8(0xfdf1c8d4), SS8(0xfff5bd1a) +}; + +static const int32_t synmatrix4[8][4] = { + { SN4(0x05a82798), SN4(0xfa57d868), SN4(0xfa57d868), SN4(0x05a82798) }, + { SN4(0x030fbc54), SN4(0xf89be510), SN4(0x07641af0), SN4(0xfcf043ac) }, + { SN4(0x00000000), SN4(0x00000000), SN4(0x00000000), SN4(0x00000000) }, + { SN4(0xfcf043ac), SN4(0x07641af0), SN4(0xf89be510), SN4(0x030fbc54) }, + { SN4(0xfa57d868), SN4(0x05a82798), SN4(0x05a82798), SN4(0xfa57d868) }, + { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) }, + { SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000) }, + { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) } +}; + +static const int32_t synmatrix8[16][8] = { + { SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798), + SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798) }, + { SN8(0x0471ced0), SN8(0xf8275a10), SN8(0x018f8b84), SN8(0x06a6d988), + SN8(0xf9592678), SN8(0xfe70747c), SN8(0x07d8a5f0), SN8(0xfb8e3130) }, + { SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac), + SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54) }, + { SN8(0x018f8b84), SN8(0xfb8e3130), SN8(0x06a6d988), SN8(0xf8275a10), + SN8(0x07d8a5f0), SN8(0xf9592678), SN8(0x0471ced0), SN8(0xfe70747c) }, + { SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), + SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000) }, + { SN8(0xfe70747c), SN8(0x0471ced0), SN8(0xf9592678), SN8(0x07d8a5f0), + SN8(0xf8275a10), SN8(0x06a6d988), SN8(0xfb8e3130), SN8(0x018f8b84) }, + { SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54), + SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac) }, + { SN8(0xfb8e3130), SN8(0x07d8a5f0), SN8(0xfe70747c), SN8(0xf9592678), + SN8(0x06a6d988), SN8(0x018f8b84), SN8(0xf8275a10), SN8(0x0471ced0) }, + { SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868), + SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868) }, + { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0), + SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) }, + { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0), + SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) }, + { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c), + SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) }, + { SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), + SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000) }, + { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c), + SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) }, + { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0), + SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) }, + { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0), + SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) } +}; + +/* Uncomment the following line to enable high precision build of SBC encoder */ + +/* #define SBC_HIGH_PRECISION */ + +#ifdef SBC_HIGH_PRECISION +#define FIXED_A int64_t /* data type for fixed point accumulator */ +#define FIXED_T int32_t /* data type for fixed point constants */ +#define SBC_FIXED_EXTRA_BITS 16 +#else +#define FIXED_A int32_t /* data type for fixed point accumulator */ +#define FIXED_T int16_t /* data type for fixed point constants */ +#define SBC_FIXED_EXTRA_BITS 0 +#endif + +/* A2DP specification: Section 12.8 Tables + * + * Original values are premultiplied by 2 for better precision (that is the + * maximum which is possible without overflows) + * + * Note: in each block of 8 numbers sign was changed for elements 2 and 7 + * in order to compensate the same change applied to cos_table_fixed_4 + */ +#define SBC_PROTO_FIXED4_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) +#define F_PROTO4(x) (FIXED_A) ((x * 2) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_PROTO4(x) +static const FIXED_T _sbc_proto_fixed4[40] = { + F(0.00000000E+00), F(5.36548976E-04), + -F(1.49188357E-03), F(2.73370904E-03), + F(3.83720193E-03), F(3.89205149E-03), + F(1.86581691E-03), F(3.06012286E-03), + + F(1.09137620E-02), F(2.04385087E-02), + -F(2.88757392E-02), F(3.21939290E-02), + F(2.58767811E-02), F(6.13245186E-03), + -F(2.88217274E-02), F(7.76463494E-02), + + F(1.35593274E-01), F(1.94987841E-01), + -F(2.46636662E-01), F(2.81828203E-01), + F(2.94315332E-01), F(2.81828203E-01), + F(2.46636662E-01), -F(1.94987841E-01), + + -F(1.35593274E-01), -F(7.76463494E-02), + F(2.88217274E-02), F(6.13245186E-03), + F(2.58767811E-02), F(3.21939290E-02), + F(2.88757392E-02), -F(2.04385087E-02), + + -F(1.09137620E-02), -F(3.06012286E-03), + -F(1.86581691E-03), F(3.89205149E-03), + F(3.83720193E-03), F(2.73370904E-03), + F(1.49188357E-03), -F(5.36548976E-04), +}; +#undef F + +/* + * To produce this cosine matrix in Octave: + * + * b = zeros(4, 8); + * for i = 0:3 + * for j = 0:7 b(i+1, j+1) = cos((i + 0.5) * (j - 2) * (pi/4)) + * endfor + * endfor; + * printf("%.10f, ", b'); + * + * Note: in each block of 8 numbers sign was changed for elements 2 and 7 + * + * Change of sign for element 2 allows to replace constant 1.0 (not + * representable in Q15 format) with -1.0 (fine with Q15). + * Changed sign for element 7 allows to have more similar constants + * and simplify subband filter function code. + */ +#define SBC_COS_TABLE_FIXED4_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) +#define F_COS4(x) (FIXED_A) ((x) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_COS4(x) +static const FIXED_T cos_table_fixed_4[32] = { + F(0.7071067812), F(0.9238795325), -F(1.0000000000), F(0.9238795325), + F(0.7071067812), F(0.3826834324), F(0.0000000000), F(0.3826834324), + + -F(0.7071067812), F(0.3826834324), -F(1.0000000000), F(0.3826834324), + -F(0.7071067812), -F(0.9238795325), -F(0.0000000000), -F(0.9238795325), + + -F(0.7071067812), -F(0.3826834324), -F(1.0000000000), -F(0.3826834324), + -F(0.7071067812), F(0.9238795325), F(0.0000000000), F(0.9238795325), + + F(0.7071067812), -F(0.9238795325), -F(1.0000000000), -F(0.9238795325), + F(0.7071067812), -F(0.3826834324), -F(0.0000000000), -F(0.3826834324), +}; +#undef F + +/* A2DP specification: Section 12.8 Tables + * + * Original values are premultiplied by 4 for better precision (that is the + * maximum which is possible without overflows) + * + * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 + * in order to compensate the same change applied to cos_table_fixed_8 + */ +#define SBC_PROTO_FIXED8_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) +#define F_PROTO8(x) (FIXED_A) ((x * 2) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_PROTO8(x) +static const FIXED_T _sbc_proto_fixed8[80] = { + F(0.00000000E+00), F(1.56575398E-04), + F(3.43256425E-04), F(5.54620202E-04), + -F(8.23919506E-04), F(1.13992507E-03), + F(1.47640169E-03), F(1.78371725E-03), + F(2.01182542E-03), F(2.10371989E-03), + F(1.99454554E-03), F(1.61656283E-03), + F(9.02154502E-04), F(1.78805361E-04), + F(1.64973098E-03), F(3.49717454E-03), + + F(5.65949473E-03), F(8.02941163E-03), + F(1.04584443E-02), F(1.27472335E-02), + -F(1.46525263E-02), F(1.59045603E-02), + F(1.62208471E-02), F(1.53184106E-02), + F(1.29371806E-02), F(8.85757540E-03), + F(2.92408442E-03), -F(4.91578024E-03), + -F(1.46404076E-02), F(2.61098752E-02), + F(3.90751381E-02), F(5.31873032E-02), + + F(6.79989431E-02), F(8.29847578E-02), + F(9.75753918E-02), F(1.11196689E-01), + -F(1.23264548E-01), F(1.33264415E-01), + F(1.40753505E-01), F(1.45389847E-01), + F(1.46955068E-01), F(1.45389847E-01), + F(1.40753505E-01), F(1.33264415E-01), + F(1.23264548E-01), -F(1.11196689E-01), + -F(9.75753918E-02), -F(8.29847578E-02), + + -F(6.79989431E-02), -F(5.31873032E-02), + -F(3.90751381E-02), -F(2.61098752E-02), + F(1.46404076E-02), -F(4.91578024E-03), + F(2.92408442E-03), F(8.85757540E-03), + F(1.29371806E-02), F(1.53184106E-02), + F(1.62208471E-02), F(1.59045603E-02), + F(1.46525263E-02), -F(1.27472335E-02), + -F(1.04584443E-02), -F(8.02941163E-03), + + -F(5.65949473E-03), -F(3.49717454E-03), + -F(1.64973098E-03), -F(1.78805361E-04), + -F(9.02154502E-04), F(1.61656283E-03), + F(1.99454554E-03), F(2.10371989E-03), + F(2.01182542E-03), F(1.78371725E-03), + F(1.47640169E-03), F(1.13992507E-03), + F(8.23919506E-04), -F(5.54620202E-04), + -F(3.43256425E-04), -F(1.56575398E-04), +}; +#undef F + +/* + * To produce this cosine matrix in Octave: + * + * b = zeros(8, 16); + * for i = 0:7 + * for j = 0:15 b(i+1, j+1) = cos((i + 0.5) * (j - 4) * (pi/8)) + * endfor endfor; + * printf("%.10f, ", b'); + * + * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 + * + * Change of sign for element 4 allows to replace constant 1.0 (not + * representable in Q15 format) with -1.0 (fine with Q15). + * Changed signs for elements 13, 14, 15 allow to have more similar constants + * and simplify subband filter function code. + */ +#define SBC_COS_TABLE_FIXED8_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) +#define F_COS8(x) (FIXED_A) ((x) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_COS8(x) +static const FIXED_T cos_table_fixed_8[128] = { + F(0.7071067812), F(0.8314696123), F(0.9238795325), F(0.9807852804), + -F(1.0000000000), F(0.9807852804), F(0.9238795325), F(0.8314696123), + F(0.7071067812), F(0.5555702330), F(0.3826834324), F(0.1950903220), + F(0.0000000000), F(0.1950903220), F(0.3826834324), F(0.5555702330), + + -F(0.7071067812), -F(0.1950903220), F(0.3826834324), F(0.8314696123), + -F(1.0000000000), F(0.8314696123), F(0.3826834324), -F(0.1950903220), + -F(0.7071067812), -F(0.9807852804), -F(0.9238795325), -F(0.5555702330), + -F(0.0000000000), -F(0.5555702330), -F(0.9238795325), -F(0.9807852804), + + -F(0.7071067812), -F(0.9807852804), -F(0.3826834324), F(0.5555702330), + -F(1.0000000000), F(0.5555702330), -F(0.3826834324), -F(0.9807852804), + -F(0.7071067812), F(0.1950903220), F(0.9238795325), F(0.8314696123), + F(0.0000000000), F(0.8314696123), F(0.9238795325), F(0.1950903220), + + F(0.7071067812), -F(0.5555702330), -F(0.9238795325), F(0.1950903220), + -F(1.0000000000), F(0.1950903220), -F(0.9238795325), -F(0.5555702330), + F(0.7071067812), F(0.8314696123), -F(0.3826834324), -F(0.9807852804), + -F(0.0000000000), -F(0.9807852804), -F(0.3826834324), F(0.8314696123), + + F(0.7071067812), F(0.5555702330), -F(0.9238795325), -F(0.1950903220), + -F(1.0000000000), -F(0.1950903220), -F(0.9238795325), F(0.5555702330), + F(0.7071067812), -F(0.8314696123), -F(0.3826834324), F(0.9807852804), + F(0.0000000000), F(0.9807852804), -F(0.3826834324), -F(0.8314696123), + + -F(0.7071067812), F(0.9807852804), -F(0.3826834324), -F(0.5555702330), + -F(1.0000000000), -F(0.5555702330), -F(0.3826834324), F(0.9807852804), + -F(0.7071067812), -F(0.1950903220), F(0.9238795325), -F(0.8314696123), + -F(0.0000000000), -F(0.8314696123), F(0.9238795325), -F(0.1950903220), + + -F(0.7071067812), F(0.1950903220), F(0.3826834324), -F(0.8314696123), + -F(1.0000000000), -F(0.8314696123), F(0.3826834324), F(0.1950903220), + -F(0.7071067812), F(0.9807852804), -F(0.9238795325), F(0.5555702330), + -F(0.0000000000), F(0.5555702330), -F(0.9238795325), F(0.9807852804), + + F(0.7071067812), -F(0.8314696123), F(0.9238795325), -F(0.9807852804), + -F(1.0000000000), -F(0.9807852804), F(0.9238795325), -F(0.8314696123), + F(0.7071067812), -F(0.5555702330), F(0.3826834324), -F(0.1950903220), + -F(0.0000000000), -F(0.1950903220), F(0.3826834324), -F(0.5555702330), +}; +#undef F + +/* + * Enforce 16 byte alignment for the data, which is supposed to be used + * with SIMD optimized code. + */ + +#define SBC_ALIGN_BITS 4 +#define SBC_ALIGN_MASK ((1 << (SBC_ALIGN_BITS)) - 1) + +#ifdef __GNUC__ +#define SBC_ALIGNED __attribute__((aligned(1 << (SBC_ALIGN_BITS)))) +#else +#define SBC_ALIGNED +#endif + +/* + * Constant tables for the use in SIMD optimized analysis filters + * Each table consists of two parts: + * 1. reordered "proto" table + * 2. reordered "cos" table + * + * Due to non-symmetrical reordering, separate tables for "even" + * and "odd" cases are needed + */ + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_even[40 + 16] = { +#define C0 1.0932568993 +#define C1 1.3056875580 +#define C2 1.3056875580 +#define C3 1.6772280856 + +#define F(x) F_PROTO4(x) + F(0.00000000E+00 * C0), F(3.83720193E-03 * C0), + F(5.36548976E-04 * C1), F(2.73370904E-03 * C1), + F(3.06012286E-03 * C2), F(3.89205149E-03 * C2), + F(0.00000000E+00 * C3), -F(1.49188357E-03 * C3), + F(1.09137620E-02 * C0), F(2.58767811E-02 * C0), + F(2.04385087E-02 * C1), F(3.21939290E-02 * C1), + F(7.76463494E-02 * C2), F(6.13245186E-03 * C2), + F(0.00000000E+00 * C3), -F(2.88757392E-02 * C3), + F(1.35593274E-01 * C0), F(2.94315332E-01 * C0), + F(1.94987841E-01 * C1), F(2.81828203E-01 * C1), + -F(1.94987841E-01 * C2), F(2.81828203E-01 * C2), + F(0.00000000E+00 * C3), -F(2.46636662E-01 * C3), + -F(1.35593274E-01 * C0), F(2.58767811E-02 * C0), + -F(7.76463494E-02 * C1), F(6.13245186E-03 * C1), + -F(2.04385087E-02 * C2), F(3.21939290E-02 * C2), + F(0.00000000E+00 * C3), F(2.88217274E-02 * C3), + -F(1.09137620E-02 * C0), F(3.83720193E-03 * C0), + -F(3.06012286E-03 * C1), F(3.89205149E-03 * C1), + -F(5.36548976E-04 * C2), F(2.73370904E-03 * C2), + F(0.00000000E+00 * C3), -F(1.86581691E-03 * C3), +#undef F +#define F(x) F_COS4(x) + F(0.7071067812 / C0), F(0.9238795325 / C1), + -F(0.7071067812 / C0), F(0.3826834324 / C1), + -F(0.7071067812 / C0), -F(0.3826834324 / C1), + F(0.7071067812 / C0), -F(0.9238795325 / C1), + F(0.3826834324 / C2), -F(1.0000000000 / C3), + -F(0.9238795325 / C2), -F(1.0000000000 / C3), + F(0.9238795325 / C2), -F(1.0000000000 / C3), + -F(0.3826834324 / C2), -F(1.0000000000 / C3), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_odd[40 + 16] = { +#define C0 1.3056875580 +#define C1 1.6772280856 +#define C2 1.0932568993 +#define C3 1.3056875580 + +#define F(x) F_PROTO4(x) + F(2.73370904E-03 * C0), F(5.36548976E-04 * C0), + -F(1.49188357E-03 * C1), F(0.00000000E+00 * C1), + F(3.83720193E-03 * C2), F(1.09137620E-02 * C2), + F(3.89205149E-03 * C3), F(3.06012286E-03 * C3), + F(3.21939290E-02 * C0), F(2.04385087E-02 * C0), + -F(2.88757392E-02 * C1), F(0.00000000E+00 * C1), + F(2.58767811E-02 * C2), F(1.35593274E-01 * C2), + F(6.13245186E-03 * C3), F(7.76463494E-02 * C3), + F(2.81828203E-01 * C0), F(1.94987841E-01 * C0), + -F(2.46636662E-01 * C1), F(0.00000000E+00 * C1), + F(2.94315332E-01 * C2), -F(1.35593274E-01 * C2), + F(2.81828203E-01 * C3), -F(1.94987841E-01 * C3), + F(6.13245186E-03 * C0), -F(7.76463494E-02 * C0), + F(2.88217274E-02 * C1), F(0.00000000E+00 * C1), + F(2.58767811E-02 * C2), -F(1.09137620E-02 * C2), + F(3.21939290E-02 * C3), -F(2.04385087E-02 * C3), + F(3.89205149E-03 * C0), -F(3.06012286E-03 * C0), + -F(1.86581691E-03 * C1), F(0.00000000E+00 * C1), + F(3.83720193E-03 * C2), F(0.00000000E+00 * C2), + F(2.73370904E-03 * C3), -F(5.36548976E-04 * C3), +#undef F +#define F(x) F_COS4(x) + F(0.9238795325 / C0), -F(1.0000000000 / C1), + F(0.3826834324 / C0), -F(1.0000000000 / C1), + -F(0.3826834324 / C0), -F(1.0000000000 / C1), + -F(0.9238795325 / C0), -F(1.0000000000 / C1), + F(0.7071067812 / C2), F(0.3826834324 / C3), + -F(0.7071067812 / C2), -F(0.9238795325 / C3), + -F(0.7071067812 / C2), F(0.9238795325 / C3), + F(0.7071067812 / C2), -F(0.3826834324 / C3), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_even[80 + 64] = { +#define C0 2.7906148894 +#define C1 2.4270044280 +#define C2 2.8015616024 +#define C3 3.1710363741 +#define C4 2.5377944043 +#define C5 2.4270044280 +#define C6 2.8015616024 +#define C7 3.1710363741 + +#define F(x) F_PROTO8(x) + F(0.00000000E+00 * C0), F(2.01182542E-03 * C0), + F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), + F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), + F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), + -F(8.23919506E-04 * C4), F(0.00000000E+00 * C4), + F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), + F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), + F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), + F(5.65949473E-03 * C0), F(1.29371806E-02 * C0), + F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), + F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), + F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), + -F(1.46525263E-02 * C4), F(0.00000000E+00 * C4), + F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), + F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), + -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), + F(6.79989431E-02 * C0), F(1.46955068E-01 * C0), + F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), + F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), + F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), + -F(1.23264548E-01 * C4), F(0.00000000E+00 * C4), + F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), + F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), + F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), + -F(6.79989431E-02 * C0), F(1.29371806E-02 * C0), + -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), + -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), + -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), + F(1.46404076E-02 * C4), F(0.00000000E+00 * C4), + F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), + F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), + F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), + -F(5.65949473E-03 * C0), F(2.01182542E-03 * C0), + -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), + -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), + -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), + -F(9.02154502E-04 * C4), F(0.00000000E+00 * C4), + F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), + F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), + F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), +#undef F +#define F(x) F_COS8(x) + F(0.7071067812 / C0), F(0.8314696123 / C1), + -F(0.7071067812 / C0), -F(0.1950903220 / C1), + -F(0.7071067812 / C0), -F(0.9807852804 / C1), + F(0.7071067812 / C0), -F(0.5555702330 / C1), + F(0.7071067812 / C0), F(0.5555702330 / C1), + -F(0.7071067812 / C0), F(0.9807852804 / C1), + -F(0.7071067812 / C0), F(0.1950903220 / C1), + F(0.7071067812 / C0), -F(0.8314696123 / C1), + F(0.9238795325 / C2), F(0.9807852804 / C3), + F(0.3826834324 / C2), F(0.8314696123 / C3), + -F(0.3826834324 / C2), F(0.5555702330 / C3), + -F(0.9238795325 / C2), F(0.1950903220 / C3), + -F(0.9238795325 / C2), -F(0.1950903220 / C3), + -F(0.3826834324 / C2), -F(0.5555702330 / C3), + F(0.3826834324 / C2), -F(0.8314696123 / C3), + F(0.9238795325 / C2), -F(0.9807852804 / C3), + -F(1.0000000000 / C4), F(0.5555702330 / C5), + -F(1.0000000000 / C4), -F(0.9807852804 / C5), + -F(1.0000000000 / C4), F(0.1950903220 / C5), + -F(1.0000000000 / C4), F(0.8314696123 / C5), + -F(1.0000000000 / C4), -F(0.8314696123 / C5), + -F(1.0000000000 / C4), -F(0.1950903220 / C5), + -F(1.0000000000 / C4), F(0.9807852804 / C5), + -F(1.0000000000 / C4), -F(0.5555702330 / C5), + F(0.3826834324 / C6), F(0.1950903220 / C7), + -F(0.9238795325 / C6), -F(0.5555702330 / C7), + F(0.9238795325 / C6), F(0.8314696123 / C7), + -F(0.3826834324 / C6), -F(0.9807852804 / C7), + -F(0.3826834324 / C6), F(0.9807852804 / C7), + F(0.9238795325 / C6), -F(0.8314696123 / C7), + -F(0.9238795325 / C6), F(0.5555702330 / C7), + F(0.3826834324 / C6), -F(0.1950903220 / C7), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_odd[80 + 64] = { +#define C0 2.5377944043 +#define C1 2.4270044280 +#define C2 2.8015616024 +#define C3 3.1710363741 +#define C4 2.7906148894 +#define C5 2.4270044280 +#define C6 2.8015616024 +#define C7 3.1710363741 + +#define F(x) F_PROTO8(x) + F(0.00000000E+00 * C0), -F(8.23919506E-04 * C0), + F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), + F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), + F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), + F(2.01182542E-03 * C4), F(5.65949473E-03 * C4), + F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), + F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), + F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), + F(0.00000000E+00 * C0), -F(1.46525263E-02 * C0), + F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), + F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), + F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), + F(1.29371806E-02 * C4), F(6.79989431E-02 * C4), + F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), + F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), + -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), + F(0.00000000E+00 * C0), -F(1.23264548E-01 * C0), + F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), + F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), + F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), + F(1.46955068E-01 * C4), -F(6.79989431E-02 * C4), + F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), + F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), + F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), + F(0.00000000E+00 * C0), F(1.46404076E-02 * C0), + -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), + -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), + -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), + F(1.29371806E-02 * C4), -F(5.65949473E-03 * C4), + F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), + F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), + F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), + F(0.00000000E+00 * C0), -F(9.02154502E-04 * C0), + -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), + -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), + -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), + F(2.01182542E-03 * C4), F(0.00000000E+00 * C4), + F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), + F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), + F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), +#undef F +#define F(x) F_COS8(x) + -F(1.0000000000 / C0), F(0.8314696123 / C1), + -F(1.0000000000 / C0), -F(0.1950903220 / C1), + -F(1.0000000000 / C0), -F(0.9807852804 / C1), + -F(1.0000000000 / C0), -F(0.5555702330 / C1), + -F(1.0000000000 / C0), F(0.5555702330 / C1), + -F(1.0000000000 / C0), F(0.9807852804 / C1), + -F(1.0000000000 / C0), F(0.1950903220 / C1), + -F(1.0000000000 / C0), -F(0.8314696123 / C1), + F(0.9238795325 / C2), F(0.9807852804 / C3), + F(0.3826834324 / C2), F(0.8314696123 / C3), + -F(0.3826834324 / C2), F(0.5555702330 / C3), + -F(0.9238795325 / C2), F(0.1950903220 / C3), + -F(0.9238795325 / C2), -F(0.1950903220 / C3), + -F(0.3826834324 / C2), -F(0.5555702330 / C3), + F(0.3826834324 / C2), -F(0.8314696123 / C3), + F(0.9238795325 / C2), -F(0.9807852804 / C3), + F(0.7071067812 / C4), F(0.5555702330 / C5), + -F(0.7071067812 / C4), -F(0.9807852804 / C5), + -F(0.7071067812 / C4), F(0.1950903220 / C5), + F(0.7071067812 / C4), F(0.8314696123 / C5), + F(0.7071067812 / C4), -F(0.8314696123 / C5), + -F(0.7071067812 / C4), -F(0.1950903220 / C5), + -F(0.7071067812 / C4), F(0.9807852804 / C5), + F(0.7071067812 / C4), -F(0.5555702330 / C5), + F(0.3826834324 / C6), F(0.1950903220 / C7), + -F(0.9238795325 / C6), -F(0.5555702330 / C7), + F(0.9238795325 / C6), F(0.8314696123 / C7), + -F(0.3826834324 / C6), -F(0.9807852804 / C7), + -F(0.3826834324 / C6), F(0.9807852804 / C7), + F(0.9238795325 / C6), -F(0.8314696123 / C7), + -F(0.9238795325 / C6), F(0.5555702330 / C7), + F(0.3826834324 / C6), -F(0.1950903220 / C7), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +}; diff --git a/sbc/sbcdec.c b/sbc/sbcdec.c new file mode 100644 index 0000000..7008e88 --- /dev/null +++ b/sbc/sbcdec.c @@ -0,0 +1,293 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) decoder + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sbc.h" +#include "formats.h" + +#define BUF_SIZE 8192 + +static int verbose = 0; + +static void decode(char *filename, char *output, int tofile) +{ + unsigned char buf[BUF_SIZE], *stream; + struct stat st; + sbc_t sbc; + int fd, ad, pos, streamlen, framelen, count; + size_t len; + int format = AFMT_S16_BE, frequency, channels; + ssize_t written; + + if (stat(filename, &st) < 0) { + fprintf(stderr, "Can't get size of file %s: %s\n", + filename, strerror(errno)); + return; + } + + stream = malloc(st.st_size); + + if (!stream) { + fprintf(stderr, "Can't allocate memory for %s: %s\n", + filename, strerror(errno)); + return; + } + + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Can't open file %s: %s\n", + filename, strerror(errno)); + goto free; + } + + if (read(fd, stream, st.st_size) != st.st_size) { + fprintf(stderr, "Can't read content of %s: %s\n", + filename, strerror(errno)); + close(fd); + goto free; + } + + close(fd); + + pos = 0; + streamlen = st.st_size; + + if (tofile) + ad = open(output, O_WRONLY | O_CREAT | O_TRUNC, 0644); + else + ad = open(output, O_WRONLY, 0); + + if (ad < 0) { + fprintf(stderr, "Can't open output %s: %s\n", + output, strerror(errno)); + goto free; + } + + sbc_init(&sbc, 0L); + sbc.endian = SBC_BE; + + framelen = sbc_decode(&sbc, stream, streamlen, buf, sizeof(buf), &len); + channels = sbc.mode == SBC_MODE_MONO ? 1 : 2; + switch (sbc.frequency) { + case SBC_FREQ_16000: + frequency = 16000; + break; + + case SBC_FREQ_32000: + frequency = 32000; + break; + + case SBC_FREQ_44100: + frequency = 44100; + break; + + case SBC_FREQ_48000: + frequency = 48000; + break; + default: + frequency = 0; + } + + if (verbose) { + fprintf(stderr,"decoding %s with rate %d, %d subbands, " + "%d bits, allocation method %s and mode %s\n", + filename, frequency, sbc.subbands * 4 + 4, sbc.bitpool, + sbc.allocation == SBC_AM_SNR ? "SNR" : "LOUDNESS", + sbc.mode == SBC_MODE_MONO ? "MONO" : + sbc.mode == SBC_MODE_STEREO ? + "STEREO" : "JOINTSTEREO"); + } + + if (tofile) { + struct au_header au_hdr; + + au_hdr.magic = AU_MAGIC; + au_hdr.hdr_size = BE_INT(24); + au_hdr.data_size = BE_INT(0); + au_hdr.encoding = BE_INT(AU_FMT_LIN16); + au_hdr.sample_rate = BE_INT(frequency); + au_hdr.channels = BE_INT(channels); + + written = write(ad, &au_hdr, sizeof(au_hdr)); + if (written < (ssize_t) sizeof(au_hdr)) { + fprintf(stderr, "Failed to write header\n"); + goto close; + } + } else { + if (ioctl(ad, SNDCTL_DSP_SETFMT, &format) < 0) { + fprintf(stderr, "Can't set audio format on %s: %s\n", + output, strerror(errno)); + goto close; + } + + if (ioctl(ad, SNDCTL_DSP_CHANNELS, &channels) < 0) { + fprintf(stderr, "Can't set number of channels on %s: %s\n", + output, strerror(errno)); + goto close; + } + + if (ioctl(ad, SNDCTL_DSP_SPEED, &frequency) < 0) { + fprintf(stderr, "Can't set audio rate on %s: %s\n", + output, strerror(errno)); + goto close; + } + } + + count = len; + + while (framelen > 0) { + /* we have completed an sbc_decode at this point sbc.len is the + * length of the frame we just decoded count is the number of + * decoded bytes yet to be written */ + + if (count + len >= BUF_SIZE) { + /* buffer is too full to stuff decoded audio in so it + * must be written to the device */ + written = write(ad, buf, count); + if (written > 0) + count -= written; + } + + /* sanity check */ + if (count + len >= BUF_SIZE) { + fprintf(stderr, + "buffer size of %d is too small for decoded" + " data (%lu)\n", BUF_SIZE, (unsigned long) (len + count)); + exit(1); + } + + /* push the pointer in the file forward to the next bit to be + * decoded tell the decoder to decode up to the remaining + * length of the file (!) */ + pos += framelen; + framelen = sbc_decode(&sbc, stream + pos, streamlen - pos, + buf + count, sizeof(buf) - count, &len); + + /* increase the count */ + count += len; + } + + if (count > 0) { + written = write(ad, buf, count); + if (written > 0) + count -= written; + } + +close: + sbc_finish(&sbc); + + close(ad); + +free: + free(stream); +} + +static void usage(void) +{ + printf("SBC decoder utility ver %s\n", VERSION); + printf("Copyright (c) 2004-2010 Marcel Holtmann\n\n"); + + printf("Usage:\n" + "\tsbcdec [options] file(s)\n" + "\n"); + + printf("Options:\n" + "\t-h, --help Display help\n" + "\t-v, --verbose Verbose mode\n" + "\t-d, --device Sound device\n" + "\t-f, --file Decode to a file\n" + "\n"); +} + +static struct option main_options[] = { + { "help", 0, 0, 'h' }, + { "device", 1, 0, 'd' }, + { "verbose", 0, 0, 'v' }, + { "file", 1, 0, 'f' }, + { 0, 0, 0, 0 } +}; + +int main(int argc, char *argv[]) +{ + char *output = NULL; + int i, opt, tofile = 0; + + while ((opt = getopt_long(argc, argv, "+hvd:f:", + main_options, NULL)) != -1) { + switch(opt) { + case 'h': + usage(); + exit(0); + + case 'v': + verbose = 1; + break; + + case 'd': + free(output); + output = strdup(optarg); + tofile = 0; + break; + + case 'f' : + free(output); + output = strdup(optarg); + tofile = 1; + break; + + default: + exit(1); + } + } + + argc -= optind; + argv += optind; + optind = 0; + + if (argc < 1) { + usage(); + exit(1); + } + + for (i = 0; i < argc; i++) + decode(argv[i], output ? output : "/dev/dsp", tofile); + + free(output); + + return 0; +} diff --git a/sbc/sbcenc.c b/sbc/sbcenc.c new file mode 100644 index 0000000..3d3a7dc --- /dev/null +++ b/sbc/sbcenc.c @@ -0,0 +1,308 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) encoder + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sbc.h" +#include "formats.h" + +static int verbose = 0; + +#define BUF_SIZE 32768 +static unsigned char input[BUF_SIZE], output[BUF_SIZE + BUF_SIZE / 4]; + +static void encode(char *filename, int subbands, int bitpool, int joint, + int dualchannel, int snr, int blocks) +{ + struct au_header au_hdr; + sbc_t sbc; + int fd, size, srate, codesize, nframes; + ssize_t encoded; + ssize_t len; + + if (sizeof(au_hdr) != 24) { + /* Sanity check just in case */ + fprintf(stderr, "FIXME: sizeof(au_hdr) != 24\n"); + return; + } + + if (strcmp(filename, "-")) { + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Can't open file %s: %s\n", + filename, strerror(errno)); + return; + } + } else + fd = fileno(stdin); + + len = read(fd, &au_hdr, sizeof(au_hdr)); + if (len < (ssize_t) sizeof(au_hdr)) { + if (fd > fileno(stderr)) + fprintf(stderr, "Can't read header from file %s: %s\n", + filename, strerror(errno)); + else + perror("Can't read audio header"); + goto done; + } + + if (au_hdr.magic != AU_MAGIC || + BE_INT(au_hdr.hdr_size) > 128 || + BE_INT(au_hdr.hdr_size) < sizeof(au_hdr) || + BE_INT(au_hdr.encoding) != AU_FMT_LIN16) { + fprintf(stderr, "Not in Sun/NeXT audio S16_BE format\n"); + goto done; + } + + sbc_init(&sbc, 0L); + + switch (BE_INT(au_hdr.sample_rate)) { + case 16000: + sbc.frequency = SBC_FREQ_16000; + break; + case 32000: + sbc.frequency = SBC_FREQ_32000; + break; + case 44100: + sbc.frequency = SBC_FREQ_44100; + break; + case 48000: + sbc.frequency = SBC_FREQ_48000; + break; + } + + srate = BE_INT(au_hdr.sample_rate); + + sbc.subbands = subbands == 4 ? SBC_SB_4 : SBC_SB_8; + + if (BE_INT(au_hdr.channels) == 1) { + sbc.mode = SBC_MODE_MONO; + if (joint || dualchannel) { + fprintf(stderr, "Audio is mono but joint or " + "dualchannel mode has been specified\n"); + goto done; + } + } else if (joint && !dualchannel) + sbc.mode = SBC_MODE_JOINT_STEREO; + else if (!joint && dualchannel) + sbc.mode = SBC_MODE_DUAL_CHANNEL; + else if (!joint && !dualchannel) + sbc.mode = SBC_MODE_STEREO; + else { + fprintf(stderr, "Both joint and dualchannel mode have been " + "specified\n"); + goto done; + } + + sbc.endian = SBC_BE; + /* Skip extra bytes of the header if any */ + if (read(fd, input, BE_INT(au_hdr.hdr_size) - len) < 0) + goto done; + + sbc.bitpool = bitpool; + sbc.allocation = snr ? SBC_AM_SNR : SBC_AM_LOUDNESS; + sbc.blocks = blocks == 4 ? SBC_BLK_4 : + blocks == 8 ? SBC_BLK_8 : + blocks == 12 ? SBC_BLK_12 : SBC_BLK_16; + + if (verbose) { + fprintf(stderr, "encoding %s with rate %d, %d blocks, " + "%d subbands, %d bits, allocation method %s, " + "and mode %s\n", + filename, srate, blocks, subbands, bitpool, + sbc.allocation == SBC_AM_SNR ? "SNR" : "LOUDNESS", + sbc.mode == SBC_MODE_MONO ? "MONO" : + sbc.mode == SBC_MODE_STEREO ? + "STEREO" : "JOINTSTEREO"); + } + + codesize = sbc_get_codesize(&sbc); + nframes = sizeof(input) / codesize; + while (1) { + unsigned char *inp, *outp; + /* read data for up to 'nframes' frames of input data */ + size = read(fd, input, codesize * nframes); + if (size < 0) { + /* Something really bad happened */ + perror("Can't read audio data"); + break; + } + if (size < codesize) { + /* Not enough data for encoding even a single frame */ + break; + } + /* encode all the data from the input buffer in a loop */ + inp = input; + outp = output; + while (size >= codesize) { + len = sbc_encode(&sbc, inp, codesize, + outp, sizeof(output) - (outp - output), + &encoded); + if (len != codesize || encoded <= 0) { + fprintf(stderr, + "sbc_encode fail, len=%zd, encoded=%lu\n", + len, (unsigned long) encoded); + break; + } + size -= len; + inp += len; + outp += encoded; + } + len = write(fileno(stdout), output, outp - output); + if (len != outp - output) { + perror("Can't write SBC output"); + break; + } + if (size != 0) { + /* + * sbc_encode failure has been detected earlier or end + * of file reached (have trailing partial data which is + * insufficient to encode SBC frame) + */ + break; + } + } + + sbc_finish(&sbc); + +done: + if (fd > fileno(stderr)) + close(fd); +} + +static void usage(void) +{ + printf("SBC encoder utility ver %s\n", VERSION); + printf("Copyright (c) 2004-2010 Marcel Holtmann\n\n"); + + printf("Usage:\n" + "\tsbcenc [options] file(s)\n" + "\n"); + + printf("Options:\n" + "\t-h, --help Display help\n" + "\t-v, --verbose Verbose mode\n" + "\t-s, --subbands Number of subbands to use (4 or 8)\n" + "\t-b, --bitpool Bitpool value (default is 32)\n" + "\t-j, --joint Joint stereo\n" + "\t-d, --dualchannel Dual channel\n" + "\t-S, --snr Use SNR mode (default is loudness)\n" + "\t-B, --blocks Number of blocks (4, 8, 12 or 16)\n" + "\n"); +} + +static struct option main_options[] = { + { "help", 0, 0, 'h' }, + { "verbose", 0, 0, 'v' }, + { "subbands", 1, 0, 's' }, + { "bitpool", 1, 0, 'b' }, + { "joint", 0, 0, 'j' }, + { "dualchannel",0, 0, 'd' }, + { "snr", 0, 0, 'S' }, + { "blocks", 1, 0, 'B' }, + { 0, 0, 0, 0 } +}; + +int main(int argc, char *argv[]) +{ + int i, opt, subbands = 8, bitpool = 32, joint = 0, dualchannel = 0; + int snr = 0, blocks = 16; + + while ((opt = getopt_long(argc, argv, "+hvs:b:jdSB:", + main_options, NULL)) != -1) { + switch(opt) { + case 'h': + usage(); + exit(0); + + case 'v': + verbose = 1; + break; + + case 's': + subbands = atoi(optarg); + if (subbands != 8 && subbands != 4) { + fprintf(stderr, "Invalid subbands\n"); + exit(1); + } + break; + + case 'b': + bitpool = atoi(optarg); + break; + + case 'j': + joint = 1; + break; + + case 'd': + dualchannel = 1; + break; + + case 'S': + snr = 1; + break; + + case 'B': + blocks = atoi(optarg); + if (blocks != 16 && blocks != 12 && + blocks != 8 && blocks != 4) { + fprintf(stderr, "Invalid blocks\n"); + exit(1); + } + break; + + default: + usage(); + exit(1); + } + } + + argc -= optind; + argv += optind; + optind = 0; + + if (argc < 1) { + usage(); + exit(1); + } + + for (i = 0; i < argc; i++) + encode(argv[i], subbands, bitpool, joint, dualchannel, + snr, blocks); + + return 0; +} diff --git a/sbc/sbcinfo.c b/sbc/sbcinfo.c new file mode 100644 index 0000000..6d92679 --- /dev/null +++ b/sbc/sbcinfo.c @@ -0,0 +1,322 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#if __BYTE_ORDER == __LITTLE_ENDIAN +struct sbc_frame_hdr { + uint8_t syncword:8; /* Sync word */ + uint8_t subbands:1; /* Subbands */ + uint8_t allocation_method:1; /* Allocation method */ + uint8_t channel_mode:2; /* Channel mode */ + uint8_t blocks:2; /* Blocks */ + uint8_t sampling_frequency:2; /* Sampling frequency */ + uint8_t bitpool:8; /* Bitpool */ + uint8_t crc_check:8; /* CRC check */ +} __attribute__ ((packed)); +#elif __BYTE_ORDER == __BIG_ENDIAN +struct sbc_frame_hdr { + uint8_t syncword:8; /* Sync word */ + uint8_t sampling_frequency:2; /* Sampling frequency */ + uint8_t blocks:2; /* Blocks */ + uint8_t channel_mode:2; /* Channel mode */ + uint8_t allocation_method:1; /* Allocation method */ + uint8_t subbands:1; /* Subbands */ + uint8_t bitpool:8; /* Bitpool */ + uint8_t crc_check:8; /* CRC check */ +} __attribute__ ((packed)); +#else +#error "Unknown byte order" +#endif + +static int calc_frame_len(struct sbc_frame_hdr *hdr) +{ + int tmp, nrof_subbands, nrof_blocks; + + nrof_subbands = (hdr->subbands + 1) * 4; + nrof_blocks = (hdr->blocks + 1) * 4; + + switch (hdr->channel_mode) { + case 0x00: + nrof_subbands /= 2; + tmp = nrof_blocks * hdr->bitpool; + break; + case 0x01: + tmp = nrof_blocks * hdr->bitpool * 2; + break; + case 0x02: + tmp = nrof_blocks * hdr->bitpool; + break; + case 0x03: + tmp = nrof_blocks * hdr->bitpool + nrof_subbands; + break; + default: + return 0; + } + + return (nrof_subbands + ((tmp + 7) / 8)); +} + +static double calc_bit_rate(struct sbc_frame_hdr *hdr) +{ + int nrof_subbands, nrof_blocks; + double f; + + nrof_subbands = (hdr->subbands + 1) * 4; + nrof_blocks = (hdr->blocks + 1) * 4; + + switch (hdr->sampling_frequency) { + case 0: + f = 16; + break; + case 1: + f = 32; + break; + case 2: + f = 44.1; + break; + case 3: + f = 48; + break; + default: + return 0; + } + + return ((8 * (calc_frame_len(hdr) + 4) * f) / + (nrof_subbands * nrof_blocks)); +} + +static char *freq2str(uint8_t freq) +{ + switch (freq) { + case 0: + return "16 kHz"; + case 1: + return "32 kHz"; + case 2: + return "44.1 kHz"; + case 3: + return "48 kHz"; + default: + return "Unknown"; + } +} + +static char *mode2str(uint8_t mode) +{ + switch (mode) { + case 0: + return "Mono"; + case 1: + return "Dual Channel"; + case 2: + return "Stereo"; + case 3: + return "Joint Stereo"; + default: + return "Unknown"; + } +} + +static ssize_t __read(int fd, void *buf, size_t count) +{ + ssize_t len, pos = 0; + + while (count > 0) { + len = read(fd, buf + pos, count); + if (len <= 0) + return len; + + count -= len; + pos += len; + } + + return pos; +} + +#define SIZE 32 + +static int analyze_file(char *filename) +{ + struct sbc_frame_hdr hdr; + unsigned char buf[64]; + double rate; + int bitpool[SIZE], frame_len[SIZE]; + int subbands, blocks, freq, mode, method; + int n, p1, p2, fd, size, num; + ssize_t len; + unsigned int count; + + if (strcmp(filename, "-")) { + printf("Filename\t\t%s\n", basename(filename)); + + fd = open(filename, O_RDONLY); + if (fd < 0) { + perror("Can't open file"); + return -1; + } + } else + fd = fileno(stdin); + + len = __read(fd, &hdr, sizeof(hdr)); + if (len != sizeof(hdr) || hdr.syncword != 0x9c) { + fprintf(stderr, "Not a SBC audio file\n"); + return -1; + } + + subbands = (hdr.subbands + 1) * 4; + blocks = (hdr.blocks + 1) * 4; + freq = hdr.sampling_frequency; + mode = hdr.channel_mode; + method = hdr.allocation_method; + + count = calc_frame_len(&hdr); + + bitpool[0] = hdr.bitpool; + frame_len[0] = count + 4; + + for (n = 1; n < SIZE; n++) { + bitpool[n] = 0; + frame_len[n] = 0; + } + + if (lseek(fd, 0, SEEK_SET) < 0) { + num = 1; + rate = calc_bit_rate(&hdr); + while (count) { + size = count > sizeof(buf) ? sizeof(buf) : count; + len = __read(fd, buf, size); + if (len < 0) + break; + count -= len; + } + } else { + num = 0; + rate = 0; + } + + while (1) { + len = __read(fd, &hdr, sizeof(hdr)); + if (len < 0) { + fprintf(stderr, "Unable to read frame header" + " (error %d)\n", errno); + break; + } + + if (len == 0) + break; + + if ((size_t) len < sizeof(hdr) || hdr.syncword != 0x9c) { + fprintf(stderr, "Corrupted SBC stream " + "(len %zd syncword 0x%02x)\n", + len, hdr.syncword); + break; + } + + count = calc_frame_len(&hdr); + len = count + 4; + + p1 = -1; + p2 = -1; + for (n = 0; n < SIZE; n++) { + if (p1 < 0 && (bitpool[n] == 0 || bitpool[n] == hdr.bitpool)) + p1 = n; + if (p2 < 0 && (frame_len[n] == 0 || frame_len[n] == len)) + p2 = n; + } + if (p1 >= 0) + bitpool[p1] = hdr.bitpool; + if (p2 >= 0) + frame_len[p2] = len; + + while (count) { + size = count > sizeof(buf) ? sizeof(buf) : count; + + len = __read(fd, buf, size); + if (len != size) { + fprintf(stderr, "Unable to read frame data " + "(error %d)\n", errno); + break; + } + + count -= len; + } + + rate += calc_bit_rate(&hdr); + num++; + } + + printf("Subbands\t\t%d\n", subbands); + printf("Block length\t\t%d\n", blocks); + printf("Sampling frequency\t%s\n", freq2str(freq)); + printf("Channel mode\t\t%s\n", mode2str(hdr.channel_mode)); + printf("Allocation method\t%s\n", method ? "SNR" : "Loudness"); + printf("Bitpool\t\t\t%d", bitpool[0]); + for (n = 1; n < SIZE; n++) + if (bitpool[n] > 0) + printf(", %d", bitpool[n]); + printf("\n"); + printf("Number of frames\t%d\n", num); + printf("Frame length\t\t%d", frame_len[0]); + for (n = 1; n < SIZE; n++) + if (frame_len[n] > 0) + printf(", %d", frame_len[n]); + printf(" Bytes\n"); + if (num > 0) + printf("Bit rate\t\t%.3f kbps\n", rate / num); + + if (fd > fileno(stderr)) + close(fd); + + printf("\n"); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int i; + + if (argc < 2) { + fprintf(stderr, "Usage: sbcinfo \n"); + exit(1); + } + + for (i = 0; i < argc - 1; i++) + if (analyze_file(argv[i + 1]) < 0) + exit(1); + + return 0; +} diff --git a/sbc/sbctester.c b/sbc/sbctester.c new file mode 100644 index 0000000..b1e3608 --- /dev/null +++ b/sbc/sbctester.c @@ -0,0 +1,358 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2007-2010 Marcel Holtmann + * Copyright (C) 2007-2008 Frederic Dalleau + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include + +#define MAXCHANNELS 2 +#define DEFACCURACY 7 + +static double sampletobits(short sample16, int verbose) +{ + double bits = 0; + unsigned short bit; + int i; + + if (verbose) + printf("===> sampletobits(%hd, %04hX)\n", sample16, sample16); + + /* Bit 0 is MSB */ + if (sample16 < 0) + bits = -1; + + if (verbose) + printf("%d", (sample16 < 0) ? 1 : 0); + + /* Bit 15 is LSB */ + for (i = 1; i < 16; i++) { + bit = (unsigned short) sample16; + bit >>= 15 - i; + bit %= 2; + + if (verbose) + printf("%d", bit); + + if (bit) + bits += (1.0 / pow(2.0, i)); + } + + if (verbose) + printf("\n"); + + return bits; +} + +static int calculate_rms_level(SNDFILE * sndref, SF_INFO * infosref, + SNDFILE * sndtst, SF_INFO * infostst, + int accuracy, char *csvname) +{ + short refsample[MAXCHANNELS], tstsample[MAXCHANNELS]; + double refbits, tstbits; + double rms_accu[MAXCHANNELS]; + double rms_level[MAXCHANNELS]; + double rms_limit = 1.0 / (pow(2.0, accuracy - 1) * pow(12.0, 0.5)); + FILE *csv = NULL; + int i, j, r1, r2, verdict; + + if (csvname) + csv = fopen(csvname, "wt"); + + if (csv) { + fprintf(csv, "num;"); + for (j = 0; j < infostst->channels; j++) + fprintf(csv, "ref channel %d;tst channel %d;", j, j); + fprintf(csv, "\r\n"); + } + + sf_seek(sndref, 0, SEEK_SET); + sf_seek(sndtst, 0, SEEK_SET); + + memset(rms_accu, 0, sizeof(rms_accu)); + memset(rms_level, 0, sizeof(rms_level)); + + for (i = 0; i < infostst->frames; i++) { + if (csv) + fprintf(csv, "%d;", i); + + r1 = sf_read_short(sndref, refsample, infostst->channels); + if (r1 != infostst->channels) { + printf("Failed to read reference data: %s " + "(r1=%d, channels=%d)", + sf_strerror(sndref), r1, + infostst->channels); + if (csv) + fclose(csv); + return -1; + } + + r2 = sf_read_short(sndtst, tstsample, infostst->channels); + if (r2 != infostst->channels) { + printf("Failed to read test data: %s " + "(r2=%d, channels=%d)\n", + sf_strerror(sndtst), r2, + infostst->channels); + if (csv) + fclose(csv); + return -1; + } + + for (j = 0; j < infostst->channels; j++) { + if (csv) + fprintf(csv, "%d;%d;", refsample[j], + tstsample[j]); + + refbits = sampletobits(refsample[j], 0); + tstbits = sampletobits(tstsample[j], 0); + + rms_accu[j] += pow(tstbits - refbits, 2.0); + } + + if (csv) + fprintf(csv, "\r\n"); + } + + printf("Limit: %f\n", rms_limit); + + for (j = 0; j < infostst->channels; j++) { + printf("Channel %d\n", j); + printf("Accumulated %f\n", rms_accu[j]); + rms_accu[j] /= (double) infostst->frames; + printf("Accumulated / %f = %f\n", (double) infostst->frames, + rms_accu[j]); + rms_level[j] = sqrt(rms_accu[j]); + printf("Level = %f (%f x %f = %f)\n", + rms_level[j], rms_level[j], rms_level[j], + rms_level[j] * rms_level[j]); + } + + verdict = 1; + + for (j = 0; j < infostst->channels; j++) { + printf("Channel %d: %f\n", j, rms_level[j]); + + if (rms_level[j] > rms_limit) + verdict = 0; + } + + printf("%s return %d\n", __FUNCTION__, verdict); + + return verdict; +} + +static int check_absolute_diff(SNDFILE * sndref, SF_INFO * infosref, + SNDFILE * sndtst, SF_INFO * infostst, + int accuracy) +{ + short refsample[MAXCHANNELS], tstsample[MAXCHANNELS]; + short refmax[MAXCHANNELS], tstmax[MAXCHANNELS]; + double refbits, tstbits; + double rms_absolute = 1.0 / (pow(2, accuracy - 2)); + double calc_max[MAXCHANNELS]; + int calc_count = 0; + short r1, r2; + double cur_diff; + int i, j, verdict; + + memset(&refmax, 0, sizeof(refmax)); + memset(&tstmax, 0, sizeof(tstmax)); + memset(&calc_max, 0, sizeof(calc_max)); + memset(&refsample, 0, sizeof(refsample)); + memset(&tstsample, 0, sizeof(tstsample)); + + sf_seek(sndref, 0, SEEK_SET); + sf_seek(sndtst, 0, SEEK_SET); + + verdict = 1; + + printf("Absolute max: %f\n", rms_absolute); + for (i = 0; i < infostst->frames; i++) { + r1 = sf_read_short(sndref, refsample, infostst->channels); + + if (r1 != infostst->channels) { + printf("Failed to read reference data: %s " + "(r1=%d, channels=%d)", + sf_strerror(sndref), r1, + infostst->channels); + return -1; + } + + r2 = sf_read_short(sndtst, tstsample, infostst->channels); + if (r2 != infostst->channels) { + printf("Failed to read test data: %s " + "(r2=%d, channels=%d)\n", + sf_strerror(sndtst), r2, + infostst->channels); + return -1; + } + + for (j = 0; j < infostst->channels; j++) { + refbits = sampletobits(refsample[j], 0); + tstbits = sampletobits(tstsample[j], 0); + + cur_diff = fabs(tstbits - refbits); + + if (cur_diff > rms_absolute) { + calc_count++; + /* printf("Channel %d exceeded : fabs(%f - %f) = %f > %f\n", j, tstbits, refbits, cur_diff, rms_absolute); */ + verdict = 0; + } + + if (cur_diff > calc_max[j]) { + calc_max[j] = cur_diff; + refmax[j] = refsample[j]; + tstmax[j] = tstsample[j]; + } + } + } + + for (j = 0; j < infostst->channels; j++) { + printf("Calculated max: %f (%hd-%hd=%hd)\n", + calc_max[j], tstmax[j], refmax[j], + tstmax[j] - refmax[j]); + } + + printf("%s return %d\n", __FUNCTION__, verdict); + + return verdict; +} + +static void usage() +{ + printf("SBC conformance test ver %s\n", VERSION); + printf("Copyright (c) 2007-2010 Marcel Holtmann\n"); + printf("Copyright (c) 2007-2008 Frederic Dalleau\n\n"); + + printf("Usage:\n" + "\tsbctester reference.wav checkfile.wav\n" + "\tsbctester integer\n" + "\n"); + + printf("To test the encoder:\n"); + printf("\tUse a reference codec to encode original.wav to reference.sbc\n"); + printf("\tUse sbcenc to encode original.wav to checkfile.sbc\n"); + printf("\tDecode both file using the reference decoder\n"); + printf("\tRun sbctester with these two wav files to get the result\n\n"); + + printf("\tA file called out.csv is generated to use the data in a\n"); + printf("\tspreadsheet application or database.\n\n"); +} + +int main(int argc, char *argv[]) +{ + SNDFILE *sndref = NULL; + SNDFILE *sndtst = NULL; + SF_INFO infosref; + SF_INFO infostst; + char *ref; + char *tst; + int pass_rms, pass_absolute, pass, accuracy; + + if (argc == 2) { + double db; + + printf("Test sampletobits\n"); + db = sampletobits((short) atoi(argv[1]), 1); + printf("db = %f\n", db); + exit(0); + } + + if (argc < 3) { + usage(); + exit(1); + } + + ref = argv[1]; + tst = argv[2]; + + printf("opening reference %s\n", ref); + + sndref = sf_open(ref, SFM_READ, &infosref); + if (!sndref) { + printf("Failed to open reference file\n"); + exit(1); + } + + printf("opening testfile %s\n", tst); + sndtst = sf_open(tst, SFM_READ, &infostst); + if (!sndtst) { + printf("Failed to open test file\n"); + sf_close(sndref); + exit(1); + } + + printf("reference:\n\t%d frames,\n\t%d hz,\n\t%d channels\n", + (int) infosref.frames, (int) infosref.samplerate, + (int) infosref.channels); + printf("testfile:\n\t%d frames,\n\t%d hz,\n\t%d channels\n", + (int) infostst.frames, (int) infostst.samplerate, + (int) infostst.channels); + + /* check number of channels */ + if (infosref.channels > 2 || infostst.channels > 2) { + printf("Too many channels\n"); + goto error; + } + + /* compare number of samples */ + if (infosref.samplerate != infostst.samplerate || + infosref.channels != infostst.channels) { + printf("Cannot compare files with different charasteristics\n"); + goto error; + } + + accuracy = DEFACCURACY; + printf("Accuracy: %d\n", accuracy); + + /* Condition 1 rms level */ + pass_rms = calculate_rms_level(sndref, &infosref, sndtst, &infostst, + accuracy, "out.csv"); + if (pass_rms < 0) + goto error; + + /* Condition 2 absolute difference */ + pass_absolute = check_absolute_diff(sndref, &infosref, sndtst, + &infostst, accuracy); + if (pass_absolute < 0) + goto error; + + /* Verdict */ + pass = pass_rms && pass_absolute; + printf("Verdict: %s\n", pass ? "pass" : "fail"); + + return 0; + +error: + sf_close(sndref); + sf_close(sndtst); + + exit(1); +} -- cgit v1.2.3