diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2017-02-09 12:11:17 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2017-02-09 12:11:17 +1100 |
commit | 3a3dce991d22806222fe83aad9a91b1c2d044f3a (patch) | |
tree | cb8f7af785a2208b67aaff66ca642c81e10583a4 | |
parent | d5b460329b9e758d10a5a7b925636329f39ee306 (diff) | |
parent | 1a20b96612656b3ff2d6967c3111bec0e21904a8 (diff) |
Merge remote-tracking branch 'crypto/master'
133 files changed, 11489 insertions, 8400 deletions
diff --git a/Documentation/crypto/api-digest.rst b/Documentation/crypto/api-digest.rst index 07356fa99200..7a1e670d6ce1 100644 --- a/Documentation/crypto/api-digest.rst +++ b/Documentation/crypto/api-digest.rst @@ -14,7 +14,7 @@ Asynchronous Message Digest API :doc: Asynchronous Message Digest API .. kernel-doc:: include/crypto/hash.h - :functions: crypto_alloc_ahash crypto_free_ahash crypto_ahash_init crypto_ahash_digestsize crypto_ahash_reqtfm crypto_ahash_reqsize crypto_ahash_setkey crypto_ahash_finup crypto_ahash_final crypto_ahash_digest crypto_ahash_export crypto_ahash_import + :functions: crypto_alloc_ahash crypto_free_ahash crypto_ahash_init crypto_ahash_digestsize crypto_ahash_reqtfm crypto_ahash_reqsize crypto_ahash_statesize crypto_ahash_setkey crypto_ahash_finup crypto_ahash_final crypto_ahash_digest crypto_ahash_export crypto_ahash_import Asynchronous Hash Request Handle -------------------------------- diff --git a/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt b/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt new file mode 100644 index 000000000000..c204725e5873 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/mediatek-crypto.txt @@ -0,0 +1,27 @@ +MediaTek cryptographic accelerators + +Required properties: +- compatible: Should be "mediatek,eip97-crypto" +- reg: Address and length of the register set for the device +- interrupts: Should contain the five crypto engines interrupts in numeric + order. These are global system and four descriptor rings. +- clocks: the clock used by the core +- clock-names: the names of the clock listed in the clocks property. These are + "ethif", "cryp" +- power-domains: Must contain a reference to the PM domain. + + +Example: + crypto: crypto@1b240000 { + compatible = "mediatek,eip97-crypto"; + reg = <0 0x1b240000 0 0x20000>; + interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_LOW>, + <GIC_SPI 83 IRQ_TYPE_LEVEL_LOW>, + <GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>, + <GIC_SPI 91 IRQ_TYPE_LEVEL_LOW>, + <GIC_SPI 97 IRQ_TYPE_LEVEL_LOW>; + clocks = <&topckgen CLK_TOP_ETHIF_SEL>, + <ðsys CLK_ETHSYS_CRYPTO>; + clock-names = "ethif","cryp"; + power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>; + }; diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 13f1b4c289d4..a8fce93137fb 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -62,35 +62,18 @@ config CRYPTO_SHA512_ARM using optimized ARM assembler and NEON, when available. config CRYPTO_AES_ARM - tristate "AES cipher algorithms (ARM-asm)" - depends on ARM + tristate "Scalar AES cipher for ARM" select CRYPTO_ALGAPI select CRYPTO_AES help Use optimized AES assembler routines for ARM platforms. - AES cipher algorithms (FIPS-197). AES uses the Rijndael - algorithm. - - Rijndael appears to be consistently a very good performer in - both hardware and software across a wide range of computing - environments regardless of its use in feedback or non-feedback - modes. Its key setup time is excellent, and its key agility is - good. Rijndael's very low memory requirements make it very well - suited for restricted-space environments, in which it also - demonstrates excellent performance. Rijndael's operations are - among the easiest to defend against power and timing attacks. - - The AES specifies three key sizes: 128, 192 and 256 bits - - See <http://csrc.nist.gov/encryption/aes/> for more information. - config CRYPTO_AES_ARM_BS tristate "Bit sliced AES using NEON instructions" depends on KERNEL_MODE_NEON - select CRYPTO_AES_ARM select CRYPTO_BLKCIPHER select CRYPTO_SIMD + select CRYPTO_AES_ARM help Use a faster and more secure NEON based implementation of AES in CBC, CTR and XTS modes @@ -130,4 +113,10 @@ config CRYPTO_CRC32_ARM_CE depends on KERNEL_MODE_NEON && CRC32 select CRYPTO_HASH +config CRYPTO_CHACHA20_NEON + tristate "NEON accelerated ChaCha20 symmetric cipher" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_CHACHA20 + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b578a1820ab1..1822c4697278 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -26,8 +27,8 @@ $(warning $(ce-obj-y) $(ce-obj-m)) endif endif -aes-arm-y := aes-armv4.o aes_glue.o -aes-arm-bs-y := aesbs-core.o aesbs-glue.o +aes-arm-y := aes-cipher-core.o aes-cipher-glue.o +aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o @@ -40,17 +41,15 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o +chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) -$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl - $(call cmd,perl) - $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl $(call cmd,perl) $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S +.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S deleted file mode 100644 index ebb9761fb572..000000000000 --- a/arch/arm/crypto/aes-armv4.S +++ /dev/null @@ -1,1089 +0,0 @@ -#define __ARM_ARCH__ __LINUX_ARM_ARCH__ -@ ==================================================================== -@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see http://www.openssl.org/~appro/cryptogams/. -@ ==================================================================== - -@ AES for ARMv4 - -@ January 2007. -@ -@ Code uses single 1K S-box and is >2 times faster than code generated -@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which -@ allows to merge logical or arithmetic operation with shift or rotate -@ in one instruction and emit combined result every cycle. The module -@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit -@ key [on single-issue Xscale PXA250 core]. - -@ May 2007. -@ -@ AES_set_[en|de]crypt_key is added. - -@ July 2010. -@ -@ Rescheduling for dual-issue pipeline resulted in 12% improvement on -@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. - -@ February 2011. -@ -@ Profiler-assisted and platform-specific optimization resulted in 16% -@ improvement on Cortex A8 core and ~21.5 cycles per byte. - -@ A little glue here to select the correct code below for the ARM CPU -@ that is being targetted. - -#include <linux/linkage.h> -#include <asm/assembler.h> - -.text - -.type AES_Te,%object -.align 5 -AES_Te: -.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d -.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 -.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d -.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a -.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 -.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b -.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea -.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b -.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a -.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f -.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 -.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f -.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e -.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 -.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d -.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f -.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e -.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb -.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce -.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 -.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c -.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed -.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b -.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a -.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 -.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 -.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 -.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 -.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a -.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 -.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 -.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d -.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f -.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 -.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 -.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 -.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f -.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 -.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c -.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 -.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e -.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 -.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 -.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b -.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 -.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 -.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 -.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 -.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 -.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 -.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 -.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 -.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa -.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 -.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 -.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 -.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 -.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 -.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 -.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a -.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 -.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 -.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 -.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a -@ Te4[256] -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -@ rcon[] -.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 -.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 -.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 -.size AES_Te,.-AES_Te - -@ void AES_encrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.align 5 -ENTRY(AES_encrypt) - adr r3,AES_encrypt - stmdb sp!,{r1,r4-r12,lr} - mov r12,r0 @ inp - mov r11,r2 - sub r10,r3,#AES_encrypt-AES_Te @ Te -#if __ARM_ARCH__<7 - ldrb r0,[r12,#3] @ load input data in endian-neutral - ldrb r4,[r12,#2] @ manner... - ldrb r5,[r12,#1] - ldrb r6,[r12,#0] - orr r0,r0,r4,lsl#8 - ldrb r1,[r12,#7] - orr r0,r0,r5,lsl#16 - ldrb r4,[r12,#6] - orr r0,r0,r6,lsl#24 - ldrb r5,[r12,#5] - ldrb r6,[r12,#4] - orr r1,r1,r4,lsl#8 - ldrb r2,[r12,#11] - orr r1,r1,r5,lsl#16 - ldrb r4,[r12,#10] - orr r1,r1,r6,lsl#24 - ldrb r5,[r12,#9] - ldrb r6,[r12,#8] - orr r2,r2,r4,lsl#8 - ldrb r3,[r12,#15] - orr r2,r2,r5,lsl#16 - ldrb r4,[r12,#14] - orr r2,r2,r6,lsl#24 - ldrb r5,[r12,#13] - ldrb r6,[r12,#12] - orr r3,r3,r4,lsl#8 - orr r3,r3,r5,lsl#16 - orr r3,r3,r6,lsl#24 -#else - ldr r0,[r12,#0] - ldr r1,[r12,#4] - ldr r2,[r12,#8] - ldr r3,[r12,#12] -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif -#endif - bl _armv4_AES_encrypt - - ldr r12,[sp],#4 @ pop out -#if __ARM_ARCH__>=7 -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif - str r0,[r12,#0] - str r1,[r12,#4] - str r2,[r12,#8] - str r3,[r12,#12] -#else - mov r4,r0,lsr#24 @ write output in endian-neutral - mov r5,r0,lsr#16 @ manner... - mov r6,r0,lsr#8 - strb r4,[r12,#0] - strb r5,[r12,#1] - mov r4,r1,lsr#24 - strb r6,[r12,#2] - mov r5,r1,lsr#16 - strb r0,[r12,#3] - mov r6,r1,lsr#8 - strb r4,[r12,#4] - strb r5,[r12,#5] - mov r4,r2,lsr#24 - strb r6,[r12,#6] - mov r5,r2,lsr#16 - strb r1,[r12,#7] - mov r6,r2,lsr#8 - strb r4,[r12,#8] - strb r5,[r12,#9] - mov r4,r3,lsr#24 - strb r6,[r12,#10] - mov r5,r3,lsr#16 - strb r2,[r12,#11] - mov r6,r3,lsr#8 - strb r4,[r12,#12] - strb r5,[r12,#13] - strb r6,[r12,#14] - strb r3,[r12,#15] -#endif - ldmia sp!,{r4-r12,pc} -ENDPROC(AES_encrypt) - -.type _armv4_AES_encrypt,%function -.align 2 -_armv4_AES_encrypt: - str lr,[sp,#-4]! @ push lr - ldmia r11!,{r4-r7} - eor r0,r0,r4 - ldr r12,[r11,#240-16] - eor r1,r1,r5 - eor r2,r2,r6 - eor r3,r3,r7 - sub r12,r12,#1 - mov lr,#255 - - and r7,lr,r0 - and r8,lr,r0,lsr#8 - and r9,lr,r0,lsr#16 - mov r0,r0,lsr#24 -.Lenc_loop: - ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0] - and r7,lr,r1,lsr#16 @ i0 - ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8] - and r8,lr,r1 - ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16] - and r9,lr,r1,lsr#8 - ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24] - mov r1,r1,lsr#24 - - ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16] - ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0] - ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8] - eor r0,r0,r7,ror#8 - ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24] - and r7,lr,r2,lsr#8 @ i0 - eor r5,r5,r8,ror#8 - and r8,lr,r2,lsr#16 @ i1 - eor r6,r6,r9,ror#8 - and r9,lr,r2 - ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] - eor r1,r1,r4,ror#24 - ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] - mov r2,r2,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] - eor r0,r0,r7,ror#16 - ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] - and r7,lr,r3 @ i0 - eor r1,r1,r8,ror#8 - and r8,lr,r3,lsr#8 @ i1 - eor r6,r6,r9,ror#16 - and r9,lr,r3,lsr#16 @ i2 - ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] - eor r2,r2,r5,ror#16 - ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] - mov r3,r3,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] - eor r0,r0,r7,ror#24 - ldr r7,[r11],#16 - eor r1,r1,r8,ror#16 - ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] - eor r2,r2,r9,ror#8 - ldr r4,[r11,#-12] - eor r3,r3,r6,ror#8 - - ldr r5,[r11,#-8] - eor r0,r0,r7 - ldr r6,[r11,#-4] - and r7,lr,r0 - eor r1,r1,r4 - and r8,lr,r0,lsr#8 - eor r2,r2,r5 - and r9,lr,r0,lsr#16 - eor r3,r3,r6 - mov r0,r0,lsr#24 - - subs r12,r12,#1 - bne .Lenc_loop - - add r10,r10,#2 - - ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0] - and r7,lr,r1,lsr#16 @ i0 - ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8] - and r8,lr,r1 - ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16] - and r9,lr,r1,lsr#8 - ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24] - mov r1,r1,lsr#24 - - ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16] - ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0] - ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8] - eor r0,r7,r0,lsl#8 - ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24] - and r7,lr,r2,lsr#8 @ i0 - eor r5,r8,r5,lsl#8 - and r8,lr,r2,lsr#16 @ i1 - eor r6,r9,r6,lsl#8 - and r9,lr,r2 - ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] - eor r1,r4,r1,lsl#24 - ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] - mov r2,r2,lsr#24 - - ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] - eor r0,r7,r0,lsl#8 - ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] - and r7,lr,r3 @ i0 - eor r1,r1,r8,lsl#16 - and r8,lr,r3,lsr#8 @ i1 - eor r6,r9,r6,lsl#8 - and r9,lr,r3,lsr#16 @ i2 - ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] - eor r2,r5,r2,lsl#24 - ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] - mov r3,r3,lsr#24 - - ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] - eor r0,r7,r0,lsl#8 - ldr r7,[r11,#0] - ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] - eor r1,r1,r8,lsl#8 - ldr r4,[r11,#4] - eor r2,r2,r9,lsl#16 - ldr r5,[r11,#8] - eor r3,r6,r3,lsl#24 - ldr r6,[r11,#12] - - eor r0,r0,r7 - eor r1,r1,r4 - eor r2,r2,r5 - eor r3,r3,r6 - - sub r10,r10,#2 - ldr pc,[sp],#4 @ pop and return -.size _armv4_AES_encrypt,.-_armv4_AES_encrypt - -.align 5 -ENTRY(private_AES_set_encrypt_key) -_armv4_AES_set_encrypt_key: - adr r3,_armv4_AES_set_encrypt_key - teq r0,#0 - moveq r0,#-1 - beq .Labrt - teq r2,#0 - moveq r0,#-1 - beq .Labrt - - teq r1,#128 - beq .Lok - teq r1,#192 - beq .Lok - teq r1,#256 - movne r0,#-1 - bne .Labrt - -.Lok: stmdb sp!,{r4-r12,lr} - sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 - - mov r12,r0 @ inp - mov lr,r1 @ bits - mov r11,r2 @ key - -#if __ARM_ARCH__<7 - ldrb r0,[r12,#3] @ load input data in endian-neutral - ldrb r4,[r12,#2] @ manner... - ldrb r5,[r12,#1] - ldrb r6,[r12,#0] - orr r0,r0,r4,lsl#8 - ldrb r1,[r12,#7] - orr r0,r0,r5,lsl#16 - ldrb r4,[r12,#6] - orr r0,r0,r6,lsl#24 - ldrb r5,[r12,#5] - ldrb r6,[r12,#4] - orr r1,r1,r4,lsl#8 - ldrb r2,[r12,#11] - orr r1,r1,r5,lsl#16 - ldrb r4,[r12,#10] - orr r1,r1,r6,lsl#24 - ldrb r5,[r12,#9] - ldrb r6,[r12,#8] - orr r2,r2,r4,lsl#8 - ldrb r3,[r12,#15] - orr r2,r2,r5,lsl#16 - ldrb r4,[r12,#14] - orr r2,r2,r6,lsl#24 - ldrb r5,[r12,#13] - ldrb r6,[r12,#12] - orr r3,r3,r4,lsl#8 - str r0,[r11],#16 - orr r3,r3,r5,lsl#16 - str r1,[r11,#-12] - orr r3,r3,r6,lsl#24 - str r2,[r11,#-8] - str r3,[r11,#-4] -#else - ldr r0,[r12,#0] - ldr r1,[r12,#4] - ldr r2,[r12,#8] - ldr r3,[r12,#12] -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif - str r0,[r11],#16 - str r1,[r11,#-12] - str r2,[r11,#-8] - str r3,[r11,#-4] -#endif - - teq lr,#128 - bne .Lnot128 - mov r12,#10 - str r12,[r11,#240-16] - add r6,r10,#256 @ rcon - mov lr,#255 - -.L128_loop: - and r5,lr,r3,lsr#24 - and r7,lr,r3,lsr#16 - ldrb r5,[r10,r5] - and r8,lr,r3,lsr#8 - ldrb r7,[r10,r7] - and r9,lr,r3 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#24 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r6],#4 @ rcon[i++] - orr r5,r5,r9,lsl#8 - eor r5,r5,r4 - eor r0,r0,r5 @ rk[4]=rk[0]^... - eor r1,r1,r0 @ rk[5]=rk[1]^rk[4] - str r0,[r11],#16 - eor r2,r2,r1 @ rk[6]=rk[2]^rk[5] - str r1,[r11,#-12] - eor r3,r3,r2 @ rk[7]=rk[3]^rk[6] - str r2,[r11,#-8] - subs r12,r12,#1 - str r3,[r11,#-4] - bne .L128_loop - sub r2,r11,#176 - b .Ldone - -.Lnot128: -#if __ARM_ARCH__<7 - ldrb r8,[r12,#19] - ldrb r4,[r12,#18] - ldrb r5,[r12,#17] - ldrb r6,[r12,#16] - orr r8,r8,r4,lsl#8 - ldrb r9,[r12,#23] - orr r8,r8,r5,lsl#16 - ldrb r4,[r12,#22] - orr r8,r8,r6,lsl#24 - ldrb r5,[r12,#21] - ldrb r6,[r12,#20] - orr r9,r9,r4,lsl#8 - orr r9,r9,r5,lsl#16 - str r8,[r11],#8 - orr r9,r9,r6,lsl#24 - str r9,[r11,#-4] -#else - ldr r8,[r12,#16] - ldr r9,[r12,#20] -#ifdef __ARMEL__ - rev r8,r8 - rev r9,r9 -#endif - str r8,[r11],#8 - str r9,[r11,#-4] -#endif - - teq lr,#192 - bne .Lnot192 - mov r12,#12 - str r12,[r11,#240-24] - add r6,r10,#256 @ rcon - mov lr,#255 - mov r12,#8 - -.L192_loop: - and r5,lr,r9,lsr#24 - and r7,lr,r9,lsr#16 - ldrb r5,[r10,r5] - and r8,lr,r9,lsr#8 - ldrb r7,[r10,r7] - and r9,lr,r9 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#24 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r6],#4 @ rcon[i++] - orr r5,r5,r9,lsl#8 - eor r9,r5,r4 - eor r0,r0,r9 @ rk[6]=rk[0]^... - eor r1,r1,r0 @ rk[7]=rk[1]^rk[6] - str r0,[r11],#24 - eor r2,r2,r1 @ rk[8]=rk[2]^rk[7] - str r1,[r11,#-20] - eor r3,r3,r2 @ rk[9]=rk[3]^rk[8] - str r2,[r11,#-16] - subs r12,r12,#1 - str r3,[r11,#-12] - subeq r2,r11,#216 - beq .Ldone - - ldr r7,[r11,#-32] - ldr r8,[r11,#-28] - eor r7,r7,r3 @ rk[10]=rk[4]^rk[9] - eor r9,r8,r7 @ rk[11]=rk[5]^rk[10] - str r7,[r11,#-8] - str r9,[r11,#-4] - b .L192_loop - -.Lnot192: -#if __ARM_ARCH__<7 - ldrb r8,[r12,#27] - ldrb r4,[r12,#26] - ldrb r5,[r12,#25] - ldrb r6,[r12,#24] - orr r8,r8,r4,lsl#8 - ldrb r9,[r12,#31] - orr r8,r8,r5,lsl#16 - ldrb r4,[r12,#30] - orr r8,r8,r6,lsl#24 - ldrb r5,[r12,#29] - ldrb r6,[r12,#28] - orr r9,r9,r4,lsl#8 - orr r9,r9,r5,lsl#16 - str r8,[r11],#8 - orr r9,r9,r6,lsl#24 - str r9,[r11,#-4] -#else - ldr r8,[r12,#24] - ldr r9,[r12,#28] -#ifdef __ARMEL__ - rev r8,r8 - rev r9,r9 -#endif - str r8,[r11],#8 - str r9,[r11,#-4] -#endif - - mov r12,#14 - str r12,[r11,#240-32] - add r6,r10,#256 @ rcon - mov lr,#255 - mov r12,#7 - -.L256_loop: - and r5,lr,r9,lsr#24 - and r7,lr,r9,lsr#16 - ldrb r5,[r10,r5] - and r8,lr,r9,lsr#8 - ldrb r7,[r10,r7] - and r9,lr,r9 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#24 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r6],#4 @ rcon[i++] - orr r5,r5,r9,lsl#8 - eor r9,r5,r4 - eor r0,r0,r9 @ rk[8]=rk[0]^... - eor r1,r1,r0 @ rk[9]=rk[1]^rk[8] - str r0,[r11],#32 - eor r2,r2,r1 @ rk[10]=rk[2]^rk[9] - str r1,[r11,#-28] - eor r3,r3,r2 @ rk[11]=rk[3]^rk[10] - str r2,[r11,#-24] - subs r12,r12,#1 - str r3,[r11,#-20] - subeq r2,r11,#256 - beq .Ldone - - and r5,lr,r3 - and r7,lr,r3,lsr#8 - ldrb r5,[r10,r5] - and r8,lr,r3,lsr#16 - ldrb r7,[r10,r7] - and r9,lr,r3,lsr#24 - ldrb r8,[r10,r8] - orr r5,r5,r7,lsl#8 - ldrb r9,[r10,r9] - orr r5,r5,r8,lsl#16 - ldr r4,[r11,#-48] - orr r5,r5,r9,lsl#24 - - ldr r7,[r11,#-44] - ldr r8,[r11,#-40] - eor r4,r4,r5 @ rk[12]=rk[4]^... - ldr r9,[r11,#-36] - eor r7,r7,r4 @ rk[13]=rk[5]^rk[12] - str r4,[r11,#-16] - eor r8,r8,r7 @ rk[14]=rk[6]^rk[13] - str r7,[r11,#-12] - eor r9,r9,r8 @ rk[15]=rk[7]^rk[14] - str r8,[r11,#-8] - str r9,[r11,#-4] - b .L256_loop - -.Ldone: mov r0,#0 - ldmia sp!,{r4-r12,lr} -.Labrt: ret lr -ENDPROC(private_AES_set_encrypt_key) - -.align 5 -ENTRY(private_AES_set_decrypt_key) - str lr,[sp,#-4]! @ push lr -#if 0 - @ kernel does both of these in setkey so optimise this bit out by - @ expecting the key to already have the enc_key work done (see aes_glue.c) - bl _armv4_AES_set_encrypt_key -#else - mov r0,#0 -#endif - teq r0,#0 - ldrne lr,[sp],#4 @ pop lr - bne .Labrt - - stmdb sp!,{r4-r12} - - ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2, - mov r11,r2 @ which is AES_KEY *key - mov r7,r2 - add r8,r2,r12,lsl#4 - -.Linv: ldr r0,[r7] - ldr r1,[r7,#4] - ldr r2,[r7,#8] - ldr r3,[r7,#12] - ldr r4,[r8] - ldr r5,[r8,#4] - ldr r6,[r8,#8] - ldr r9,[r8,#12] - str r0,[r8],#-16 - str r1,[r8,#16+4] - str r2,[r8,#16+8] - str r3,[r8,#16+12] - str r4,[r7],#16 - str r5,[r7,#-12] - str r6,[r7,#-8] - str r9,[r7,#-4] - teq r7,r8 - bne .Linv - ldr r0,[r11,#16]! @ prefetch tp1 - mov r7,#0x80 - mov r8,#0x1b - orr r7,r7,#0x8000 - orr r8,r8,#0x1b00 - orr r7,r7,r7,lsl#16 - orr r8,r8,r8,lsl#16 - sub r12,r12,#1 - mvn r9,r7 - mov r12,r12,lsl#2 @ (rounds-1)*4 - -.Lmix: and r4,r0,r7 - and r1,r0,r9 - sub r4,r4,r4,lsr#7 - and r4,r4,r8 - eor r1,r4,r1,lsl#1 @ tp2 - - and r4,r1,r7 - and r2,r1,r9 - sub r4,r4,r4,lsr#7 - and r4,r4,r8 - eor r2,r4,r2,lsl#1 @ tp4 - - and r4,r2,r7 - and r3,r2,r9 - sub r4,r4,r4,lsr#7 - and r4,r4,r8 - eor r3,r4,r3,lsl#1 @ tp8 - - eor r4,r1,r2 - eor r5,r0,r3 @ tp9 - eor r4,r4,r3 @ tpe - eor r4,r4,r1,ror#24 - eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) - eor r4,r4,r2,ror#16 - eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) - eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24) - - ldr r0,[r11,#4] @ prefetch tp1 - str r4,[r11],#4 - subs r12,r12,#1 - bne .Lmix - - mov r0,#0 - ldmia sp!,{r4-r12,pc} -ENDPROC(private_AES_set_decrypt_key) - -.type AES_Td,%object -.align 5 -AES_Td: -.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 -.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 -.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 -.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f -.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 -.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 -.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da -.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 -.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd -.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 -.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 -.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 -.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 -.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a -.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 -.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c -.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 -.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a -.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 -.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 -.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 -.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff -.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 -.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb -.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 -.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e -.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 -.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a -.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e -.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 -.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d -.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 -.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd -.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 -.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 -.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 -.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d -.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 -.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 -.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef -.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 -.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 -.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 -.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 -.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 -.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b -.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 -.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 -.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 -.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 -.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 -.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f -.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df -.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f -.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e -.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 -.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 -.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c -.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf -.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 -.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f -.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 -.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 -.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 -@ Td4[256] -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.size AES_Td,.-AES_Td - -@ void AES_decrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.align 5 -ENTRY(AES_decrypt) - adr r3,AES_decrypt - stmdb sp!,{r1,r4-r12,lr} - mov r12,r0 @ inp - mov r11,r2 - sub r10,r3,#AES_decrypt-AES_Td @ Td -#if __ARM_ARCH__<7 - ldrb r0,[r12,#3] @ load input data in endian-neutral - ldrb r4,[r12,#2] @ manner... - ldrb r5,[r12,#1] - ldrb r6,[r12,#0] - orr r0,r0,r4,lsl#8 - ldrb r1,[r12,#7] - orr r0,r0,r5,lsl#16 - ldrb r4,[r12,#6] - orr r0,r0,r6,lsl#24 - ldrb r5,[r12,#5] - ldrb r6,[r12,#4] - orr r1,r1,r4,lsl#8 - ldrb r2,[r12,#11] - orr r1,r1,r5,lsl#16 - ldrb r4,[r12,#10] - orr r1,r1,r6,lsl#24 - ldrb r5,[r12,#9] - ldrb r6,[r12,#8] - orr r2,r2,r4,lsl#8 - ldrb r3,[r12,#15] - orr r2,r2,r5,lsl#16 - ldrb r4,[r12,#14] - orr r2,r2,r6,lsl#24 - ldrb r5,[r12,#13] - ldrb r6,[r12,#12] - orr r3,r3,r4,lsl#8 - orr r3,r3,r5,lsl#16 - orr r3,r3,r6,lsl#24 -#else - ldr r0,[r12,#0] - ldr r1,[r12,#4] - ldr r2,[r12,#8] - ldr r3,[r12,#12] -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif -#endif - bl _armv4_AES_decrypt - - ldr r12,[sp],#4 @ pop out -#if __ARM_ARCH__>=7 -#ifdef __ARMEL__ - rev r0,r0 - rev r1,r1 - rev r2,r2 - rev r3,r3 -#endif - str r0,[r12,#0] - str r1,[r12,#4] - str r2,[r12,#8] - str r3,[r12,#12] -#else - mov r4,r0,lsr#24 @ write output in endian-neutral - mov r5,r0,lsr#16 @ manner... - mov r6,r0,lsr#8 - strb r4,[r12,#0] - strb r5,[r12,#1] - mov r4,r1,lsr#24 - strb r6,[r12,#2] - mov r5,r1,lsr#16 - strb r0,[r12,#3] - mov r6,r1,lsr#8 - strb r4,[r12,#4] - strb r5,[r12,#5] - mov r4,r2,lsr#24 - strb r6,[r12,#6] - mov r5,r2,lsr#16 - strb r1,[r12,#7] - mov r6,r2,lsr#8 - strb r4,[r12,#8] - strb r5,[r12,#9] - mov r4,r3,lsr#24 - strb r6,[r12,#10] - mov r5,r3,lsr#16 - strb r2,[r12,#11] - mov r6,r3,lsr#8 - strb r4,[r12,#12] - strb r5,[r12,#13] - strb r6,[r12,#14] - strb r3,[r12,#15] -#endif - ldmia sp!,{r4-r12,pc} -ENDPROC(AES_decrypt) - -.type _armv4_AES_decrypt,%function -.align 2 -_armv4_AES_decrypt: - str lr,[sp,#-4]! @ push lr - ldmia r11!,{r4-r7} - eor r0,r0,r4 - ldr r12,[r11,#240-16] - eor r1,r1,r5 - eor r2,r2,r6 - eor r3,r3,r7 - sub r12,r12,#1 - mov lr,#255 - - and r7,lr,r0,lsr#16 - and r8,lr,r0,lsr#8 - and r9,lr,r0 - mov r0,r0,lsr#24 -.Ldec_loop: - ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16] - and r7,lr,r1 @ i0 - ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8] - and r8,lr,r1,lsr#16 - ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0] - and r9,lr,r1,lsr#8 - ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24] - mov r1,r1,lsr#24 - - ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0] - ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16] - ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8] - eor r0,r0,r7,ror#24 - ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24] - and r7,lr,r2,lsr#8 @ i0 - eor r5,r8,r5,ror#8 - and r8,lr,r2 @ i1 - eor r6,r9,r6,ror#8 - and r9,lr,r2,lsr#16 - ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] - eor r1,r1,r4,ror#8 - ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] - mov r2,r2,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] - eor r0,r0,r7,ror#16 - ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] - and r7,lr,r3,lsr#16 @ i0 - eor r1,r1,r8,ror#24 - and r8,lr,r3,lsr#8 @ i1 - eor r6,r9,r6,ror#8 - and r9,lr,r3 @ i2 - ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] - eor r2,r2,r5,ror#8 - ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] - mov r3,r3,lsr#24 - - ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] - eor r0,r0,r7,ror#8 - ldr r7,[r11],#16 - eor r1,r1,r8,ror#16 - ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] - eor r2,r2,r9,ror#24 - - ldr r4,[r11,#-12] - eor r0,r0,r7 - ldr r5,[r11,#-8] - eor r3,r3,r6,ror#8 - ldr r6,[r11,#-4] - and r7,lr,r0,lsr#16 - eor r1,r1,r4 - and r8,lr,r0,lsr#8 - eor r2,r2,r5 - and r9,lr,r0 - eor r3,r3,r6 - mov r0,r0,lsr#24 - - subs r12,r12,#1 - bne .Ldec_loop - - add r10,r10,#1024 - - ldr r5,[r10,#0] @ prefetch Td4 - ldr r6,[r10,#32] - ldr r4,[r10,#64] - ldr r5,[r10,#96] - ldr r6,[r10,#128] - ldr r4,[r10,#160] - ldr r5,[r10,#192] - ldr r6,[r10,#224] - - ldrb r0,[r10,r0] @ Td4[s0>>24] - ldrb r4,[r10,r7] @ Td4[s0>>16] - and r7,lr,r1 @ i0 - ldrb r5,[r10,r8] @ Td4[s0>>8] - and r8,lr,r1,lsr#16 - ldrb r6,[r10,r9] @ Td4[s0>>0] - and r9,lr,r1,lsr#8 - - ldrb r7,[r10,r7] @ Td4[s1>>0] - ARM( ldrb r1,[r10,r1,lsr#24] ) @ Td4[s1>>24] - THUMB( add r1,r10,r1,lsr#24 ) @ Td4[s1>>24] - THUMB( ldrb r1,[r1] ) - ldrb r8,[r10,r8] @ Td4[s1>>16] - eor r0,r7,r0,lsl#24 - ldrb r9,[r10,r9] @ Td4[s1>>8] - eor r1,r4,r1,lsl#8 - and r7,lr,r2,lsr#8 @ i0 - eor r5,r5,r8,lsl#8 - and r8,lr,r2 @ i1 - ldrb r7,[r10,r7] @ Td4[s2>>8] - eor r6,r6,r9,lsl#8 - ldrb r8,[r10,r8] @ Td4[s2>>0] - and r9,lr,r2,lsr#16 - - ARM( ldrb r2,[r10,r2,lsr#24] ) @ Td4[s2>>24] - THUMB( add r2,r10,r2,lsr#24 ) @ Td4[s2>>24] - THUMB( ldrb r2,[r2] ) - eor r0,r0,r7,lsl#8 - ldrb r9,[r10,r9] @ Td4[s2>>16] - eor r1,r8,r1,lsl#16 - and r7,lr,r3,lsr#16 @ i0 - eor r2,r5,r2,lsl#16 - and r8,lr,r3,lsr#8 @ i1 - ldrb r7,[r10,r7] @ Td4[s3>>16] - eor r6,r6,r9,lsl#16 - ldrb r8,[r10,r8] @ Td4[s3>>8] - and r9,lr,r3 @ i2 - - ldrb r9,[r10,r9] @ Td4[s3>>0] - ARM( ldrb r3,[r10,r3,lsr#24] ) @ Td4[s3>>24] - THUMB( add r3,r10,r3,lsr#24 ) @ Td4[s3>>24] - THUMB( ldrb r3,[r3] ) - eor r0,r0,r7,lsl#16 - ldr r7,[r11,#0] - eor r1,r1,r8,lsl#8 - ldr r4,[r11,#4] - eor r2,r9,r2,lsl#8 - ldr r5,[r11,#8] - eor r3,r6,r3,lsl#24 - ldr r6,[r11,#12] - - eor r0,r0,r7 - eor r1,r1,r4 - eor r2,r2,r5 - eor r3,r3,r6 - - sub r10,r10,#1024 - ldr pc,[sp],#4 @ pop and return -.size _armv4_AES_decrypt,.-_armv4_AES_decrypt -.asciz "AES for ARMv4, CRYPTOGAMS by <appro@openssl.org>" -.align 2 diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 987aa632c9f0..ba8e6a32fdc9 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -169,19 +169,19 @@ ENTRY(ce_aes_ecb_encrypt) .Lecbencloop3x: subs r4, r4, #3 bmi .Lecbenc1x - vld1.8 {q0-q1}, [r1, :64]! - vld1.8 {q2}, [r1, :64]! + vld1.8 {q0-q1}, [r1]! + vld1.8 {q2}, [r1]! bl aes_encrypt_3x - vst1.8 {q0-q1}, [r0, :64]! - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! + vst1.8 {q2}, [r0]! b .Lecbencloop3x .Lecbenc1x: adds r4, r4, #3 beq .Lecbencout .Lecbencloop: - vld1.8 {q0}, [r1, :64]! + vld1.8 {q0}, [r1]! bl aes_encrypt - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 bne .Lecbencloop .Lecbencout: @@ -195,19 +195,19 @@ ENTRY(ce_aes_ecb_decrypt) .Lecbdecloop3x: subs r4, r4, #3 bmi .Lecbdec1x - vld1.8 {q0-q1}, [r1, :64]! - vld1.8 {q2}, [r1, :64]! + vld1.8 {q0-q1}, [r1]! + vld1.8 {q2}, [r1]! bl aes_decrypt_3x - vst1.8 {q0-q1}, [r0, :64]! - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! + vst1.8 {q2}, [r0]! b .Lecbdecloop3x .Lecbdec1x: adds r4, r4, #3 beq .Lecbdecout .Lecbdecloop: - vld1.8 {q0}, [r1, :64]! + vld1.8 {q0}, [r1]! bl aes_decrypt - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 bne .Lecbdecloop .Lecbdecout: @@ -226,10 +226,10 @@ ENTRY(ce_aes_cbc_encrypt) vld1.8 {q0}, [r5] prepare_key r2, r3 .Lcbcencloop: - vld1.8 {q1}, [r1, :64]! @ get next pt block + vld1.8 {q1}, [r1]! @ get next pt block veor q0, q0, q1 @ ..and xor with iv bl aes_encrypt - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 bne .Lcbcencloop vst1.8 {q0}, [r5] @@ -244,8 +244,8 @@ ENTRY(ce_aes_cbc_decrypt) .Lcbcdecloop3x: subs r4, r4, #3 bmi .Lcbcdec1x - vld1.8 {q0-q1}, [r1, :64]! - vld1.8 {q2}, [r1, :64]! + vld1.8 {q0-q1}, [r1]! + vld1.8 {q2}, [r1]! vmov q3, q0 vmov q4, q1 vmov q5, q2 @@ -254,19 +254,19 @@ ENTRY(ce_aes_cbc_decrypt) veor q1, q1, q3 veor q2, q2, q4 vmov q6, q5 - vst1.8 {q0-q1}, [r0, :64]! - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! + vst1.8 {q2}, [r0]! b .Lcbcdecloop3x .Lcbcdec1x: adds r4, r4, #3 beq .Lcbcdecout vmov q15, q14 @ preserve last round key .Lcbcdecloop: - vld1.8 {q0}, [r1, :64]! @ get next ct block + vld1.8 {q0}, [r1]! @ get next ct block veor q14, q15, q6 @ combine prev ct with last key vmov q6, q0 bl aes_decrypt - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 bne .Lcbcdecloop .Lcbcdecout: @@ -300,15 +300,15 @@ ENTRY(ce_aes_ctr_encrypt) rev ip, r6 add r6, r6, #1 vmov s11, ip - vld1.8 {q3-q4}, [r1, :64]! - vld1.8 {q5}, [r1, :64]! + vld1.8 {q3-q4}, [r1]! + vld1.8 {q5}, [r1]! bl aes_encrypt_3x veor q0, q0, q3 veor q1, q1, q4 veor q2, q2, q5 rev ip, r6 - vst1.8 {q0-q1}, [r0, :64]! - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! + vst1.8 {q2}, [r0]! vmov s27, ip b .Lctrloop3x .Lctr1x: @@ -318,10 +318,10 @@ ENTRY(ce_aes_ctr_encrypt) vmov q0, q6 bl aes_encrypt subs r4, r4, #1 - bmi .Lctrhalfblock @ blocks < 0 means 1/2 block - vld1.8 {q3}, [r1, :64]! + bmi .Lctrtailblock @ blocks < 0 means tail block + vld1.8 {q3}, [r1]! veor q3, q0, q3 - vst1.8 {q3}, [r0, :64]! + vst1.8 {q3}, [r0]! adds r6, r6, #1 @ increment BE ctr rev ip, r6 @@ -333,10 +333,8 @@ ENTRY(ce_aes_ctr_encrypt) vst1.8 {q6}, [r5] pop {r4-r6, pc} -.Lctrhalfblock: - vld1.8 {d1}, [r1, :64] - veor d0, d0, d1 - vst1.8 {d0}, [r0, :64] +.Lctrtailblock: + vst1.8 {q0}, [r0, :64] @ return just the key stream pop {r4-r6, pc} .Lctrcarry: @@ -405,8 +403,8 @@ ENTRY(ce_aes_xts_encrypt) .Lxtsenc3x: subs r4, r4, #3 bmi .Lxtsenc1x - vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks - vld1.8 {q2}, [r1, :64]! + vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks + vld1.8 {q2}, [r1]! next_tweak q4, q3, q7, q6 veor q0, q0, q3 next_tweak q5, q4, q7, q6 @@ -416,8 +414,8 @@ ENTRY(ce_aes_xts_encrypt) veor q0, q0, q3 veor q1, q1, q4 veor q2, q2, q5 - vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks + vst1.8 {q2}, [r0]! vmov q3, q5 teq r4, #0 beq .Lxtsencout @@ -426,11 +424,11 @@ ENTRY(ce_aes_xts_encrypt) adds r4, r4, #3 beq .Lxtsencout .Lxtsencloop: - vld1.8 {q0}, [r1, :64]! + vld1.8 {q0}, [r1]! veor q0, q0, q3 bl aes_encrypt veor q0, q0, q3 - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 beq .Lxtsencout next_tweak q3, q3, q7, q6 @@ -456,8 +454,8 @@ ENTRY(ce_aes_xts_decrypt) .Lxtsdec3x: subs r4, r4, #3 bmi .Lxtsdec1x - vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks - vld1.8 {q2}, [r1, :64]! + vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks + vld1.8 {q2}, [r1]! next_tweak q4, q3, q7, q6 veor q0, q0, q3 next_tweak q5, q4, q7, q6 @@ -467,8 +465,8 @@ ENTRY(ce_aes_xts_decrypt) veor q0, q0, q3 veor q1, q1, q4 veor q2, q2, q5 - vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks - vst1.8 {q2}, [r0, :64]! + vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks + vst1.8 {q2}, [r0]! vmov q3, q5 teq r4, #0 beq .Lxtsdecout @@ -477,12 +475,12 @@ ENTRY(ce_aes_xts_decrypt) adds r4, r4, #3 beq .Lxtsdecout .Lxtsdecloop: - vld1.8 {q0}, [r1, :64]! + vld1.8 {q0}, [r1]! veor q0, q0, q3 add ip, r2, #32 @ 3rd round key bl aes_decrypt veor q0, q0, q3 - vst1.8 {q0}, [r0, :64]! + vst1.8 {q0}, [r0]! subs r4, r4, #1 beq .Lxtsdecout next_tweak q3, q3, q7, q6 diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index 8857531915bf..883b84d828c5 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -278,14 +278,15 @@ static int ctr_encrypt(struct skcipher_request *req) u8 *tsrc = walk.src.virt.addr; /* - * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need - * to tell aes_ctr_encrypt() to only read half a block. + * Tell aes_ctr_encrypt() to process a tail block. */ - blocks = (nbytes <= 8) ? -1 : 1; + blocks = -1; - ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, + ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, num_rounds(ctx), blocks, walk.iv); - memcpy(tdst, tail, nbytes); + if (tdst != tsrc) + memcpy(tdst, tsrc, nbytes); + crypto_xor(tdst, tail, nbytes); err = skcipher_walk_done(&walk, 0); } kernel_neon_end(); @@ -345,7 +346,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -361,7 +361,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -378,7 +377,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -396,7 +394,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = 2 * AES_MIN_KEY_SIZE, diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S new file mode 100644 index 000000000000..c817a86c4ca8 --- /dev/null +++ b/arch/arm/crypto/aes-cipher-core.S @@ -0,0 +1,179 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + + .text + .align 5 + + rk .req r0 + rounds .req r1 + in .req r2 + out .req r3 + ttab .req ip + + t0 .req lr + t1 .req r2 + t2 .req r3 + + .macro __select, out, in, idx + .if __LINUX_ARM_ARCH__ < 7 + and \out, \in, #0xff << (8 * \idx) + .else + ubfx \out, \in, #(8 * \idx), #8 + .endif + .endm + + .macro __load, out, in, idx + .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 + ldr \out, [ttab, \in, lsr #(8 * \idx) - 2] + .else + ldr \out, [ttab, \in, lsl #2] + .endif + .endm + + .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc + __select \out0, \in0, 0 + __select t0, \in1, 1 + __load \out0, \out0, 0 + __load t0, t0, 1 + + .if \enc + __select \out1, \in1, 0 + __select t1, \in2, 1 + .else + __select \out1, \in3, 0 + __select t1, \in0, 1 + .endif + __load \out1, \out1, 0 + __select t2, \in2, 2 + __load t1, t1, 1 + __load t2, t2, 2 + + eor \out0, \out0, t0, ror #24 + + __select t0, \in3, 3 + .if \enc + __select \t3, \in3, 2 + __select \t4, \in0, 3 + .else + __select \t3, \in1, 2 + __select \t4, \in2, 3 + .endif + __load \t3, \t3, 2 + __load t0, t0, 3 + __load \t4, \t4, 3 + + eor \out1, \out1, t1, ror #24 + eor \out0, \out0, t2, ror #16 + ldm rk!, {t1, t2} + eor \out1, \out1, \t3, ror #16 + eor \out0, \out0, t0, ror #8 + eor \out1, \out1, \t4, ror #8 + eor \out0, \out0, t1 + eor \out1, \out1, t2 + .endm + + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1 + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1 + .endm + + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0 + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0 + .endm + + .macro __rev, out, in + .if __LINUX_ARM_ARCH__ < 6 + lsl t0, \in, #24 + and t1, \in, #0xff00 + and t2, \in, #0xff0000 + orr \out, t0, \in, lsr #24 + orr \out, \out, t1, lsl #8 + orr \out, \out, t2, lsr #8 + .else + rev \out, \in + .endif + .endm + + .macro __adrl, out, sym, c + .if __LINUX_ARM_ARCH__ < 7 + ldr\c \out, =\sym + .else + movw\c \out, #:lower16:\sym + movt\c \out, #:upper16:\sym + .endif + .endm + + .macro do_crypt, round, ttab, ltab + push {r3-r11, lr} + + ldr r4, [in] + ldr r5, [in, #4] + ldr r6, [in, #8] + ldr r7, [in, #12] + + ldm rk!, {r8-r11} + +#ifdef CONFIG_CPU_BIG_ENDIAN + __rev r4, r4 + __rev r5, r5 + __rev r6, r6 + __rev r7, r7 +#endif + + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + + __adrl ttab, \ttab + + tst rounds, #2 + bne 1f + +0: \round r8, r9, r10, r11, r4, r5, r6, r7 + \round r4, r5, r6, r7, r8, r9, r10, r11 + +1: subs rounds, rounds, #4 + \round r8, r9, r10, r11, r4, r5, r6, r7 + __adrl ttab, \ltab, ls + \round r4, r5, r6, r7, r8, r9, r10, r11 + bhi 0b + +#ifdef CONFIG_CPU_BIG_ENDIAN + __rev r4, r4 + __rev r5, r5 + __rev r6, r6 + __rev r7, r7 +#endif + + ldr out, [sp] + + str r4, [out] + str r5, [out, #4] + str r6, [out, #8] + str r7, [out, #12] + + pop {r3-r11, pc} + + .align 3 + .ltorg + .endm + +ENTRY(__aes_arm_encrypt) + do_crypt fround, crypto_ft_tab, crypto_fl_tab +ENDPROC(__aes_arm_encrypt) + +ENTRY(__aes_arm_decrypt) + do_crypt iround, crypto_it_tab, crypto_il_tab +ENDPROC(__aes_arm_decrypt) diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c new file mode 100644 index 000000000000..c222f6e072ad --- /dev/null +++ b/arch/arm/crypto/aes-cipher-glue.c @@ -0,0 +1,74 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <linux/crypto.h> +#include <linux/module.h> + +asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out); +EXPORT_SYMBOL(__aes_arm_encrypt); + +asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out); +EXPORT_SYMBOL(__aes_arm_decrypt); + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm_encrypt(ctx->key_enc, rounds, in, out); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm_decrypt(ctx->key_dec, rounds, in, out); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-arm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + + .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, + .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, + .cra_cipher.cia_setkey = crypto_aes_set_key, + .cra_cipher.cia_encrypt = aes_encrypt, + .cra_cipher.cia_decrypt = aes_decrypt, + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + .cra_alignmask = 3, +#endif +}; + +static int __init aes_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Scalar AES cipher for ARM"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S new file mode 100644 index 000000000000..2b625c6d4712 --- /dev/null +++ b/arch/arm/crypto/aes-neonbs-core.S @@ -0,0 +1,1023 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * The algorithm implemented here is described in detail by the paper + * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and + * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) + * + * This implementation is based primarily on the OpenSSL implementation + * for 32-bit ARM written by Andy Polyakov <appro@openssl.org> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .fpu neon + + rounds .req ip + bskey .req r4 + + q0l .req d0 + q0h .req d1 + q1l .req d2 + q1h .req d3 + q2l .req d4 + q2h .req d5 + q3l .req d6 + q3h .req d7 + q4l .req d8 + q4h .req d9 + q5l .req d10 + q5h .req d11 + q6l .req d12 + q6h .req d13 + q7l .req d14 + q7h .req d15 + q8l .req d16 + q8h .req d17 + q9l .req d18 + q9h .req d19 + q10l .req d20 + q10h .req d21 + q11l .req d22 + q11h .req d23 + q12l .req d24 + q12h .req d25 + q13l .req d26 + q13h .req d27 + q14l .req d28 + q14h .req d29 + q15l .req d30 + q15h .req d31 + + .macro __tbl, out, tbl, in, tmp + .ifc \out, \tbl + .ifb \tmp + .error __tbl needs temp register if out == tbl + .endif + vmov \tmp, \out + .endif + vtbl.8 \out\()l, {\tbl}, \in\()l + .ifc \out, \tbl + vtbl.8 \out\()h, {\tmp}, \in\()h + .else + vtbl.8 \out\()h, {\tbl}, \in\()h + .endif + .endm + + .macro __ldr, out, sym + vldr \out\()l, \sym + vldr \out\()h, \sym + 8 + .endm + + .macro __adr, reg, lbl + adr \reg, \lbl +THUMB( orr \reg, \reg, #1 ) + .endm + + .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + veor \b2, \b2, \b1 + veor \b5, \b5, \b6 + veor \b3, \b3, \b0 + veor \b6, \b6, \b2 + veor \b5, \b5, \b0 + veor \b6, \b6, \b3 + veor \b3, \b3, \b7 + veor \b7, \b7, \b5 + veor \b3, \b3, \b4 + veor \b4, \b4, \b5 + veor \b2, \b2, \b7 + veor \b3, \b3, \b1 + veor \b1, \b1, \b5 + .endm + + .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + veor \b0, \b0, \b6 + veor \b1, \b1, \b4 + veor \b4, \b4, \b6 + veor \b2, \b2, \b0 + veor \b6, \b6, \b1 + veor \b1, \b1, \b5 + veor \b5, \b5, \b3 + veor \b3, \b3, \b7 + veor \b7, \b7, \b5 + veor \b2, \b2, \b5 + veor \b4, \b4, \b7 + .endm + + .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 + veor \b1, \b1, \b7 + veor \b4, \b4, \b7 + veor \b7, \b7, \b5 + veor \b1, \b1, \b3 + veor \b2, \b2, \b5 + veor \b3, \b3, \b7 + veor \b6, \b6, \b1 + veor \b2, \b2, \b0 + veor \b5, \b5, \b3 + veor \b4, \b4, \b6 + veor \b0, \b0, \b6 + veor \b1, \b1, \b4 + .endm + + .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 + veor \b1, \b1, \b5 + veor \b2, \b2, \b7 + veor \b3, \b3, \b1 + veor \b4, \b4, \b5 + veor \b7, \b7, \b5 + veor \b3, \b3, \b4 + veor \b5, \b5, \b0 + veor \b3, \b3, \b7 + veor \b6, \b6, \b2 + veor \b2, \b2, \b1 + veor \b6, \b6, \b3 + veor \b3, \b3, \b0 + veor \b5, \b5, \b6 + .endm + + .macro mul_gf4, x0, x1, y0, y1, t0, t1 + veor \t0, \y0, \y1 + vand \t0, \t0, \x0 + veor \x0, \x0, \x1 + vand \t1, \x1, \y0 + vand \x0, \x0, \y1 + veor \x1, \t1, \t0 + veor \x0, \x0, \t1 + .endm + + .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 + veor \t0, \y0, \y1 + veor \t1, \y2, \y3 + vand \t0, \t0, \x0 + vand \t1, \t1, \x2 + veor \x0, \x0, \x1 + veor \x2, \x2, \x3 + vand \x1, \x1, \y0 + vand \x3, \x3, \y2 + vand \x0, \x0, \y1 + vand \x2, \x2, \y3 + veor \x1, \x1, \x0 + veor \x2, \x2, \x3 + veor \x0, \x0, \t0 + veor \x3, \x3, \t1 + .endm + + .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, t0, t1, t2, t3 + veor \t0, \x0, \x2 + veor \t1, \x1, \x3 + mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 + veor \y0, \y0, \y2 + veor \y1, \y1, \y3 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 + veor \x0, \x0, \t0 + veor \x2, \x2, \t0 + veor \x1, \x1, \t1 + veor \x3, \x3, \t1 + veor \t0, \x4, \x6 + veor \t1, \x5, \x7 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 + veor \y0, \y0, \y2 + veor \y1, \y1, \y3 + mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 + veor \x4, \x4, \t0 + veor \x6, \x6, \t0 + veor \x5, \x5, \t1 + veor \x7, \x7, \t1 + .endm + + .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + veor \t3, \x4, \x6 + veor \t0, \x5, \x7 + veor \t1, \x1, \x3 + veor \s1, \x7, \x6 + veor \s0, \x0, \x2 + veor \s3, \t3, \t0 + vorr \t2, \t0, \t1 + vand \s2, \t3, \s0 + vorr \t3, \t3, \s0 + veor \s0, \s0, \t1 + vand \t0, \t0, \t1 + veor \t1, \x3, \x2 + vand \s3, \s3, \s0 + vand \s1, \s1, \t1 + veor \t1, \x4, \x5 + veor \s0, \x1, \x0 + veor \t3, \t3, \s1 + veor \t2, \t2, \s1 + vand \s1, \t1, \s0 + vorr \t1, \t1, \s0 + veor \t3, \t3, \s3 + veor \t0, \t0, \s1 + veor \t2, \t2, \s2 + veor \t1, \t1, \s3 + veor \t0, \t0, \s2 + vand \s0, \x7, \x3 + veor \t1, \t1, \s2 + vand \s1, \x6, \x2 + vand \s2, \x5, \x1 + vorr \s3, \x4, \x0 + veor \t3, \t3, \s0 + veor \t1, \t1, \s2 + veor \s0, \t0, \s3 + veor \t2, \t2, \s1 + vand \s2, \t3, \t1 + veor \s1, \t2, \s2 + veor \s3, \s0, \s2 + vbsl \s1, \t1, \s0 + vmvn \t0, \s0 + vbsl \s0, \s1, \s3 + vbsl \t0, \s1, \s3 + vbsl \s3, \t3, \t2 + veor \t3, \t3, \t2 + vand \s2, \s0, \s3 + veor \t1, \t1, \t0 + veor \s2, \s2, \t3 + mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + .endm + + .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 + inv_gf256 \b6, \b5, \b0, \b3, \b7, \b1, \b4, \b2, \ + \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + out_bs_ch \b7, \b1, \b4, \b2, \b6, \b5, \b0, \b3 + .endm + + .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + inv_in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 + inv_gf256 \b5, \b1, \b2, \b6, \b3, \b7, \b0, \b4, \ + \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + inv_out_bs_ch \b3, \b7, \b0, \b4, \b5, \b1, \b2, \b6 + .endm + + .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, mask + vld1.8 {\t0-\t1}, [bskey, :256]! + veor \t0, \t0, \x0 + vld1.8 {\t2-\t3}, [bskey, :256]! + veor \t1, \t1, \x1 + __tbl \x0, \t0, \mask + veor \t2, \t2, \x2 + __tbl \x1, \t1, \mask + vld1.8 {\t0-\t1}, [bskey, :256]! + veor \t3, \t3, \x3 + __tbl \x2, \t2, \mask + __tbl \x3, \t3, \mask + vld1.8 {\t2-\t3}, [bskey, :256]! + veor \t0, \t0, \x4 + veor \t1, \t1, \x5 + __tbl \x4, \t0, \mask + veor \t2, \t2, \x6 + __tbl \x5, \t1, \mask + veor \t3, \t3, \x7 + __tbl \x6, \t2, \mask + __tbl \x7, \t3, \mask + .endm + + .macro inv_shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, mask + __tbl \x0, \x0, \mask, \t0 + __tbl \x1, \x1, \mask, \t1 + __tbl \x2, \x2, \mask, \t2 + __tbl \x3, \x3, \mask, \t3 + __tbl \x4, \x4, \mask, \t0 + __tbl \x5, \x5, \mask, \t1 + __tbl \x6, \x6, \mask, \t2 + __tbl \x7, \x7, \mask, \t3 + .endm + + .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7, inv + vext.8 \t0, \x0, \x0, #12 + vext.8 \t1, \x1, \x1, #12 + veor \x0, \x0, \t0 + vext.8 \t2, \x2, \x2, #12 + veor \x1, \x1, \t1 + vext.8 \t3, \x3, \x3, #12 + veor \x2, \x2, \t2 + vext.8 \t4, \x4, \x4, #12 + veor \x3, \x3, \t3 + vext.8 \t5, \x5, \x5, #12 + veor \x4, \x4, \t4 + vext.8 \t6, \x6, \x6, #12 + veor \x5, \x5, \t5 + vext.8 \t7, \x7, \x7, #12 + veor \x6, \x6, \t6 + veor \t1, \t1, \x0 + veor.8 \x7, \x7, \t7 + vext.8 \x0, \x0, \x0, #8 + veor \t2, \t2, \x1 + veor \t0, \t0, \x7 + veor \t1, \t1, \x7 + vext.8 \x1, \x1, \x1, #8 + veor \t5, \t5, \x4 + veor \x0, \x0, \t0 + veor \t6, \t6, \x5 + veor \x1, \x1, \t1 + vext.8 \t0, \x4, \x4, #8 + veor \t4, \t4, \x3 + vext.8 \t1, \x5, \x5, #8 + veor \t7, \t7, \x6 + vext.8 \x4, \x3, \x3, #8 + veor \t3, \t3, \x2 + vext.8 \x5, \x7, \x7, #8 + veor \t4, \t4, \x7 + vext.8 \x3, \x6, \x6, #8 + veor \t3, \t3, \x7 + vext.8 \x6, \x2, \x2, #8 + veor \x7, \t1, \t5 + .ifb \inv + veor \x2, \t0, \t4 + veor \x4, \x4, \t3 + veor \x5, \x5, \t7 + veor \x3, \x3, \t6 + veor \x6, \x6, \t2 + .else + veor \t3, \t3, \x4 + veor \x5, \x5, \t7 + veor \x2, \x3, \t6 + veor \x3, \t0, \t4 + veor \x4, \x6, \t2 + vmov \x6, \t3 + .endif + .endm + + .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7 + vld1.8 {\t0-\t1}, [bskey, :256]! + veor \x0, \x0, \t0 + vld1.8 {\t2-\t3}, [bskey, :256]! + veor \x1, \x1, \t1 + vld1.8 {\t4-\t5}, [bskey, :256]! + veor \x2, \x2, \t2 + vld1.8 {\t6-\t7}, [bskey, :256] + sub bskey, bskey, #224 + veor \x3, \x3, \t3 + veor \x4, \x4, \t4 + veor \x5, \x5, \t5 + veor \x6, \x6, \t6 + veor \x7, \x7, \t7 + vext.8 \t0, \x0, \x0, #8 + vext.8 \t6, \x6, \x6, #8 + vext.8 \t7, \x7, \x7, #8 + veor \t0, \t0, \x0 + vext.8 \t1, \x1, \x1, #8 + veor \t6, \t6, \x6 + vext.8 \t2, \x2, \x2, #8 + veor \t7, \t7, \x7 + vext.8 \t3, \x3, \x3, #8 + veor \t1, \t1, \x1 + vext.8 \t4, \x4, \x4, #8 + veor \t2, \t2, \x2 + vext.8 \t5, \x5, \x5, #8 + veor \t3, \t3, \x3 + veor \t4, \t4, \x4 + veor \t5, \t5, \x5 + veor \x0, \x0, \t6 + veor \x1, \x1, \t6 + veor \x2, \x2, \t0 + veor \x4, \x4, \t2 + veor \x3, \x3, \t1 + veor \x1, \x1, \t7 + veor \x2, \x2, \t7 + veor \x4, \x4, \t6 + veor \x5, \x5, \t3 + veor \x3, \x3, \t6 + veor \x6, \x6, \t4 + veor \x4, \x4, \t7 + veor \x5, \x5, \t7 + veor \x7, \x7, \t5 + mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 + .endm + + .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 + vshr.u64 \t0, \b0, #\n + vshr.u64 \t1, \b1, #\n + veor \t0, \t0, \a0 + veor \t1, \t1, \a1 + vand \t0, \t0, \mask + vand \t1, \t1, \mask + veor \a0, \a0, \t0 + vshl.s64 \t0, \t0, #\n + veor \a1, \a1, \t1 + vshl.s64 \t1, \t1, #\n + veor \b0, \b0, \t0 + veor \b1, \b1, \t1 + .endm + + .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 + vmov.i8 \t0, #0x55 + vmov.i8 \t1, #0x33 + swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 + swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 + vmov.i8 \t0, #0x0f + swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 + swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 + swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 + swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 + .endm + + .align 4 +M0: .quad 0x02060a0e03070b0f, 0x0004080c0105090d + + /* + * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) + */ +ENTRY(aesbs_convert_key) + vld1.32 {q7}, [r1]! // load round 0 key + vld1.32 {q15}, [r1]! // load round 1 key + + vmov.i8 q8, #0x01 // bit masks + vmov.i8 q9, #0x02 + vmov.i8 q10, #0x04 + vmov.i8 q11, #0x08 + vmov.i8 q12, #0x10 + vmov.i8 q13, #0x20 + __ldr q14, M0 + + sub r2, r2, #1 + vst1.8 {q7}, [r0, :128]! // save round 0 key + +.Lkey_loop: + __tbl q7, q15, q14 + vmov.i8 q6, #0x40 + vmov.i8 q15, #0x80 + + vtst.8 q0, q7, q8 + vtst.8 q1, q7, q9 + vtst.8 q2, q7, q10 + vtst.8 q3, q7, q11 + vtst.8 q4, q7, q12 + vtst.8 q5, q7, q13 + vtst.8 q6, q7, q6 + vtst.8 q7, q7, q15 + vld1.32 {q15}, [r1]! // load next round key + vmvn q0, q0 + vmvn q1, q1 + vmvn q5, q5 + vmvn q6, q6 + + subs r2, r2, #1 + vst1.8 {q0-q1}, [r0, :256]! + vst1.8 {q2-q3}, [r0, :256]! + vst1.8 {q4-q5}, [r0, :256]! + vst1.8 {q6-q7}, [r0, :256]! + bne .Lkey_loop + + vmov.i8 q7, #0x63 // compose .L63 + veor q15, q15, q7 + vst1.8 {q15}, [r0, :128] + bx lr +ENDPROC(aesbs_convert_key) + + .align 4 +M0SR: .quad 0x0a0e02060f03070b, 0x0004080c05090d01 + +aesbs_encrypt8: + vld1.8 {q9}, [bskey, :128]! // round 0 key + __ldr q8, M0SR + + veor q10, q0, q9 // xor with round0 key + veor q11, q1, q9 + __tbl q0, q10, q8 + veor q12, q2, q9 + __tbl q1, q11, q8 + veor q13, q3, q9 + __tbl q2, q12, q8 + veor q14, q4, q9 + __tbl q3, q13, q8 + veor q15, q5, q9 + __tbl q4, q14, q8 + veor q10, q6, q9 + __tbl q5, q15, q8 + veor q11, q7, q9 + __tbl q6, q10, q8 + __tbl q7, q11, q8 + + bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 + + sub rounds, rounds, #1 + b .Lenc_sbox + + .align 5 +SR: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +SRM0: .quad 0x0304090e00050a0f, 0x01060b0c0207080d + +.Lenc_last: + __ldr q12, SRM0 +.Lenc_loop: + shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 +.Lenc_sbox: + sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ + q13, q14, q15 + subs rounds, rounds, #1 + bcc .Lenc_done + + mix_cols q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11, q12, \ + q13, q14, q15 + + beq .Lenc_last + __ldr q12, SR + b .Lenc_loop + +.Lenc_done: + vld1.8 {q12}, [bskey, :128] // last round key + + bitslice q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11 + + veor q0, q0, q12 + veor q1, q1, q12 + veor q4, q4, q12 + veor q6, q6, q12 + veor q3, q3, q12 + veor q7, q7, q12 + veor q2, q2, q12 + veor q5, q5, q12 + bx lr +ENDPROC(aesbs_encrypt8) + + .align 4 +M0ISR: .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 + +aesbs_decrypt8: + add bskey, bskey, rounds, lsl #7 + sub bskey, bskey, #112 + vld1.8 {q9}, [bskey, :128] // round 0 key + sub bskey, bskey, #128 + __ldr q8, M0ISR + + veor q10, q0, q9 // xor with round0 key + veor q11, q1, q9 + __tbl q0, q10, q8 + veor q12, q2, q9 + __tbl q1, q11, q8 + veor q13, q3, q9 + __tbl q2, q12, q8 + veor q14, q4, q9 + __tbl q3, q13, q8 + veor q15, q5, q9 + __tbl q4, q14, q8 + veor q10, q6, q9 + __tbl q5, q15, q8 + veor q11, q7, q9 + __tbl q6, q10, q8 + __tbl q7, q11, q8 + + bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 + + sub rounds, rounds, #1 + b .Ldec_sbox + + .align 5 +ISR: .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +ISRM0: .quad 0x01040b0e0205080f, 0x0306090c00070a0d + +.Ldec_last: + __ldr q12, ISRM0 +.Ldec_loop: + inv_shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 +.Ldec_sbox: + inv_sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ + q13, q14, q15 + subs rounds, rounds, #1 + bcc .Ldec_done + + inv_mix_cols q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11, q12, \ + q13, q14, q15 + + beq .Ldec_last + __ldr q12, ISR + b .Ldec_loop + +.Ldec_done: + add bskey, bskey, #112 + vld1.8 {q12}, [bskey, :128] // last round key + + bitslice q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11 + + veor q0, q0, q12 + veor q1, q1, q12 + veor q6, q6, q12 + veor q4, q4, q12 + veor q2, q2, q12 + veor q7, q7, q12 + veor q3, q3, q12 + veor q5, q5, q12 + bx lr +ENDPROC(aesbs_decrypt8) + + /* + * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + */ + .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + push {r4-r6, lr} + ldr r5, [sp, #16] // number of blocks + +99: __adr ip, 0f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q0}, [r1]! + vld1.8 {q1}, [r1]! + vld1.8 {q2}, [r1]! + vld1.8 {q3}, [r1]! + vld1.8 {q4}, [r1]! + vld1.8 {q5}, [r1]! + vld1.8 {q6}, [r1]! + vld1.8 {q7}, [r1]! + +0: mov bskey, r2 + mov rounds, r3 + bl \do8 + + __adr ip, 1f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vst1.8 {\o0}, [r0]! + vst1.8 {\o1}, [r0]! + vst1.8 {\o2}, [r0]! + vst1.8 {\o3}, [r0]! + vst1.8 {\o4}, [r0]! + vst1.8 {\o5}, [r0]! + vst1.8 {\o6}, [r0]! + vst1.8 {\o7}, [r0]! + +1: subs r5, r5, #8 + bgt 99b + + pop {r4-r6, pc} + .endm + + .align 4 +ENTRY(aesbs_ecb_encrypt) + __ecb_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 +ENDPROC(aesbs_ecb_encrypt) + + .align 4 +ENTRY(aesbs_ecb_decrypt) + __ecb_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 +ENDPROC(aesbs_ecb_decrypt) + + /* + * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + * int rounds, int blocks, u8 iv[]) + */ + .align 4 +ENTRY(aesbs_cbc_decrypt) + mov ip, sp + push {r4-r6, lr} + ldm ip, {r5-r6} // load args 4-5 + +99: __adr ip, 0f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + mov lr, r1 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q0}, [lr]! + vld1.8 {q1}, [lr]! + vld1.8 {q2}, [lr]! + vld1.8 {q3}, [lr]! + vld1.8 {q4}, [lr]! + vld1.8 {q5}, [lr]! + vld1.8 {q6}, [lr]! + vld1.8 {q7}, [lr] + +0: mov bskey, r2 + mov rounds, r3 + bl aesbs_decrypt8 + + vld1.8 {q8}, [r6] + vmov q9, q8 + vmov q10, q8 + vmov q11, q8 + vmov q12, q8 + vmov q13, q8 + vmov q14, q8 + vmov q15, q8 + + __adr ip, 1f + and lr, r5, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q9}, [r1]! + vld1.8 {q10}, [r1]! + vld1.8 {q11}, [r1]! + vld1.8 {q12}, [r1]! + vld1.8 {q13}, [r1]! + vld1.8 {q14}, [r1]! + vld1.8 {q15}, [r1]! + W(nop) + +1: __adr ip, 2f + sub ip, ip, lr, lsl #3 + bxlt ip // computed goto if blocks < 8 + + veor q0, q0, q8 + vst1.8 {q0}, [r0]! + veor q1, q1, q9 + vst1.8 {q1}, [r0]! + veor q6, q6, q10 + vst1.8 {q6}, [r0]! + veor q4, q4, q11 + vst1.8 {q4}, [r0]! + veor q2, q2, q12 + vst1.8 {q2}, [r0]! + veor q7, q7, q13 + vst1.8 {q7}, [r0]! + veor q3, q3, q14 + vst1.8 {q3}, [r0]! + veor q5, q5, q15 + vld1.8 {q8}, [r1]! // load next round's iv +2: vst1.8 {q5}, [r0]! + + subs r5, r5, #8 + vst1.8 {q8}, [r6] // store next round's iv + bgt 99b + + pop {r4-r6, pc} +ENDPROC(aesbs_cbc_decrypt) + + .macro next_ctr, q + vmov.32 \q\()h[1], r10 + adds r10, r10, #1 + vmov.32 \q\()h[0], r9 + adcs r9, r9, #0 + vmov.32 \q\()l[1], r8 + adcs r8, r8, #0 + vmov.32 \q\()l[0], r7 + adc r7, r7, #0 + vrev32.8 \q, \q + .endm + + /* + * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + * int rounds, int blocks, u8 ctr[], u8 final[]) + */ +ENTRY(aesbs_ctr_encrypt) + mov ip, sp + push {r4-r10, lr} + + ldm ip, {r5-r7} // load args 4-6 + teq r7, #0 + addne r5, r5, #1 // one extra block if final != 0 + + vld1.8 {q0}, [r6] // load counter + vrev32.8 q1, q0 + vmov r9, r10, d3 + vmov r7, r8, d2 + + adds r10, r10, #1 + adcs r9, r9, #0 + adcs r8, r8, #0 + adc r7, r7, #0 + +99: vmov q1, q0 + vmov q2, q0 + vmov q3, q0 + vmov q4, q0 + vmov q5, q0 + vmov q6, q0 + vmov q7, q0 + + __adr ip, 0f + sub lr, r5, #1 + and lr, lr, #7 + cmp r5, #8 + sub ip, ip, lr, lsl #5 + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + next_ctr q1 + next_ctr q2 + next_ctr q3 + next_ctr q4 + next_ctr q5 + next_ctr q6 + next_ctr q7 + +0: mov bskey, r2 + mov rounds, r3 + bl aesbs_encrypt8 + + __adr ip, 1f + and lr, r5, #7 + cmp r5, #8 + movgt r4, #0 + ldrle r4, [sp, #40] // load final in the last round + sub ip, ip, lr, lsl #2 + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q8}, [r1]! + vld1.8 {q9}, [r1]! + vld1.8 {q10}, [r1]! + vld1.8 {q11}, [r1]! + vld1.8 {q12}, [r1]! + vld1.8 {q13}, [r1]! + vld1.8 {q14}, [r1]! + teq r4, #0 // skip last block if 'final' +1: bne 2f + vld1.8 {q15}, [r1]! + +2: __adr ip, 3f + cmp r5, #8 + sub ip, ip, lr, lsl #3 + bxlt ip // computed goto if blocks < 8 + + veor q0, q0, q8 + vst1.8 {q0}, [r0]! + veor q1, q1, q9 + vst1.8 {q1}, [r0]! + veor q4, q4, q10 + vst1.8 {q4}, [r0]! + veor q6, q6, q11 + vst1.8 {q6}, [r0]! + veor q3, q3, q12 + vst1.8 {q3}, [r0]! + veor q7, q7, q13 + vst1.8 {q7}, [r0]! + veor q2, q2, q14 + vst1.8 {q2}, [r0]! + teq r4, #0 // skip last block if 'final' + W(bne) 5f +3: veor q5, q5, q15 + vst1.8 {q5}, [r0]! + +4: next_ctr q0 + + subs r5, r5, #8 + bgt 99b + + vst1.8 {q0}, [r6] + pop {r4-r10, pc} + +5: vst1.8 {q5}, [r4] + b 4b +ENDPROC(aesbs_ctr_encrypt) + + .macro next_tweak, out, in, const, tmp + vshr.s64 \tmp, \in, #63 + vand \tmp, \tmp, \const + vadd.u64 \out, \in, \in + vext.8 \tmp, \tmp, \tmp, #8 + veor \out, \out, \tmp + .endm + + .align 4 +.Lxts_mul_x: + .quad 1, 0x87 + + /* + * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ +__xts_prepare8: + vld1.8 {q14}, [r7] // load iv + __ldr q15, .Lxts_mul_x // load tweak mask + vmov q12, q14 + + __adr ip, 0f + and r4, r6, #7 + cmp r6, #8 + sub ip, ip, r4, lsl #5 + mov r4, sp + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q0}, [r1]! + next_tweak q12, q14, q15, q13 + veor q0, q0, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q1}, [r1]! + next_tweak q14, q12, q15, q13 + veor q1, q1, q12 + vst1.8 {q12}, [r4, :128]! + + vld1.8 {q2}, [r1]! + next_tweak q12, q14, q15, q13 + veor q2, q2, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q3}, [r1]! + next_tweak q14, q12, q15, q13 + veor q3, q3, q12 + vst1.8 {q12}, [r4, :128]! + + vld1.8 {q4}, [r1]! + next_tweak q12, q14, q15, q13 + veor q4, q4, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q5}, [r1]! + next_tweak q14, q12, q15, q13 + veor q5, q5, q12 + vst1.8 {q12}, [r4, :128]! + + vld1.8 {q6}, [r1]! + next_tweak q12, q14, q15, q13 + veor q6, q6, q14 + vst1.8 {q14}, [r4, :128]! + + vld1.8 {q7}, [r1]! + next_tweak q14, q12, q15, q13 + veor q7, q7, q12 + vst1.8 {q12}, [r4, :128] + +0: vst1.8 {q14}, [r7] // store next iv + bx lr +ENDPROC(__xts_prepare8) + + .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + push {r4-r8, lr} + mov r5, sp // preserve sp + ldrd r6, r7, [sp, #24] // get blocks and iv args + sub ip, sp, #128 // make room for 8x tweak + bic ip, ip, #0xf // align sp to 16 bytes + mov sp, ip + +99: bl __xts_prepare8 + + mov bskey, r2 + mov rounds, r3 + bl \do8 + + __adr ip, 0f + and lr, r6, #7 + cmp r6, #8 + sub ip, ip, lr, lsl #2 + mov r4, sp + bxlt ip // computed goto if blocks < 8 + + vld1.8 {q8}, [r4, :128]! + vld1.8 {q9}, [r4, :128]! + vld1.8 {q10}, [r4, :128]! + vld1.8 {q11}, [r4, :128]! + vld1.8 {q12}, [r4, :128]! + vld1.8 {q13}, [r4, :128]! + vld1.8 {q14}, [r4, :128]! + vld1.8 {q15}, [r4, :128] + +0: __adr ip, 1f + sub ip, ip, lr, lsl #3 + bxlt ip // computed goto if blocks < 8 + + veor \o0, \o0, q8 + vst1.8 {\o0}, [r0]! + veor \o1, \o1, q9 + vst1.8 {\o1}, [r0]! + veor \o2, \o2, q10 + vst1.8 {\o2}, [r0]! + veor \o3, \o3, q11 + vst1.8 {\o3}, [r0]! + veor \o4, \o4, q12 + vst1.8 {\o4}, [r0]! + veor \o5, \o5, q13 + vst1.8 {\o5}, [r0]! + veor \o6, \o6, q14 + vst1.8 {\o6}, [r0]! + veor \o7, \o7, q15 + vst1.8 {\o7}, [r0]! + +1: subs r6, r6, #8 + bgt 99b + + mov sp, r5 + pop {r4-r8, pc} + .endm + +ENTRY(aesbs_xts_encrypt) + __xts_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 +ENDPROC(aesbs_xts_encrypt) + +ENTRY(aesbs_xts_decrypt) + __xts_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 +ENDPROC(aesbs_xts_decrypt) diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c new file mode 100644 index 000000000000..2920b96dbd36 --- /dev/null +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -0,0 +1,406 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <crypto/aes.h> +#include <crypto/cbc.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> +#include <crypto/xts.h> +#include <linux/module.h> + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +MODULE_ALIAS_CRYPTO("ecb(aes)"); +MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("ctr(aes)"); +MODULE_ALIAS_CRYPTO("xts(aes)"); + +asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds); + +asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); +asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); + +asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 ctr[], u8 final[]); + +asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); +asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds, const u8 in[], + u8 out[]); + +struct aesbs_ctx { + int rounds; + u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32] __aligned(AES_BLOCK_SIZE); +}; + +struct aesbs_cbc_ctx { + struct aesbs_ctx key; + u32 enc[AES_MAX_KEYLENGTH_U32]; +}; + +struct aesbs_xts_ctx { + struct aesbs_ctx key; + u32 twkey[AES_MAX_KEYLENGTH_U32]; +}; + +static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->rounds = 6 + key_len / 4; + + kernel_neon_begin(); + aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); + kernel_neon_end(); + + return 0; +} + +static int __ecb_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks)) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, + ctx->rounds, blocks); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_encrypt); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_decrypt); +} + +static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->key.rounds = 6 + key_len / 4; + + memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); + + kernel_neon_begin(); + aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); + kernel_neon_end(); + + return 0; +} + +static void cbc_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + + __aes_arm_encrypt(ctx->enc, ctx->key.rounds, src, dst); +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + return crypto_cbc_encrypt_walk(req, cbc_encrypt_one); +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ctr_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + u8 buf[AES_BLOCK_SIZE]; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL; + + if (walk.nbytes < walk.total) { + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + final = NULL; + } + + aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->rk, ctx->rounds, blocks, walk.iv, final); + + if (final) { + u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + + if (dst != src) + memcpy(dst, src, walk.total % AES_BLOCK_SIZE); + crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, 0); + break; + } + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = xts_verify_key(tfm, in_key, key_len); + if (err) + return err; + + key_len /= 2; + err = crypto_aes_expand_key(&rk, in_key + key_len, key_len); + if (err) + return err; + + memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey)); + + return aesbs_setkey(tfm, in_key, key_len); +} + +static int __xts_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[])) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + __aes_arm_encrypt(ctx->twkey, ctx->key.rounds, walk.iv, walk.iv); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, + ctx->key.rounds, blocks, walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_encrypt); +} + +static int xts_decrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_decrypt); +} + +static struct skcipher_alg aes_algs[] = { { + .base.cra_name = "__ecb(aes)", + .base.cra_driver_name = "__ecb-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, +}, { + .base.cra_name = "__cbc(aes)", + .base.cra_driver_name = "__cbc-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_cbc_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, +}, { + .base.cra_name = "__ctr(aes)", + .base.cra_driver_name = "__ctr-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, +}, { + .base.cra_name = "__xts(aes)", + .base.cra_driver_name = "__xts-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_xts_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_xts_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, +} }; + +static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; + +static void aes_exit(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) + if (aes_simd_algs[i]) + simd_skcipher_free(aes_simd_algs[i]); + + crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static int __init aes_init(void) +{ + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; + int err; + int i; + + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) + continue; + + algname = aes_algs[i].base.cra_name + 2; + drvname = aes_algs[i].base.cra_driver_name + 2; + basename = aes_algs[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aes_simd_algs[i] = simd; + } + return 0; + +unregister_simds: + aes_exit(); + return err; +} + +module_init(aes_init); +module_exit(aes_exit); diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c deleted file mode 100644 index 0409b8f89782..000000000000 --- a/arch/arm/crypto/aes_glue.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Glue Code for the asm optimized version of the AES Cipher Algorithm - */ - -#include <linux/module.h> -#include <linux/crypto.h> -#include <crypto/aes.h> - -#include "aes_glue.h" - -EXPORT_SYMBOL(AES_encrypt); -EXPORT_SYMBOL(AES_decrypt); -EXPORT_SYMBOL(private_AES_set_encrypt_key); -EXPORT_SYMBOL(private_AES_set_decrypt_key); - -static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct AES_CTX *ctx = crypto_tfm_ctx(tfm); - AES_encrypt(src, dst, &ctx->enc_key); -} - -static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct AES_CTX *ctx = crypto_tfm_ctx(tfm); - AES_decrypt(src, dst, &ctx->dec_key); -} - -static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct AES_CTX *ctx = crypto_tfm_ctx(tfm); - - switch (key_len) { - case AES_KEYSIZE_128: - key_len = 128; - break; - case AES_KEYSIZE_192: - key_len = 192; - break; - case AES_KEYSIZE_256: - key_len = 256; - break; - default: - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - /* private_AES_set_decrypt_key expects an encryption key as input */ - ctx->dec_key = ctx->enc_key; - if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - return 0; -} - -static struct crypto_alg aes_alg = { - .cra_name = "aes", - .cra_driver_name = "aes-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct AES_CTX), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt - } - } -}; - -static int __init aes_init(void) -{ - return crypto_register_alg(&aes_alg); -} - -static void __exit aes_fini(void) -{ - crypto_unregister_alg(&aes_alg); -} - -module_init(aes_init); -module_exit(aes_fini); - -MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_CRYPTO("aes"); -MODULE_ALIAS_CRYPTO("aes-asm"); -MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>"); diff --git a/arch/arm/crypto/aes_glue.h b/arch/arm/crypto/aes_glue.h deleted file mode 100644 index cca3e51eb606..000000000000 --- a/arch/arm/crypto/aes_glue.h +++ /dev/null @@ -1,19 +0,0 @@ - -#define AES_MAXNR 14 - -struct AES_KEY { - unsigned int rd_key[4 * (AES_MAXNR + 1)]; - int rounds; -}; - -struct AES_CTX { - struct AES_KEY enc_key; - struct AES_KEY dec_key; -}; - -asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); -asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); -asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, - const int bits, struct AES_KEY *key); -asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, - const int bits, struct AES_KEY *key); diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped deleted file mode 100644 index 1d1800f71c5b..000000000000 --- a/arch/arm/crypto/aesbs-core.S_shipped +++ /dev/null @@ -1,2548 +0,0 @@ - -@ ==================================================================== -@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see http://www.openssl.org/~appro/cryptogams/. -@ -@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel -@ <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is -@ granted. -@ ==================================================================== - -@ Bit-sliced AES for ARM NEON -@ -@ February 2012. -@ -@ This implementation is direct adaptation of bsaes-x86_64 module for -@ ARM NEON. Except that this module is endian-neutral [in sense that -@ it can be compiled for either endianness] by courtesy of vld1.8's -@ neutrality. Initial version doesn't implement interface to OpenSSL, -@ only low-level primitives and unsupported entry points, just enough -@ to collect performance results, which for Cortex-A8 core are: -@ -@ encrypt 19.5 cycles per byte processed with 128-bit key -@ decrypt 22.1 cycles per byte processed with 128-bit key -@ key conv. 440 cycles per 128-bit key/0.18 of 8x block -@ -@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, -@ which is [much] worse than anticipated (for further details see -@ http://www.openssl.org/~appro/Snapdragon-S4.html). -@ -@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code -@ manages in 20.0 cycles]. -@ -@ When comparing to x86_64 results keep in mind that NEON unit is -@ [mostly] single-issue and thus can't [fully] benefit from -@ instruction-level parallelism. And when comparing to aes-armv4 -@ results keep in mind key schedule conversion overhead (see -@ bsaes-x86_64.pl for further details)... -@ -@ <appro@openssl.org> - -@ April-August 2013 -@ -@ Add CBC, CTR and XTS subroutines, adapt for kernel use. -@ -@ <ard.biesheuvel@linaro.org> - -#ifndef __KERNEL__ -# include "arm_arch.h" - -# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} -# define VFP_ABI_POP vldmia sp!,{d8-d15} -# define VFP_ABI_FRAME 0x40 -#else -# define VFP_ABI_PUSH -# define VFP_ABI_POP -# define VFP_ABI_FRAME 0 -# define BSAES_ASM_EXTENDED_KEY -# define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -# define __ARM_MAX_ARCH__ 7 -#endif - -#ifdef __thumb__ -# define adrl adr -#endif - -#if __ARM_MAX_ARCH__>=7 -.arch armv7-a -.fpu neon - -.text -.syntax unified @ ARMv7-capable assembler is expected to handle this -#ifdef __thumb2__ -.thumb -#else -.code 32 -#endif - -.type _bsaes_decrypt8,%function -.align 4 -_bsaes_decrypt8: - adr r6,_bsaes_decrypt8 - vldmia r4!, {q9} @ round 0 key - add r6,r6,#.LM0ISR-_bsaes_decrypt8 - - vldmia r6!, {q8} @ .LM0ISR - veor q10, q0, q9 @ xor with round0 key - veor q11, q1, q9 - vtbl.8 d0, {q10}, d16 - vtbl.8 d1, {q10}, d17 - veor q12, q2, q9 - vtbl.8 d2, {q11}, d16 - vtbl.8 d3, {q11}, d17 - veor q13, q3, q9 - vtbl.8 d4, {q12}, d16 - vtbl.8 d5, {q12}, d17 - veor q14, q4, q9 - vtbl.8 d6, {q13}, d16 - vtbl.8 d7, {q13}, d17 - veor q15, q5, q9 - vtbl.8 d8, {q14}, d16 - vtbl.8 d9, {q14}, d17 - veor q10, q6, q9 - vtbl.8 d10, {q15}, d16 - vtbl.8 d11, {q15}, d17 - veor q11, q7, q9 - vtbl.8 d12, {q10}, d16 - vtbl.8 d13, {q10}, d17 - vtbl.8 d14, {q11}, d16 - vtbl.8 d15, {q11}, d17 - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q6, #1 - vshr.u64 q11, q4, #1 - veor q10, q10, q7 - veor q11, q11, q5 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #1 - veor q5, q5, q11 - vshl.u64 q11, q11, #1 - veor q6, q6, q10 - veor q4, q4, q11 - vshr.u64 q10, q2, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q3 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q3, q3, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q2, q2, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q5, #2 - vshr.u64 q11, q4, #2 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q9 - vand q11, q11, q9 - veor q7, q7, q10 - vshl.u64 q10, q10, #2 - veor q6, q6, q11 - vshl.u64 q11, q11, #2 - veor q5, q5, q10 - veor q4, q4, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q3 - veor q11, q11, q2 - vand q10, q10, q9 - vand q11, q11, q9 - veor q3, q3, q10 - vshl.u64 q10, q10, #2 - veor q2, q2, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q3, #4 - vshr.u64 q11, q2, #4 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q6, q6, q11 - vshl.u64 q11, q11, #4 - veor q3, q3, q10 - veor q2, q2, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q5 - veor q11, q11, q4 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q4, q4, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - sub r5,r5,#1 - b .Ldec_sbox -.align 4 -.Ldec_loop: - vldmia r4!, {q8-q11} - veor q8, q8, q0 - veor q9, q9, q1 - vtbl.8 d0, {q8}, d24 - vtbl.8 d1, {q8}, d25 - vldmia r4!, {q8} - veor q10, q10, q2 - vtbl.8 d2, {q9}, d24 - vtbl.8 d3, {q9}, d25 - vldmia r4!, {q9} - veor q11, q11, q3 - vtbl.8 d4, {q10}, d24 - vtbl.8 d5, {q10}, d25 - vldmia r4!, {q10} - vtbl.8 d6, {q11}, d24 - vtbl.8 d7, {q11}, d25 - vldmia r4!, {q11} - veor q8, q8, q4 - veor q9, q9, q5 - vtbl.8 d8, {q8}, d24 - vtbl.8 d9, {q8}, d25 - veor q10, q10, q6 - vtbl.8 d10, {q9}, d24 - vtbl.8 d11, {q9}, d25 - veor q11, q11, q7 - vtbl.8 d12, {q10}, d24 - vtbl.8 d13, {q10}, d25 - vtbl.8 d14, {q11}, d24 - vtbl.8 d15, {q11}, d25 -.Ldec_sbox: - veor q1, q1, q4 - veor q3, q3, q4 - - veor q4, q4, q7 - veor q1, q1, q6 - veor q2, q2, q7 - veor q6, q6, q4 - - veor q0, q0, q1 - veor q2, q2, q5 - veor q7, q7, q6 - veor q3, q3, q0 - veor q5, q5, q0 - veor q1, q1, q3 - veor q11, q3, q0 - veor q10, q7, q4 - veor q9, q1, q6 - veor q13, q4, q0 - vmov q8, q10 - veor q12, q5, q2 - - vorr q10, q10, q9 - veor q15, q11, q8 - vand q14, q11, q12 - vorr q11, q11, q12 - veor q12, q12, q9 - vand q8, q8, q9 - veor q9, q6, q2 - vand q15, q15, q12 - vand q13, q13, q9 - veor q9, q3, q7 - veor q12, q1, q5 - veor q11, q11, q13 - veor q10, q10, q13 - vand q13, q9, q12 - vorr q9, q9, q12 - veor q11, q11, q15 - veor q8, q8, q13 - veor q10, q10, q14 - veor q9, q9, q15 - veor q8, q8, q14 - vand q12, q4, q6 - veor q9, q9, q14 - vand q13, q0, q2 - vand q14, q7, q1 - vorr q15, q3, q5 - veor q11, q11, q12 - veor q9, q9, q14 - veor q8, q8, q15 - veor q10, q10, q13 - - @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 - - @ new smaller inversion - - vand q14, q11, q9 - vmov q12, q8 - - veor q13, q10, q14 - veor q15, q8, q14 - veor q14, q8, q14 @ q14=q15 - - vbsl q13, q9, q8 - vbsl q15, q11, q10 - veor q11, q11, q10 - - vbsl q12, q13, q14 - vbsl q8, q14, q13 - - vand q14, q12, q15 - veor q9, q9, q8 - - veor q14, q14, q11 - veor q12, q5, q2 - veor q8, q1, q6 - veor q10, q15, q14 - vand q10, q10, q5 - veor q5, q5, q1 - vand q11, q1, q15 - vand q5, q5, q14 - veor q1, q11, q10 - veor q5, q5, q11 - veor q15, q15, q13 - veor q14, q14, q9 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q2 - veor q12, q12, q8 - veor q2, q2, q6 - vand q8, q8, q15 - vand q6, q6, q13 - vand q12, q12, q14 - vand q2, q2, q9 - veor q8, q8, q12 - veor q2, q2, q6 - veor q12, q12, q11 - veor q6, q6, q10 - veor q5, q5, q12 - veor q2, q2, q12 - veor q1, q1, q8 - veor q6, q6, q8 - - veor q12, q3, q0 - veor q8, q7, q4 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q0 - veor q12, q12, q8 - veor q0, q0, q4 - vand q8, q8, q15 - vand q4, q4, q13 - vand q12, q12, q14 - vand q0, q0, q9 - veor q8, q8, q12 - veor q0, q0, q4 - veor q12, q12, q11 - veor q4, q4, q10 - veor q15, q15, q13 - veor q14, q14, q9 - veor q10, q15, q14 - vand q10, q10, q3 - veor q3, q3, q7 - vand q11, q7, q15 - vand q3, q3, q14 - veor q7, q11, q10 - veor q3, q3, q11 - veor q3, q3, q12 - veor q0, q0, q12 - veor q7, q7, q8 - veor q4, q4, q8 - veor q1, q1, q7 - veor q6, q6, q5 - - veor q4, q4, q1 - veor q2, q2, q7 - veor q5, q5, q7 - veor q4, q4, q2 - veor q7, q7, q0 - veor q4, q4, q5 - veor q3, q3, q6 - veor q6, q6, q1 - veor q3, q3, q4 - - veor q4, q4, q0 - veor q7, q7, q3 - subs r5,r5,#1 - bcc .Ldec_done - @ multiplication by 0x05-0x00-0x04-0x00 - vext.8 q8, q0, q0, #8 - vext.8 q14, q3, q3, #8 - vext.8 q15, q5, q5, #8 - veor q8, q8, q0 - vext.8 q9, q1, q1, #8 - veor q14, q14, q3 - vext.8 q10, q6, q6, #8 - veor q15, q15, q5 - vext.8 q11, q4, q4, #8 - veor q9, q9, q1 - vext.8 q12, q2, q2, #8 - veor q10, q10, q6 - vext.8 q13, q7, q7, #8 - veor q11, q11, q4 - veor q12, q12, q2 - veor q13, q13, q7 - - veor q0, q0, q14 - veor q1, q1, q14 - veor q6, q6, q8 - veor q2, q2, q10 - veor q4, q4, q9 - veor q1, q1, q15 - veor q6, q6, q15 - veor q2, q2, q14 - veor q7, q7, q11 - veor q4, q4, q14 - veor q3, q3, q12 - veor q2, q2, q15 - veor q7, q7, q15 - veor q5, q5, q13 - vext.8 q8, q0, q0, #12 @ x0 <<< 32 - vext.8 q9, q1, q1, #12 - veor q0, q0, q8 @ x0 ^ (x0 <<< 32) - vext.8 q10, q6, q6, #12 - veor q1, q1, q9 - vext.8 q11, q4, q4, #12 - veor q6, q6, q10 - vext.8 q12, q2, q2, #12 - veor q4, q4, q11 - vext.8 q13, q7, q7, #12 - veor q2, q2, q12 - vext.8 q14, q3, q3, #12 - veor q7, q7, q13 - vext.8 q15, q5, q5, #12 - veor q3, q3, q14 - - veor q9, q9, q0 - veor q5, q5, q15 - vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) - veor q10, q10, q1 - veor q8, q8, q5 - veor q9, q9, q5 - vext.8 q1, q1, q1, #8 - veor q13, q13, q2 - veor q0, q0, q8 - veor q14, q14, q7 - veor q1, q1, q9 - vext.8 q8, q2, q2, #8 - veor q12, q12, q4 - vext.8 q9, q7, q7, #8 - veor q15, q15, q3 - vext.8 q2, q4, q4, #8 - veor q11, q11, q6 - vext.8 q7, q5, q5, #8 - veor q12, q12, q5 - vext.8 q4, q3, q3, #8 - veor q11, q11, q5 - vext.8 q3, q6, q6, #8 - veor q5, q9, q13 - veor q11, q11, q2 - veor q7, q7, q15 - veor q6, q4, q14 - veor q4, q8, q12 - veor q2, q3, q10 - vmov q3, q11 - @ vmov q5, q9 - vldmia r6, {q12} @ .LISR - ite eq @ Thumb2 thing, sanity check in ARM - addeq r6,r6,#0x10 - bne .Ldec_loop - vldmia r6, {q12} @ .LISRM0 - b .Ldec_loop -.align 4 -.Ldec_done: - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q3, #1 - vshr.u64 q11, q2, #1 - veor q10, q10, q5 - veor q11, q11, q7 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #1 - veor q7, q7, q11 - vshl.u64 q11, q11, #1 - veor q3, q3, q10 - veor q2, q2, q11 - vshr.u64 q10, q6, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q4 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q4, q4, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q6, q6, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q7, #2 - vshr.u64 q11, q2, #2 - veor q10, q10, q5 - veor q11, q11, q3 - vand q10, q10, q9 - vand q11, q11, q9 - veor q5, q5, q10 - vshl.u64 q10, q10, #2 - veor q3, q3, q11 - vshl.u64 q11, q11, #2 - veor q7, q7, q10 - veor q2, q2, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q4 - veor q11, q11, q6 - vand q10, q10, q9 - vand q11, q11, q9 - veor q4, q4, q10 - vshl.u64 q10, q10, #2 - veor q6, q6, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q4, #4 - vshr.u64 q11, q6, #4 - veor q10, q10, q5 - veor q11, q11, q3 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q3, q3, q11 - vshl.u64 q11, q11, #4 - veor q4, q4, q10 - veor q6, q6, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q7 - veor q11, q11, q2 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q2, q2, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - vldmia r4, {q8} @ last round key - veor q6, q6, q8 - veor q4, q4, q8 - veor q2, q2, q8 - veor q7, q7, q8 - veor q3, q3, q8 - veor q5, q5, q8 - veor q0, q0, q8 - veor q1, q1, q8 - bx lr -.size _bsaes_decrypt8,.-_bsaes_decrypt8 - -.type _bsaes_const,%object -.align 6 -_bsaes_const: -.LM0ISR: @ InvShiftRows constants - .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 -.LISR: - .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 -.LISRM0: - .quad 0x01040b0e0205080f, 0x0306090c00070a0d -.LM0SR: @ ShiftRows constants - .quad 0x0a0e02060f03070b, 0x0004080c05090d01 -.LSR: - .quad 0x0504070600030201, 0x0f0e0d0c0a09080b -.LSRM0: - .quad 0x0304090e00050a0f, 0x01060b0c0207080d -.LM0: - .quad 0x02060a0e03070b0f, 0x0004080c0105090d -.LREVM0SR: - .quad 0x090d01050c000408, 0x03070b0f060a0e02 -.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro@openssl.org>" -.align 6 -.size _bsaes_const,.-_bsaes_const - -.type _bsaes_encrypt8,%function -.align 4 -_bsaes_encrypt8: - adr r6,_bsaes_encrypt8 - vldmia r4!, {q9} @ round 0 key - sub r6,r6,#_bsaes_encrypt8-.LM0SR - - vldmia r6!, {q8} @ .LM0SR -_bsaes_encrypt8_alt: - veor q10, q0, q9 @ xor with round0 key - veor q11, q1, q9 - vtbl.8 d0, {q10}, d16 - vtbl.8 d1, {q10}, d17 - veor q12, q2, q9 - vtbl.8 d2, {q11}, d16 - vtbl.8 d3, {q11}, d17 - veor q13, q3, q9 - vtbl.8 d4, {q12}, d16 - vtbl.8 d5, {q12}, d17 - veor q14, q4, q9 - vtbl.8 d6, {q13}, d16 - vtbl.8 d7, {q13}, d17 - veor q15, q5, q9 - vtbl.8 d8, {q14}, d16 - vtbl.8 d9, {q14}, d17 - veor q10, q6, q9 - vtbl.8 d10, {q15}, d16 - vtbl.8 d11, {q15}, d17 - veor q11, q7, q9 - vtbl.8 d12, {q10}, d16 - vtbl.8 d13, {q10}, d17 - vtbl.8 d14, {q11}, d16 - vtbl.8 d15, {q11}, d17 -_bsaes_encrypt8_bitslice: - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q6, #1 - vshr.u64 q11, q4, #1 - veor q10, q10, q7 - veor q11, q11, q5 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #1 - veor q5, q5, q11 - vshl.u64 q11, q11, #1 - veor q6, q6, q10 - veor q4, q4, q11 - vshr.u64 q10, q2, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q3 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q3, q3, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q2, q2, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q5, #2 - vshr.u64 q11, q4, #2 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q9 - vand q11, q11, q9 - veor q7, q7, q10 - vshl.u64 q10, q10, #2 - veor q6, q6, q11 - vshl.u64 q11, q11, #2 - veor q5, q5, q10 - veor q4, q4, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q3 - veor q11, q11, q2 - vand q10, q10, q9 - vand q11, q11, q9 - veor q3, q3, q10 - vshl.u64 q10, q10, #2 - veor q2, q2, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q3, #4 - vshr.u64 q11, q2, #4 - veor q10, q10, q7 - veor q11, q11, q6 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q6, q6, q11 - vshl.u64 q11, q11, #4 - veor q3, q3, q10 - veor q2, q2, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q5 - veor q11, q11, q4 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q4, q4, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - sub r5,r5,#1 - b .Lenc_sbox -.align 4 -.Lenc_loop: - vldmia r4!, {q8-q11} - veor q8, q8, q0 - veor q9, q9, q1 - vtbl.8 d0, {q8}, d24 - vtbl.8 d1, {q8}, d25 - vldmia r4!, {q8} - veor q10, q10, q2 - vtbl.8 d2, {q9}, d24 - vtbl.8 d3, {q9}, d25 - vldmia r4!, {q9} - veor q11, q11, q3 - vtbl.8 d4, {q10}, d24 - vtbl.8 d5, {q10}, d25 - vldmia r4!, {q10} - vtbl.8 d6, {q11}, d24 - vtbl.8 d7, {q11}, d25 - vldmia r4!, {q11} - veor q8, q8, q4 - veor q9, q9, q5 - vtbl.8 d8, {q8}, d24 - vtbl.8 d9, {q8}, d25 - veor q10, q10, q6 - vtbl.8 d10, {q9}, d24 - vtbl.8 d11, {q9}, d25 - veor q11, q11, q7 - vtbl.8 d12, {q10}, d24 - vtbl.8 d13, {q10}, d25 - vtbl.8 d14, {q11}, d24 - vtbl.8 d15, {q11}, d25 -.Lenc_sbox: - veor q2, q2, q1 - veor q5, q5, q6 - veor q3, q3, q0 - veor q6, q6, q2 - veor q5, q5, q0 - - veor q6, q6, q3 - veor q3, q3, q7 - veor q7, q7, q5 - veor q3, q3, q4 - veor q4, q4, q5 - - veor q2, q2, q7 - veor q3, q3, q1 - veor q1, q1, q5 - veor q11, q7, q4 - veor q10, q1, q2 - veor q9, q5, q3 - veor q13, q2, q4 - vmov q8, q10 - veor q12, q6, q0 - - vorr q10, q10, q9 - veor q15, q11, q8 - vand q14, q11, q12 - vorr q11, q11, q12 - veor q12, q12, q9 - vand q8, q8, q9 - veor q9, q3, q0 - vand q15, q15, q12 - vand q13, q13, q9 - veor q9, q7, q1 - veor q12, q5, q6 - veor q11, q11, q13 - veor q10, q10, q13 - vand q13, q9, q12 - vorr q9, q9, q12 - veor q11, q11, q15 - veor q8, q8, q13 - veor q10, q10, q14 - veor q9, q9, q15 - veor q8, q8, q14 - vand q12, q2, q3 - veor q9, q9, q14 - vand q13, q4, q0 - vand q14, q1, q5 - vorr q15, q7, q6 - veor q11, q11, q12 - veor q9, q9, q14 - veor q8, q8, q15 - veor q10, q10, q13 - - @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 - - @ new smaller inversion - - vand q14, q11, q9 - vmov q12, q8 - - veor q13, q10, q14 - veor q15, q8, q14 - veor q14, q8, q14 @ q14=q15 - - vbsl q13, q9, q8 - vbsl q15, q11, q10 - veor q11, q11, q10 - - vbsl q12, q13, q14 - vbsl q8, q14, q13 - - vand q14, q12, q15 - veor q9, q9, q8 - - veor q14, q14, q11 - veor q12, q6, q0 - veor q8, q5, q3 - veor q10, q15, q14 - vand q10, q10, q6 - veor q6, q6, q5 - vand q11, q5, q15 - vand q6, q6, q14 - veor q5, q11, q10 - veor q6, q6, q11 - veor q15, q15, q13 - veor q14, q14, q9 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q0 - veor q12, q12, q8 - veor q0, q0, q3 - vand q8, q8, q15 - vand q3, q3, q13 - vand q12, q12, q14 - vand q0, q0, q9 - veor q8, q8, q12 - veor q0, q0, q3 - veor q12, q12, q11 - veor q3, q3, q10 - veor q6, q6, q12 - veor q0, q0, q12 - veor q5, q5, q8 - veor q3, q3, q8 - - veor q12, q7, q4 - veor q8, q1, q2 - veor q11, q15, q14 - veor q10, q13, q9 - vand q11, q11, q12 - vand q10, q10, q4 - veor q12, q12, q8 - veor q4, q4, q2 - vand q8, q8, q15 - vand q2, q2, q13 - vand q12, q12, q14 - vand q4, q4, q9 - veor q8, q8, q12 - veor q4, q4, q2 - veor q12, q12, q11 - veor q2, q2, q10 - veor q15, q15, q13 - veor q14, q14, q9 - veor q10, q15, q14 - vand q10, q10, q7 - veor q7, q7, q1 - vand q11, q1, q15 - vand q7, q7, q14 - veor q1, q11, q10 - veor q7, q7, q11 - veor q7, q7, q12 - veor q4, q4, q12 - veor q1, q1, q8 - veor q2, q2, q8 - veor q7, q7, q0 - veor q1, q1, q6 - veor q6, q6, q0 - veor q4, q4, q7 - veor q0, q0, q1 - - veor q1, q1, q5 - veor q5, q5, q2 - veor q2, q2, q3 - veor q3, q3, q5 - veor q4, q4, q5 - - veor q6, q6, q3 - subs r5,r5,#1 - bcc .Lenc_done - vext.8 q8, q0, q0, #12 @ x0 <<< 32 - vext.8 q9, q1, q1, #12 - veor q0, q0, q8 @ x0 ^ (x0 <<< 32) - vext.8 q10, q4, q4, #12 - veor q1, q1, q9 - vext.8 q11, q6, q6, #12 - veor q4, q4, q10 - vext.8 q12, q3, q3, #12 - veor q6, q6, q11 - vext.8 q13, q7, q7, #12 - veor q3, q3, q12 - vext.8 q14, q2, q2, #12 - veor q7, q7, q13 - vext.8 q15, q5, q5, #12 - veor q2, q2, q14 - - veor q9, q9, q0 - veor q5, q5, q15 - vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) - veor q10, q10, q1 - veor q8, q8, q5 - veor q9, q9, q5 - vext.8 q1, q1, q1, #8 - veor q13, q13, q3 - veor q0, q0, q8 - veor q14, q14, q7 - veor q1, q1, q9 - vext.8 q8, q3, q3, #8 - veor q12, q12, q6 - vext.8 q9, q7, q7, #8 - veor q15, q15, q2 - vext.8 q3, q6, q6, #8 - veor q11, q11, q4 - vext.8 q7, q5, q5, #8 - veor q12, q12, q5 - vext.8 q6, q2, q2, #8 - veor q11, q11, q5 - vext.8 q2, q4, q4, #8 - veor q5, q9, q13 - veor q4, q8, q12 - veor q3, q3, q11 - veor q7, q7, q15 - veor q6, q6, q14 - @ vmov q4, q8 - veor q2, q2, q10 - @ vmov q5, q9 - vldmia r6, {q12} @ .LSR - ite eq @ Thumb2 thing, samity check in ARM - addeq r6,r6,#0x10 - bne .Lenc_loop - vldmia r6, {q12} @ .LSRM0 - b .Lenc_loop -.align 4 -.Lenc_done: - vmov.i8 q8,#0x55 @ compose .LBS0 - vmov.i8 q9,#0x33 @ compose .LBS1 - vshr.u64 q10, q2, #1 - vshr.u64 q11, q3, #1 - veor q10, q10, q5 - veor q11, q11, q7 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #1 - veor q7, q7, q11 - vshl.u64 q11, q11, #1 - veor q2, q2, q10 - veor q3, q3, q11 - vshr.u64 q10, q4, #1 - vshr.u64 q11, q0, #1 - veor q10, q10, q6 - veor q11, q11, q1 - vand q10, q10, q8 - vand q11, q11, q8 - veor q6, q6, q10 - vshl.u64 q10, q10, #1 - veor q1, q1, q11 - vshl.u64 q11, q11, #1 - veor q4, q4, q10 - veor q0, q0, q11 - vmov.i8 q8,#0x0f @ compose .LBS2 - vshr.u64 q10, q7, #2 - vshr.u64 q11, q3, #2 - veor q10, q10, q5 - veor q11, q11, q2 - vand q10, q10, q9 - vand q11, q11, q9 - veor q5, q5, q10 - vshl.u64 q10, q10, #2 - veor q2, q2, q11 - vshl.u64 q11, q11, #2 - veor q7, q7, q10 - veor q3, q3, q11 - vshr.u64 q10, q1, #2 - vshr.u64 q11, q0, #2 - veor q10, q10, q6 - veor q11, q11, q4 - vand q10, q10, q9 - vand q11, q11, q9 - veor q6, q6, q10 - vshl.u64 q10, q10, #2 - veor q4, q4, q11 - vshl.u64 q11, q11, #2 - veor q1, q1, q10 - veor q0, q0, q11 - vshr.u64 q10, q6, #4 - vshr.u64 q11, q4, #4 - veor q10, q10, q5 - veor q11, q11, q2 - vand q10, q10, q8 - vand q11, q11, q8 - veor q5, q5, q10 - vshl.u64 q10, q10, #4 - veor q2, q2, q11 - vshl.u64 q11, q11, #4 - veor q6, q6, q10 - veor q4, q4, q11 - vshr.u64 q10, q1, #4 - vshr.u64 q11, q0, #4 - veor q10, q10, q7 - veor q11, q11, q3 - vand q10, q10, q8 - vand q11, q11, q8 - veor q7, q7, q10 - vshl.u64 q10, q10, #4 - veor q3, q3, q11 - vshl.u64 q11, q11, #4 - veor q1, q1, q10 - veor q0, q0, q11 - vldmia r4, {q8} @ last round key - veor q4, q4, q8 - veor q6, q6, q8 - veor q3, q3, q8 - veor q7, q7, q8 - veor q2, q2, q8 - veor q5, q5, q8 - veor q0, q0, q8 - veor q1, q1, q8 - bx lr -.size _bsaes_encrypt8,.-_bsaes_encrypt8 -.type _bsaes_key_convert,%function -.align 4 -_bsaes_key_convert: - adr r6,_bsaes_key_convert - vld1.8 {q7}, [r4]! @ load round 0 key - sub r6,r6,#_bsaes_key_convert-.LM0 - vld1.8 {q15}, [r4]! @ load round 1 key - - vmov.i8 q8, #0x01 @ bit masks - vmov.i8 q9, #0x02 - vmov.i8 q10, #0x04 - vmov.i8 q11, #0x08 - vmov.i8 q12, #0x10 - vmov.i8 q13, #0x20 - vldmia r6, {q14} @ .LM0 - -#ifdef __ARMEL__ - vrev32.8 q7, q7 - vrev32.8 q15, q15 -#endif - sub r5,r5,#1 - vstmia r12!, {q7} @ save round 0 key - b .Lkey_loop - -.align 4 -.Lkey_loop: - vtbl.8 d14,{q15},d28 - vtbl.8 d15,{q15},d29 - vmov.i8 q6, #0x40 - vmov.i8 q15, #0x80 - - vtst.8 q0, q7, q8 - vtst.8 q1, q7, q9 - vtst.8 q2, q7, q10 - vtst.8 q3, q7, q11 - vtst.8 q4, q7, q12 - vtst.8 q5, q7, q13 - vtst.8 q6, q7, q6 - vtst.8 q7, q7, q15 - vld1.8 {q15}, [r4]! @ load next round key - vmvn q0, q0 @ "pnot" - vmvn q1, q1 - vmvn q5, q5 - vmvn q6, q6 -#ifdef __ARMEL__ - vrev32.8 q15, q15 -#endif - subs r5,r5,#1 - vstmia r12!,{q0-q7} @ write bit-sliced round key - bne .Lkey_loop - - vmov.i8 q7,#0x63 @ compose .L63 - @ don't save last round key - bx lr -.size _bsaes_key_convert,.-_bsaes_key_convert -.extern AES_cbc_encrypt -.extern AES_decrypt - -.global bsaes_cbc_encrypt -.type bsaes_cbc_encrypt,%function -.align 5 -bsaes_cbc_encrypt: -#ifndef __KERNEL__ - cmp r2, #128 -#ifndef __thumb__ - blo AES_cbc_encrypt -#else - bhs 1f - b AES_cbc_encrypt -1: -#endif -#endif - - @ it is up to the caller to make sure we are called with enc == 0 - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr r8, [ip] @ IV is 1st arg on the stack - mov r2, r2, lsr#4 @ len in 16 byte blocks - sub sp, #0x10 @ scratch space to carry over the IV - mov r9, sp @ save sp - - ldr r10, [r3, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key - add r12, #96 @ sifze of bit-slices key schedule - - @ populate the key schedule - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - mov sp, r12 @ sp is sp - bl _bsaes_key_convert - vldmia sp, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia sp, {q7} -#else - ldr r12, [r3, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [r3, #244] - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - add r12, r3, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, r3, #248 - vldmia r4, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia r4, {q7} - -.align 2 -0: -#endif - - vld1.8 {q15}, [r8] @ load IV - b .Lcbc_dec_loop - -.align 4 -.Lcbc_dec_loop: - subs r2, r2, #0x8 - bmi .Lcbc_dec_loop_finish - - vld1.8 {q0-q1}, [r0]! @ load input - vld1.8 {q2-q3}, [r0]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, sp @ pass the key -#else - add r4, r3, #248 -#endif - vld1.8 {q4-q5}, [r0]! - mov r5, r10 - vld1.8 {q6-q7}, [r0] - sub r0, r0, #0x60 - vstmia r9, {q15} @ put aside IV - - bl _bsaes_decrypt8 - - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q12-q13}, [r0]! - veor q4, q4, q10 - veor q2, q2, q11 - vld1.8 {q14-q15}, [r0]! - veor q7, q7, q12 - vst1.8 {q0-q1}, [r1]! @ write output - veor q3, q3, q13 - vst1.8 {q6}, [r1]! - veor q5, q5, q14 - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - vst1.8 {q7}, [r1]! - vst1.8 {q3}, [r1]! - vst1.8 {q5}, [r1]! - - b .Lcbc_dec_loop - -.Lcbc_dec_loop_finish: - adds r2, r2, #8 - beq .Lcbc_dec_done - - vld1.8 {q0}, [r0]! @ load input - cmp r2, #2 - blo .Lcbc_dec_one - vld1.8 {q1}, [r0]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, sp @ pass the key -#else - add r4, r3, #248 -#endif - mov r5, r10 - vstmia r9, {q15} @ put aside IV - beq .Lcbc_dec_two - vld1.8 {q2}, [r0]! - cmp r2, #4 - blo .Lcbc_dec_three - vld1.8 {q3}, [r0]! - beq .Lcbc_dec_four - vld1.8 {q4}, [r0]! - cmp r2, #6 - blo .Lcbc_dec_five - vld1.8 {q5}, [r0]! - beq .Lcbc_dec_six - vld1.8 {q6}, [r0]! - sub r0, r0, #0x70 - - bl _bsaes_decrypt8 - - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q12-q13}, [r0]! - veor q4, q4, q10 - veor q2, q2, q11 - vld1.8 {q15}, [r0]! - veor q7, q7, q12 - vst1.8 {q0-q1}, [r1]! @ write output - veor q3, q3, q13 - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - vst1.8 {q7}, [r1]! - vst1.8 {q3}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_six: - sub r0, r0, #0x60 - bl _bsaes_decrypt8 - vldmia r9,{q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q12}, [r0]! - veor q4, q4, q10 - veor q2, q2, q11 - vld1.8 {q15}, [r0]! - veor q7, q7, q12 - vst1.8 {q0-q1}, [r1]! @ write output - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - vst1.8 {q7}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_five: - sub r0, r0, #0x50 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10-q11}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q15}, [r0]! - veor q4, q4, q10 - vst1.8 {q0-q1}, [r1]! @ write output - veor q2, q2, q11 - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - vst1.8 {q2}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_four: - sub r0, r0, #0x40 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q10}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vld1.8 {q15}, [r0]! - veor q4, q4, q10 - vst1.8 {q0-q1}, [r1]! @ write output - vst1.8 {q6}, [r1]! - vst1.8 {q4}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_three: - sub r0, r0, #0x30 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8-q9}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q15}, [r0]! - veor q1, q1, q8 - veor q6, q6, q9 - vst1.8 {q0-q1}, [r1]! @ write output - vst1.8 {q6}, [r1]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_two: - sub r0, r0, #0x20 - bl _bsaes_decrypt8 - vldmia r9, {q14} @ reload IV - vld1.8 {q8}, [r0]! @ reload input - veor q0, q0, q14 @ ^= IV - vld1.8 {q15}, [r0]! @ reload input - veor q1, q1, q8 - vst1.8 {q0-q1}, [r1]! @ write output - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_one: - sub r0, r0, #0x10 - mov r10, r1 @ save original out pointer - mov r1, r9 @ use the iv scratch space as out buffer - mov r2, r3 - vmov q4,q15 @ just in case ensure that IV - vmov q5,q0 @ and input are preserved - bl AES_decrypt - vld1.8 {q0}, [r9,:64] @ load result - veor q0, q0, q4 @ ^= IV - vmov q15, q5 @ q5 holds input - vst1.8 {q0}, [r10] @ write output - -.Lcbc_dec_done: -#ifndef BSAES_ASM_EXTENDED_KEY - vmov.i32 q0, #0 - vmov.i32 q1, #0 -.Lcbc_dec_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r9 - bne .Lcbc_dec_bzero -#endif - - mov sp, r9 - add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb - vst1.8 {q15}, [r8] @ return IV - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} -.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt -.extern AES_encrypt -.global bsaes_ctr32_encrypt_blocks -.type bsaes_ctr32_encrypt_blocks,%function -.align 5 -bsaes_ctr32_encrypt_blocks: - cmp r2, #8 @ use plain AES for - blo .Lctr_enc_short @ small sizes - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr r8, [ip] @ ctr is 1st arg on the stack - sub sp, sp, #0x10 @ scratch space to carry over the ctr - mov r9, sp @ save sp - - ldr r10, [r3, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key - add r12, #96 @ size of bit-sliced key schedule - - @ populate the key schedule - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - mov sp, r12 @ sp is sp - bl _bsaes_key_convert - veor q7,q7,q15 @ fix up last round key - vstmia r12, {q7} @ save last round key - - vld1.8 {q0}, [r8] @ load counter - add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 - vldmia sp, {q4} @ load round0 key -#else - ldr r12, [r3, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [r3, #244] - mov r4, r3 @ pass key - mov r5, r10 @ pass # of rounds - add r12, r3, #248 @ pass key schedule - bl _bsaes_key_convert - veor q7,q7,q15 @ fix up last round key - vstmia r12, {q7} @ save last round key - -.align 2 -0: add r12, r3, #248 - vld1.8 {q0}, [r8] @ load counter - adrl r8, .LREVM0SR @ borrow r8 - vldmia r12, {q4} @ load round0 key - sub sp, #0x10 @ place for adjusted round0 key -#endif - - vmov.i32 q8,#1 @ compose 1<<96 - veor q9,q9,q9 - vrev32.8 q0,q0 - vext.8 q8,q9,q8,#4 - vrev32.8 q4,q4 - vadd.u32 q9,q8,q8 @ compose 2<<96 - vstmia sp, {q4} @ save adjusted round0 key - b .Lctr_enc_loop - -.align 4 -.Lctr_enc_loop: - vadd.u32 q10, q8, q9 @ compose 3<<96 - vadd.u32 q1, q0, q8 @ +1 - vadd.u32 q2, q0, q9 @ +2 - vadd.u32 q3, q0, q10 @ +3 - vadd.u32 q4, q1, q10 - vadd.u32 q5, q2, q10 - vadd.u32 q6, q3, q10 - vadd.u32 q7, q4, q10 - vadd.u32 q10, q5, q10 @ next counter - - @ Borrow prologue from _bsaes_encrypt8 to use the opportunity - @ to flip byte order in 32-bit counter - - vldmia sp, {q9} @ load round0 key -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x10 @ pass next round key -#else - add r4, r3, #264 -#endif - vldmia r8, {q8} @ .LREVM0SR - mov r5, r10 @ pass rounds - vstmia r9, {q10} @ save next counter - sub r6, r8, #.LREVM0SR-.LSR @ pass constants - - bl _bsaes_encrypt8_alt - - subs r2, r2, #8 - blo .Lctr_enc_loop_done - - vld1.8 {q8-q9}, [r0]! @ load input - vld1.8 {q10-q11}, [r0]! - veor q0, q8 - veor q1, q9 - vld1.8 {q12-q13}, [r0]! - veor q4, q10 - veor q6, q11 - vld1.8 {q14-q15}, [r0]! - veor q3, q12 - vst1.8 {q0-q1}, [r1]! @ write output - veor q7, q13 - veor q2, q14 - vst1.8 {q4}, [r1]! - veor q5, q15 - vst1.8 {q6}, [r1]! - vmov.i32 q8, #1 @ compose 1<<96 - vst1.8 {q3}, [r1]! - veor q9, q9, q9 - vst1.8 {q7}, [r1]! - vext.8 q8, q9, q8, #4 - vst1.8 {q2}, [r1]! - vadd.u32 q9,q8,q8 @ compose 2<<96 - vst1.8 {q5}, [r1]! - vldmia r9, {q0} @ load counter - - bne .Lctr_enc_loop - b .Lctr_enc_done - -.align 4 -.Lctr_enc_loop_done: - add r2, r2, #8 - vld1.8 {q8}, [r0]! @ load input - veor q0, q8 - vst1.8 {q0}, [r1]! @ write output - cmp r2, #2 - blo .Lctr_enc_done - vld1.8 {q9}, [r0]! - veor q1, q9 - vst1.8 {q1}, [r1]! - beq .Lctr_enc_done - vld1.8 {q10}, [r0]! - veor q4, q10 - vst1.8 {q4}, [r1]! - cmp r2, #4 - blo .Lctr_enc_done - vld1.8 {q11}, [r0]! - veor q6, q11 - vst1.8 {q6}, [r1]! - beq .Lctr_enc_done - vld1.8 {q12}, [r0]! - veor q3, q12 - vst1.8 {q3}, [r1]! - cmp r2, #6 - blo .Lctr_enc_done - vld1.8 {q13}, [r0]! - veor q7, q13 - vst1.8 {q7}, [r1]! - beq .Lctr_enc_done - vld1.8 {q14}, [r0] - veor q2, q14 - vst1.8 {q2}, [r1]! - -.Lctr_enc_done: - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifndef BSAES_ASM_EXTENDED_KEY -.Lctr_enc_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r9 - bne .Lctr_enc_bzero -#else - vstmia sp, {q0-q1} -#endif - - mov sp, r9 - add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.align 4 -.Lctr_enc_short: - ldr ip, [sp] @ ctr pointer is passed on stack - stmdb sp!, {r4-r8, lr} - - mov r4, r0 @ copy arguments - mov r5, r1 - mov r6, r2 - mov r7, r3 - ldr r8, [ip, #12] @ load counter LSW - vld1.8 {q1}, [ip] @ load whole counter value -#ifdef __ARMEL__ - rev r8, r8 -#endif - sub sp, sp, #0x10 - vst1.8 {q1}, [sp,:64] @ copy counter value - sub sp, sp, #0x10 - -.Lctr_enc_short_loop: - add r0, sp, #0x10 @ input counter value - mov r1, sp @ output on the stack - mov r2, r7 @ key - - bl AES_encrypt - - vld1.8 {q0}, [r4]! @ load input - vld1.8 {q1}, [sp,:64] @ load encrypted counter - add r8, r8, #1 -#ifdef __ARMEL__ - rev r0, r8 - str r0, [sp, #0x1c] @ next counter value -#else - str r8, [sp, #0x1c] @ next counter value -#endif - veor q0,q0,q1 - vst1.8 {q0}, [r5]! @ store output - subs r6, r6, #1 - bne .Lctr_enc_short_loop - - vmov.i32 q0, #0 - vmov.i32 q1, #0 - vstmia sp!, {q0-q1} - - ldmia sp!, {r4-r8, pc} -.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks -.globl bsaes_xts_encrypt -.type bsaes_xts_encrypt,%function -.align 4 -bsaes_xts_encrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future r3 - - mov r7, r0 - mov r8, r1 - mov r9, r2 - mov r10, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0,sp @ pointer to initial tweak -#endif - - ldr r1, [r10, #240] @ get # of rounds - mov r3, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key - @ add r12, #96 @ size of bit-sliced key schedule - sub r12, #48 @ place for tweak[9] - - @ populate the key schedule - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - veor q7, q7, q15 @ fix up last round key - vstmia r12, {q7} @ save last round key -#else - ldr r12, [r10, #244] - eors r12, #1 - beq 0f - - str r12, [r10, #244] - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - add r12, r10, #248 @ pass key schedule - bl _bsaes_key_convert - veor q7, q7, q15 @ fix up last round key - vstmia r12, {q7} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - - vld1.8 {q8}, [r0] @ initial tweak - adr r2, .Lxts_magic - - subs r9, #0x80 - blo .Lxts_enc_short - b .Lxts_enc_loop - -.align 4 -.Lxts_enc_loop: - vldmia r2, {q5} @ load XTS magic - vshr.s64 q6, q8, #63 - mov r0, sp - vand q6, q6, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q9, #63 - veor q9, q9, q6 - vand q7, q7, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q10, #63 - veor q10, q10, q7 - vand q6, q6, q5 - vld1.8 {q0}, [r7]! - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q11, #63 - veor q11, q11, q6 - vand q7, q7, q5 - vld1.8 {q1}, [r7]! - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q12, #63 - veor q12, q12, q7 - vand q6, q6, q5 - vld1.8 {q2}, [r7]! - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q13, #63 - veor q13, q13, q6 - vand q7, q7, q5 - vld1.8 {q3}, [r7]! - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q14, #63 - veor q14, q14, q7 - vand q6, q6, q5 - vld1.8 {q4}, [r7]! - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q15, #63 - veor q15, q15, q6 - vand q7, q7, q5 - vld1.8 {q5}, [r7]! - veor q4, q4, q12 - vadd.u64 q8, q15, q15 - vst1.64 {q15}, [r0,:128]! - vswp d15,d14 - veor q8, q8, q7 - vst1.64 {q8}, [r0,:128] @ next round tweak - - vld1.8 {q6-q7}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - veor q7, q7, q15 - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - vld1.64 {q14-q15}, [r0,:128]! - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q2, q14 - vst1.8 {q10-q11}, [r8]! - veor q13, q5, q15 - vst1.8 {q12-q13}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - - subs r9, #0x80 - bpl .Lxts_enc_loop - -.Lxts_enc_short: - adds r9, #0x70 - bmi .Lxts_enc_done - - vldmia r2, {q5} @ load XTS magic - vshr.s64 q7, q8, #63 - mov r0, sp - vand q7, q7, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q9, #63 - veor q9, q9, q7 - vand q6, q6, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q10, #63 - veor q10, q10, q6 - vand q7, q7, q5 - vld1.8 {q0}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_1 - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q11, #63 - veor q11, q11, q7 - vand q6, q6, q5 - vld1.8 {q1}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_2 - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q12, #63 - veor q12, q12, q6 - vand q7, q7, q5 - vld1.8 {q2}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_3 - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q13, #63 - veor q13, q13, q7 - vand q6, q6, q5 - vld1.8 {q3}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_4 - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q14, #63 - veor q14, q14, q6 - vand q7, q7, q5 - vld1.8 {q4}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_5 - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q15, #63 - veor q15, q15, q7 - vand q6, q6, q5 - vld1.8 {q5}, [r7]! - subs r9, #0x10 - bmi .Lxts_enc_6 - veor q4, q4, q12 - sub r9, #0x10 - vst1.64 {q15}, [r0,:128] @ next round tweak - - vld1.8 {q6}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - vld1.64 {q14}, [r0,:128]! - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q2, q14 - vst1.8 {q10-q11}, [r8]! - vst1.8 {q12}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_6: - vst1.64 {q14}, [r0,:128] @ next round tweak - - veor q4, q4, q12 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q5, q5, q13 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - vst1.8 {q10-q11}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done - -@ put this in range for both ARM and Thumb mode adr instructions -.align 5 -.Lxts_magic: - .quad 1, 0x87 - -.align 5 -.Lxts_enc_5: - vst1.64 {q13}, [r0,:128] @ next round tweak - - veor q3, q3, q11 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q4, q4, q12 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12}, [r0,:128]! - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - veor q10, q3, q12 - vst1.8 {q8-q9}, [r8]! - vst1.8 {q10}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_4: - vst1.64 {q12}, [r0,:128] @ next round tweak - - veor q2, q2, q10 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q3, q3, q11 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q6, q11 - vst1.8 {q8-q9}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_3: - vst1.64 {q11}, [r0,:128] @ next round tweak - - veor q1, q1, q9 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q2, q2, q10 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q4, q10 - vst1.8 {q0-q1}, [r8]! - vst1.8 {q8}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_2: - vst1.64 {q10}, [r0,:128] @ next round tweak - - veor q0, q0, q8 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q1, q1, q9 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - vst1.8 {q0-q1}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_1: - mov r0, sp - veor q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - - bl AES_encrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r8]! - mov r3, r4 - - vmov q8, q9 @ next round tweak - -.Lxts_enc_done: -#ifndef XTS_CHAIN_TWEAK - adds r9, #0x10 - beq .Lxts_enc_ret - sub r6, r8, #0x10 - -.Lxts_enc_steal: - ldrb r0, [r7], #1 - ldrb r1, [r8, #-0x10] - strb r0, [r8, #-0x10] - strb r1, [r8], #1 - - subs r9, #1 - bhi .Lxts_enc_steal - - vld1.8 {q0}, [r6] - mov r0, sp - veor q0, q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - - bl AES_encrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r6] - mov r3, r4 -#endif - -.Lxts_enc_ret: - bic r0, r3, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_enc_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_enc_bzero - - mov sp, r3 -#ifdef XTS_CHAIN_TWEAK - vst1.8 {q8}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_encrypt,.-bsaes_xts_encrypt - -.globl bsaes_xts_decrypt -.type bsaes_xts_decrypt,%function -.align 4 -bsaes_xts_decrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future r3 - - mov r7, r0 - mov r8, r1 - mov r9, r2 - mov r10, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0, sp @ pointer to initial tweak -#endif - - ldr r1, [r10, #240] @ get # of rounds - mov r3, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key - @ add r12, #96 @ size of bit-sliced key schedule - sub r12, #48 @ place for tweak[9] - - @ populate the key schedule - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - add r4, sp, #0x90 - vldmia r4, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia r4, {q7} -#else - ldr r12, [r10, #244] - eors r12, #1 - beq 0f - - str r12, [r10, #244] - mov r4, r10 @ pass key - mov r5, r1 @ pass # of rounds - add r12, r10, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, r10, #248 - vldmia r4, {q6} - vstmia r12, {q15} @ save last round key - veor q7, q7, q6 @ fix up round 0 key - vstmia r4, {q7} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - vld1.8 {q8}, [r0] @ initial tweak - adr r2, .Lxts_magic - -#ifndef XTS_CHAIN_TWEAK - tst r9, #0xf @ if not multiple of 16 - it ne @ Thumb2 thing, sanity check in ARM - subne r9, #0x10 @ subtract another 16 bytes -#endif - subs r9, #0x80 - - blo .Lxts_dec_short - b .Lxts_dec_loop - -.align 4 -.Lxts_dec_loop: - vldmia r2, {q5} @ load XTS magic - vshr.s64 q6, q8, #63 - mov r0, sp - vand q6, q6, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q9, #63 - veor q9, q9, q6 - vand q7, q7, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q10, #63 - veor q10, q10, q7 - vand q6, q6, q5 - vld1.8 {q0}, [r7]! - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q11, #63 - veor q11, q11, q6 - vand q7, q7, q5 - vld1.8 {q1}, [r7]! - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q12, #63 - veor q12, q12, q7 - vand q6, q6, q5 - vld1.8 {q2}, [r7]! - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q13, #63 - veor q13, q13, q6 - vand q7, q7, q5 - vld1.8 {q3}, [r7]! - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q14, #63 - veor q14, q14, q7 - vand q6, q6, q5 - vld1.8 {q4}, [r7]! - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q15, #63 - veor q15, q15, q6 - vand q7, q7, q5 - vld1.8 {q5}, [r7]! - veor q4, q4, q12 - vadd.u64 q8, q15, q15 - vst1.64 {q15}, [r0,:128]! - vswp d15,d14 - veor q8, q8, q7 - vst1.64 {q8}, [r0,:128] @ next round tweak - - vld1.8 {q6-q7}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - veor q7, q7, q15 - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - vld1.64 {q14-q15}, [r0,:128]! - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q3, q14 - vst1.8 {q10-q11}, [r8]! - veor q13, q5, q15 - vst1.8 {q12-q13}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - - subs r9, #0x80 - bpl .Lxts_dec_loop - -.Lxts_dec_short: - adds r9, #0x70 - bmi .Lxts_dec_done - - vldmia r2, {q5} @ load XTS magic - vshr.s64 q7, q8, #63 - mov r0, sp - vand q7, q7, q5 - vadd.u64 q9, q8, q8 - vst1.64 {q8}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q9, #63 - veor q9, q9, q7 - vand q6, q6, q5 - vadd.u64 q10, q9, q9 - vst1.64 {q9}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q10, #63 - veor q10, q10, q6 - vand q7, q7, q5 - vld1.8 {q0}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_1 - vadd.u64 q11, q10, q10 - vst1.64 {q10}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q11, #63 - veor q11, q11, q7 - vand q6, q6, q5 - vld1.8 {q1}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_2 - veor q0, q0, q8 - vadd.u64 q12, q11, q11 - vst1.64 {q11}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q12, #63 - veor q12, q12, q6 - vand q7, q7, q5 - vld1.8 {q2}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_3 - veor q1, q1, q9 - vadd.u64 q13, q12, q12 - vst1.64 {q12}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q13, #63 - veor q13, q13, q7 - vand q6, q6, q5 - vld1.8 {q3}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_4 - veor q2, q2, q10 - vadd.u64 q14, q13, q13 - vst1.64 {q13}, [r0,:128]! - vswp d13,d12 - vshr.s64 q7, q14, #63 - veor q14, q14, q6 - vand q7, q7, q5 - vld1.8 {q4}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_5 - veor q3, q3, q11 - vadd.u64 q15, q14, q14 - vst1.64 {q14}, [r0,:128]! - vswp d15,d14 - vshr.s64 q6, q15, #63 - veor q15, q15, q7 - vand q6, q6, q5 - vld1.8 {q5}, [r7]! - subs r9, #0x10 - bmi .Lxts_dec_6 - veor q4, q4, q12 - sub r9, #0x10 - vst1.64 {q15}, [r0,:128] @ next round tweak - - vld1.8 {q6}, [r7]! - veor q5, q5, q13 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q6, q6, q14 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - vld1.64 {q14}, [r0,:128]! - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - veor q12, q3, q14 - vst1.8 {q10-q11}, [r8]! - vst1.8 {q12}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_6: - vst1.64 {q14}, [r0,:128] @ next round tweak - - veor q4, q4, q12 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q5, q5, q13 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12-q13}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - veor q11, q7, q13 - vst1.8 {q10-q11}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_5: - vst1.64 {q13}, [r0,:128] @ next round tweak - - veor q3, q3, q11 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q4, q4, q12 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - vld1.64 {q12}, [r0,:128]! - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - veor q10, q2, q12 - vst1.8 {q8-q9}, [r8]! - vst1.8 {q10}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_4: - vst1.64 {q12}, [r0,:128] @ next round tweak - - veor q2, q2, q10 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q3, q3, q11 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10-q11}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - veor q9, q4, q11 - vst1.8 {q8-q9}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_3: - vst1.64 {q11}, [r0,:128] @ next round tweak - - veor q1, q1, q9 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q2, q2, q10 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - vld1.64 {q10}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - veor q8, q6, q10 - vst1.8 {q0-q1}, [r8]! - vst1.8 {q8}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_2: - vst1.64 {q10}, [r0,:128] @ next round tweak - - veor q0, q0, q8 -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, r10, #248 @ pass key schedule -#endif - veor q1, q1, q9 - mov r5, r1 @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {q8-q9}, [r0,:128]! - veor q0, q0, q8 - veor q1, q1, q9 - vst1.8 {q0-q1}, [r8]! - - vld1.64 {q8}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_1: - mov r0, sp - veor q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - mov r5, r2 @ preserve magic - - bl AES_decrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r8]! - mov r3, r4 - mov r2, r5 - - vmov q8, q9 @ next round tweak - -.Lxts_dec_done: -#ifndef XTS_CHAIN_TWEAK - adds r9, #0x10 - beq .Lxts_dec_ret - - @ calculate one round of extra tweak for the stolen ciphertext - vldmia r2, {q5} - vshr.s64 q6, q8, #63 - vand q6, q6, q5 - vadd.u64 q9, q8, q8 - vswp d13,d12 - veor q9, q9, q6 - - @ perform the final decryption with the last tweak value - vld1.8 {q0}, [r7]! - mov r0, sp - veor q0, q0, q9 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - mov r4, r3 @ preserve fp - - bl AES_decrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q9 - vst1.8 {q0}, [r8] - - mov r6, r8 -.Lxts_dec_steal: - ldrb r1, [r8] - ldrb r0, [r7], #1 - strb r1, [r8, #0x10] - strb r0, [r8], #1 - - subs r9, #1 - bhi .Lxts_dec_steal - - vld1.8 {q0}, [r6] - mov r0, sp - veor q0, q8 - mov r1, sp - vst1.8 {q0}, [sp,:128] - mov r2, r10 - - bl AES_decrypt - - vld1.8 {q0}, [sp,:128] - veor q0, q0, q8 - vst1.8 {q0}, [r6] - mov r3, r4 -#endif - -.Lxts_dec_ret: - bic r0, r3, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_dec_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_dec_bzero - - mov sp, r3 -#ifdef XTS_CHAIN_TWEAK - vst1.8 {q8}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_decrypt,.-bsaes_xts_decrypt -#endif diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c deleted file mode 100644 index d8e06de72ef3..000000000000 --- a/arch/arm/crypto/aesbs-glue.c +++ /dev/null @@ -1,367 +0,0 @@ -/* - * linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES - * - * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <asm/neon.h> -#include <crypto/aes.h> -#include <crypto/cbc.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> -#include <linux/module.h> -#include <crypto/xts.h> - -#include "aes_glue.h" - -#define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE) - -struct BS_KEY { - struct AES_KEY rk; - int converted; - u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE]; -} __aligned(8); - -asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in); -asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in); - -asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes, - struct BS_KEY *key, u8 iv[]); - -asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks, - struct BS_KEY *key, u8 const iv[]); - -asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes, - struct BS_KEY *key, u8 tweak[]); - -asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes, - struct BS_KEY *key, u8 tweak[]); - -struct aesbs_cbc_ctx { - struct AES_KEY enc; - struct BS_KEY dec; -}; - -struct aesbs_ctr_ctx { - struct BS_KEY enc; -}; - -struct aesbs_xts_ctx { - struct BS_KEY enc; - struct BS_KEY dec; - struct AES_KEY twkey; -}; - -static int aesbs_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - int bits = key_len * 8; - - if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - ctx->dec.rk = ctx->enc; - private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); - ctx->dec.converted = 0; - return 0; -} - -static int aesbs_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - int bits = key_len * 8; - - if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - ctx->enc.converted = 0; - return 0; -} - -static int aesbs_xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int bits = key_len * 4; - int err; - - err = xts_verify_key(tfm, in_key, key_len); - if (err) - return err; - - if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - ctx->dec.rk = ctx->enc.rk; - private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); - private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey); - ctx->enc.converted = ctx->dec.converted = 0; - return 0; -} - -static inline void aesbs_encrypt_one(struct crypto_skcipher *tfm, - const u8 *src, u8 *dst) -{ - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - AES_encrypt(src, dst, &ctx->enc); -} - -static int aesbs_cbc_encrypt(struct skcipher_request *req) -{ - return crypto_cbc_encrypt_walk(req, aesbs_encrypt_one); -} - -static inline void aesbs_decrypt_one(struct crypto_skcipher *tfm, - const u8 *src, u8 *dst) -{ - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - AES_decrypt(src, dst, &ctx->dec.rk); -} - -static int aesbs_cbc_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - for (err = skcipher_walk_virt(&walk, req, false); - (nbytes = walk.nbytes); err = skcipher_walk_done(&walk, nbytes)) { - u32 blocks = nbytes / AES_BLOCK_SIZE; - u8 *dst = walk.dst.virt.addr; - u8 *src = walk.src.virt.addr; - u8 *iv = walk.iv; - - if (blocks >= 8) { - kernel_neon_begin(); - bsaes_cbc_encrypt(src, dst, nbytes, &ctx->dec, iv); - kernel_neon_end(); - nbytes %= AES_BLOCK_SIZE; - continue; - } - - nbytes = crypto_cbc_decrypt_blocks(&walk, tfm, - aesbs_decrypt_one); - } - return err; -} - -static void inc_be128_ctr(__be32 ctr[], u32 addend) -{ - int i; - - for (i = 3; i >= 0; i--, addend = 1) { - u32 n = be32_to_cpu(ctr[i]) + addend; - - ctr[i] = cpu_to_be32(n); - if (n >= addend) - break; - } -} - -static int aesbs_ctr_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - u32 blocks; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; - __be32 *ctr = (__be32 *)walk.iv; - u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]); - - /* avoid 32 bit counter overflow in the NEON code */ - if (unlikely(headroom < blocks)) { - blocks = headroom + 1; - tail = walk.nbytes - blocks * AES_BLOCK_SIZE; - } - kernel_neon_begin(); - bsaes_ctr32_encrypt_blocks(walk.src.virt.addr, - walk.dst.virt.addr, blocks, - &ctx->enc, walk.iv); - kernel_neon_end(); - inc_be128_ctr(ctr, blocks); - - err = skcipher_walk_done(&walk, tail); - } - if (walk.nbytes) { - u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; - u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; - u8 ks[AES_BLOCK_SIZE]; - - AES_encrypt(walk.iv, ks, &ctx->enc.rk); - if (tdst != tsrc) - memcpy(tdst, tsrc, walk.nbytes); - crypto_xor(tdst, ks, walk.nbytes); - err = skcipher_walk_done(&walk, 0); - } - return err; -} - -static int aesbs_xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - /* generate the initial tweak */ - AES_encrypt(walk.iv, walk.iv, &ctx->twkey); - - while (walk.nbytes) { - kernel_neon_begin(); - bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, - walk.nbytes, &ctx->enc, walk.iv); - kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); - } - return err; -} - -static int aesbs_xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - /* generate the initial tweak */ - AES_encrypt(walk.iv, walk.iv, &ctx->twkey); - - while (walk.nbytes) { - kernel_neon_begin(); - bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, - walk.nbytes, &ctx->dec, walk.iv); - kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); - } - return err; -} - -static struct skcipher_alg aesbs_algs[] = { { - .base = { - .cra_name = "__cbc(aes)", - .cra_driver_name = "__cbc-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_cbc_set_key, - .encrypt = aesbs_cbc_encrypt, - .decrypt = aesbs_cbc_decrypt, -}, { - .base = { - .cra_name = "__ctr(aes)", - .cra_driver_name = "__ctr-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, - .setkey = aesbs_ctr_set_key, - .encrypt = aesbs_ctr_encrypt, - .decrypt = aesbs_ctr_encrypt, -}, { - .base = { - .cra_name = "__xts(aes)", - .cra_driver_name = "__xts-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesbs_xts_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - }, - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_xts_set_key, - .encrypt = aesbs_xts_encrypt, - .decrypt = aesbs_xts_decrypt, -} }; - -struct simd_skcipher_alg *aesbs_simd_algs[ARRAY_SIZE(aesbs_algs)]; - -static void aesbs_mod_exit(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(aesbs_simd_algs) && aesbs_simd_algs[i]; i++) - simd_skcipher_free(aesbs_simd_algs[i]); - - crypto_unregister_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs)); -} - -static int __init aesbs_mod_init(void) -{ - struct simd_skcipher_alg *simd; - const char *basename; - const char *algname; - const char *drvname; - int err; - int i; - - if (!cpu_has_neon()) - return -ENODEV; - - err = crypto_register_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs)); - if (err) - return err; - - for (i = 0; i < ARRAY_SIZE(aesbs_algs); i++) { - algname = aesbs_algs[i].base.cra_name + 2; - drvname = aesbs_algs[i].base.cra_driver_name + 2; - basename = aesbs_algs[i].base.cra_driver_name; - simd = simd_skcipher_create_compat(algname, drvname, basename); - err = PTR_ERR(simd); - if (IS_ERR(simd)) - goto unregister_simds; - - aesbs_simd_algs[i] = simd; - } - - return 0; - -unregister_simds: - aesbs_mod_exit(); - return err; -} - -module_init(aesbs_mod_init); -module_exit(aesbs_mod_exit); - -MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL"); diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl deleted file mode 100644 index a4d3856e7d24..000000000000 --- a/arch/arm/crypto/bsaes-armv7.pl +++ /dev/null @@ -1,2471 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# -# Specific modes and adaptation for Linux kernel by Ard Biesheuvel -# <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is -# granted. -# ==================================================================== - -# Bit-sliced AES for ARM NEON -# -# February 2012. -# -# This implementation is direct adaptation of bsaes-x86_64 module for -# ARM NEON. Except that this module is endian-neutral [in sense that -# it can be compiled for either endianness] by courtesy of vld1.8's -# neutrality. Initial version doesn't implement interface to OpenSSL, -# only low-level primitives and unsupported entry points, just enough -# to collect performance results, which for Cortex-A8 core are: -# -# encrypt 19.5 cycles per byte processed with 128-bit key -# decrypt 22.1 cycles per byte processed with 128-bit key -# key conv. 440 cycles per 128-bit key/0.18 of 8x block -# -# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, -# which is [much] worse than anticipated (for further details see -# http://www.openssl.org/~appro/Snapdragon-S4.html). -# -# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code -# manages in 20.0 cycles]. -# -# When comparing to x86_64 results keep in mind that NEON unit is -# [mostly] single-issue and thus can't [fully] benefit from -# instruction-level parallelism. And when comparing to aes-armv4 -# results keep in mind key schedule conversion overhead (see -# bsaes-x86_64.pl for further details)... -# -# <appro@openssl.org> - -# April-August 2013 -# -# Add CBC, CTR and XTS subroutines, adapt for kernel use. -# -# <ard.biesheuvel@linaro.org> - -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; - -my ($inp,$out,$len,$key)=("r0","r1","r2","r3"); -my @XMM=map("q$_",(0..15)); - -{ -my ($key,$rounds,$const)=("r4","r5","r6"); - -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } - -sub Sbox { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb -my @b=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; - &InBasisChange (@b); - &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); - &OutBasisChange (@b[7,1,4,2,6,5,0,3]); -} - -sub InBasisChange { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb -my @b=@_[0..7]; -$code.=<<___; - veor @b[2], @b[2], @b[1] - veor @b[5], @b[5], @b[6] - veor @b[3], @b[3], @b[0] - veor @b[6], @b[6], @b[2] - veor @b[5], @b[5], @b[0] - - veor @b[6], @b[6], @b[3] - veor @b[3], @b[3], @b[7] - veor @b[7], @b[7], @b[5] - veor @b[3], @b[3], @b[4] - veor @b[4], @b[4], @b[5] - - veor @b[2], @b[2], @b[7] - veor @b[3], @b[3], @b[1] - veor @b[1], @b[1], @b[5] -___ -} - -sub OutBasisChange { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb -my @b=@_[0..7]; -$code.=<<___; - veor @b[0], @b[0], @b[6] - veor @b[1], @b[1], @b[4] - veor @b[4], @b[4], @b[6] - veor @b[2], @b[2], @b[0] - veor @b[6], @b[6], @b[1] - - veor @b[1], @b[1], @b[5] - veor @b[5], @b[5], @b[3] - veor @b[3], @b[3], @b[7] - veor @b[7], @b[7], @b[5] - veor @b[2], @b[2], @b[5] - - veor @b[4], @b[4], @b[7] -___ -} - -sub InvSbox { -# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb -my @b=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; - &InvInBasisChange (@b); - &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); - &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); -} - -sub InvInBasisChange { # OutBasisChange in reverse (with twist) -my @b=@_[5,1,2,6,3,7,0,4]; -$code.=<<___ - veor @b[1], @b[1], @b[7] - veor @b[4], @b[4], @b[7] - - veor @b[7], @b[7], @b[5] - veor @b[1], @b[1], @b[3] - veor @b[2], @b[2], @b[5] - veor @b[3], @b[3], @b[7] - - veor @b[6], @b[6], @b[1] - veor @b[2], @b[2], @b[0] - veor @b[5], @b[5], @b[3] - veor @b[4], @b[4], @b[6] - veor @b[0], @b[0], @b[6] - veor @b[1], @b[1], @b[4] -___ -} - -sub InvOutBasisChange { # InBasisChange in reverse -my @b=@_[2,5,7,3,6,1,0,4]; -$code.=<<___; - veor @b[1], @b[1], @b[5] - veor @b[2], @b[2], @b[7] - - veor @b[3], @b[3], @b[1] - veor @b[4], @b[4], @b[5] - veor @b[7], @b[7], @b[5] - veor @b[3], @b[3], @b[4] - veor @b[5], @b[5], @b[0] - veor @b[3], @b[3], @b[7] - veor @b[6], @b[6], @b[2] - veor @b[2], @b[2], @b[1] - veor @b[6], @b[6], @b[3] - - veor @b[3], @b[3], @b[0] - veor @b[5], @b[5], @b[6] -___ -} - -sub Mul_GF4 { -#;************************************************************* -#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * -#;************************************************************* -my ($x0,$x1,$y0,$y1,$t0,$t1)=@_; -$code.=<<___; - veor $t0, $y0, $y1 - vand $t0, $t0, $x0 - veor $x0, $x0, $x1 - vand $t1, $x1, $y0 - vand $x0, $x0, $y1 - veor $x1, $t1, $t0 - veor $x0, $x0, $t1 -___ -} - -sub Mul_GF4_N { # not used, see next subroutine -# multiply and scale by N -my ($x0,$x1,$y0,$y1,$t0)=@_; -$code.=<<___; - veor $t0, $y0, $y1 - vand $t0, $t0, $x0 - veor $x0, $x0, $x1 - vand $x1, $x1, $y0 - vand $x0, $x0, $y1 - veor $x1, $x1, $x0 - veor $x0, $x0, $t0 -___ -} - -sub Mul_GF4_N_GF4 { -# interleaved Mul_GF4_N and Mul_GF4 -my ($x0,$x1,$y0,$y1,$t0, - $x2,$x3,$y2,$y3,$t1)=@_; -$code.=<<___; - veor $t0, $y0, $y1 - veor $t1, $y2, $y3 - vand $t0, $t0, $x0 - vand $t1, $t1, $x2 - veor $x0, $x0, $x1 - veor $x2, $x2, $x3 - vand $x1, $x1, $y0 - vand $x3, $x3, $y2 - vand $x0, $x0, $y1 - vand $x2, $x2, $y3 - veor $x1, $x1, $x0 - veor $x2, $x2, $x3 - veor $x0, $x0, $t0 - veor $x3, $x3, $t1 -___ -} -sub Mul_GF16_2 { -my @x=@_[0..7]; -my @y=@_[8..11]; -my @t=@_[12..15]; -$code.=<<___; - veor @t[0], @x[0], @x[2] - veor @t[1], @x[1], @x[3] -___ - &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]); -$code.=<<___; - veor @y[0], @y[0], @y[2] - veor @y[1], @y[1], @y[3] -___ - Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], - @x[2], @x[3], @y[2], @y[3], @t[2]); -$code.=<<___; - veor @x[0], @x[0], @t[0] - veor @x[2], @x[2], @t[0] - veor @x[1], @x[1], @t[1] - veor @x[3], @x[3], @t[1] - - veor @t[0], @x[4], @x[6] - veor @t[1], @x[5], @x[7] -___ - &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], - @x[6], @x[7], @y[2], @y[3], @t[2]); -$code.=<<___; - veor @y[0], @y[0], @y[2] - veor @y[1], @y[1], @y[3] -___ - &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]); -$code.=<<___; - veor @x[4], @x[4], @t[0] - veor @x[6], @x[6], @t[0] - veor @x[5], @x[5], @t[1] - veor @x[7], @x[7], @t[1] -___ -} -sub Inv_GF256 { -#;******************************************************************** -#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * -#;******************************************************************** -my @x=@_[0..7]; -my @t=@_[8..11]; -my @s=@_[12..15]; -# direct optimizations from hardware -$code.=<<___; - veor @t[3], @x[4], @x[6] - veor @t[2], @x[5], @x[7] - veor @t[1], @x[1], @x[3] - veor @s[1], @x[7], @x[6] - vmov @t[0], @t[2] - veor @s[0], @x[0], @x[2] - - vorr @t[2], @t[2], @t[1] - veor @s[3], @t[3], @t[0] - vand @s[2], @t[3], @s[0] - vorr @t[3], @t[3], @s[0] - veor @s[0], @s[0], @t[1] - vand @t[0], @t[0], @t[1] - veor @t[1], @x[3], @x[2] - vand @s[3], @s[3], @s[0] - vand @s[1], @s[1], @t[1] - veor @t[1], @x[4], @x[5] - veor @s[0], @x[1], @x[0] - veor @t[3], @t[3], @s[1] - veor @t[2], @t[2], @s[1] - vand @s[1], @t[1], @s[0] - vorr @t[1], @t[1], @s[0] - veor @t[3], @t[3], @s[3] - veor @t[0], @t[0], @s[1] - veor @t[2], @t[2], @s[2] - veor @t[1], @t[1], @s[3] - veor @t[0], @t[0], @s[2] - vand @s[0], @x[7], @x[3] - veor @t[1], @t[1], @s[2] - vand @s[1], @x[6], @x[2] - vand @s[2], @x[5], @x[1] - vorr @s[3], @x[4], @x[0] - veor @t[3], @t[3], @s[0] - veor @t[1], @t[1], @s[2] - veor @t[0], @t[0], @s[3] - veor @t[2], @t[2], @s[1] - - @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 - - @ new smaller inversion - - vand @s[2], @t[3], @t[1] - vmov @s[0], @t[0] - - veor @s[1], @t[2], @s[2] - veor @s[3], @t[0], @s[2] - veor @s[2], @t[0], @s[2] @ @s[2]=@s[3] - - vbsl @s[1], @t[1], @t[0] - vbsl @s[3], @t[3], @t[2] - veor @t[3], @t[3], @t[2] - - vbsl @s[0], @s[1], @s[2] - vbsl @t[0], @s[2], @s[1] - - vand @s[2], @s[0], @s[3] - veor @t[1], @t[1], @t[0] - - veor @s[2], @s[2], @t[3] -___ -# output in s3, s2, s1, t1 - -# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 - -# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 - &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); - -### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb -} - -# AES linear components - -sub ShiftRows { -my @x=@_[0..7]; -my @t=@_[8..11]; -my $mask=pop; -$code.=<<___; - vldmia $key!, {@t[0]-@t[3]} - veor @t[0], @t[0], @x[0] - veor @t[1], @t[1], @x[1] - vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)` - vldmia $key!, {@t[0]} - veor @t[2], @t[2], @x[2] - vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)` - vldmia $key!, {@t[1]} - veor @t[3], @t[3], @x[3] - vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)` - vldmia $key!, {@t[2]} - vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)` - vldmia $key!, {@t[3]} - veor @t[0], @t[0], @x[4] - veor @t[1], @t[1], @x[5] - vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)` - veor @t[2], @t[2], @x[6] - vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)` - veor @t[3], @t[3], @x[7] - vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)` - vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)` - vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)` -___ -} - -sub MixColumns { -# modified to emit output in order suitable for feeding back to aesenc[last] -my @x=@_[0..7]; -my @t=@_[8..15]; -my $inv=@_[16]; # optional -$code.=<<___; - vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32 - vext.8 @t[1], @x[1], @x[1], #12 - veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32) - vext.8 @t[2], @x[2], @x[2], #12 - veor @x[1], @x[1], @t[1] - vext.8 @t[3], @x[3], @x[3], #12 - veor @x[2], @x[2], @t[2] - vext.8 @t[4], @x[4], @x[4], #12 - veor @x[3], @x[3], @t[3] - vext.8 @t[5], @x[5], @x[5], #12 - veor @x[4], @x[4], @t[4] - vext.8 @t[6], @x[6], @x[6], #12 - veor @x[5], @x[5], @t[5] - vext.8 @t[7], @x[7], @x[7], #12 - veor @x[6], @x[6], @t[6] - - veor @t[1], @t[1], @x[0] - veor @x[7], @x[7], @t[7] - vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64) - veor @t[2], @t[2], @x[1] - veor @t[0], @t[0], @x[7] - veor @t[1], @t[1], @x[7] - vext.8 @x[1], @x[1], @x[1], #8 - veor @t[5], @t[5], @x[4] - veor @x[0], @x[0], @t[0] - veor @t[6], @t[6], @x[5] - veor @x[1], @x[1], @t[1] - vext.8 @t[0], @x[4], @x[4], #8 - veor @t[4], @t[4], @x[3] - vext.8 @t[1], @x[5], @x[5], #8 - veor @t[7], @t[7], @x[6] - vext.8 @x[4], @x[3], @x[3], #8 - veor @t[3], @t[3], @x[2] - vext.8 @x[5], @x[7], @x[7], #8 - veor @t[4], @t[4], @x[7] - vext.8 @x[3], @x[6], @x[6], #8 - veor @t[3], @t[3], @x[7] - vext.8 @x[6], @x[2], @x[2], #8 - veor @x[7], @t[1], @t[5] -___ -$code.=<<___ if (!$inv); - veor @x[2], @t[0], @t[4] - veor @x[4], @x[4], @t[3] - veor @x[5], @x[5], @t[7] - veor @x[3], @x[3], @t[6] - @ vmov @x[2], @t[0] - veor @x[6], @x[6], @t[2] - @ vmov @x[7], @t[1] -___ -$code.=<<___ if ($inv); - veor @t[3], @t[3], @x[4] - veor @x[5], @x[5], @t[7] - veor @x[2], @x[3], @t[6] - veor @x[3], @t[0], @t[4] - veor @x[4], @x[6], @t[2] - vmov @x[6], @t[3] - @ vmov @x[7], @t[1] -___ -} - -sub InvMixColumns_orig { -my @x=@_[0..7]; -my @t=@_[8..15]; - -$code.=<<___; - @ multiplication by 0x0e - vext.8 @t[7], @x[7], @x[7], #12 - vmov @t[2], @x[2] - veor @x[2], @x[2], @x[5] @ 2 5 - veor @x[7], @x[7], @x[5] @ 7 5 - vext.8 @t[0], @x[0], @x[0], #12 - vmov @t[5], @x[5] - veor @x[5], @x[5], @x[0] @ 5 0 [1] - veor @x[0], @x[0], @x[1] @ 0 1 - vext.8 @t[1], @x[1], @x[1], #12 - veor @x[1], @x[1], @x[2] @ 1 25 - veor @x[0], @x[0], @x[6] @ 01 6 [2] - vext.8 @t[3], @x[3], @x[3], #12 - veor @x[1], @x[1], @x[3] @ 125 3 [4] - veor @x[2], @x[2], @x[0] @ 25 016 [3] - veor @x[3], @x[3], @x[7] @ 3 75 - veor @x[7], @x[7], @x[6] @ 75 6 [0] - vext.8 @t[6], @x[6], @x[6], #12 - vmov @t[4], @x[4] - veor @x[6], @x[6], @x[4] @ 6 4 - veor @x[4], @x[4], @x[3] @ 4 375 [6] - veor @x[3], @x[3], @x[7] @ 375 756=36 - veor @x[6], @x[6], @t[5] @ 64 5 [7] - veor @x[3], @x[3], @t[2] @ 36 2 - vext.8 @t[5], @t[5], @t[5], #12 - veor @x[3], @x[3], @t[4] @ 362 4 [5] -___ - my @y = @x[7,5,0,2,1,3,4,6]; -$code.=<<___; - @ multiplication by 0x0b - veor @y[1], @y[1], @y[0] - veor @y[0], @y[0], @t[0] - vext.8 @t[2], @t[2], @t[2], #12 - veor @y[1], @y[1], @t[1] - veor @y[0], @y[0], @t[5] - vext.8 @t[4], @t[4], @t[4], #12 - veor @y[1], @y[1], @t[6] - veor @y[0], @y[0], @t[7] - veor @t[7], @t[7], @t[6] @ clobber t[7] - - veor @y[3], @y[3], @t[0] - veor @y[1], @y[1], @y[0] - vext.8 @t[0], @t[0], @t[0], #12 - veor @y[2], @y[2], @t[1] - veor @y[4], @y[4], @t[1] - vext.8 @t[1], @t[1], @t[1], #12 - veor @y[2], @y[2], @t[2] - veor @y[3], @y[3], @t[2] - veor @y[5], @y[5], @t[2] - veor @y[2], @y[2], @t[7] - vext.8 @t[2], @t[2], @t[2], #12 - veor @y[3], @y[3], @t[3] - veor @y[6], @y[6], @t[3] - veor @y[4], @y[4], @t[3] - veor @y[7], @y[7], @t[4] - vext.8 @t[3], @t[3], @t[3], #12 - veor @y[5], @y[5], @t[4] - veor @y[7], @y[7], @t[7] - veor @t[7], @t[7], @t[5] @ clobber t[7] even more - veor @y[3], @y[3], @t[5] - veor @y[4], @y[4], @t[4] - - veor @y[5], @y[5], @t[7] - vext.8 @t[4], @t[4], @t[4], #12 - veor @y[6], @y[6], @t[7] - veor @y[4], @y[4], @t[7] - - veor @t[7], @t[7], @t[5] - vext.8 @t[5], @t[5], @t[5], #12 - - @ multiplication by 0x0d - veor @y[4], @y[4], @y[7] - veor @t[7], @t[7], @t[6] @ restore t[7] - veor @y[7], @y[7], @t[4] - vext.8 @t[6], @t[6], @t[6], #12 - veor @y[2], @y[2], @t[0] - veor @y[7], @y[7], @t[5] - vext.8 @t[7], @t[7], @t[7], #12 - veor @y[2], @y[2], @t[2] - - veor @y[3], @y[3], @y[1] - veor @y[1], @y[1], @t[1] - veor @y[0], @y[0], @t[0] - veor @y[3], @y[3], @t[0] - veor @y[1], @y[1], @t[5] - veor @y[0], @y[0], @t[5] - vext.8 @t[0], @t[0], @t[0], #12 - veor @y[1], @y[1], @t[7] - veor @y[0], @y[0], @t[6] - veor @y[3], @y[3], @y[1] - veor @y[4], @y[4], @t[1] - vext.8 @t[1], @t[1], @t[1], #12 - - veor @y[7], @y[7], @t[7] - veor @y[4], @y[4], @t[2] - veor @y[5], @y[5], @t[2] - veor @y[2], @y[2], @t[6] - veor @t[6], @t[6], @t[3] @ clobber t[6] - vext.8 @t[2], @t[2], @t[2], #12 - veor @y[4], @y[4], @y[7] - veor @y[3], @y[3], @t[6] - - veor @y[6], @y[6], @t[6] - veor @y[5], @y[5], @t[5] - vext.8 @t[5], @t[5], @t[5], #12 - veor @y[6], @y[6], @t[4] - vext.8 @t[4], @t[4], @t[4], #12 - veor @y[5], @y[5], @t[6] - veor @y[6], @y[6], @t[7] - vext.8 @t[7], @t[7], @t[7], #12 - veor @t[6], @t[6], @t[3] @ restore t[6] - vext.8 @t[3], @t[3], @t[3], #12 - - @ multiplication by 0x09 - veor @y[4], @y[4], @y[1] - veor @t[1], @t[1], @y[1] @ t[1]=y[1] - veor @t[0], @t[0], @t[5] @ clobber t[0] - vext.8 @t[6], @t[6], @t[6], #12 - veor @t[1], @t[1], @t[5] - veor @y[3], @y[3], @t[0] - veor @t[0], @t[0], @y[0] @ t[0]=y[0] - veor @t[1], @t[1], @t[6] - veor @t[6], @t[6], @t[7] @ clobber t[6] - veor @y[4], @y[4], @t[1] - veor @y[7], @y[7], @t[4] - veor @y[6], @y[6], @t[3] - veor @y[5], @y[5], @t[2] - veor @t[4], @t[4], @y[4] @ t[4]=y[4] - veor @t[3], @t[3], @y[3] @ t[3]=y[3] - veor @t[5], @t[5], @y[5] @ t[5]=y[5] - veor @t[2], @t[2], @y[2] @ t[2]=y[2] - veor @t[3], @t[3], @t[7] - veor @XMM[5], @t[5], @t[6] - veor @XMM[6], @t[6], @y[6] @ t[6]=y[6] - veor @XMM[2], @t[2], @t[6] - veor @XMM[7], @t[7], @y[7] @ t[7]=y[7] - - vmov @XMM[0], @t[0] - vmov @XMM[1], @t[1] - @ vmov @XMM[2], @t[2] - vmov @XMM[3], @t[3] - vmov @XMM[4], @t[4] - @ vmov @XMM[5], @t[5] - @ vmov @XMM[6], @t[6] - @ vmov @XMM[7], @t[7] -___ -} - -sub InvMixColumns { -my @x=@_[0..7]; -my @t=@_[8..15]; - -# Thanks to Jussi Kivilinna for providing pointer to -# -# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | -# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | -# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | -# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | - -$code.=<<___; - @ multiplication by 0x05-0x00-0x04-0x00 - vext.8 @t[0], @x[0], @x[0], #8 - vext.8 @t[6], @x[6], @x[6], #8 - vext.8 @t[7], @x[7], @x[7], #8 - veor @t[0], @t[0], @x[0] - vext.8 @t[1], @x[1], @x[1], #8 - veor @t[6], @t[6], @x[6] - vext.8 @t[2], @x[2], @x[2], #8 - veor @t[7], @t[7], @x[7] - vext.8 @t[3], @x[3], @x[3], #8 - veor @t[1], @t[1], @x[1] - vext.8 @t[4], @x[4], @x[4], #8 - veor @t[2], @t[2], @x[2] - vext.8 @t[5], @x[5], @x[5], #8 - veor @t[3], @t[3], @x[3] - veor @t[4], @t[4], @x[4] - veor @t[5], @t[5], @x[5] - - veor @x[0], @x[0], @t[6] - veor @x[1], @x[1], @t[6] - veor @x[2], @x[2], @t[0] - veor @x[4], @x[4], @t[2] - veor @x[3], @x[3], @t[1] - veor @x[1], @x[1], @t[7] - veor @x[2], @x[2], @t[7] - veor @x[4], @x[4], @t[6] - veor @x[5], @x[5], @t[3] - veor @x[3], @x[3], @t[6] - veor @x[6], @x[6], @t[4] - veor @x[4], @x[4], @t[7] - veor @x[5], @x[5], @t[7] - veor @x[7], @x[7], @t[5] -___ - &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 -} - -sub swapmove { -my ($a,$b,$n,$mask,$t)=@_; -$code.=<<___; - vshr.u64 $t, $b, #$n - veor $t, $t, $a - vand $t, $t, $mask - veor $a, $a, $t - vshl.u64 $t, $t, #$n - veor $b, $b, $t -___ -} -sub swapmove2x { -my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; -$code.=<<___; - vshr.u64 $t0, $b0, #$n - vshr.u64 $t1, $b1, #$n - veor $t0, $t0, $a0 - veor $t1, $t1, $a1 - vand $t0, $t0, $mask - vand $t1, $t1, $mask - veor $a0, $a0, $t0 - vshl.u64 $t0, $t0, #$n - veor $a1, $a1, $t1 - vshl.u64 $t1, $t1, #$n - veor $b0, $b0, $t0 - veor $b1, $b1, $t1 -___ -} - -sub bitslice { -my @x=reverse(@_[0..7]); -my ($t0,$t1,$t2,$t3)=@_[8..11]; -$code.=<<___; - vmov.i8 $t0,#0x55 @ compose .LBS0 - vmov.i8 $t1,#0x33 @ compose .LBS1 -___ - &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); - &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); -$code.=<<___; - vmov.i8 $t0,#0x0f @ compose .LBS2 -___ - &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); - &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); - - &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); - &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); -} - -$code.=<<___; -#ifndef __KERNEL__ -# include "arm_arch.h" - -# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} -# define VFP_ABI_POP vldmia sp!,{d8-d15} -# define VFP_ABI_FRAME 0x40 -#else -# define VFP_ABI_PUSH -# define VFP_ABI_POP -# define VFP_ABI_FRAME 0 -# define BSAES_ASM_EXTENDED_KEY -# define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -# define __ARM_MAX_ARCH__ 7 -#endif - -#ifdef __thumb__ -# define adrl adr -#endif - -#if __ARM_MAX_ARCH__>=7 -.arch armv7-a -.fpu neon - -.text -.syntax unified @ ARMv7-capable assembler is expected to handle this -#ifdef __thumb2__ -.thumb -#else -.code 32 -#endif - -.type _bsaes_decrypt8,%function -.align 4 -_bsaes_decrypt8: - adr $const,_bsaes_decrypt8 - vldmia $key!, {@XMM[9]} @ round 0 key - add $const,$const,#.LM0ISR-_bsaes_decrypt8 - - vldmia $const!, {@XMM[8]} @ .LM0ISR - veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key - veor @XMM[11], @XMM[1], @XMM[9] - vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` - veor @XMM[12], @XMM[2], @XMM[9] - vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` - veor @XMM[13], @XMM[3], @XMM[9] - vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` - veor @XMM[14], @XMM[4], @XMM[9] - vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` - veor @XMM[15], @XMM[5], @XMM[9] - vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` - veor @XMM[10], @XMM[6], @XMM[9] - vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` - veor @XMM[11], @XMM[7], @XMM[9] - vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` - vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` -___ - &bitslice (@XMM[0..7, 8..11]); -$code.=<<___; - sub $rounds,$rounds,#1 - b .Ldec_sbox -.align 4 -.Ldec_loop: -___ - &ShiftRows (@XMM[0..7, 8..12]); -$code.=".Ldec_sbox:\n"; - &InvSbox (@XMM[0..7, 8..15]); -$code.=<<___; - subs $rounds,$rounds,#1 - bcc .Ldec_done -___ - &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); -$code.=<<___; - vldmia $const, {@XMM[12]} @ .LISR - ite eq @ Thumb2 thing, sanity check in ARM - addeq $const,$const,#0x10 - bne .Ldec_loop - vldmia $const, {@XMM[12]} @ .LISRM0 - b .Ldec_loop -.align 4 -.Ldec_done: -___ - &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); -$code.=<<___; - vldmia $key, {@XMM[8]} @ last round key - veor @XMM[6], @XMM[6], @XMM[8] - veor @XMM[4], @XMM[4], @XMM[8] - veor @XMM[2], @XMM[2], @XMM[8] - veor @XMM[7], @XMM[7], @XMM[8] - veor @XMM[3], @XMM[3], @XMM[8] - veor @XMM[5], @XMM[5], @XMM[8] - veor @XMM[0], @XMM[0], @XMM[8] - veor @XMM[1], @XMM[1], @XMM[8] - bx lr -.size _bsaes_decrypt8,.-_bsaes_decrypt8 - -.type _bsaes_const,%object -.align 6 -_bsaes_const: -.LM0ISR: @ InvShiftRows constants - .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 -.LISR: - .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 -.LISRM0: - .quad 0x01040b0e0205080f, 0x0306090c00070a0d -.LM0SR: @ ShiftRows constants - .quad 0x0a0e02060f03070b, 0x0004080c05090d01 -.LSR: - .quad 0x0504070600030201, 0x0f0e0d0c0a09080b -.LSRM0: - .quad 0x0304090e00050a0f, 0x01060b0c0207080d -.LM0: - .quad 0x02060a0e03070b0f, 0x0004080c0105090d -.LREVM0SR: - .quad 0x090d01050c000408, 0x03070b0f060a0e02 -.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>" -.align 6 -.size _bsaes_const,.-_bsaes_const - -.type _bsaes_encrypt8,%function -.align 4 -_bsaes_encrypt8: - adr $const,_bsaes_encrypt8 - vldmia $key!, {@XMM[9]} @ round 0 key - sub $const,$const,#_bsaes_encrypt8-.LM0SR - - vldmia $const!, {@XMM[8]} @ .LM0SR -_bsaes_encrypt8_alt: - veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key - veor @XMM[11], @XMM[1], @XMM[9] - vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` - veor @XMM[12], @XMM[2], @XMM[9] - vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` - veor @XMM[13], @XMM[3], @XMM[9] - vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` - veor @XMM[14], @XMM[4], @XMM[9] - vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` - veor @XMM[15], @XMM[5], @XMM[9] - vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` - veor @XMM[10], @XMM[6], @XMM[9] - vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` - veor @XMM[11], @XMM[7], @XMM[9] - vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` - vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` - vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` -_bsaes_encrypt8_bitslice: -___ - &bitslice (@XMM[0..7, 8..11]); -$code.=<<___; - sub $rounds,$rounds,#1 - b .Lenc_sbox -.align 4 -.Lenc_loop: -___ - &ShiftRows (@XMM[0..7, 8..12]); -$code.=".Lenc_sbox:\n"; - &Sbox (@XMM[0..7, 8..15]); -$code.=<<___; - subs $rounds,$rounds,#1 - bcc .Lenc_done -___ - &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); -$code.=<<___; - vldmia $const, {@XMM[12]} @ .LSR - ite eq @ Thumb2 thing, samity check in ARM - addeq $const,$const,#0x10 - bne .Lenc_loop - vldmia $const, {@XMM[12]} @ .LSRM0 - b .Lenc_loop -.align 4 -.Lenc_done: -___ - # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb - &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); -$code.=<<___; - vldmia $key, {@XMM[8]} @ last round key - veor @XMM[4], @XMM[4], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[8] - veor @XMM[3], @XMM[3], @XMM[8] - veor @XMM[7], @XMM[7], @XMM[8] - veor @XMM[2], @XMM[2], @XMM[8] - veor @XMM[5], @XMM[5], @XMM[8] - veor @XMM[0], @XMM[0], @XMM[8] - veor @XMM[1], @XMM[1], @XMM[8] - bx lr -.size _bsaes_encrypt8,.-_bsaes_encrypt8 -___ -} -{ -my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6"); - -sub bitslice_key { -my @x=reverse(@_[0..7]); -my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; - - &swapmove (@x[0,1],1,$bs0,$t2,$t3); -$code.=<<___; - @ &swapmove(@x[2,3],1,$t0,$t2,$t3); - vmov @x[2], @x[0] - vmov @x[3], @x[1] -___ - #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); - - &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); -$code.=<<___; - @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); - vmov @x[4], @x[0] - vmov @x[6], @x[2] - vmov @x[5], @x[1] - vmov @x[7], @x[3] -___ - &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); - &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); -} - -$code.=<<___; -.type _bsaes_key_convert,%function -.align 4 -_bsaes_key_convert: - adr $const,_bsaes_key_convert - vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key - sub $const,$const,#_bsaes_key_convert-.LM0 - vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key - - vmov.i8 @XMM[8], #0x01 @ bit masks - vmov.i8 @XMM[9], #0x02 - vmov.i8 @XMM[10], #0x04 - vmov.i8 @XMM[11], #0x08 - vmov.i8 @XMM[12], #0x10 - vmov.i8 @XMM[13], #0x20 - vldmia $const, {@XMM[14]} @ .LM0 - -#ifdef __ARMEL__ - vrev32.8 @XMM[7], @XMM[7] - vrev32.8 @XMM[15], @XMM[15] -#endif - sub $rounds,$rounds,#1 - vstmia $out!, {@XMM[7]} @ save round 0 key - b .Lkey_loop - -.align 4 -.Lkey_loop: - vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])` - vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])` - vmov.i8 @XMM[6], #0x40 - vmov.i8 @XMM[15], #0x80 - - vtst.8 @XMM[0], @XMM[7], @XMM[8] - vtst.8 @XMM[1], @XMM[7], @XMM[9] - vtst.8 @XMM[2], @XMM[7], @XMM[10] - vtst.8 @XMM[3], @XMM[7], @XMM[11] - vtst.8 @XMM[4], @XMM[7], @XMM[12] - vtst.8 @XMM[5], @XMM[7], @XMM[13] - vtst.8 @XMM[6], @XMM[7], @XMM[6] - vtst.8 @XMM[7], @XMM[7], @XMM[15] - vld1.8 {@XMM[15]}, [$inp]! @ load next round key - vmvn @XMM[0], @XMM[0] @ "pnot" - vmvn @XMM[1], @XMM[1] - vmvn @XMM[5], @XMM[5] - vmvn @XMM[6], @XMM[6] -#ifdef __ARMEL__ - vrev32.8 @XMM[15], @XMM[15] -#endif - subs $rounds,$rounds,#1 - vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key - bne .Lkey_loop - - vmov.i8 @XMM[7],#0x63 @ compose .L63 - @ don't save last round key - bx lr -.size _bsaes_key_convert,.-_bsaes_key_convert -___ -} - -if (0) { # following four functions are unsupported interface - # used for benchmarking... -$code.=<<___; -.globl bsaes_enc_key_convert -.type bsaes_enc_key_convert,%function -.align 4 -bsaes_enc_key_convert: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - - ldr r5,[$inp,#240] @ pass rounds - mov r4,$inp @ pass key - mov r12,$out @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_enc_key_convert,.-bsaes_enc_key_convert - -.globl bsaes_encrypt_128 -.type bsaes_encrypt_128,%function -.align 4 -bsaes_encrypt_128: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so -.Lenc128_loop: - vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input - vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! - mov r4,$key @ pass the key - vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! - mov r5,#10 @ pass rounds - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - - bl _bsaes_encrypt8 - - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - subs $len,$len,#0x80 - vst1.8 {@XMM[5]}, [$out]! - bhi .Lenc128_loop - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_encrypt_128,.-bsaes_encrypt_128 - -.globl bsaes_dec_key_convert -.type bsaes_dec_key_convert,%function -.align 4 -bsaes_dec_key_convert: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so - - ldr r5,[$inp,#240] @ pass rounds - mov r4,$inp @ pass key - mov r12,$out @ pass key schedule - bl _bsaes_key_convert - vldmia $out, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia $out, {@XMM[7]} - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_dec_key_convert,.-bsaes_dec_key_convert - -.globl bsaes_decrypt_128 -.type bsaes_decrypt_128,%function -.align 4 -bsaes_decrypt_128: - stmdb sp!,{r4-r6,lr} - vstmdb sp!,{d8-d15} @ ABI specification says so -.Ldec128_loop: - vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input - vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! - mov r4,$key @ pass the key - vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! - mov r5,#10 @ pass rounds - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - - bl _bsaes_decrypt8 - - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - subs $len,$len,#0x80 - vst1.8 {@XMM[5]}, [$out]! - bhi .Ldec128_loop - - vldmia sp!,{d8-d15} - ldmia sp!,{r4-r6,pc} -.size bsaes_decrypt_128,.-bsaes_decrypt_128 -___ -} -{ -my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10)); -my ($keysched)=("sp"); - -$code.=<<___; -.extern AES_cbc_encrypt -.extern AES_decrypt - -.global bsaes_cbc_encrypt -.type bsaes_cbc_encrypt,%function -.align 5 -bsaes_cbc_encrypt: -#ifndef __KERNEL__ - cmp $len, #128 -#ifndef __thumb__ - blo AES_cbc_encrypt -#else - bhs 1f - b AES_cbc_encrypt -1: -#endif -#endif - - @ it is up to the caller to make sure we are called with enc == 0 - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr $ivp, [ip] @ IV is 1st arg on the stack - mov $len, $len, lsr#4 @ len in 16 byte blocks - sub sp, #0x10 @ scratch space to carry over the IV - mov $fp, sp @ save sp - - ldr $rounds, [$key, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - add r12, #`128-32` @ sifze of bit-slices key schedule - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 @ sp is $keysched - bl _bsaes_key_convert - vldmia $keysched, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia $keysched, {@XMM[7]} -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, $key, #248 - vldmia r4, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia r4, {@XMM[7]} - -.align 2 -0: -#endif - - vld1.8 {@XMM[15]}, [$ivp] @ load IV - b .Lcbc_dec_loop - -.align 4 -.Lcbc_dec_loop: - subs $len, $len, #0x8 - bmi .Lcbc_dec_loop_finish - - vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input - vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, $keysched @ pass the key -#else - add r4, $key, #248 -#endif - vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! - mov r5, $rounds - vld1.8 {@XMM[6]-@XMM[7]}, [$inp] - sub $inp, $inp, #0x60 - vstmia $fp, {@XMM[15]} @ put aside IV - - bl _bsaes_decrypt8 - - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - veor @XMM[2], @XMM[2], @XMM[11] - vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! - veor @XMM[7], @XMM[7], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[3], @XMM[3], @XMM[13] - vst1.8 {@XMM[6]}, [$out]! - veor @XMM[5], @XMM[5], @XMM[14] - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - vst1.8 {@XMM[5]}, [$out]! - - b .Lcbc_dec_loop - -.Lcbc_dec_loop_finish: - adds $len, $len, #8 - beq .Lcbc_dec_done - - vld1.8 {@XMM[0]}, [$inp]! @ load input - cmp $len, #2 - blo .Lcbc_dec_one - vld1.8 {@XMM[1]}, [$inp]! -#ifndef BSAES_ASM_EXTENDED_KEY - mov r4, $keysched @ pass the key -#else - add r4, $key, #248 -#endif - mov r5, $rounds - vstmia $fp, {@XMM[15]} @ put aside IV - beq .Lcbc_dec_two - vld1.8 {@XMM[2]}, [$inp]! - cmp $len, #4 - blo .Lcbc_dec_three - vld1.8 {@XMM[3]}, [$inp]! - beq .Lcbc_dec_four - vld1.8 {@XMM[4]}, [$inp]! - cmp $len, #6 - blo .Lcbc_dec_five - vld1.8 {@XMM[5]}, [$inp]! - beq .Lcbc_dec_six - vld1.8 {@XMM[6]}, [$inp]! - sub $inp, $inp, #0x70 - - bl _bsaes_decrypt8 - - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - veor @XMM[2], @XMM[2], @XMM[11] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[7], @XMM[7], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[3], @XMM[3], @XMM[13] - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - vst1.8 {@XMM[3]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_six: - sub $inp, $inp, #0x60 - bl _bsaes_decrypt8 - vldmia $fp,{@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[12]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - veor @XMM[2], @XMM[2], @XMM[11] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[7], @XMM[7], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[7]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_five: - sub $inp, $inp, #0x50 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[2], @XMM[2], @XMM[11] - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - vst1.8 {@XMM[2]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_four: - sub $inp, $inp, #0x40 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[10]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[4], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - vst1.8 {@XMM[4]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_three: - sub $inp, $inp, #0x30 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[15]}, [$inp]! - veor @XMM[1], @XMM[1], @XMM[8] - veor @XMM[6], @XMM[6], @XMM[9] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - vst1.8 {@XMM[6]}, [$out]! - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_two: - sub $inp, $inp, #0x20 - bl _bsaes_decrypt8 - vldmia $fp, {@XMM[14]} @ reload IV - vld1.8 {@XMM[8]}, [$inp]! @ reload input - veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV - vld1.8 {@XMM[15]}, [$inp]! @ reload input - veor @XMM[1], @XMM[1], @XMM[8] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - b .Lcbc_dec_done -.align 4 -.Lcbc_dec_one: - sub $inp, $inp, #0x10 - mov $rounds, $out @ save original out pointer - mov $out, $fp @ use the iv scratch space as out buffer - mov r2, $key - vmov @XMM[4],@XMM[15] @ just in case ensure that IV - vmov @XMM[5],@XMM[0] @ and input are preserved - bl AES_decrypt - vld1.8 {@XMM[0]}, [$fp,:64] @ load result - veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV - vmov @XMM[15], @XMM[5] @ @XMM[5] holds input - vst1.8 {@XMM[0]}, [$rounds] @ write output - -.Lcbc_dec_done: -#ifndef BSAES_ASM_EXTENDED_KEY - vmov.i32 q0, #0 - vmov.i32 q1, #0 -.Lcbc_dec_bzero: @ wipe key schedule [if any] - vstmia $keysched!, {q0-q1} - cmp $keysched, $fp - bne .Lcbc_dec_bzero -#endif - - mov sp, $fp - add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb - vst1.8 {@XMM[15]}, [$ivp] @ return IV - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} -.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt -___ -} -{ -my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10))); -my $const = "r6"; # shared with _bsaes_encrypt8_alt -my $keysched = "sp"; - -$code.=<<___; -.extern AES_encrypt -.global bsaes_ctr32_encrypt_blocks -.type bsaes_ctr32_encrypt_blocks,%function -.align 5 -bsaes_ctr32_encrypt_blocks: - cmp $len, #8 @ use plain AES for - blo .Lctr_enc_short @ small sizes - - mov ip, sp - stmdb sp!, {r4-r10, lr} - VFP_ABI_PUSH - ldr $ctr, [ip] @ ctr is 1st arg on the stack - sub sp, sp, #0x10 @ scratch space to carry over the ctr - mov $fp, sp @ save sp - - ldr $rounds, [$key, #240] @ get # of rounds -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - add r12, #`128-32` @ size of bit-sliced key schedule - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 @ sp is $keysched - bl _bsaes_key_convert - veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key - - vld1.8 {@XMM[0]}, [$ctr] @ load counter - add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr - vldmia $keysched, {@XMM[4]} @ load round0 key -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - @ populate the key schedule - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key - -.align 2 -0: add r12, $key, #248 - vld1.8 {@XMM[0]}, [$ctr] @ load counter - adrl $ctr, .LREVM0SR @ borrow $ctr - vldmia r12, {@XMM[4]} @ load round0 key - sub sp, #0x10 @ place for adjusted round0 key -#endif - - vmov.i32 @XMM[8],#1 @ compose 1<<96 - veor @XMM[9],@XMM[9],@XMM[9] - vrev32.8 @XMM[0],@XMM[0] - vext.8 @XMM[8],@XMM[9],@XMM[8],#4 - vrev32.8 @XMM[4],@XMM[4] - vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 - vstmia $keysched, {@XMM[4]} @ save adjusted round0 key - b .Lctr_enc_loop - -.align 4 -.Lctr_enc_loop: - vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96 - vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1 - vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2 - vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3 - vadd.u32 @XMM[4], @XMM[1], @XMM[10] - vadd.u32 @XMM[5], @XMM[2], @XMM[10] - vadd.u32 @XMM[6], @XMM[3], @XMM[10] - vadd.u32 @XMM[7], @XMM[4], @XMM[10] - vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter - - @ Borrow prologue from _bsaes_encrypt8 to use the opportunity - @ to flip byte order in 32-bit counter - - vldmia $keysched, {@XMM[9]} @ load round0 key -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, $keysched, #0x10 @ pass next round key -#else - add r4, $key, #`248+16` -#endif - vldmia $ctr, {@XMM[8]} @ .LREVM0SR - mov r5, $rounds @ pass rounds - vstmia $fp, {@XMM[10]} @ save next counter - sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants - - bl _bsaes_encrypt8_alt - - subs $len, $len, #8 - blo .Lctr_enc_loop_done - - vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input - vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! - veor @XMM[0], @XMM[8] - veor @XMM[1], @XMM[9] - vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! - veor @XMM[4], @XMM[10] - veor @XMM[6], @XMM[11] - vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! - veor @XMM[3], @XMM[12] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output - veor @XMM[7], @XMM[13] - veor @XMM[2], @XMM[14] - vst1.8 {@XMM[4]}, [$out]! - veor @XMM[5], @XMM[15] - vst1.8 {@XMM[6]}, [$out]! - vmov.i32 @XMM[8], #1 @ compose 1<<96 - vst1.8 {@XMM[3]}, [$out]! - veor @XMM[9], @XMM[9], @XMM[9] - vst1.8 {@XMM[7]}, [$out]! - vext.8 @XMM[8], @XMM[9], @XMM[8], #4 - vst1.8 {@XMM[2]}, [$out]! - vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 - vst1.8 {@XMM[5]}, [$out]! - vldmia $fp, {@XMM[0]} @ load counter - - bne .Lctr_enc_loop - b .Lctr_enc_done - -.align 4 -.Lctr_enc_loop_done: - add $len, $len, #8 - vld1.8 {@XMM[8]}, [$inp]! @ load input - veor @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [$out]! @ write output - cmp $len, #2 - blo .Lctr_enc_done - vld1.8 {@XMM[9]}, [$inp]! - veor @XMM[1], @XMM[9] - vst1.8 {@XMM[1]}, [$out]! - beq .Lctr_enc_done - vld1.8 {@XMM[10]}, [$inp]! - veor @XMM[4], @XMM[10] - vst1.8 {@XMM[4]}, [$out]! - cmp $len, #4 - blo .Lctr_enc_done - vld1.8 {@XMM[11]}, [$inp]! - veor @XMM[6], @XMM[11] - vst1.8 {@XMM[6]}, [$out]! - beq .Lctr_enc_done - vld1.8 {@XMM[12]}, [$inp]! - veor @XMM[3], @XMM[12] - vst1.8 {@XMM[3]}, [$out]! - cmp $len, #6 - blo .Lctr_enc_done - vld1.8 {@XMM[13]}, [$inp]! - veor @XMM[7], @XMM[13] - vst1.8 {@XMM[7]}, [$out]! - beq .Lctr_enc_done - vld1.8 {@XMM[14]}, [$inp] - veor @XMM[2], @XMM[14] - vst1.8 {@XMM[2]}, [$out]! - -.Lctr_enc_done: - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifndef BSAES_ASM_EXTENDED_KEY -.Lctr_enc_bzero: @ wipe key schedule [if any] - vstmia $keysched!, {q0-q1} - cmp $keysched, $fp - bne .Lctr_enc_bzero -#else - vstmia $keysched, {q0-q1} -#endif - - mov sp, $fp - add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.align 4 -.Lctr_enc_short: - ldr ip, [sp] @ ctr pointer is passed on stack - stmdb sp!, {r4-r8, lr} - - mov r4, $inp @ copy arguments - mov r5, $out - mov r6, $len - mov r7, $key - ldr r8, [ip, #12] @ load counter LSW - vld1.8 {@XMM[1]}, [ip] @ load whole counter value -#ifdef __ARMEL__ - rev r8, r8 -#endif - sub sp, sp, #0x10 - vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value - sub sp, sp, #0x10 - -.Lctr_enc_short_loop: - add r0, sp, #0x10 @ input counter value - mov r1, sp @ output on the stack - mov r2, r7 @ key - - bl AES_encrypt - - vld1.8 {@XMM[0]}, [r4]! @ load input - vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter - add r8, r8, #1 -#ifdef __ARMEL__ - rev r0, r8 - str r0, [sp, #0x1c] @ next counter value -#else - str r8, [sp, #0x1c] @ next counter value -#endif - veor @XMM[0],@XMM[0],@XMM[1] - vst1.8 {@XMM[0]}, [r5]! @ store output - subs r6, r6, #1 - bne .Lctr_enc_short_loop - - vmov.i32 q0, #0 - vmov.i32 q1, #0 - vstmia sp!, {q0-q1} - - ldmia sp!, {r4-r8, pc} -.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks -___ -} -{ -###################################################################### -# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len, -# const AES_KEY *key1, const AES_KEY *key2, -# const unsigned char iv[16]); -# -my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3))); -my $const="r6"; # returned by _bsaes_key_convert -my $twmask=@XMM[5]; -my @T=@XMM[6..7]; - -$code.=<<___; -.globl bsaes_xts_encrypt -.type bsaes_xts_encrypt,%function -.align 4 -bsaes_xts_encrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future $fp - - mov $inp, r0 - mov $out, r1 - mov $len, r2 - mov $key, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0,sp @ pointer to initial tweak -#endif - - ldr $rounds, [$key, #240] @ get # of rounds - mov $fp, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - @ add r12, #`128-32` @ size of bit-sliced key schedule - sub r12, #`32+16` @ place for tweak[9] - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - - vld1.8 {@XMM[8]}, [r0] @ initial tweak - adr $magic, .Lxts_magic - - subs $len, #0x80 - blo .Lxts_enc_short - b .Lxts_enc_loop - -.align 4 -.Lxts_enc_loop: - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - vadd.u64 @XMM[8], @XMM[15], @XMM[15] - vst1.64 {@XMM[15]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - veor @XMM[8], @XMM[8], @T[0] - vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - veor @XMM[7], @XMM[7], @XMM[15] - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[2], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - veor @XMM[13], @XMM[5], @XMM[15] - vst1.8 {@XMM[12]-@XMM[13]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - subs $len, #0x80 - bpl .Lxts_enc_loop - -.Lxts_enc_short: - adds $len, #0x70 - bmi .Lxts_enc_done - - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! - subs $len, #0x10 - bmi .Lxts_enc_`$i-9` -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - sub $len, #0x10 - vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - vld1.64 {@XMM[14]}, [r0,:128]! - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[2], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - vst1.8 {@XMM[12]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_6: - vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak - - veor @XMM[4], @XMM[4], @XMM[12] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[5], @XMM[5], @XMM[13] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done - -@ put this in range for both ARM and Thumb mode adr instructions -.align 5 -.Lxts_magic: - .quad 1, 0x87 - -.align 5 -.Lxts_enc_5: - vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak - - veor @XMM[3], @XMM[3], @XMM[11] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[4], @XMM[4], @XMM[12] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - veor @XMM[10], @XMM[3], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - vst1.8 {@XMM[10]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_4: - vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak - - veor @XMM[2], @XMM[2], @XMM[10] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[3], @XMM[3], @XMM[11] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[6], @XMM[11] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_3: - vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak - - veor @XMM[1], @XMM[1], @XMM[9] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[2], @XMM[2], @XMM[10] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - vld1.64 {@XMM[10]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[4], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - vst1.8 {@XMM[8]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_2: - vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak - - veor @XMM[0], @XMM[0], @XMM[8] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[1], @XMM[1], @XMM[9] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_encrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_enc_done -.align 4 -.Lxts_enc_1: - mov r0, sp - veor @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - - bl AES_encrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [$out]! - mov $fp, r4 - - vmov @XMM[8], @XMM[9] @ next round tweak - -.Lxts_enc_done: -#ifndef XTS_CHAIN_TWEAK - adds $len, #0x10 - beq .Lxts_enc_ret - sub r6, $out, #0x10 - -.Lxts_enc_steal: - ldrb r0, [$inp], #1 - ldrb r1, [$out, #-0x10] - strb r0, [$out, #-0x10] - strb r1, [$out], #1 - - subs $len, #1 - bhi .Lxts_enc_steal - - vld1.8 {@XMM[0]}, [r6] - mov r0, sp - veor @XMM[0], @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - - bl AES_encrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [r6] - mov $fp, r4 -#endif - -.Lxts_enc_ret: - bic r0, $fp, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_enc_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_enc_bzero - - mov sp, $fp -#ifdef XTS_CHAIN_TWEAK - vst1.8 {@XMM[8]}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_encrypt,.-bsaes_xts_encrypt - -.globl bsaes_xts_decrypt -.type bsaes_xts_decrypt,%function -.align 4 -bsaes_xts_decrypt: - mov ip, sp - stmdb sp!, {r4-r10, lr} @ 0x20 - VFP_ABI_PUSH - mov r6, sp @ future $fp - - mov $inp, r0 - mov $out, r1 - mov $len, r2 - mov $key, r3 - - sub r0, sp, #0x10 @ 0x10 - bic r0, #0xf @ align at 16 bytes - mov sp, r0 - -#ifdef XTS_CHAIN_TWEAK - ldr r0, [ip] @ pointer to input tweak -#else - @ generate initial tweak - ldr r0, [ip, #4] @ iv[] - mov r1, sp - ldr r2, [ip, #0] @ key2 - bl AES_encrypt - mov r0, sp @ pointer to initial tweak -#endif - - ldr $rounds, [$key, #240] @ get # of rounds - mov $fp, r6 -#ifndef BSAES_ASM_EXTENDED_KEY - @ allocate the key schedule on the stack - sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key - @ add r12, #`128-32` @ size of bit-sliced key schedule - sub r12, #`32+16` @ place for tweak[9] - - @ populate the key schedule - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - mov sp, r12 - add r12, #0x90 @ pass key schedule - bl _bsaes_key_convert - add r4, sp, #0x90 - vldmia r4, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia r4, {@XMM[7]} -#else - ldr r12, [$key, #244] - eors r12, #1 - beq 0f - - str r12, [$key, #244] - mov r4, $key @ pass key - mov r5, $rounds @ pass # of rounds - add r12, $key, #248 @ pass key schedule - bl _bsaes_key_convert - add r4, $key, #248 - vldmia r4, {@XMM[6]} - vstmia r12, {@XMM[15]} @ save last round key - veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key - vstmia r4, {@XMM[7]} - -.align 2 -0: sub sp, #0x90 @ place for tweak[9] -#endif - vld1.8 {@XMM[8]}, [r0] @ initial tweak - adr $magic, .Lxts_magic - -#ifndef XTS_CHAIN_TWEAK - tst $len, #0xf @ if not multiple of 16 - it ne @ Thumb2 thing, sanity check in ARM - subne $len, #0x10 @ subtract another 16 bytes -#endif - subs $len, #0x80 - - blo .Lxts_dec_short - b .Lxts_dec_loop - -.align 4 -.Lxts_dec_loop: - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - vadd.u64 @XMM[8], @XMM[15], @XMM[15] - vst1.64 {@XMM[15]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - veor @XMM[8], @XMM[8], @T[0] - vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - veor @XMM[7], @XMM[7], @XMM[15] - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[3], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - veor @XMM[13], @XMM[5], @XMM[15] - vst1.8 {@XMM[12]-@XMM[13]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - - subs $len, #0x80 - bpl .Lxts_dec_loop - -.Lxts_dec_short: - adds $len, #0x70 - bmi .Lxts_dec_done - - vldmia $magic, {$twmask} @ load XTS magic - vshr.s64 @T[0], @XMM[8], #63 - mov r0, sp - vand @T[0], @T[0], $twmask -___ -for($i=9;$i<16;$i++) { -$code.=<<___; - vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] - vst1.64 {@XMM[$i-1]}, [r0,:128]! - vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` - vshr.s64 @T[1], @XMM[$i], #63 - veor @XMM[$i], @XMM[$i], @T[0] - vand @T[1], @T[1], $twmask -___ - @T=reverse(@T); - -$code.=<<___ if ($i>=10); - vld1.8 {@XMM[$i-10]}, [$inp]! - subs $len, #0x10 - bmi .Lxts_dec_`$i-9` -___ -$code.=<<___ if ($i>=11); - veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] -___ -} -$code.=<<___; - sub $len, #0x10 - vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak - - vld1.8 {@XMM[6]}, [$inp]! - veor @XMM[5], @XMM[5], @XMM[13] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[6], @XMM[6], @XMM[14] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - vld1.64 {@XMM[14]}, [r0,:128]! - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - veor @XMM[12], @XMM[3], @XMM[14] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - vst1.8 {@XMM[12]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_6: - vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak - - veor @XMM[4], @XMM[4], @XMM[12] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[5], @XMM[5], @XMM[13] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - veor @XMM[11], @XMM[7], @XMM[13] - vst1.8 {@XMM[10]-@XMM[11]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_5: - vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak - - veor @XMM[3], @XMM[3], @XMM[11] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[4], @XMM[4], @XMM[12] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - vld1.64 {@XMM[12]}, [r0,:128]! - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - veor @XMM[10], @XMM[2], @XMM[12] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - vst1.8 {@XMM[10]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_4: - vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak - - veor @XMM[2], @XMM[2], @XMM[10] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[3], @XMM[3], @XMM[11] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! - vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - veor @XMM[9], @XMM[4], @XMM[11] - vst1.8 {@XMM[8]-@XMM[9]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_3: - vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak - - veor @XMM[1], @XMM[1], @XMM[9] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[2], @XMM[2], @XMM[10] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - vld1.64 {@XMM[10]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - veor @XMM[8], @XMM[6], @XMM[10] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - vst1.8 {@XMM[8]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_2: - vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak - - veor @XMM[0], @XMM[0], @XMM[8] -#ifndef BSAES_ASM_EXTENDED_KEY - add r4, sp, #0x90 @ pass key schedule -#else - add r4, $key, #248 @ pass key schedule -#endif - veor @XMM[1], @XMM[1], @XMM[9] - mov r5, $rounds @ pass rounds - mov r0, sp - - bl _bsaes_decrypt8 - - vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! - veor @XMM[0], @XMM[0], @XMM[ 8] - veor @XMM[1], @XMM[1], @XMM[ 9] - vst1.8 {@XMM[0]-@XMM[1]}, [$out]! - - vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak - b .Lxts_dec_done -.align 4 -.Lxts_dec_1: - mov r0, sp - veor @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - mov r5, $magic @ preserve magic - - bl AES_decrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [$out]! - mov $fp, r4 - mov $magic, r5 - - vmov @XMM[8], @XMM[9] @ next round tweak - -.Lxts_dec_done: -#ifndef XTS_CHAIN_TWEAK - adds $len, #0x10 - beq .Lxts_dec_ret - - @ calculate one round of extra tweak for the stolen ciphertext - vldmia $magic, {$twmask} - vshr.s64 @XMM[6], @XMM[8], #63 - vand @XMM[6], @XMM[6], $twmask - vadd.u64 @XMM[9], @XMM[8], @XMM[8] - vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")` - veor @XMM[9], @XMM[9], @XMM[6] - - @ perform the final decryption with the last tweak value - vld1.8 {@XMM[0]}, [$inp]! - mov r0, sp - veor @XMM[0], @XMM[0], @XMM[9] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - mov r4, $fp @ preserve fp - - bl AES_decrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[9] - vst1.8 {@XMM[0]}, [$out] - - mov r6, $out -.Lxts_dec_steal: - ldrb r1, [$out] - ldrb r0, [$inp], #1 - strb r1, [$out, #0x10] - strb r0, [$out], #1 - - subs $len, #1 - bhi .Lxts_dec_steal - - vld1.8 {@XMM[0]}, [r6] - mov r0, sp - veor @XMM[0], @XMM[8] - mov r1, sp - vst1.8 {@XMM[0]}, [sp,:128] - mov r2, $key - - bl AES_decrypt - - vld1.8 {@XMM[0]}, [sp,:128] - veor @XMM[0], @XMM[0], @XMM[8] - vst1.8 {@XMM[0]}, [r6] - mov $fp, r4 -#endif - -.Lxts_dec_ret: - bic r0, $fp, #0xf - vmov.i32 q0, #0 - vmov.i32 q1, #0 -#ifdef XTS_CHAIN_TWEAK - ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak -#endif -.Lxts_dec_bzero: @ wipe key schedule [if any] - vstmia sp!, {q0-q1} - cmp sp, r0 - bne .Lxts_dec_bzero - - mov sp, $fp -#ifdef XTS_CHAIN_TWEAK - vst1.8 {@XMM[8]}, [r1] -#endif - VFP_ABI_POP - ldmia sp!, {r4-r10, pc} @ return - -.size bsaes_xts_decrypt,.-bsaes_xts_decrypt -___ -} -$code.=<<___; -#endif -___ - -$code =~ s/\`([^\`]*)\`/eval($1)/gem; - -open SELF,$0; -while(<SELF>) { - next if (/^#!/); - last if (!s/^#/@/ and !/^$/); - print; -} -close SELF; - -print $code; - -close STDOUT; diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S new file mode 100644 index 000000000000..3fecb2124c35 --- /dev/null +++ b/arch/arm/crypto/chacha20-neon-core.S @@ -0,0 +1,523 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/linkage.h> + + .text + .fpu neon + .align 5 + +ENTRY(chacha20_block_xor_neon) + // r0: Input state matrix, s + // r1: 1 data block output, o + // r2: 1 data block input, i + + // + // This function encrypts one ChaCha20 block by loading the state matrix + // in four NEON registers. It performs matrix operation on four words in + // parallel, but requireds shuffling to rearrange the words after each + // round. + // + + // x0..3 = s0..3 + add ip, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [ip] + + vmov q8, q0 + vmov q9, q1 + vmov q10, q2 + vmov q11, q3 + + mov r3, #10 + +.Ldoubleround: + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #16 + vsri.u32 q3, q4, #16 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #12 + vsri.u32 q1, q4, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #8 + vsri.u32 q3, q4, #24 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #7 + vsri.u32 q1, q4, #25 + + // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + vext.8 q1, q1, q1, #4 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vext.8 q2, q2, q2, #8 + // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + vext.8 q3, q3, q3, #12 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #16 + vsri.u32 q3, q4, #16 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #12 + vsri.u32 q1, q4, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + vadd.i32 q0, q0, q1 + veor q4, q3, q0 + vshl.u32 q3, q4, #8 + vsri.u32 q3, q4, #24 + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + vadd.i32 q2, q2, q3 + veor q4, q1, q2 + vshl.u32 q1, q4, #7 + vsri.u32 q1, q4, #25 + + // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + vext.8 q1, q1, q1, #12 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + vext.8 q2, q2, q2, #8 + // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + vext.8 q3, q3, q3, #4 + + subs r3, r3, #1 + bne .Ldoubleround + + add ip, r2, #0x20 + vld1.8 {q4-q5}, [r2] + vld1.8 {q6-q7}, [ip] + + // o0 = i0 ^ (x0 + s0) + vadd.i32 q0, q0, q8 + veor q0, q0, q4 + + // o1 = i1 ^ (x1 + s1) + vadd.i32 q1, q1, q9 + veor q1, q1, q5 + + // o2 = i2 ^ (x2 + s2) + vadd.i32 q2, q2, q10 + veor q2, q2, q6 + + // o3 = i3 ^ (x3 + s3) + vadd.i32 q3, q3, q11 + veor q3, q3, q7 + + add ip, r1, #0x20 + vst1.8 {q0-q1}, [r1] + vst1.8 {q2-q3}, [ip] + + bx lr +ENDPROC(chacha20_block_xor_neon) + + .align 5 +ENTRY(chacha20_4block_xor_neon) + push {r4-r6, lr} + mov ip, sp // preserve the stack pointer + sub r3, sp, #0x20 // allocate a 32 byte buffer + bic r3, r3, #0x1f // aligned to 32 bytes + mov sp, r3 + + // r0: Input state matrix, s + // r1: 4 data blocks output, o + // r2: 4 data blocks input, i + + // + // This function encrypts four consecutive ChaCha20 blocks by loading + // the state matrix in NEON registers four times. The algorithm performs + // each operation on the corresponding word of each state matrix, hence + // requires no word shuffling. For final XORing step we transpose the + // matrix by interleaving 32- and then 64-bit words, which allows us to + // do XOR in NEON registers. + // + + // x0..15[0-3] = s0..3[0..3] + add r3, r0, #0x20 + vld1.32 {q0-q1}, [r0] + vld1.32 {q2-q3}, [r3] + + adr r3, CTRINC + vdup.32 q15, d7[1] + vdup.32 q14, d7[0] + vld1.32 {q11}, [r3, :128] + vdup.32 q13, d6[1] + vdup.32 q12, d6[0] + vadd.i32 q12, q12, q11 // x12 += counter values 0-3 + vdup.32 q11, d5[1] + vdup.32 q10, d5[0] + vdup.32 q9, d4[1] + vdup.32 q8, d4[0] + vdup.32 q7, d3[1] + vdup.32 q6, d3[0] + vdup.32 q5, d2[1] + vdup.32 q4, d2[0] + vdup.32 q3, d1[1] + vdup.32 q2, d1[0] + vdup.32 q1, d0[1] + vdup.32 q0, d0[0] + + mov r3, #10 + +.Ldoubleround4: + // x0 += x4, x12 = rotl32(x12 ^ x0, 16) + // x1 += x5, x13 = rotl32(x13 ^ x1, 16) + // x2 += x6, x14 = rotl32(x14 ^ x2, 16) + // x3 += x7, x15 = rotl32(x15 ^ x3, 16) + vadd.i32 q0, q0, q4 + vadd.i32 q1, q1, q5 + vadd.i32 q2, q2, q6 + vadd.i32 q3, q3, q7 + + veor q12, q12, q0 + veor q13, q13, q1 + veor q14, q14, q2 + veor q15, q15, q3 + + vrev32.16 q12, q12 + vrev32.16 q13, q13 + vrev32.16 q14, q14 + vrev32.16 q15, q15 + + // x8 += x12, x4 = rotl32(x4 ^ x8, 12) + // x9 += x13, x5 = rotl32(x5 ^ x9, 12) + // x10 += x14, x6 = rotl32(x6 ^ x10, 12) + // x11 += x15, x7 = rotl32(x7 ^ x11, 12) + vadd.i32 q8, q8, q12 + vadd.i32 q9, q9, q13 + vadd.i32 q10, q10, q14 + vadd.i32 q11, q11, q15 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q4, q8 + veor q9, q5, q9 + vshl.u32 q4, q8, #12 + vshl.u32 q5, q9, #12 + vsri.u32 q4, q8, #20 + vsri.u32 q5, q9, #20 + + veor q8, q6, q10 + veor q9, q7, q11 + vshl.u32 q6, q8, #12 + vshl.u32 q7, q9, #12 + vsri.u32 q6, q8, #20 + vsri.u32 q7, q9, #20 + + // x0 += x4, x12 = rotl32(x12 ^ x0, 8) + // x1 += x5, x13 = rotl32(x13 ^ x1, 8) + // x2 += x6, x14 = rotl32(x14 ^ x2, 8) + // x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vadd.i32 q0, q0, q4 + vadd.i32 q1, q1, q5 + vadd.i32 q2, q2, q6 + vadd.i32 q3, q3, q7 + + veor q8, q12, q0 + veor q9, q13, q1 + vshl.u32 q12, q8, #8 + vshl.u32 q13, q9, #8 + vsri.u32 q12, q8, #24 + vsri.u32 q13, q9, #24 + + veor q8, q14, q2 + veor q9, q15, q3 + vshl.u32 q14, q8, #8 + vshl.u32 q15, q9, #8 + vsri.u32 q14, q8, #24 + vsri.u32 q15, q9, #24 + + vld1.32 {q8-q9}, [sp, :256] + + // x8 += x12, x4 = rotl32(x4 ^ x8, 7) + // x9 += x13, x5 = rotl32(x5 ^ x9, 7) + // x10 += x14, x6 = rotl32(x6 ^ x10, 7) + // x11 += x15, x7 = rotl32(x7 ^ x11, 7) + vadd.i32 q8, q8, q12 + vadd.i32 q9, q9, q13 + vadd.i32 q10, q10, q14 + vadd.i32 q11, q11, q15 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q4, q8 + veor q9, q5, q9 + vshl.u32 q4, q8, #7 + vshl.u32 q5, q9, #7 + vsri.u32 q4, q8, #25 + vsri.u32 q5, q9, #25 + + veor q8, q6, q10 + veor q9, q7, q11 + vshl.u32 q6, q8, #7 + vshl.u32 q7, q9, #7 + vsri.u32 q6, q8, #25 + vsri.u32 q7, q9, #25 + + vld1.32 {q8-q9}, [sp, :256] + + // x0 += x5, x15 = rotl32(x15 ^ x0, 16) + // x1 += x6, x12 = rotl32(x12 ^ x1, 16) + // x2 += x7, x13 = rotl32(x13 ^ x2, 16) + // x3 += x4, x14 = rotl32(x14 ^ x3, 16) + vadd.i32 q0, q0, q5 + vadd.i32 q1, q1, q6 + vadd.i32 q2, q2, q7 + vadd.i32 q3, q3, q4 + + veor q15, q15, q0 + veor q12, q12, q1 + veor q13, q13, q2 + veor q14, q14, q3 + + vrev32.16 q15, q15 + vrev32.16 q12, q12 + vrev32.16 q13, q13 + vrev32.16 q14, q14 + + // x10 += x15, x5 = rotl32(x5 ^ x10, 12) + // x11 += x12, x6 = rotl32(x6 ^ x11, 12) + // x8 += x13, x7 = rotl32(x7 ^ x8, 12) + // x9 += x14, x4 = rotl32(x4 ^ x9, 12) + vadd.i32 q10, q10, q15 + vadd.i32 q11, q11, q12 + vadd.i32 q8, q8, q13 + vadd.i32 q9, q9, q14 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q7, q8 + veor q9, q4, q9 + vshl.u32 q7, q8, #12 + vshl.u32 q4, q9, #12 + vsri.u32 q7, q8, #20 + vsri.u32 q4, q9, #20 + + veor q8, q5, q10 + veor q9, q6, q11 + vshl.u32 q5, q8, #12 + vshl.u32 q6, q9, #12 + vsri.u32 q5, q8, #20 + vsri.u32 q6, q9, #20 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 8) + // x1 += x6, x12 = rotl32(x12 ^ x1, 8) + // x2 += x7, x13 = rotl32(x13 ^ x2, 8) + // x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vadd.i32 q0, q0, q5 + vadd.i32 q1, q1, q6 + vadd.i32 q2, q2, q7 + vadd.i32 q3, q3, q4 + + veor q8, q15, q0 + veor q9, q12, q1 + vshl.u32 q15, q8, #8 + vshl.u32 q12, q9, #8 + vsri.u32 q15, q8, #24 + vsri.u32 q12, q9, #24 + + veor q8, q13, q2 + veor q9, q14, q3 + vshl.u32 q13, q8, #8 + vshl.u32 q14, q9, #8 + vsri.u32 q13, q8, #24 + vsri.u32 q14, q9, #24 + + vld1.32 {q8-q9}, [sp, :256] + + // x10 += x15, x5 = rotl32(x5 ^ x10, 7) + // x11 += x12, x6 = rotl32(x6 ^ x11, 7) + // x8 += x13, x7 = rotl32(x7 ^ x8, 7) + // x9 += x14, x4 = rotl32(x4 ^ x9, 7) + vadd.i32 q10, q10, q15 + vadd.i32 q11, q11, q12 + vadd.i32 q8, q8, q13 + vadd.i32 q9, q9, q14 + + vst1.32 {q8-q9}, [sp, :256] + + veor q8, q7, q8 + veor q9, q4, q9 + vshl.u32 q7, q8, #7 + vshl.u32 q4, q9, #7 + vsri.u32 q7, q8, #25 + vsri.u32 q4, q9, #25 + + veor q8, q5, q10 + veor q9, q6, q11 + vshl.u32 q5, q8, #7 + vshl.u32 q6, q9, #7 + vsri.u32 q5, q8, #25 + vsri.u32 q6, q9, #25 + + subs r3, r3, #1 + beq 0f + + vld1.32 {q8-q9}, [sp, :256] + b .Ldoubleround4 + + // x0[0-3] += s0[0] + // x1[0-3] += s0[1] + // x2[0-3] += s0[2] + // x3[0-3] += s0[3] +0: ldmia r0!, {r3-r6} + vdup.32 q8, r3 + vdup.32 q9, r4 + vadd.i32 q0, q0, q8 + vadd.i32 q1, q1, q9 + vdup.32 q8, r5 + vdup.32 q9, r6 + vadd.i32 q2, q2, q8 + vadd.i32 q3, q3, q9 + + // x4[0-3] += s1[0] + // x5[0-3] += s1[1] + // x6[0-3] += s1[2] + // x7[0-3] += s1[3] + ldmia r0!, {r3-r6} + vdup.32 q8, r3 + vdup.32 q9, r4 + vadd.i32 q4, q4, q8 + vadd.i32 q5, q5, q9 + vdup.32 q8, r5 + vdup.32 q9, r6 + vadd.i32 q6, q6, q8 + vadd.i32 q7, q7, q9 + + // interleave 32-bit words in state n, n+1 + vzip.32 q0, q1 + vzip.32 q2, q3 + vzip.32 q4, q5 + vzip.32 q6, q7 + + // interleave 64-bit words in state n, n+2 + vswp d1, d4 + vswp d3, d6 + vswp d9, d12 + vswp d11, d14 + + // xor with corresponding input, write to output + vld1.8 {q8-q9}, [r2]! + veor q8, q8, q0 + veor q9, q9, q4 + vst1.8 {q8-q9}, [r1]! + + vld1.32 {q8-q9}, [sp, :256] + + // x8[0-3] += s2[0] + // x9[0-3] += s2[1] + // x10[0-3] += s2[2] + // x11[0-3] += s2[3] + ldmia r0!, {r3-r6} + vdup.32 q0, r3 + vdup.32 q4, r4 + vadd.i32 q8, q8, q0 + vadd.i32 q9, q9, q4 + vdup.32 q0, r5 + vdup.32 q4, r6 + vadd.i32 q10, q10, q0 + vadd.i32 q11, q11, q4 + + // x12[0-3] += s3[0] + // x13[0-3] += s3[1] + // x14[0-3] += s3[2] + // x15[0-3] += s3[3] + ldmia r0!, {r3-r6} + vdup.32 q0, r3 + vdup.32 q4, r4 + adr r3, CTRINC + vadd.i32 q12, q12, q0 + vld1.32 {q0}, [r3, :128] + vadd.i32 q13, q13, q4 + vadd.i32 q12, q12, q0 // x12 += counter values 0-3 + + vdup.32 q0, r5 + vdup.32 q4, r6 + vadd.i32 q14, q14, q0 + vadd.i32 q15, q15, q4 + + // interleave 32-bit words in state n, n+1 + vzip.32 q8, q9 + vzip.32 q10, q11 + vzip.32 q12, q13 + vzip.32 q14, q15 + + // interleave 64-bit words in state n, n+2 + vswp d17, d20 + vswp d19, d22 + vswp d25, d28 + vswp d27, d30 + + vmov q4, q1 + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q8 + veor q1, q1, q12 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q2 + veor q1, q1, q6 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q10 + veor q1, q1, q14 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q4 + veor q1, q1, q5 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q9 + veor q1, q1, q13 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2]! + veor q0, q0, q3 + veor q1, q1, q7 + vst1.8 {q0-q1}, [r1]! + + vld1.8 {q0-q1}, [r2] + veor q0, q0, q11 + veor q1, q1, q15 + vst1.8 {q0-q1}, [r1] + + mov sp, ip + pop {r4-r6, pc} +ENDPROC(chacha20_4block_xor_neon) + + .align 4 +CTRINC: .word 0, 1, 2, 3 diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c new file mode 100644 index 000000000000..59a7be08e80c --- /dev/null +++ b/arch/arm/crypto/chacha20-neon-glue.c @@ -0,0 +1,127 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <crypto/algapi.h> +#include <crypto/chacha20.h> +#include <crypto/internal/skcipher.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> + +asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); +asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); + +static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes) +{ + u8 buf[CHACHA20_BLOCK_SIZE]; + + while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + chacha20_4block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE * 4; + src += CHACHA20_BLOCK_SIZE * 4; + dst += CHACHA20_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA20_BLOCK_SIZE) { + chacha20_block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE; + src += CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha20_block_xor_neon(state, buf, buf); + memcpy(dst, buf, bytes); + } +} + +static int chacha20_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + u32 state[16]; + int err; + + if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha20_crypt(req); + + err = skcipher_walk_virt(&walk, req, true); + + crypto_chacha20_init(state, ctx, walk.iv); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + kernel_neon_end(); + + return err; +} + +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .walksize = 4 * CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_neon, + .decrypt = chacha20_neon, +}; + +static int __init chacha20_simd_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_NEON)) + return -ENODEV; + + return crypto_register_skcipher(&alg); +} + +static void __exit chacha20_simd_mod_fini(void) +{ + crypto_unregister_skcipher(&alg); +} + +module_init(chacha20_simd_mod_init); +module_exit(chacha20_simd_mod_fini); + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 450a85df041a..bed7feddfeed 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -41,6 +41,10 @@ config CRYPTO_CRC32_ARM64_CE depends on KERNEL_MODE_NEON && CRC32 select CRYPTO_HASH +config CRYPTO_AES_ARM64 + tristate "AES core cipher using scalar instructions" + select CRYPTO_AES + config CRYPTO_AES_ARM64_CE tristate "AES core cipher using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON @@ -72,4 +76,17 @@ config CRYPTO_CRC32_ARM64 depends on ARM64 select CRYPTO_HASH +config CRYPTO_CHACHA20_NEON + tristate "NEON accelerated ChaCha20 symmetric cipher" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_CHACHA20 + +config CRYPTO_AES_ARM64_BS + tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm" + depends on KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_AES_ARM64_NEON_BLK + select CRYPTO_SIMD + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index aa8888d7b744..d1ae1b9cbe70 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -41,6 +41,15 @@ sha256-arm64-y := sha256-glue.o sha256-core.o obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o sha512-arm64-y := sha512-glue.o sha512-core.o +obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o +chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o + +obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o +aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o +aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o + AFLAGS_aes-ce.o := -DINTERLEAVE=4 AFLAGS_aes-neon.o := -DINTERLEAVE=4 diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index cc5515dac74a..6a7dbc7c83a6 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -258,7 +258,6 @@ static struct aead_alg ccm_aes_alg = { .cra_priority = 300, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .ivsize = AES_BLOCK_SIZE, diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S new file mode 100644 index 000000000000..f2f9cc519309 --- /dev/null +++ b/arch/arm64/crypto/aes-cipher-core.S @@ -0,0 +1,110 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + + rk .req x0 + out .req x1 + in .req x2 + rounds .req x3 + tt .req x4 + lt .req x2 + + .macro __pair, enc, reg0, reg1, in0, in1e, in1d, shift + ubfx \reg0, \in0, #\shift, #8 + .if \enc + ubfx \reg1, \in1e, #\shift, #8 + .else + ubfx \reg1, \in1d, #\shift, #8 + .endif + ldr \reg0, [tt, \reg0, uxtw #2] + ldr \reg1, [tt, \reg1, uxtw #2] + .endm + + .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc + ldp \out0, \out1, [rk], #8 + + __pair \enc, w13, w14, \in0, \in1, \in3, 0 + __pair \enc, w15, w16, \in1, \in2, \in0, 8 + __pair \enc, w17, w18, \in2, \in3, \in1, 16 + __pair \enc, \t0, \t1, \in3, \in0, \in2, 24 + + eor \out0, \out0, w13 + eor \out1, \out1, w14 + eor \out0, \out0, w15, ror #24 + eor \out1, \out1, w16, ror #24 + eor \out0, \out0, w17, ror #16 + eor \out1, \out1, w18, ror #16 + eor \out0, \out0, \t0, ror #8 + eor \out1, \out1, \t1, ror #8 + .endm + + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1 + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1 + .endm + + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3 + __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0 + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0 + .endm + + .macro do_crypt, round, ttab, ltab + ldp w5, w6, [in] + ldp w7, w8, [in, #8] + ldp w9, w10, [rk], #16 + ldp w11, w12, [rk, #-8] + +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) + + eor w5, w5, w9 + eor w6, w6, w10 + eor w7, w7, w11 + eor w8, w8, w12 + + adr_l tt, \ttab + adr_l lt, \ltab + + tbnz rounds, #1, 1f + +0: \round w9, w10, w11, w12, w5, w6, w7, w8 + \round w5, w6, w7, w8, w9, w10, w11, w12 + +1: subs rounds, rounds, #4 + \round w9, w10, w11, w12, w5, w6, w7, w8 + csel tt, tt, lt, hi + \round w5, w6, w7, w8, w9, w10, w11, w12 + b.hi 0b + +CPU_BE( rev w5, w5 ) +CPU_BE( rev w6, w6 ) +CPU_BE( rev w7, w7 ) +CPU_BE( rev w8, w8 ) + + stp w5, w6, [out] + stp w7, w8, [out, #8] + ret + .endm + + .align 5 +ENTRY(__aes_arm64_encrypt) + do_crypt fround, crypto_ft_tab, crypto_fl_tab +ENDPROC(__aes_arm64_encrypt) + + .align 5 +ENTRY(__aes_arm64_decrypt) + do_crypt iround, crypto_it_tab, crypto_il_tab +ENDPROC(__aes_arm64_decrypt) diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c new file mode 100644 index 000000000000..7288e7cbebff --- /dev/null +++ b/arch/arm64/crypto/aes-cipher-glue.c @@ -0,0 +1,69 @@ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <linux/crypto.h> +#include <linux/module.h> + +asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); +EXPORT_SYMBOL(__aes_arm64_encrypt); + +asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds); +EXPORT_SYMBOL(__aes_arm64_decrypt); + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm64_encrypt(ctx->key_enc, out, in, rounds); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int rounds = 6 + ctx->key_length / 4; + + __aes_arm64_decrypt(ctx->key_dec, out, in, rounds); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-arm64", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + + .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, + .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, + .cra_cipher.cia_setkey = crypto_aes_set_key, + .cra_cipher.cia_encrypt = aes_encrypt, + .cra_cipher.cia_decrypt = aes_decrypt +}; + +static int __init aes_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Scalar AES cipher for arm64"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 4e3f8adb1793..055bc3f61138 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -215,14 +215,15 @@ static int ctr_encrypt(struct skcipher_request *req) u8 *tsrc = walk.src.virt.addr; /* - * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need - * to tell aes_ctr_encrypt() to only read half a block. + * Tell aes_ctr_encrypt() to process a tail block. */ - blocks = (nbytes <= 8) ? -1 : 1; + blocks = -1; - aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds, + aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); - memcpy(tdst, tail, nbytes); + if (tdst != tsrc) + memcpy(tdst, tsrc, nbytes); + crypto_xor(tdst, tail, nbytes); err = skcipher_walk_done(&walk, 0); } kernel_neon_end(); @@ -282,7 +283,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -298,7 +298,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -315,7 +314,22 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = skcipher_aes_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, +}, { + .base = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-" MODE, + .cra_priority = PRIO - 1, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, }, .min_keysize = AES_MIN_KEY_SIZE, @@ -333,7 +347,6 @@ static struct skcipher_alg aes_algs[] = { { .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), - .cra_alignmask = 7, .cra_module = THIS_MODULE, }, .min_keysize = 2 * AES_MIN_KEY_SIZE, @@ -350,8 +363,9 @@ static void aes_exit(void) { int i; - for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++) - simd_skcipher_free(aes_simd_algs[i]); + for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) + if (aes_simd_algs[i]) + simd_skcipher_free(aes_simd_algs[i]); crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } @@ -370,6 +384,9 @@ static int __init aes_init(void) return err; for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) + continue; + algname = aes_algs[i].base.cra_name + 2; drvname = aes_algs[i].base.cra_driver_name + 2; basename = aes_algs[i].base.cra_driver_name; @@ -392,5 +409,7 @@ unregister_simds: module_cpu_feature_match(AES, aes_init); #else module_init(aes_init); +EXPORT_SYMBOL(neon_aes_ecb_encrypt); +EXPORT_SYMBOL(neon_aes_cbc_encrypt); #endif module_exit(aes_exit); diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 838dad5c209f..92b982a8b112 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -337,7 +337,7 @@ AES_ENTRY(aes_ctr_encrypt) .Lctrcarrydone: subs w4, w4, #1 - bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ + bmi .Lctrtailblock /* blocks <0 means tail block */ ld1 {v3.16b}, [x1], #16 eor v3.16b, v0.16b, v3.16b st1 {v3.16b}, [x0], #16 @@ -348,10 +348,8 @@ AES_ENTRY(aes_ctr_encrypt) FRAME_POP ret -.Lctrhalfblock: - ld1 {v3.8b}, [x1] - eor v3.8b, v0.8b, v3.8b - st1 {v3.8b}, [x0] +.Lctrtailblock: + st1 {v0.16b}, [x0] FRAME_POP ret diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index 85f07ead7c5c..f1e3aa2732f9 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -1,7 +1,7 @@ /* * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON * - * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -17,17 +17,25 @@ /* multiply by polynomial 'x' in GF(2^8) */ .macro mul_by_x, out, in, temp, const sshr \temp, \in, #7 - add \out, \in, \in + shl \out, \in, #1 and \temp, \temp, \const eor \out, \out, \temp .endm + /* multiply by polynomial 'x^2' in GF(2^8) */ + .macro mul_by_x2, out, in, temp, const + ushr \temp, \in, #6 + shl \out, \in, #2 + pmul \temp, \temp, \const + eor \out, \out, \temp + .endm + /* preload the entire Sbox */ .macro prepare, sbox, shiftrows, temp adr \temp, \sbox - movi v12.16b, #0x40 + movi v12.16b, #0x1b ldr q13, \shiftrows - movi v14.16b, #0x1b + ldr q14, .Lror32by8 ld1 {v16.16b-v19.16b}, [\temp], #64 ld1 {v20.16b-v23.16b}, [\temp], #64 ld1 {v24.16b-v27.16b}, [\temp], #64 @@ -50,37 +58,31 @@ /* apply SubBytes transformation using the the preloaded Sbox */ .macro sub_bytes, in - sub v9.16b, \in\().16b, v12.16b + sub v9.16b, \in\().16b, v15.16b tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b - sub v10.16b, v9.16b, v12.16b + sub v10.16b, v9.16b, v15.16b tbx \in\().16b, {v20.16b-v23.16b}, v9.16b - sub v11.16b, v10.16b, v12.16b + sub v11.16b, v10.16b, v15.16b tbx \in\().16b, {v24.16b-v27.16b}, v10.16b tbx \in\().16b, {v28.16b-v31.16b}, v11.16b .endm /* apply MixColumns transformation */ - .macro mix_columns, in - mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b - rev32 v8.8h, \in\().8h - eor \in\().16b, v10.16b, \in\().16b - shl v9.4s, v8.4s, #24 - shl v11.4s, \in\().4s, #24 - sri v9.4s, v8.4s, #8 - sri v11.4s, \in\().4s, #8 - eor v9.16b, v9.16b, v8.16b - eor v10.16b, v10.16b, v9.16b - eor \in\().16b, v10.16b, v11.16b - .endm - + .macro mix_columns, in, enc + .if \enc == 0 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ - .macro inv_mix_columns, in - mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b - mul_by_x v11.16b, v11.16b, v10.16b, v14.16b - eor \in\().16b, \in\().16b, v11.16b - rev32 v11.8h, v11.8h - eor \in\().16b, \in\().16b, v11.16b - mix_columns \in + mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b + eor \in\().16b, \in\().16b, v8.16b + rev32 v8.8h, v8.8h + eor \in\().16b, \in\().16b, v8.16b + .endif + + mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b + rev32 v8.8h, \in\().8h + eor v8.16b, v8.16b, v9.16b + eor \in\().16b, \in\().16b, v8.16b + tbl \in\().16b, {\in\().16b}, v14.16b + eor \in\().16b, \in\().16b, v8.16b .endm .macro do_block, enc, in, rounds, rk, rkp, i @@ -88,16 +90,13 @@ add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + movi v15.16b, #0x40 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ sub_bytes \in - ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 + ld1 {v15.4s}, [\rkp], #16 beq 2222f - .if \enc == 1 - mix_columns \in - .else - inv_mix_columns \in - .endif + mix_columns \in, \enc b 1111b 2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ .endm @@ -116,139 +115,114 @@ */ .macro sub_bytes_2x, in0, in1 - sub v8.16b, \in0\().16b, v12.16b - sub v9.16b, \in1\().16b, v12.16b + sub v8.16b, \in0\().16b, v15.16b tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b + sub v9.16b, \in1\().16b, v15.16b tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b - sub v10.16b, v8.16b, v12.16b - sub v11.16b, v9.16b, v12.16b + sub v10.16b, v8.16b, v15.16b tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b + sub v11.16b, v9.16b, v15.16b tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b - sub v8.16b, v10.16b, v12.16b - sub v9.16b, v11.16b, v12.16b + sub v8.16b, v10.16b, v15.16b tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b + sub v9.16b, v11.16b, v15.16b tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b .endm .macro sub_bytes_4x, in0, in1, in2, in3 - sub v8.16b, \in0\().16b, v12.16b + sub v8.16b, \in0\().16b, v15.16b tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b - sub v9.16b, \in1\().16b, v12.16b + sub v9.16b, \in1\().16b, v15.16b tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b - sub v10.16b, \in2\().16b, v12.16b + sub v10.16b, \in2\().16b, v15.16b tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b - sub v11.16b, \in3\().16b, v12.16b + sub v11.16b, \in3\().16b, v15.16b tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b - sub v8.16b, v8.16b, v12.16b + sub v8.16b, v8.16b, v15.16b tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b - sub v9.16b, v9.16b, v12.16b + sub v9.16b, v9.16b, v15.16b tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b - sub v10.16b, v10.16b, v12.16b + sub v10.16b, v10.16b, v15.16b tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b - sub v11.16b, v11.16b, v12.16b + sub v11.16b, v11.16b, v15.16b tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b - sub v8.16b, v8.16b, v12.16b + sub v8.16b, v8.16b, v15.16b tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b - sub v9.16b, v9.16b, v12.16b + sub v9.16b, v9.16b, v15.16b tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b - sub v10.16b, v10.16b, v12.16b + sub v10.16b, v10.16b, v15.16b tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b - sub v11.16b, v11.16b, v12.16b + sub v11.16b, v11.16b, v15.16b tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b .endm .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const - sshr \tmp0\().16b, \in0\().16b, #7 - add \out0\().16b, \in0\().16b, \in0\().16b - sshr \tmp1\().16b, \in1\().16b, #7 + sshr \tmp0\().16b, \in0\().16b, #7 + shl \out0\().16b, \in0\().16b, #1 + sshr \tmp1\().16b, \in1\().16b, #7 and \tmp0\().16b, \tmp0\().16b, \const\().16b - add \out1\().16b, \in1\().16b, \in1\().16b + shl \out1\().16b, \in1\().16b, #1 and \tmp1\().16b, \tmp1\().16b, \const\().16b eor \out0\().16b, \out0\().16b, \tmp0\().16b eor \out1\().16b, \out1\().16b, \tmp1\().16b .endm - .macro mix_columns_2x, in0, in1 - mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 - rev32 v10.8h, \in0\().8h - rev32 v11.8h, \in1\().8h - eor \in0\().16b, v8.16b, \in0\().16b - eor \in1\().16b, v9.16b, \in1\().16b - shl v12.4s, v10.4s, #24 - shl v13.4s, v11.4s, #24 - eor v8.16b, v8.16b, v10.16b - sri v12.4s, v10.4s, #8 - shl v10.4s, \in0\().4s, #24 - eor v9.16b, v9.16b, v11.16b - sri v13.4s, v11.4s, #8 - shl v11.4s, \in1\().4s, #24 - sri v10.4s, \in0\().4s, #8 - eor \in0\().16b, v8.16b, v12.16b - sri v11.4s, \in1\().4s, #8 - eor \in1\().16b, v9.16b, v13.16b - eor \in0\().16b, v10.16b, \in0\().16b - eor \in1\().16b, v11.16b, \in1\().16b + .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const + ushr \tmp0\().16b, \in0\().16b, #6 + shl \out0\().16b, \in0\().16b, #2 + ushr \tmp1\().16b, \in1\().16b, #6 + pmul \tmp0\().16b, \tmp0\().16b, \const\().16b + shl \out1\().16b, \in1\().16b, #2 + pmul \tmp1\().16b, \tmp1\().16b, \const\().16b + eor \out0\().16b, \out0\().16b, \tmp0\().16b + eor \out1\().16b, \out1\().16b, \tmp1\().16b .endm - .macro inv_mix_cols_2x, in0, in1 - mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 - mul_by_x_2x v8, v9, v8, v9, v10, v11, v14 + .macro mix_columns_2x, in0, in1, enc + .if \enc == 0 + /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ + mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12 eor \in0\().16b, \in0\().16b, v8.16b - eor \in1\().16b, \in1\().16b, v9.16b rev32 v8.8h, v8.8h - rev32 v9.8h, v9.8h - eor \in0\().16b, \in0\().16b, v8.16b - eor \in1\().16b, \in1\().16b, v9.16b - mix_columns_2x \in0, \in1 - .endm - - .macro inv_mix_cols_4x, in0, in1, in2, in3 - mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 - mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14 - mul_by_x_2x v8, v9, v8, v9, v12, v13, v14 - mul_by_x_2x v10, v11, v10, v11, v12, v13, v14 - eor \in0\().16b, \in0\().16b, v8.16b eor \in1\().16b, \in1\().16b, v9.16b - eor \in2\().16b, \in2\().16b, v10.16b - eor \in3\().16b, \in3\().16b, v11.16b - rev32 v8.8h, v8.8h rev32 v9.8h, v9.8h - rev32 v10.8h, v10.8h - rev32 v11.8h, v11.8h eor \in0\().16b, \in0\().16b, v8.16b eor \in1\().16b, \in1\().16b, v9.16b - eor \in2\().16b, \in2\().16b, v10.16b - eor \in3\().16b, \in3\().16b, v11.16b - mix_columns_2x \in0, \in1 - mix_columns_2x \in2, \in3 + .endif + + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12 + rev32 v10.8h, \in0\().8h + rev32 v11.8h, \in1\().8h + eor v10.16b, v10.16b, v8.16b + eor v11.16b, v11.16b, v9.16b + eor \in0\().16b, \in0\().16b, v10.16b + eor \in1\().16b, \in1\().16b, v11.16b + tbl \in0\().16b, {\in0\().16b}, v14.16b + tbl \in1\().16b, {\in1\().16b}, v14.16b + eor \in0\().16b, \in0\().16b, v10.16b + eor \in1\().16b, \in1\().16b, v11.16b .endm - .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i + .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ - sub_bytes_2x \in0, \in1 + movi v15.16b, #0x40 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.4s}, [\rkp], #16 + sub_bytes_2x \in0, \in1 subs \i, \i, #1 + ld1 {v15.4s}, [\rkp], #16 beq 2222f - .if \enc == 1 - mix_columns_2x \in0, \in1 - ldr q13, .LForward_ShiftRows - .else - inv_mix_cols_2x \in0, \in1 - ldr q13, .LReverse_ShiftRows - .endif - movi v12.16b, #0x40 + mix_columns_2x \in0, \in1, \enc b 1111b 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ @@ -262,23 +236,17 @@ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ - sub_bytes_4x \in0, \in1, \in2, \in3 + movi v15.16b, #0x40 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.4s}, [\rkp], #16 + sub_bytes_4x \in0, \in1, \in2, \in3 subs \i, \i, #1 + ld1 {v15.4s}, [\rkp], #16 beq 2222f - .if \enc == 1 - mix_columns_2x \in0, \in1 - mix_columns_2x \in2, \in3 - ldr q13, .LForward_ShiftRows - .else - inv_mix_cols_4x \in0, \in1, \in2, \in3 - ldr q13, .LReverse_ShiftRows - .endif - movi v12.16b, #0x40 + mix_columns_2x \in0, \in1, \enc + mix_columns_2x \in2, \in3, \enc b 1111b 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ @@ -305,19 +273,7 @@ #include "aes-modes.S" .text - .align 4 -.LForward_ShiftRows: -CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 ) -CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb ) -CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 ) -CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 ) - -.LReverse_ShiftRows: -CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb ) -CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 ) -CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 ) -CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 ) - + .align 6 .LForward_Sbox: .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 @@ -385,3 +341,12 @@ CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 ) .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + +.LForward_ShiftRows: + .octa 0x0b06010c07020d08030e09040f0a0500 + +.LReverse_ShiftRows: + .octa 0x0306090c0f0205080b0e0104070a0d00 + +.Lror32by8: + .octa 0x0c0f0e0d080b0a090407060500030201 diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S new file mode 100644 index 000000000000..ca0472500433 --- /dev/null +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -0,0 +1,972 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * The algorithm implemented here is described in detail by the paper + * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and + * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) + * + * This implementation is based primarily on the OpenSSL implementation + * for 32-bit ARM written by Andy Polyakov <appro@openssl.org> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + + rounds .req x11 + bskey .req x12 + + .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + eor \b2, \b2, \b1 + eor \b5, \b5, \b6 + eor \b3, \b3, \b0 + eor \b6, \b6, \b2 + eor \b5, \b5, \b0 + eor \b6, \b6, \b3 + eor \b3, \b3, \b7 + eor \b7, \b7, \b5 + eor \b3, \b3, \b4 + eor \b4, \b4, \b5 + eor \b2, \b2, \b7 + eor \b3, \b3, \b1 + eor \b1, \b1, \b5 + .endm + + .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 + eor \b0, \b0, \b6 + eor \b1, \b1, \b4 + eor \b4, \b4, \b6 + eor \b2, \b2, \b0 + eor \b6, \b6, \b1 + eor \b1, \b1, \b5 + eor \b5, \b5, \b3 + eor \b3, \b3, \b7 + eor \b7, \b7, \b5 + eor \b2, \b2, \b5 + eor \b4, \b4, \b7 + .endm + + .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 + eor \b1, \b1, \b7 + eor \b4, \b4, \b7 + eor \b7, \b7, \b5 + eor \b1, \b1, \b3 + eor \b2, \b2, \b5 + eor \b3, \b3, \b7 + eor \b6, \b6, \b1 + eor \b2, \b2, \b0 + eor \b5, \b5, \b3 + eor \b4, \b4, \b6 + eor \b0, \b0, \b6 + eor \b1, \b1, \b4 + .endm + + .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 + eor \b1, \b1, \b5 + eor \b2, \b2, \b7 + eor \b3, \b3, \b1 + eor \b4, \b4, \b5 + eor \b7, \b7, \b5 + eor \b3, \b3, \b4 + eor \b5, \b5, \b0 + eor \b3, \b3, \b7 + eor \b6, \b6, \b2 + eor \b2, \b2, \b1 + eor \b6, \b6, \b3 + eor \b3, \b3, \b0 + eor \b5, \b5, \b6 + .endm + + .macro mul_gf4, x0, x1, y0, y1, t0, t1 + eor \t0, \y0, \y1 + and \t0, \t0, \x0 + eor \x0, \x0, \x1 + and \t1, \x1, \y0 + and \x0, \x0, \y1 + eor \x1, \t1, \t0 + eor \x0, \x0, \t1 + .endm + + .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 + eor \t0, \y0, \y1 + eor \t1, \y2, \y3 + and \t0, \t0, \x0 + and \t1, \t1, \x2 + eor \x0, \x0, \x1 + eor \x2, \x2, \x3 + and \x1, \x1, \y0 + and \x3, \x3, \y2 + and \x0, \x0, \y1 + and \x2, \x2, \y3 + eor \x1, \x1, \x0 + eor \x2, \x2, \x3 + eor \x0, \x0, \t0 + eor \x3, \x3, \t1 + .endm + + .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, t0, t1, t2, t3 + eor \t0, \x0, \x2 + eor \t1, \x1, \x3 + mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 + eor \y0, \y0, \y2 + eor \y1, \y1, \y3 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 + eor \x0, \x0, \t0 + eor \x2, \x2, \t0 + eor \x1, \x1, \t1 + eor \x3, \x3, \t1 + eor \t0, \x4, \x6 + eor \t1, \x5, \x7 + mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 + eor \y0, \y0, \y2 + eor \y1, \y1, \y3 + mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 + eor \x4, \x4, \t0 + eor \x6, \x6, \t0 + eor \x5, \x5, \t1 + eor \x7, \x7, \t1 + .endm + + .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + eor \t3, \x4, \x6 + eor \t0, \x5, \x7 + eor \t1, \x1, \x3 + eor \s1, \x7, \x6 + eor \s0, \x0, \x2 + eor \s3, \t3, \t0 + orr \t2, \t0, \t1 + and \s2, \t3, \s0 + orr \t3, \t3, \s0 + eor \s0, \s0, \t1 + and \t0, \t0, \t1 + eor \t1, \x3, \x2 + and \s3, \s3, \s0 + and \s1, \s1, \t1 + eor \t1, \x4, \x5 + eor \s0, \x1, \x0 + eor \t3, \t3, \s1 + eor \t2, \t2, \s1 + and \s1, \t1, \s0 + orr \t1, \t1, \s0 + eor \t3, \t3, \s3 + eor \t0, \t0, \s1 + eor \t2, \t2, \s2 + eor \t1, \t1, \s3 + eor \t0, \t0, \s2 + and \s0, \x7, \x3 + eor \t1, \t1, \s2 + and \s1, \x6, \x2 + and \s2, \x5, \x1 + orr \s3, \x4, \x0 + eor \t3, \t3, \s0 + eor \t1, \t1, \s2 + eor \s0, \t0, \s3 + eor \t2, \t2, \s1 + and \s2, \t3, \t1 + eor \s1, \t2, \s2 + eor \s3, \s0, \s2 + bsl \s1, \t1, \s0 + not \t0, \s0 + bsl \s0, \s1, \s3 + bsl \t0, \s1, \s3 + bsl \s3, \t3, \t2 + eor \t3, \t3, \t2 + and \s2, \s0, \s3 + eor \t1, \t1, \t0 + eor \s2, \s2, \t3 + mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + .endm + + .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ + \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b + inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \ + \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ + \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ + \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b + out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ + \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b + .endm + + .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ + t0, t1, t2, t3, s0, s1, s2, s3 + inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ + \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b + inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \ + \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ + \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ + \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b + inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ + \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b + .endm + + .macro enc_next_rk + ldp q16, q17, [bskey], #128 + ldp q18, q19, [bskey, #-96] + ldp q20, q21, [bskey, #-64] + ldp q22, q23, [bskey, #-32] + .endm + + .macro dec_next_rk + ldp q16, q17, [bskey, #-128]! + ldp q18, q19, [bskey, #32] + ldp q20, q21, [bskey, #64] + ldp q22, q23, [bskey, #96] + .endm + + .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7 + eor \x0\().16b, \x0\().16b, v16.16b + eor \x1\().16b, \x1\().16b, v17.16b + eor \x2\().16b, \x2\().16b, v18.16b + eor \x3\().16b, \x3\().16b, v19.16b + eor \x4\().16b, \x4\().16b, v20.16b + eor \x5\().16b, \x5\().16b, v21.16b + eor \x6\().16b, \x6\().16b, v22.16b + eor \x7\().16b, \x7\().16b, v23.16b + .endm + + .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask + tbl \x0\().16b, {\x0\().16b}, \mask\().16b + tbl \x1\().16b, {\x1\().16b}, \mask\().16b + tbl \x2\().16b, {\x2\().16b}, \mask\().16b + tbl \x3\().16b, {\x3\().16b}, \mask\().16b + tbl \x4\().16b, {\x4\().16b}, \mask\().16b + tbl \x5\().16b, {\x5\().16b}, \mask\().16b + tbl \x6\().16b, {\x6\().16b}, \mask\().16b + tbl \x7\().16b, {\x7\().16b}, \mask\().16b + .endm + + .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7, inv + ext \t0\().16b, \x0\().16b, \x0\().16b, #12 + ext \t1\().16b, \x1\().16b, \x1\().16b, #12 + eor \x0\().16b, \x0\().16b, \t0\().16b + ext \t2\().16b, \x2\().16b, \x2\().16b, #12 + eor \x1\().16b, \x1\().16b, \t1\().16b + ext \t3\().16b, \x3\().16b, \x3\().16b, #12 + eor \x2\().16b, \x2\().16b, \t2\().16b + ext \t4\().16b, \x4\().16b, \x4\().16b, #12 + eor \x3\().16b, \x3\().16b, \t3\().16b + ext \t5\().16b, \x5\().16b, \x5\().16b, #12 + eor \x4\().16b, \x4\().16b, \t4\().16b + ext \t6\().16b, \x6\().16b, \x6\().16b, #12 + eor \x5\().16b, \x5\().16b, \t5\().16b + ext \t7\().16b, \x7\().16b, \x7\().16b, #12 + eor \x6\().16b, \x6\().16b, \t6\().16b + eor \t1\().16b, \t1\().16b, \x0\().16b + eor \x7\().16b, \x7\().16b, \t7\().16b + ext \x0\().16b, \x0\().16b, \x0\().16b, #8 + eor \t2\().16b, \t2\().16b, \x1\().16b + eor \t0\().16b, \t0\().16b, \x7\().16b + eor \t1\().16b, \t1\().16b, \x7\().16b + ext \x1\().16b, \x1\().16b, \x1\().16b, #8 + eor \t5\().16b, \t5\().16b, \x4\().16b + eor \x0\().16b, \x0\().16b, \t0\().16b + eor \t6\().16b, \t6\().16b, \x5\().16b + eor \x1\().16b, \x1\().16b, \t1\().16b + ext \t0\().16b, \x4\().16b, \x4\().16b, #8 + eor \t4\().16b, \t4\().16b, \x3\().16b + ext \t1\().16b, \x5\().16b, \x5\().16b, #8 + eor \t7\().16b, \t7\().16b, \x6\().16b + ext \x4\().16b, \x3\().16b, \x3\().16b, #8 + eor \t3\().16b, \t3\().16b, \x2\().16b + ext \x5\().16b, \x7\().16b, \x7\().16b, #8 + eor \t4\().16b, \t4\().16b, \x7\().16b + ext \x3\().16b, \x6\().16b, \x6\().16b, #8 + eor \t3\().16b, \t3\().16b, \x7\().16b + ext \x6\().16b, \x2\().16b, \x2\().16b, #8 + eor \x7\().16b, \t1\().16b, \t5\().16b + .ifb \inv + eor \x2\().16b, \t0\().16b, \t4\().16b + eor \x4\().16b, \x4\().16b, \t3\().16b + eor \x5\().16b, \x5\().16b, \t7\().16b + eor \x3\().16b, \x3\().16b, \t6\().16b + eor \x6\().16b, \x6\().16b, \t2\().16b + .else + eor \t3\().16b, \t3\().16b, \x4\().16b + eor \x5\().16b, \x5\().16b, \t7\().16b + eor \x2\().16b, \x3\().16b, \t6\().16b + eor \x3\().16b, \t0\().16b, \t4\().16b + eor \x4\().16b, \x6\().16b, \t2\().16b + mov \x6\().16b, \t3\().16b + .endif + .endm + + .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ + t0, t1, t2, t3, t4, t5, t6, t7 + ext \t0\().16b, \x0\().16b, \x0\().16b, #8 + ext \t6\().16b, \x6\().16b, \x6\().16b, #8 + ext \t7\().16b, \x7\().16b, \x7\().16b, #8 + eor \t0\().16b, \t0\().16b, \x0\().16b + ext \t1\().16b, \x1\().16b, \x1\().16b, #8 + eor \t6\().16b, \t6\().16b, \x6\().16b + ext \t2\().16b, \x2\().16b, \x2\().16b, #8 + eor \t7\().16b, \t7\().16b, \x7\().16b + ext \t3\().16b, \x3\().16b, \x3\().16b, #8 + eor \t1\().16b, \t1\().16b, \x1\().16b + ext \t4\().16b, \x4\().16b, \x4\().16b, #8 + eor \t2\().16b, \t2\().16b, \x2\().16b + ext \t5\().16b, \x5\().16b, \x5\().16b, #8 + eor \t3\().16b, \t3\().16b, \x3\().16b + eor \t4\().16b, \t4\().16b, \x4\().16b + eor \t5\().16b, \t5\().16b, \x5\().16b + eor \x0\().16b, \x0\().16b, \t6\().16b + eor \x1\().16b, \x1\().16b, \t6\().16b + eor \x2\().16b, \x2\().16b, \t0\().16b + eor \x4\().16b, \x4\().16b, \t2\().16b + eor \x3\().16b, \x3\().16b, \t1\().16b + eor \x1\().16b, \x1\().16b, \t7\().16b + eor \x2\().16b, \x2\().16b, \t7\().16b + eor \x4\().16b, \x4\().16b, \t6\().16b + eor \x5\().16b, \x5\().16b, \t3\().16b + eor \x3\().16b, \x3\().16b, \t6\().16b + eor \x6\().16b, \x6\().16b, \t4\().16b + eor \x4\().16b, \x4\().16b, \t7\().16b + eor \x5\().16b, \x5\().16b, \t7\().16b + eor \x7\().16b, \x7\().16b, \t5\().16b + mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ + \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 + .endm + + .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 + ushr \t0\().2d, \b0\().2d, #\n + ushr \t1\().2d, \b1\().2d, #\n + eor \t0\().16b, \t0\().16b, \a0\().16b + eor \t1\().16b, \t1\().16b, \a1\().16b + and \t0\().16b, \t0\().16b, \mask\().16b + and \t1\().16b, \t1\().16b, \mask\().16b + eor \a0\().16b, \a0\().16b, \t0\().16b + shl \t0\().2d, \t0\().2d, #\n + eor \a1\().16b, \a1\().16b, \t1\().16b + shl \t1\().2d, \t1\().2d, #\n + eor \b0\().16b, \b0\().16b, \t0\().16b + eor \b1\().16b, \b1\().16b, \t1\().16b + .endm + + .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 + movi \t0\().16b, #0x55 + movi \t1\().16b, #0x33 + swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 + swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 + movi \t0\().16b, #0x0f + swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 + swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 + swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 + swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 + .endm + + + .align 6 +M0: .octa 0x0004080c0105090d02060a0e03070b0f + +M0SR: .octa 0x0004080c05090d010a0e02060f03070b +SR: .octa 0x0f0e0d0c0a09080b0504070600030201 +SRM0: .octa 0x01060b0c0207080d0304090e00050a0f + +M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03 +ISR: .octa 0x0f0e0d0c080b0a090504070602010003 +ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f + + /* + * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) + */ +ENTRY(aesbs_convert_key) + ld1 {v7.4s}, [x1], #16 // load round 0 key + ld1 {v17.4s}, [x1], #16 // load round 1 key + + movi v8.16b, #0x01 // bit masks + movi v9.16b, #0x02 + movi v10.16b, #0x04 + movi v11.16b, #0x08 + movi v12.16b, #0x10 + movi v13.16b, #0x20 + movi v14.16b, #0x40 + movi v15.16b, #0x80 + ldr q16, M0 + + sub x2, x2, #1 + str q7, [x0], #16 // save round 0 key + +.Lkey_loop: + tbl v7.16b ,{v17.16b}, v16.16b + ld1 {v17.4s}, [x1], #16 // load next round key + + cmtst v0.16b, v7.16b, v8.16b + cmtst v1.16b, v7.16b, v9.16b + cmtst v2.16b, v7.16b, v10.16b + cmtst v3.16b, v7.16b, v11.16b + cmtst v4.16b, v7.16b, v12.16b + cmtst v5.16b, v7.16b, v13.16b + cmtst v6.16b, v7.16b, v14.16b + cmtst v7.16b, v7.16b, v15.16b + not v0.16b, v0.16b + not v1.16b, v1.16b + not v5.16b, v5.16b + not v6.16b, v6.16b + + subs x2, x2, #1 + stp q0, q1, [x0], #128 + stp q2, q3, [x0, #-96] + stp q4, q5, [x0, #-64] + stp q6, q7, [x0, #-32] + b.ne .Lkey_loop + + movi v7.16b, #0x63 // compose .L63 + eor v17.16b, v17.16b, v7.16b + str q17, [x0] + ret +ENDPROC(aesbs_convert_key) + + .align 4 +aesbs_encrypt8: + ldr q9, [bskey], #16 // round 0 key + ldr q8, M0SR + ldr q24, SR + + eor v10.16b, v0.16b, v9.16b // xor with round0 key + eor v11.16b, v1.16b, v9.16b + tbl v0.16b, {v10.16b}, v8.16b + eor v12.16b, v2.16b, v9.16b + tbl v1.16b, {v11.16b}, v8.16b + eor v13.16b, v3.16b, v9.16b + tbl v2.16b, {v12.16b}, v8.16b + eor v14.16b, v4.16b, v9.16b + tbl v3.16b, {v13.16b}, v8.16b + eor v15.16b, v5.16b, v9.16b + tbl v4.16b, {v14.16b}, v8.16b + eor v10.16b, v6.16b, v9.16b + tbl v5.16b, {v15.16b}, v8.16b + eor v11.16b, v7.16b, v9.16b + tbl v6.16b, {v10.16b}, v8.16b + tbl v7.16b, {v11.16b}, v8.16b + + bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 + + sub rounds, rounds, #1 + b .Lenc_sbox + +.Lenc_loop: + shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 +.Lenc_sbox: + sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ + v13, v14, v15 + subs rounds, rounds, #1 + b.cc .Lenc_done + + enc_next_rk + + mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \ + v13, v14, v15 + + add_round_key v0, v1, v2, v3, v4, v5, v6, v7 + + b.ne .Lenc_loop + ldr q24, SRM0 + b .Lenc_loop + +.Lenc_done: + ldr q12, [bskey] // last round key + + bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11 + + eor v0.16b, v0.16b, v12.16b + eor v1.16b, v1.16b, v12.16b + eor v4.16b, v4.16b, v12.16b + eor v6.16b, v6.16b, v12.16b + eor v3.16b, v3.16b, v12.16b + eor v7.16b, v7.16b, v12.16b + eor v2.16b, v2.16b, v12.16b + eor v5.16b, v5.16b, v12.16b + ret +ENDPROC(aesbs_encrypt8) + + .align 4 +aesbs_decrypt8: + lsl x9, rounds, #7 + add bskey, bskey, x9 + + ldr q9, [bskey, #-112]! // round 0 key + ldr q8, M0ISR + ldr q24, ISR + + eor v10.16b, v0.16b, v9.16b // xor with round0 key + eor v11.16b, v1.16b, v9.16b + tbl v0.16b, {v10.16b}, v8.16b + eor v12.16b, v2.16b, v9.16b + tbl v1.16b, {v11.16b}, v8.16b + eor v13.16b, v3.16b, v9.16b + tbl v2.16b, {v12.16b}, v8.16b + eor v14.16b, v4.16b, v9.16b + tbl v3.16b, {v13.16b}, v8.16b + eor v15.16b, v5.16b, v9.16b + tbl v4.16b, {v14.16b}, v8.16b + eor v10.16b, v6.16b, v9.16b + tbl v5.16b, {v15.16b}, v8.16b + eor v11.16b, v7.16b, v9.16b + tbl v6.16b, {v10.16b}, v8.16b + tbl v7.16b, {v11.16b}, v8.16b + + bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 + + sub rounds, rounds, #1 + b .Ldec_sbox + +.Ldec_loop: + shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 +.Ldec_sbox: + inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ + v13, v14, v15 + subs rounds, rounds, #1 + b.cc .Ldec_done + + dec_next_rk + + add_round_key v0, v1, v6, v4, v2, v7, v3, v5 + + inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \ + v13, v14, v15 + + b.ne .Ldec_loop + ldr q24, ISRM0 + b .Ldec_loop +.Ldec_done: + ldr q12, [bskey, #-16] // last round key + + bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11 + + eor v0.16b, v0.16b, v12.16b + eor v1.16b, v1.16b, v12.16b + eor v6.16b, v6.16b, v12.16b + eor v4.16b, v4.16b, v12.16b + eor v2.16b, v2.16b, v12.16b + eor v7.16b, v7.16b, v12.16b + eor v3.16b, v3.16b, v12.16b + eor v5.16b, v5.16b, v12.16b + ret +ENDPROC(aesbs_decrypt8) + + /* + * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + */ + .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + stp x29, x30, [sp, #-16]! + mov x29, sp + +99: mov x5, #1 + lsl x5, x5, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x5, x5, xzr, mi + + ld1 {v0.16b}, [x1], #16 + tbnz x5, #1, 0f + ld1 {v1.16b}, [x1], #16 + tbnz x5, #2, 0f + ld1 {v2.16b}, [x1], #16 + tbnz x5, #3, 0f + ld1 {v3.16b}, [x1], #16 + tbnz x5, #4, 0f + ld1 {v4.16b}, [x1], #16 + tbnz x5, #5, 0f + ld1 {v5.16b}, [x1], #16 + tbnz x5, #6, 0f + ld1 {v6.16b}, [x1], #16 + tbnz x5, #7, 0f + ld1 {v7.16b}, [x1], #16 + +0: mov bskey, x2 + mov rounds, x3 + bl \do8 + + st1 {\o0\().16b}, [x0], #16 + tbnz x5, #1, 1f + st1 {\o1\().16b}, [x0], #16 + tbnz x5, #2, 1f + st1 {\o2\().16b}, [x0], #16 + tbnz x5, #3, 1f + st1 {\o3\().16b}, [x0], #16 + tbnz x5, #4, 1f + st1 {\o4\().16b}, [x0], #16 + tbnz x5, #5, 1f + st1 {\o5\().16b}, [x0], #16 + tbnz x5, #6, 1f + st1 {\o6\().16b}, [x0], #16 + tbnz x5, #7, 1f + st1 {\o7\().16b}, [x0], #16 + + cbnz x4, 99b + +1: ldp x29, x30, [sp], #16 + ret + .endm + + .align 4 +ENTRY(aesbs_ecb_encrypt) + __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 +ENDPROC(aesbs_ecb_encrypt) + + .align 4 +ENTRY(aesbs_ecb_decrypt) + __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 +ENDPROC(aesbs_ecb_decrypt) + + /* + * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ + .align 4 +ENTRY(aesbs_cbc_decrypt) + stp x29, x30, [sp, #-16]! + mov x29, sp + +99: mov x6, #1 + lsl x6, x6, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x6, x6, xzr, mi + + ld1 {v0.16b}, [x1], #16 + mov v25.16b, v0.16b + tbnz x6, #1, 0f + ld1 {v1.16b}, [x1], #16 + mov v26.16b, v1.16b + tbnz x6, #2, 0f + ld1 {v2.16b}, [x1], #16 + mov v27.16b, v2.16b + tbnz x6, #3, 0f + ld1 {v3.16b}, [x1], #16 + mov v28.16b, v3.16b + tbnz x6, #4, 0f + ld1 {v4.16b}, [x1], #16 + mov v29.16b, v4.16b + tbnz x6, #5, 0f + ld1 {v5.16b}, [x1], #16 + mov v30.16b, v5.16b + tbnz x6, #6, 0f + ld1 {v6.16b}, [x1], #16 + mov v31.16b, v6.16b + tbnz x6, #7, 0f + ld1 {v7.16b}, [x1] + +0: mov bskey, x2 + mov rounds, x3 + bl aesbs_decrypt8 + + ld1 {v24.16b}, [x5] // load IV + + eor v1.16b, v1.16b, v25.16b + eor v6.16b, v6.16b, v26.16b + eor v4.16b, v4.16b, v27.16b + eor v2.16b, v2.16b, v28.16b + eor v7.16b, v7.16b, v29.16b + eor v0.16b, v0.16b, v24.16b + eor v3.16b, v3.16b, v30.16b + eor v5.16b, v5.16b, v31.16b + + st1 {v0.16b}, [x0], #16 + mov v24.16b, v25.16b + tbnz x6, #1, 1f + st1 {v1.16b}, [x0], #16 + mov v24.16b, v26.16b + tbnz x6, #2, 1f + st1 {v6.16b}, [x0], #16 + mov v24.16b, v27.16b + tbnz x6, #3, 1f + st1 {v4.16b}, [x0], #16 + mov v24.16b, v28.16b + tbnz x6, #4, 1f + st1 {v2.16b}, [x0], #16 + mov v24.16b, v29.16b + tbnz x6, #5, 1f + st1 {v7.16b}, [x0], #16 + mov v24.16b, v30.16b + tbnz x6, #6, 1f + st1 {v3.16b}, [x0], #16 + mov v24.16b, v31.16b + tbnz x6, #7, 1f + ld1 {v24.16b}, [x1], #16 + st1 {v5.16b}, [x0], #16 +1: st1 {v24.16b}, [x5] // store IV + + cbnz x4, 99b + + ldp x29, x30, [sp], #16 + ret +ENDPROC(aesbs_cbc_decrypt) + + .macro next_tweak, out, in, const, tmp + sshr \tmp\().2d, \in\().2d, #63 + and \tmp\().16b, \tmp\().16b, \const\().16b + add \out\().2d, \in\().2d, \in\().2d + ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 + eor \out\().16b, \out\().16b, \tmp\().16b + .endm + + .align 4 +.Lxts_mul_x: +CPU_LE( .quad 1, 0x87 ) +CPU_BE( .quad 0x87, 1 ) + + /* + * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ +__xts_crypt8: + mov x6, #1 + lsl x6, x6, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x6, x6, xzr, mi + + ld1 {v0.16b}, [x1], #16 + next_tweak v26, v25, v30, v31 + eor v0.16b, v0.16b, v25.16b + tbnz x6, #1, 0f + + ld1 {v1.16b}, [x1], #16 + next_tweak v27, v26, v30, v31 + eor v1.16b, v1.16b, v26.16b + tbnz x6, #2, 0f + + ld1 {v2.16b}, [x1], #16 + next_tweak v28, v27, v30, v31 + eor v2.16b, v2.16b, v27.16b + tbnz x6, #3, 0f + + ld1 {v3.16b}, [x1], #16 + next_tweak v29, v28, v30, v31 + eor v3.16b, v3.16b, v28.16b + tbnz x6, #4, 0f + + ld1 {v4.16b}, [x1], #16 + str q29, [sp, #16] + eor v4.16b, v4.16b, v29.16b + next_tweak v29, v29, v30, v31 + tbnz x6, #5, 0f + + ld1 {v5.16b}, [x1], #16 + str q29, [sp, #32] + eor v5.16b, v5.16b, v29.16b + next_tweak v29, v29, v30, v31 + tbnz x6, #6, 0f + + ld1 {v6.16b}, [x1], #16 + str q29, [sp, #48] + eor v6.16b, v6.16b, v29.16b + next_tweak v29, v29, v30, v31 + tbnz x6, #7, 0f + + ld1 {v7.16b}, [x1], #16 + str q29, [sp, #64] + eor v7.16b, v7.16b, v29.16b + next_tweak v29, v29, v30, v31 + +0: mov bskey, x2 + mov rounds, x3 + br x7 +ENDPROC(__xts_crypt8) + + .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 + stp x29, x30, [sp, #-80]! + mov x29, sp + + ldr q30, .Lxts_mul_x + ld1 {v25.16b}, [x5] + +99: adr x7, \do8 + bl __xts_crypt8 + + ldp q16, q17, [sp, #16] + ldp q18, q19, [sp, #48] + + eor \o0\().16b, \o0\().16b, v25.16b + eor \o1\().16b, \o1\().16b, v26.16b + eor \o2\().16b, \o2\().16b, v27.16b + eor \o3\().16b, \o3\().16b, v28.16b + + st1 {\o0\().16b}, [x0], #16 + mov v25.16b, v26.16b + tbnz x6, #1, 1f + st1 {\o1\().16b}, [x0], #16 + mov v25.16b, v27.16b + tbnz x6, #2, 1f + st1 {\o2\().16b}, [x0], #16 + mov v25.16b, v28.16b + tbnz x6, #3, 1f + st1 {\o3\().16b}, [x0], #16 + mov v25.16b, v29.16b + tbnz x6, #4, 1f + + eor \o4\().16b, \o4\().16b, v16.16b + eor \o5\().16b, \o5\().16b, v17.16b + eor \o6\().16b, \o6\().16b, v18.16b + eor \o7\().16b, \o7\().16b, v19.16b + + st1 {\o4\().16b}, [x0], #16 + tbnz x6, #5, 1f + st1 {\o5\().16b}, [x0], #16 + tbnz x6, #6, 1f + st1 {\o6\().16b}, [x0], #16 + tbnz x6, #7, 1f + st1 {\o7\().16b}, [x0], #16 + + cbnz x4, 99b + +1: st1 {v25.16b}, [x5] + ldp x29, x30, [sp], #80 + ret + .endm + +ENTRY(aesbs_xts_encrypt) + __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 +ENDPROC(aesbs_xts_encrypt) + +ENTRY(aesbs_xts_decrypt) + __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 +ENDPROC(aesbs_xts_decrypt) + + .macro next_ctr, v + mov \v\().d[1], x8 + adds x8, x8, #1 + mov \v\().d[0], x7 + adc x7, x7, xzr + rev64 \v\().16b, \v\().16b + .endm + + /* + * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + * int rounds, int blocks, u8 iv[], u8 final[]) + */ +ENTRY(aesbs_ctr_encrypt) + stp x29, x30, [sp, #-16]! + mov x29, sp + + cmp x6, #0 + cset x10, ne + add x4, x4, x10 // do one extra block if final + + ldp x7, x8, [x5] + ld1 {v0.16b}, [x5] +CPU_LE( rev x7, x7 ) +CPU_LE( rev x8, x8 ) + adds x8, x8, #1 + adc x7, x7, xzr + +99: mov x9, #1 + lsl x9, x9, x4 + subs w4, w4, #8 + csel x4, x4, xzr, pl + csel x9, x9, xzr, le + + tbnz x9, #1, 0f + next_ctr v1 + tbnz x9, #2, 0f + next_ctr v2 + tbnz x9, #3, 0f + next_ctr v3 + tbnz x9, #4, 0f + next_ctr v4 + tbnz x9, #5, 0f + next_ctr v5 + tbnz x9, #6, 0f + next_ctr v6 + tbnz x9, #7, 0f + next_ctr v7 + +0: mov bskey, x2 + mov rounds, x3 + bl aesbs_encrypt8 + + lsr x9, x9, x10 // disregard the extra block + tbnz x9, #0, 0f + + ld1 {v8.16b}, [x1], #16 + eor v0.16b, v0.16b, v8.16b + st1 {v0.16b}, [x0], #16 + tbnz x9, #1, 1f + + ld1 {v9.16b}, [x1], #16 + eor v1.16b, v1.16b, v9.16b + st1 {v1.16b}, [x0], #16 + tbnz x9, #2, 2f + + ld1 {v10.16b}, [x1], #16 + eor v4.16b, v4.16b, v10.16b + st1 {v4.16b}, [x0], #16 + tbnz x9, #3, 3f + + ld1 {v11.16b}, [x1], #16 + eor v6.16b, v6.16b, v11.16b + st1 {v6.16b}, [x0], #16 + tbnz x9, #4, 4f + + ld1 {v12.16b}, [x1], #16 + eor v3.16b, v3.16b, v12.16b + st1 {v3.16b}, [x0], #16 + tbnz x9, #5, 5f + + ld1 {v13.16b}, [x1], #16 + eor v7.16b, v7.16b, v13.16b + st1 {v7.16b}, [x0], #16 + tbnz x9, #6, 6f + + ld1 {v14.16b}, [x1], #16 + eor v2.16b, v2.16b, v14.16b + st1 {v2.16b}, [x0], #16 + tbnz x9, #7, 7f + + ld1 {v15.16b}, [x1], #16 + eor v5.16b, v5.16b, v15.16b + st1 {v5.16b}, [x0], #16 + +8: next_ctr v0 + cbnz x4, 99b + +0: st1 {v0.16b}, [x5] + ldp x29, x30, [sp], #16 + ret + + /* + * If we are handling the tail of the input (x6 != NULL), return the + * final keystream block back to the caller. + */ +1: cbz x6, 8b + st1 {v1.16b}, [x6] + b 8b +2: cbz x6, 8b + st1 {v4.16b}, [x6] + b 8b +3: cbz x6, 8b + st1 {v6.16b}, [x6] + b 8b +4: cbz x6, 8b + st1 {v3.16b}, [x6] + b 8b +5: cbz x6, 8b + st1 {v7.16b}, [x6] + b 8b +6: cbz x6, 8b + st1 {v2.16b}, [x6] + b 8b +7: cbz x6, 8b + st1 {v5.16b}, [x6] + b 8b +ENDPROC(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c new file mode 100644 index 000000000000..db2501d93550 --- /dev/null +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -0,0 +1,439 @@ +/* + * Bit sliced AES using NEON instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <crypto/aes.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> +#include <crypto/xts.h> +#include <linux/module.h> + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +MODULE_ALIAS_CRYPTO("ecb(aes)"); +MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("ctr(aes)"); +MODULE_ALIAS_CRYPTO("xts(aes)"); + +asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds); + +asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); +asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); + +asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[], u8 final[]); + +asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); +asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +/* borrowed from aes-neon-blk.ko */ +asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], + int rounds, int blocks, int first); +asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], + int rounds, int blocks, u8 iv[], + int first); + +struct aesbs_ctx { + u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32]; + int rounds; +} __aligned(AES_BLOCK_SIZE); + +struct aesbs_cbc_ctx { + struct aesbs_ctx key; + u32 enc[AES_MAX_KEYLENGTH_U32]; +}; + +struct aesbs_xts_ctx { + struct aesbs_ctx key; + u32 twkey[AES_MAX_KEYLENGTH_U32]; +}; + +static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->rounds = 6 + key_len / 4; + + kernel_neon_begin(); + aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); + kernel_neon_end(); + + return 0; +} + +static int __ecb_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks)) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, + ctx->rounds, blocks); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ecb_encrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_encrypt); +} + +static int ecb_decrypt(struct skcipher_request *req) +{ + return __ecb_crypt(req, aesbs_ecb_decrypt); +} + +static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = crypto_aes_expand_key(&rk, in_key, key_len); + if (err) + return err; + + ctx->key.rounds = 6 + key_len / 4; + + memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); + + kernel_neon_begin(); + aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); + kernel_neon_end(); + + return 0; +} + +static int cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err, first = 1; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + /* fall back to the non-bitsliced NEON implementation */ + neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->enc, ctx->key.rounds, blocks, walk.iv, + first); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + first = 0; + } + kernel_neon_end(); + return err; +} + +static int cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int ctr_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + u8 buf[AES_BLOCK_SIZE]; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL; + + if (walk.nbytes < walk.total) { + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + final = NULL; + } + + aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->rk, ctx->rounds, blocks, walk.iv, final); + + if (final) { + u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + + if (dst != src) + memcpy(dst, src, walk.total % AES_BLOCK_SIZE); + crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE); + + err = skcipher_walk_done(&walk, 0); + break; + } + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_aes_ctx rk; + int err; + + err = xts_verify_key(tfm, in_key, key_len); + if (err) + return err; + + key_len /= 2; + err = crypto_aes_expand_key(&rk, in_key + key_len, key_len); + if (err) + return err; + + memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey)); + + return aesbs_setkey(tfm, in_key, key_len); +} + +static int __xts_crypt(struct skcipher_request *req, + void (*fn)(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[])) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, true); + + kernel_neon_begin(); + + neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, + ctx->key.rounds, 1, 1); + + while (walk.nbytes >= AES_BLOCK_SIZE) { + unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + + if (walk.nbytes < walk.total) + blocks = round_down(blocks, + walk.stride / AES_BLOCK_SIZE); + + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, + ctx->key.rounds, blocks, walk.iv); + err = skcipher_walk_done(&walk, + walk.nbytes - blocks * AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_encrypt); +} + +static int xts_decrypt(struct skcipher_request *req) +{ + return __xts_crypt(req, aesbs_xts_decrypt); +} + +static struct skcipher_alg aes_algs[] = { { + .base.cra_name = "__ecb(aes)", + .base.cra_driver_name = "__ecb-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, +}, { + .base.cra_name = "__cbc(aes)", + .base.cra_driver_name = "__cbc-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_cbc_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, +}, { + .base.cra_name = "__ctr(aes)", + .base.cra_driver_name = "__ctr-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, +}, { + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-neonbs", + .base.cra_priority = 250 - 1, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, +}, { + .base.cra_name = "__xts(aes)", + .base.cra_driver_name = "__xts-aes-neonbs", + .base.cra_priority = 250, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aesbs_xts_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_xts_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, +} }; + +static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; + +static void aes_exit(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) + if (aes_simd_algs[i]) + simd_skcipher_free(aes_simd_algs[i]); + + crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static int __init aes_init(void) +{ + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; + int err; + int i; + + if (!(elf_hwcap & HWCAP_ASIMD)) + return -ENODEV; + + err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) + continue; + + algname = aes_algs[i].base.cra_name + 2; + drvname = aes_algs[i].base.cra_driver_name + 2; + basename = aes_algs[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aes_simd_algs[i] = simd; + } + return 0; + +unregister_simds: + aes_exit(); + return err; +} + +module_init(aes_init); +module_exit(aes_exit); diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha20-neon-core.S new file mode 100644 index 000000000000..13c85e272c2a --- /dev/null +++ b/arch/arm64/crypto/chacha20-neon-core.S @@ -0,0 +1,450 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/linkage.h> + + .text + .align 6 + +ENTRY(chacha20_block_xor_neon) + // x0: Input state matrix, s + // x1: 1 data block output, o + // x2: 1 data block input, i + + // + // This function encrypts one ChaCha20 block by loading the state matrix + // in four NEON registers. It performs matrix operation on four words in + // parallel, but requires shuffling to rearrange the words after each + // round. + // + + // x0..3 = s0..3 + adr x3, ROT8 + ld1 {v0.4s-v3.4s}, [x0] + ld1 {v8.4s-v11.4s}, [x0] + ld1 {v12.4s}, [x3] + + mov x3, #10 + +.Ldoubleround: + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + rev32 v3.8h, v3.8h + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #12 + sri v1.4s, v4.4s, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + tbl v3.16b, {v3.16b}, v12.16b + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #7 + sri v1.4s, v4.4s, #25 + + // x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + ext v1.16b, v1.16b, v1.16b, #4 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + ext v2.16b, v2.16b, v2.16b, #8 + // x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + ext v3.16b, v3.16b, v3.16b, #12 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 16) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + rev32 v3.8h, v3.8h + + // x2 += x3, x1 = rotl32(x1 ^ x2, 12) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #12 + sri v1.4s, v4.4s, #20 + + // x0 += x1, x3 = rotl32(x3 ^ x0, 8) + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + tbl v3.16b, {v3.16b}, v12.16b + + // x2 += x3, x1 = rotl32(x1 ^ x2, 7) + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #7 + sri v1.4s, v4.4s, #25 + + // x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + ext v1.16b, v1.16b, v1.16b, #12 + // x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + ext v2.16b, v2.16b, v2.16b, #8 + // x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + ext v3.16b, v3.16b, v3.16b, #4 + + subs x3, x3, #1 + b.ne .Ldoubleround + + ld1 {v4.16b-v7.16b}, [x2] + + // o0 = i0 ^ (x0 + s0) + add v0.4s, v0.4s, v8.4s + eor v0.16b, v0.16b, v4.16b + + // o1 = i1 ^ (x1 + s1) + add v1.4s, v1.4s, v9.4s + eor v1.16b, v1.16b, v5.16b + + // o2 = i2 ^ (x2 + s2) + add v2.4s, v2.4s, v10.4s + eor v2.16b, v2.16b, v6.16b + + // o3 = i3 ^ (x3 + s3) + add v3.4s, v3.4s, v11.4s + eor v3.16b, v3.16b, v7.16b + + st1 {v0.16b-v3.16b}, [x1] + + ret +ENDPROC(chacha20_block_xor_neon) + + .align 6 +ENTRY(chacha20_4block_xor_neon) + // x0: Input state matrix, s + // x1: 4 data blocks output, o + // x2: 4 data blocks input, i + + // + // This function encrypts four consecutive ChaCha20 blocks by loading + // the state matrix in NEON registers four times. The algorithm performs + // each operation on the corresponding word of each state matrix, hence + // requires no word shuffling. For final XORing step we transpose the + // matrix by interleaving 32- and then 64-bit words, which allows us to + // do XOR in NEON registers. + // + adr x3, CTRINC // ... and ROT8 + ld1 {v30.4s-v31.4s}, [x3] + + // x0..15[0-3] = s0..3[0..3] + mov x4, x0 + ld4r { v0.4s- v3.4s}, [x4], #16 + ld4r { v4.4s- v7.4s}, [x4], #16 + ld4r { v8.4s-v11.4s}, [x4], #16 + ld4r {v12.4s-v15.4s}, [x4] + + // x12 += counter values 0-3 + add v12.4s, v12.4s, v30.4s + + mov x3, #10 + +.Ldoubleround4: + // x0 += x4, x12 = rotl32(x12 ^ x0, 16) + // x1 += x5, x13 = rotl32(x13 ^ x1, 16) + // x2 += x6, x14 = rotl32(x14 ^ x2, 16) + // x3 += x7, x15 = rotl32(x15 ^ x3, 16) + add v0.4s, v0.4s, v4.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + + eor v12.16b, v12.16b, v0.16b + eor v13.16b, v13.16b, v1.16b + eor v14.16b, v14.16b, v2.16b + eor v15.16b, v15.16b, v3.16b + + rev32 v12.8h, v12.8h + rev32 v13.8h, v13.8h + rev32 v14.8h, v14.8h + rev32 v15.8h, v15.8h + + // x8 += x12, x4 = rotl32(x4 ^ x8, 12) + // x9 += x13, x5 = rotl32(x5 ^ x9, 12) + // x10 += x14, x6 = rotl32(x6 ^ x10, 12) + // x11 += x15, x7 = rotl32(x7 ^ x11, 12) + add v8.4s, v8.4s, v12.4s + add v9.4s, v9.4s, v13.4s + add v10.4s, v10.4s, v14.4s + add v11.4s, v11.4s, v15.4s + + eor v16.16b, v4.16b, v8.16b + eor v17.16b, v5.16b, v9.16b + eor v18.16b, v6.16b, v10.16b + eor v19.16b, v7.16b, v11.16b + + shl v4.4s, v16.4s, #12 + shl v5.4s, v17.4s, #12 + shl v6.4s, v18.4s, #12 + shl v7.4s, v19.4s, #12 + + sri v4.4s, v16.4s, #20 + sri v5.4s, v17.4s, #20 + sri v6.4s, v18.4s, #20 + sri v7.4s, v19.4s, #20 + + // x0 += x4, x12 = rotl32(x12 ^ x0, 8) + // x1 += x5, x13 = rotl32(x13 ^ x1, 8) + // x2 += x6, x14 = rotl32(x14 ^ x2, 8) + // x3 += x7, x15 = rotl32(x15 ^ x3, 8) + add v0.4s, v0.4s, v4.4s + add v1.4s, v1.4s, v5.4s + add v2.4s, v2.4s, v6.4s + add v3.4s, v3.4s, v7.4s + + eor v12.16b, v12.16b, v0.16b + eor v13.16b, v13.16b, v1.16b + eor v14.16b, v14.16b, v2.16b + eor v15.16b, v15.16b, v3.16b + + tbl v12.16b, {v12.16b}, v31.16b + tbl v13.16b, {v13.16b}, v31.16b + tbl v14.16b, {v14.16b}, v31.16b + tbl v15.16b, {v15.16b}, v31.16b + + // x8 += x12, x4 = rotl32(x4 ^ x8, 7) + // x9 += x13, x5 = rotl32(x5 ^ x9, 7) + // x10 += x14, x6 = rotl32(x6 ^ x10, 7) + // x11 += x15, x7 = rotl32(x7 ^ x11, 7) + add v8.4s, v8.4s, v12.4s + add v9.4s, v9.4s, v13.4s + add v10.4s, v10.4s, v14.4s + add v11.4s, v11.4s, v15.4s + + eor v16.16b, v4.16b, v8.16b + eor v17.16b, v5.16b, v9.16b + eor v18.16b, v6.16b, v10.16b + eor v19.16b, v7.16b, v11.16b + + shl v4.4s, v16.4s, #7 + shl v5.4s, v17.4s, #7 + shl v6.4s, v18.4s, #7 + shl v7.4s, v19.4s, #7 + + sri v4.4s, v16.4s, #25 + sri v5.4s, v17.4s, #25 + sri v6.4s, v18.4s, #25 + sri v7.4s, v19.4s, #25 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 16) + // x1 += x6, x12 = rotl32(x12 ^ x1, 16) + // x2 += x7, x13 = rotl32(x13 ^ x2, 16) + // x3 += x4, x14 = rotl32(x14 ^ x3, 16) + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v4.4s + + eor v15.16b, v15.16b, v0.16b + eor v12.16b, v12.16b, v1.16b + eor v13.16b, v13.16b, v2.16b + eor v14.16b, v14.16b, v3.16b + + rev32 v15.8h, v15.8h + rev32 v12.8h, v12.8h + rev32 v13.8h, v13.8h + rev32 v14.8h, v14.8h + + // x10 += x15, x5 = rotl32(x5 ^ x10, 12) + // x11 += x12, x6 = rotl32(x6 ^ x11, 12) + // x8 += x13, x7 = rotl32(x7 ^ x8, 12) + // x9 += x14, x4 = rotl32(x4 ^ x9, 12) + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v12.4s + add v8.4s, v8.4s, v13.4s + add v9.4s, v9.4s, v14.4s + + eor v16.16b, v5.16b, v10.16b + eor v17.16b, v6.16b, v11.16b + eor v18.16b, v7.16b, v8.16b + eor v19.16b, v4.16b, v9.16b + + shl v5.4s, v16.4s, #12 + shl v6.4s, v17.4s, #12 + shl v7.4s, v18.4s, #12 + shl v4.4s, v19.4s, #12 + + sri v5.4s, v16.4s, #20 + sri v6.4s, v17.4s, #20 + sri v7.4s, v18.4s, #20 + sri v4.4s, v19.4s, #20 + + // x0 += x5, x15 = rotl32(x15 ^ x0, 8) + // x1 += x6, x12 = rotl32(x12 ^ x1, 8) + // x2 += x7, x13 = rotl32(x13 ^ x2, 8) + // x3 += x4, x14 = rotl32(x14 ^ x3, 8) + add v0.4s, v0.4s, v5.4s + add v1.4s, v1.4s, v6.4s + add v2.4s, v2.4s, v7.4s + add v3.4s, v3.4s, v4.4s + + eor v15.16b, v15.16b, v0.16b + eor v12.16b, v12.16b, v1.16b + eor v13.16b, v13.16b, v2.16b + eor v14.16b, v14.16b, v3.16b + + tbl v15.16b, {v15.16b}, v31.16b + tbl v12.16b, {v12.16b}, v31.16b + tbl v13.16b, {v13.16b}, v31.16b + tbl v14.16b, {v14.16b}, v31.16b + + // x10 += x15, x5 = rotl32(x5 ^ x10, 7) + // x11 += x12, x6 = rotl32(x6 ^ x11, 7) + // x8 += x13, x7 = rotl32(x7 ^ x8, 7) + // x9 += x14, x4 = rotl32(x4 ^ x9, 7) + add v10.4s, v10.4s, v15.4s + add v11.4s, v11.4s, v12.4s + add v8.4s, v8.4s, v13.4s + add v9.4s, v9.4s, v14.4s + + eor v16.16b, v5.16b, v10.16b + eor v17.16b, v6.16b, v11.16b + eor v18.16b, v7.16b, v8.16b + eor v19.16b, v4.16b, v9.16b + + shl v5.4s, v16.4s, #7 + shl v6.4s, v17.4s, #7 + shl v7.4s, v18.4s, #7 + shl v4.4s, v19.4s, #7 + + sri v5.4s, v16.4s, #25 + sri v6.4s, v17.4s, #25 + sri v7.4s, v18.4s, #25 + sri v4.4s, v19.4s, #25 + + subs x3, x3, #1 + b.ne .Ldoubleround4 + + ld4r {v16.4s-v19.4s}, [x0], #16 + ld4r {v20.4s-v23.4s}, [x0], #16 + + // x12 += counter values 0-3 + add v12.4s, v12.4s, v30.4s + + // x0[0-3] += s0[0] + // x1[0-3] += s0[1] + // x2[0-3] += s0[2] + // x3[0-3] += s0[3] + add v0.4s, v0.4s, v16.4s + add v1.4s, v1.4s, v17.4s + add v2.4s, v2.4s, v18.4s + add v3.4s, v3.4s, v19.4s + + ld4r {v24.4s-v27.4s}, [x0], #16 + ld4r {v28.4s-v31.4s}, [x0] + + // x4[0-3] += s1[0] + // x5[0-3] += s1[1] + // x6[0-3] += s1[2] + // x7[0-3] += s1[3] + add v4.4s, v4.4s, v20.4s + add v5.4s, v5.4s, v21.4s + add v6.4s, v6.4s, v22.4s + add v7.4s, v7.4s, v23.4s + + // x8[0-3] += s2[0] + // x9[0-3] += s2[1] + // x10[0-3] += s2[2] + // x11[0-3] += s2[3] + add v8.4s, v8.4s, v24.4s + add v9.4s, v9.4s, v25.4s + add v10.4s, v10.4s, v26.4s + add v11.4s, v11.4s, v27.4s + + // x12[0-3] += s3[0] + // x13[0-3] += s3[1] + // x14[0-3] += s3[2] + // x15[0-3] += s3[3] + add v12.4s, v12.4s, v28.4s + add v13.4s, v13.4s, v29.4s + add v14.4s, v14.4s, v30.4s + add v15.4s, v15.4s, v31.4s + + // interleave 32-bit words in state n, n+1 + zip1 v16.4s, v0.4s, v1.4s + zip2 v17.4s, v0.4s, v1.4s + zip1 v18.4s, v2.4s, v3.4s + zip2 v19.4s, v2.4s, v3.4s + zip1 v20.4s, v4.4s, v5.4s + zip2 v21.4s, v4.4s, v5.4s + zip1 v22.4s, v6.4s, v7.4s + zip2 v23.4s, v6.4s, v7.4s + zip1 v24.4s, v8.4s, v9.4s + zip2 v25.4s, v8.4s, v9.4s + zip1 v26.4s, v10.4s, v11.4s + zip2 v27.4s, v10.4s, v11.4s + zip1 v28.4s, v12.4s, v13.4s + zip2 v29.4s, v12.4s, v13.4s + zip1 v30.4s, v14.4s, v15.4s + zip2 v31.4s, v14.4s, v15.4s + + // interleave 64-bit words in state n, n+2 + zip1 v0.2d, v16.2d, v18.2d + zip2 v4.2d, v16.2d, v18.2d + zip1 v8.2d, v17.2d, v19.2d + zip2 v12.2d, v17.2d, v19.2d + ld1 {v16.16b-v19.16b}, [x2], #64 + + zip1 v1.2d, v20.2d, v22.2d + zip2 v5.2d, v20.2d, v22.2d + zip1 v9.2d, v21.2d, v23.2d + zip2 v13.2d, v21.2d, v23.2d + ld1 {v20.16b-v23.16b}, [x2], #64 + + zip1 v2.2d, v24.2d, v26.2d + zip2 v6.2d, v24.2d, v26.2d + zip1 v10.2d, v25.2d, v27.2d + zip2 v14.2d, v25.2d, v27.2d + ld1 {v24.16b-v27.16b}, [x2], #64 + + zip1 v3.2d, v28.2d, v30.2d + zip2 v7.2d, v28.2d, v30.2d + zip1 v11.2d, v29.2d, v31.2d + zip2 v15.2d, v29.2d, v31.2d + ld1 {v28.16b-v31.16b}, [x2] + + // xor with corresponding input, write to output + eor v16.16b, v16.16b, v0.16b + eor v17.16b, v17.16b, v1.16b + eor v18.16b, v18.16b, v2.16b + eor v19.16b, v19.16b, v3.16b + eor v20.16b, v20.16b, v4.16b + eor v21.16b, v21.16b, v5.16b + st1 {v16.16b-v19.16b}, [x1], #64 + eor v22.16b, v22.16b, v6.16b + eor v23.16b, v23.16b, v7.16b + eor v24.16b, v24.16b, v8.16b + eor v25.16b, v25.16b, v9.16b + st1 {v20.16b-v23.16b}, [x1], #64 + eor v26.16b, v26.16b, v10.16b + eor v27.16b, v27.16b, v11.16b + eor v28.16b, v28.16b, v12.16b + st1 {v24.16b-v27.16b}, [x1], #64 + eor v29.16b, v29.16b, v13.16b + eor v30.16b, v30.16b, v14.16b + eor v31.16b, v31.16b, v15.16b + st1 {v28.16b-v31.16b}, [x1] + + ret +ENDPROC(chacha20_4block_xor_neon) + +CTRINC: .word 0, 1, 2, 3 +ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c new file mode 100644 index 000000000000..a7cd575ea223 --- /dev/null +++ b/arch/arm64/crypto/chacha20-neon-glue.c @@ -0,0 +1,126 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions + * + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <crypto/algapi.h> +#include <crypto/chacha20.h> +#include <crypto/internal/skcipher.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> + +asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src); +asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src); + +static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes) +{ + u8 buf[CHACHA20_BLOCK_SIZE]; + + while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + chacha20_4block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE * 4; + src += CHACHA20_BLOCK_SIZE * 4; + dst += CHACHA20_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA20_BLOCK_SIZE) { + chacha20_block_xor_neon(state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE; + src += CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha20_block_xor_neon(state, buf, buf); + memcpy(dst, buf, bytes); + } +} + +static int chacha20_neon(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + u32 state[16]; + int err; + + if (req->cryptlen <= CHACHA20_BLOCK_SIZE) + return crypto_chacha20_crypt(req); + + err = skcipher_walk_virt(&walk, req, true); + + crypto_chacha20_init(state, ctx, walk.iv); + + kernel_neon_begin(); + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + kernel_neon_end(); + + return err; +} + +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .walksize = 4 * CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_neon, + .decrypt = chacha20_neon, +}; + +static int __init chacha20_simd_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_ASIMD)) + return -ENODEV; + + return crypto_register_skcipher(&alg); +} + +static void __exit chacha20_simd_mod_fini(void) +{ + crypto_unregister_skcipher(&alg); +} + +module_init(chacha20_simd_mod_init); +module_exit(chacha20_simd_mod_fini); + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 383a6f84a060..3c465184ff8a 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -46,28 +46,49 @@ #ifdef __x86_64__ -.data +# constants in mergeable sections, linker can reorder and merge +.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 .align 16 .Lgf128mul_x_ble_mask: .octa 0x00000000000000010000000000000087 +.section .rodata.cst16.POLY, "aM", @progbits, 16 +.align 16 POLY: .octa 0xC2000000000000000000000000000001 +.section .rodata.cst16.TWOONE, "aM", @progbits, 16 +.align 16 TWOONE: .octa 0x00000001000000000000000000000001 -# order of these constants should not change. -# more specifically, ALL_F should follow SHIFT_MASK, -# and ZERO should follow ALL_F - +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +.section .rodata.cst16.MASK1, "aM", @progbits, 16 +.align 16 MASK1: .octa 0x0000000000000000ffffffffffffffff +.section .rodata.cst16.MASK2, "aM", @progbits, 16 +.align 16 MASK2: .octa 0xffffffffffffffff0000000000000000 -SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 -ALL_F: .octa 0xffffffffffffffffffffffffffffffff -ZERO: .octa 0x00000000000000000000000000000000 +.section .rodata.cst16.ONE, "aM", @progbits, 16 +.align 16 ONE: .octa 0x00000000000000000000000000000001 +.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16 +.align 16 F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 +.section .rodata.cst16.dec, "aM", @progbits, 16 +.align 16 dec: .octa 0x1 +.section .rodata.cst16.enc, "aM", @progbits, 16 +.align 16 enc: .octa 0x2 +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, +# and zero should follow ALL_F +.section .rodata, "a", @progbits +.align 16 +SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 +ALL_F: .octa 0xffffffffffffffffffffffffffffffff + .octa 0x00000000000000000000000000000000 + .text diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 522ab68d1c88..d664382c6e56 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -122,23 +122,39 @@ #include <linux/linkage.h> #include <asm/inst.h> -.data +# constants in mergeable sections, linker can reorder and merge +.section .rodata.cst16.POLY, "aM", @progbits, 16 .align 16 - POLY: .octa 0xC2000000000000000000000000000001 + +.section .rodata.cst16.POLY2, "aM", @progbits, 16 +.align 16 POLY2: .octa 0xC20000000000000000000001C2000000 -TWOONE: .octa 0x00000001000000000000000000000001 -# order of these constants should not change. -# more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F +.section .rodata.cst16.TWOONE, "aM", @progbits, 16 +.align 16 +TWOONE: .octa 0x00000001000000000000000000000001 +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F -SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 -ALL_F: .octa 0xffffffffffffffffffffffffffffffff -ZERO: .octa 0x00000000000000000000000000000000 + +.section .rodata.cst16.ONE, "aM", @progbits, 16 +.align 16 ONE: .octa 0x00000000000000000000000000000001 + +.section .rodata.cst16.ONEf, "aM", @progbits, 16 +.align 16 ONEf: .octa 0x01000000000000000000000000000000 +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F +.section .rodata, "a", @progbits +.align 16 +SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 +ALL_F: .octa 0xffffffffffffffffffffffffffffffff + .octa 0x00000000000000000000000000000000 + .text diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 7ff1b0c86a8e..93de8ea51548 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -740,9 +740,11 @@ static int helper_rfc4106_encrypt(struct aead_request *req) *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && - req->src->offset + req->src->length <= PAGE_SIZE && + (!PageHighMem(sg_page(req->src)) || + req->src->offset + req->src->length <= PAGE_SIZE) && sg_is_last(req->dst) && - req->dst->offset + req->dst->length <= PAGE_SIZE) { + (!PageHighMem(sg_page(req->dst)) || + req->dst->offset + req->dst->length <= PAGE_SIZE)) { one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); assoc = scatterwalk_map(&src_sg_walk); @@ -822,9 +824,11 @@ static int helper_rfc4106_decrypt(struct aead_request *req) *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && - req->src->offset + req->src->length <= PAGE_SIZE && + (!PageHighMem(sg_page(req->src)) || + req->src->offset + req->src->length <= PAGE_SIZE) && sg_is_last(req->dst) && - req->dst->offset + req->dst->length <= PAGE_SIZE) { + (!PageHighMem(sg_page(req->dst)) || + req->dst->offset + req->dst->length <= PAGE_SIZE)) { one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); assoc = scatterwalk_map(&src_sg_walk); diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index aa9e8bd163f6..f7c495e2863c 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -571,7 +571,9 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vmovdqu y6, 14 * 16(rio); \ vmovdqu y7, 15 * 16(rio); -.data + +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 .align 16 #define SHUFB_BYTES(idx) \ @@ -711,6 +713,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 /* 4-bit mask */ +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 .align 4 .L0f0f0f0f: .long 0x0f0f0f0f diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 16186c18656d..eee5b3982cfd 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -610,20 +610,25 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vmovdqu y6, 14 * 32(rio); \ vmovdqu y7, 15 * 32(rio); -.data -.align 32 +.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32 +.align 32 #define SHUFB_BYTES(idx) \ 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) - .Lshufb_16x16b: .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) +.section .rodata.cst32.pack_bswap, "aM", @progbits, 32 +.align 32 .Lpack_bswap: .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + /* For CTR-mode IV byteswap */ .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 @@ -750,6 +755,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 .align 4 /* 4-bit mask */ .L0f0f0f0f: diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 14fa1966bf01..b4a8806234ea 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -195,19 +195,29 @@ vpshufb rmask, x0, x0; \ vpshufb rmask, x1, x1; -.data - +.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 .align 16 .Lbswap_mask: .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 +.align 16 .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.section .rodata.cst16.bswap_iv_mask, "aM", @progbits, 16 +.align 16 .Lbswap_iv_mask: .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst4.16_mask, "aM", @progbits, 4 +.align 4 .L16_mask: .byte 16, 16, 16, 16 +.section .rodata.cst4.32_mask, "aM", @progbits, 4 +.align 4 .L32_mask: .byte 32, 0, 0, 0 +.section .rodata.cst4.first_mask, "aM", @progbits, 4 +.align 4 .Lfirst_mask: .byte 0x1f, 0, 0, 0 diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index c419389889cd..952d3156a933 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -225,8 +225,7 @@ vpshufb rmask, x2, x2; \ vpshufb rmask, x3, x3; -.data - +.section .rodata.cst16, "aM", @progbits, 16 .align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 @@ -244,10 +243,19 @@ .byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0 .Lrkr_dec_QBAR_QBAR_QBAR_QBAR: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst4.L16_mask, "aM", @progbits, 4 +.align 4 .L16_mask: .byte 16, 16, 16, 16 + +.section .rodata.cst4.L32_mask, "aM", @progbits, 4 +.align 4 .L32_mask: .byte 32, 0, 0, 0 + +.section .rodata.cst4.first_mask, "aM", @progbits, 4 +.align 4 .Lfirst_mask: .byte 0x1f, 0, 0, 0 diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S index 16694e625f77..3a2dc3dc6cac 100644 --- a/arch/x86/crypto/chacha20-avx2-x86_64.S +++ b/arch/x86/crypto/chacha20-avx2-x86_64.S @@ -11,13 +11,18 @@ #include <linux/linkage.h> -.data +.section .rodata.cst32.ROT8, "aM", @progbits, 32 .align 32 - ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 .octa 0x0e0d0c0f0a09080b0605040702010003 + +.section .rodata.cst32.ROT16, "aM", @progbits, 32 +.align 32 ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 .octa 0x0d0c0f0e09080b0a0504070601000302 + +.section .rodata.cst32.CTRINC, "aM", @progbits, 32 +.align 32 CTRINC: .octa 0x00000003000000020000000100000000 .octa 0x00000007000000060000000500000004 diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S index 3a33124e9112..3f511a7d73b8 100644 --- a/arch/x86/crypto/chacha20-ssse3-x86_64.S +++ b/arch/x86/crypto/chacha20-ssse3-x86_64.S @@ -11,11 +11,14 @@ #include <linux/linkage.h> -.data +.section .rodata.cst16.ROT8, "aM", @progbits, 16 .align 16 - ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 +.section .rodata.cst16.ROT16, "aM", @progbits, 16 +.align 16 ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 +.section .rodata.cst16.CTRINC, "aM", @progbits, 16 +.align 16 CTRINC: .octa 0x00000003000000020000000100000000 .text diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index f910d1d449f0..1e6af1b35f7b 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -11,7 +11,7 @@ #include <crypto/algapi.h> #include <crypto/chacha20.h> -#include <linux/crypto.h> +#include <crypto/internal/skcipher.h> #include <linux/kernel.h> #include <linux/module.h> #include <asm/fpu/api.h> @@ -63,36 +63,37 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, } } -static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int chacha20_simd(struct skcipher_request *req) { - u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 *state, state_buf[16 + 2] __aligned(8); + struct skcipher_walk walk; int err; - if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha20_crypt(desc, dst, src, nbytes); + BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16); + state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN); - state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); + if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha20_crypt(req); - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, true); - crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); + crypto_chacha20_init(state, ctx, walk.iv); kernel_fpu_begin(); while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); + err = skcipher_walk_done(&walk, + walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } kernel_fpu_end(); @@ -100,27 +101,22 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, return err; } -static struct crypto_alg alg = { - .cra_name = "chacha20", - .cra_driver_name = "chacha20-simd", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct chacha20_ctx), - .cra_alignmask = sizeof(u32) - 1, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .geniv = "seqiv", - .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_simd, - .decrypt = chacha20_simd, - }, - }, +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-simd", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_alignmask = sizeof(u32) - 1, + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_simd, + .decrypt = chacha20_simd, }; static int __init chacha20_simd_mod_init(void) @@ -133,12 +129,12 @@ static int __init chacha20_simd_mod_init(void) boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); #endif - return crypto_register_alg(&alg); + return crypto_register_skcipher(&alg); } static void __exit chacha20_simd_mod_fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_skcipher(&alg); } module_init(chacha20_simd_mod_init); diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index dc05f010ca9b..7a7de27c6f41 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -312,7 +312,7 @@ do_return: ret ENDPROC(crc_pcl) -.section .rodata, "a", %progbits +.section .rodata, "a", @progbits ################################################################ ## jump table Table is 129 entries x 2 bytes each ################################################################ diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index 35e97569d05f..de04d3e98d8d 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -554,12 +554,11 @@ _only_less_than_2: ENDPROC(crc_t10dif_pcl) -.data - +.section .rodata, "a", @progbits +.align 16 # precomputed constants # these constants are precomputed from the poly: # 0x8bb70000 (0x8bb7 scaled to 32 bits) -.align 16 # Q = 0x18BB70000 # rk1 = 2^(32*3) mod Q << 32 # rk2 = 2^(32*5) mod Q << 32 @@ -613,14 +612,23 @@ rk20: +.section .rodata.cst16.mask1, "aM", @progbits, 16 +.align 16 mask1: .octa 0x80808080808080808080808080808080 + +.section .rodata.cst16.mask2, "aM", @progbits, 16 +.align 16 mask2: .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +.section .rodata.cst32.pshufb_shf_table, "aM", @progbits, 32 +.align 32 pshufb_shf_table: # use these values for shift constants for the pshufb instruction # different alignments result in values as shown: diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index 038f6ae87c5e..f3e91647ca27 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -537,7 +537,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) ret; ENDPROC(des3_ede_x86_64_crypt_blk_3way) -.data +.section .rodata, "a", @progbits .align 16 .L_s1: .quad 0x0010100001010400, 0x0000000000000000 diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index eed55c8cca4f..f94375a8dcd1 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -20,8 +20,7 @@ #include <asm/inst.h> #include <asm/frame.h> -.data - +.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 .align 16 .Lbswap_mask: .octa 0x000102030405060708090a0b0c0d0e0f diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S index eff2f414e22b..3b6e70d085da 100644 --- a/arch/x86/crypto/poly1305-avx2-x86_64.S +++ b/arch/x86/crypto/poly1305-avx2-x86_64.S @@ -11,11 +11,13 @@ #include <linux/linkage.h> -.data +.section .rodata.cst32.ANMASK, "aM", @progbits, 32 .align 32 - ANMASK: .octa 0x0000000003ffffff0000000003ffffff .octa 0x0000000003ffffff0000000003ffffff + +.section .rodata.cst32.ORMASK, "aM", @progbits, 32 +.align 32 ORMASK: .octa 0x00000000010000000000000001000000 .octa 0x00000000010000000000000001000000 diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S index 338c748054ed..c88c670cb5fc 100644 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ b/arch/x86/crypto/poly1305-sse2-x86_64.S @@ -11,10 +11,12 @@ #include <linux/linkage.h> -.data +.section .rodata.cst16.ANMASK, "aM", @progbits, 16 .align 16 - ANMASK: .octa 0x0000000003ffffff0000000003ffffff + +.section .rodata.cst16.ORMASK, "aM", @progbits, 16 +.align 16 ORMASK: .octa 0x00000000010000000000000001000000 .text diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 8be571808342..2925077f8c6a 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -29,11 +29,12 @@ .file "serpent-avx-x86_64-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index 97c48add33ed..d67888f2a52a 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -20,13 +20,18 @@ .file "serpent-avx2-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask_0: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask_1: .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S index 96df6a39d7e2..93b945597ecf 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S @@ -281,11 +281,13 @@ ENTRY(sha1_mb_mgr_get_comp_job_avx2) ret ENDPROC(sha1_mb_mgr_get_comp_job_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 + +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 one: .quad 1 two: diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S index 63a0d9c8e31f..7a93b1c0d69a 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S @@ -203,8 +203,7 @@ return_null: ENDPROC(sha1_mb_mgr_submit_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S index c9dae1cd2919..20f77aa633de 100644 --- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S @@ -461,21 +461,32 @@ lloop: ENDPROC(sha1_x8_avx2) -.data - +.section .rodata.cst32.K00_19, "aM", @progbits, 32 .align 32 K00_19: .octa 0x5A8279995A8279995A8279995A827999 .octa 0x5A8279995A8279995A8279995A827999 + +.section .rodata.cst32.K20_39, "aM", @progbits, 32 +.align 32 K20_39: .octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 .octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 + +.section .rodata.cst32.K40_59, "aM", @progbits, 32 +.align 32 K40_59: .octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC .octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC + +.section .rodata.cst32.K60_79, "aM", @progbits, 32 +.align 32 K60_79: .octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 .octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 + +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 .octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 874a651b9e7d..ebbdba72ae07 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -293,10 +293,12 @@ ENTRY(sha1_ni_transform) ret ENDPROC(sha1_ni_transform) -.data - -.align 64 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x000102030405060708090a0b0c0d0e0f + +.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16 +.align 16 UPPER_WORD_MASK: .octa 0xFFFFFFFF000000000000000000000000 diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 92b3b5d75ba9..e08888a1a5f2 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -463,7 +463,7 @@ done_hash: ret ENDPROC(sha256_transform_avx) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -483,14 +483,21 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16 +.align 16 # shuffle xBxA -> 00BA _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 +.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16 +.align 16 # shuffle xDxC -> DC00 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 570ec5ec62d7..89c8f09787d2 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -723,7 +723,7 @@ done_hash: ret ENDPROC(sha256_transform_rorx) -.data +.section .rodata.cst512.K256, "aM", @progbits, 512 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -759,14 +759,21 @@ K256: .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203 # shuffle xBxA -> 00BA +.section .rodata.cst32._SHUF_00BA, "aM", @progbits, 32 +.align 32 _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100 # shuffle xDxC -> DC00 +.section .rodata.cst32._SHUF_DC00, "aM", @progbits, 32 +.align 32 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S index a78a0694ddef..8fe6338bcc84 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S @@ -284,11 +284,13 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2) ret ENDPROC(sha256_mb_mgr_get_comp_job_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 + +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 one: .quad 1 two: diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S index 7ea670e25acc..b36ae7454084 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S @@ -208,8 +208,7 @@ return_null: ENDPROC(sha256_mb_mgr_submit_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S index aa21aea4c722..1687c80c5995 100644 --- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S @@ -437,7 +437,8 @@ Lrounds_16_xx: ret ENDPROC(sha256_x8_avx2) -.data + +.section .rodata.K256_8, "a", @progbits .align 64 K256_8: .octa 0x428a2f98428a2f98428a2f98428a2f98 @@ -568,10 +569,14 @@ K256_8: .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 .octa 0xc67178f2c67178f2c67178f2c67178f2 .octa 0xc67178f2c67178f2c67178f2c67178f2 + +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 .global K256 K256: diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index 2cedc44e8121..39b83c93e7fd 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -474,7 +474,7 @@ done_hash: ret ENDPROC(sha256_transform_ssse3) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -494,13 +494,19 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16 +.align 16 # shuffle xBxA -> 00BA _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 +.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16 +.align 16 # shuffle xDxC -> DC00 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 748cdf21a938..fb58f58ecfbc 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -329,7 +329,7 @@ ENTRY(sha256_ni_transform) ret ENDPROC(sha256_ni_transform) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -349,5 +349,7 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 565274d6a641..39235fefe6f7 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -370,14 +370,17 @@ ENDPROC(sha512_transform_avx) ######################################################################## ### Binary Data -.data - +.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16 .align 16 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. XMM_QWORD_BSWAP: .octa 0x08090a0b0c0d0e0f0001020304050607 +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 +.align 64 # K[t] used in SHA512 hashing K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 1f20b35d8573..7f5f6c6ec72e 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -684,8 +684,11 @@ ENDPROC(sha512_transform_rorx) ######################################################################## ### Binary Data -.data +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 .align 64 # K[t] used in SHA512 hashing K512: @@ -730,14 +733,17 @@ K512: .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 .align 32 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 .octa 0x18191a1b1c1d1e1f1011121314151617 +.section .rodata.cst32.MASK_YMM_LO, "aM", @progbits, 32 +.align 32 MASK_YMM_LO: .octa 0x00000000000000000000000000000000 .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S index 3ddba19a0db6..7c629caebc05 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S @@ -280,12 +280,18 @@ ENTRY(sha512_mb_mgr_get_comp_job_avx2) pop %rbx ret ENDPROC(sha512_mb_mgr_get_comp_job_avx2) -.data -.align 16 +.section .rodata.cst8.one, "aM", @progbits, 8 +.align 8 one: .quad 1 + +.section .rodata.cst8.two, "aM", @progbits, 8 +.align 8 two: .quad 2 + +.section .rodata.cst8.three, "aM", @progbits, 8 +.align 8 three: .quad 3 diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S index 815f07bdd1f8..4ba709ba78e5 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S @@ -209,8 +209,9 @@ return_null: xor job_rax, job_rax jmp return ENDPROC(sha512_mb_mgr_submit_avx2) -.data +/* UNUSED? +.section .rodata.cst16, "aM", @progbits, 16 .align 16 H0: .int 0x6a09e667 H1: .int 0xbb67ae85 @@ -220,3 +221,4 @@ H4: .int 0x510e527f H5: .int 0x9b05688c H6: .int 0x1f83d9ab H7: .int 0x5be0cd19 +*/ diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S index 31ab1eff6413..e22e907643a6 100644 --- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S @@ -361,7 +361,7 @@ Lrounds_16_xx: ret ENDPROC(sha512_x4_avx2) -.data +.section .rodata.K512_4, "a", @progbits .align 64 K512_4: .octa 0x428a2f98d728ae22428a2f98d728ae22,\ @@ -525,5 +525,7 @@ K512_4: .octa 0x6c44198c4a4758176c44198c4a475817,\ 0x6c44198c4a4758176c44198c4a475817 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 .octa 0x18191a1b1c1d1e1f1011121314151617 diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index e610e29cbc81..66bbd9058a90 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -369,14 +369,17 @@ ENDPROC(sha512_transform_ssse3) ######################################################################## ### Binary Data -.data - +.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16 .align 16 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. XMM_QWORD_BSWAP: .octa 0x08090a0b0c0d0e0f0001020304050607 +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 +.align 64 # K[t] used in SHA512 hashing K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index dc66273e610d..b3f49d286348 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -29,11 +29,13 @@ .file "twofish-avx-x86_64-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index d676fc59521a..d880a4897159 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include <crypto/scatterwalk.h> @@ -394,7 +395,7 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg) { struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher; @@ -468,7 +469,7 @@ static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg) { struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher; diff --git a/crypto/acompress.c b/crypto/acompress.c index 887783d8e9a9..47d11627cd20 100644 --- a/crypto/acompress.c +++ b/crypto/acompress.c @@ -20,6 +20,7 @@ #include <linux/crypto.h> #include <crypto/algapi.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include <crypto/internal/acompress.h> #include <crypto/internal/scompress.h> @@ -50,7 +51,7 @@ static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/aead.c b/crypto/aead.c index 3f5c5ff004ab..f794b30a9407 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -24,6 +24,7 @@ #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include "internal.h" @@ -132,7 +133,7 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg) { struct aead_alg *aead = container_of(alg, struct aead_alg, base); diff --git a/crypto/ahash.c b/crypto/ahash.c index 2ce8bcb9049c..e58c4970c22b 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include "internal.h" @@ -493,7 +494,7 @@ static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) { seq_printf(m, "type : ahash\n"); diff --git a/crypto/akcipher.c b/crypto/akcipher.c index def301ed1288..cfbdb06d8ca8 100644 --- a/crypto/akcipher.c +++ b/crypto/akcipher.c @@ -17,6 +17,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/crypto.h> +#include <linux/compiler.h> #include <crypto/algapi.h> #include <linux/cryptouser.h> #include <net/netlink.h> @@ -47,7 +48,7 @@ static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index d19b09cdf284..54fc90e8339c 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -245,7 +245,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) struct alg_sock *ask = alg_sk(sk); struct hash_ctx *ctx = ask->private; struct ahash_request *req = &ctx->req; - char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))]; + char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1]; struct sock *sk2; struct alg_sock *ask2; struct hash_ctx *ctx2; diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index a832426820e8..6c43a0a17a55 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -1,6 +1,6 @@ /* * Block chaining cipher operations. - * + * * Generic encrypt/decrypt wrapper for ciphers, handles operations across * multiple page boundaries by using temporary blocks. In user context, * the kernel is given a chance to schedule us once per page. @@ -9,7 +9,7 @@ * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) + * Software Foundation; either version 2 of the License, or (at your option) * any later version. * */ @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include "internal.h" @@ -534,7 +535,7 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg) { seq_printf(m, "type : blkcipher\n"); diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index 1cab83146e33..8b3c04d625c3 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -10,10 +10,9 @@ */ #include <crypto/algapi.h> -#include <linux/crypto.h> -#include <linux/kernel.h> -#include <linux/module.h> #include <crypto/chacha20.h> +#include <crypto/internal/skcipher.h> +#include <linux/module.h> static inline u32 le32_to_cpuvp(const void *p) { @@ -63,10 +62,10 @@ void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv) } EXPORT_SYMBOL_GPL(crypto_chacha20_init); -int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize) { - struct chacha20_ctx *ctx = crypto_tfm_ctx(tfm); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); int i; if (keysize != CHACHA20_KEY_SIZE) @@ -79,66 +78,54 @@ int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, } EXPORT_SYMBOL_GPL(crypto_chacha20_setkey); -int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +int crypto_chacha20_crypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; u32 state[16]; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); - - crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); + err = skcipher_walk_virt(&walk, req, true); - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); - } + crypto_chacha20_init(state, ctx, walk.iv); - if (walk.nbytes) { + while (walk.nbytes > 0) { chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } return err; } EXPORT_SYMBOL_GPL(crypto_chacha20_crypt); -static struct crypto_alg alg = { - .cra_name = "chacha20", - .cra_driver_name = "chacha20-generic", - .cra_priority = 100, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct chacha20_ctx), - .cra_alignmask = sizeof(u32) - 1, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .geniv = "seqiv", - .setkey = crypto_chacha20_setkey, - .encrypt = crypto_chacha20_crypt, - .decrypt = crypto_chacha20_crypt, - }, - }, +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-generic", + .base.cra_priority = 100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_alignmask = sizeof(u32) - 1, + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = crypto_chacha20_crypt, + .decrypt = crypto_chacha20_crypt, }; static int __init chacha20_generic_mod_init(void) { - return crypto_register_alg(&alg); + return crypto_register_skcipher(&alg); } static void __exit chacha20_generic_mod_fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_skcipher(&alg); } module_init(chacha20_generic_mod_init); diff --git a/crypto/cts.c b/crypto/cts.c index 00254d76b21b..a1335d6c35fb 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -49,6 +49,7 @@ #include <linux/scatterlist.h> #include <crypto/scatterwalk.h> #include <linux/slab.h> +#include <linux/compiler.h> struct crypto_cts_ctx { struct crypto_skcipher *child; @@ -103,7 +104,7 @@ static int cts_cbc_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); - u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + u8 d[bsize * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; int lastn; @@ -183,7 +184,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); - u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32)))); + u8 d[bsize * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; u8 *space; diff --git a/crypto/kpp.c b/crypto/kpp.c index d36ce05eee43..a90edc27af77 100644 --- a/crypto/kpp.c +++ b/crypto/kpp.c @@ -19,6 +19,7 @@ #include <linux/crypto.h> #include <crypto/algapi.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include <crypto/kpp.h> #include <crypto/internal/kpp.h> @@ -47,7 +48,7 @@ static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/pcbc.c b/crypto/pcbc.c index e4538e07f7ca..11d248673ad4 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -20,6 +20,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/compiler.h> struct crypto_pcbc_ctx { struct crypto_cipher *child; @@ -146,7 +147,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req, unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 *iv = walk->iv; - u8 tmpbuf[bsize] __attribute__ ((aligned(__alignof__(u32)))); + u8 tmpbuf[bsize] __aligned(__alignof__(u32)); do { memcpy(tmpbuf, src, bsize); diff --git a/crypto/rng.c b/crypto/rng.c index b81cffb13bab..f46dac5288b9 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <net/netlink.h> #include "internal.h" @@ -95,7 +96,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg) { seq_printf(m, "type : rng\n"); diff --git a/crypto/scompress.c b/crypto/scompress.c index 35e396d154b7..6b048b36312d 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/crypto.h> +#include <linux/compiler.h> #include <linux/vmalloc.h> #include <crypto/algapi.h> #include <linux/cryptouser.h> @@ -57,7 +58,7 @@ static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) { diff --git a/crypto/shash.c b/crypto/shash.c index a051541a4a17..5e31c8d776df 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -19,6 +19,7 @@ #include <linux/seq_file.h> #include <linux/cryptouser.h> #include <net/netlink.h> +#include <linux/compiler.h> #include "internal.h" @@ -67,7 +68,7 @@ EXPORT_SYMBOL_GPL(crypto_shash_setkey); static inline unsigned int shash_align_buffer_size(unsigned len, unsigned long mask) { - typedef u8 __attribute__ ((aligned)) u8_aligned; + typedef u8 __aligned_largest u8_aligned; return len + (mask & ~(__alignof__(u8_aligned) - 1)); } @@ -80,7 +81,7 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data, unsigned int unaligned_len = alignmask + 1 - ((unsigned long)data & alignmask); u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)] - __attribute__ ((aligned)); + __aligned_largest; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; @@ -116,7 +117,7 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out) struct shash_alg *shash = crypto_shash_alg(tfm); unsigned int ds = crypto_shash_digestsize(tfm); u8 ubuf[shash_align_buffer_size(ds, alignmask)] - __attribute__ ((aligned)); + __aligned_largest; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; @@ -403,7 +404,7 @@ static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg) #endif static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) { struct shash_alg *salg = __crypto_shash_alg(alg); diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 0e1e6c35188e..014af741fc6a 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -19,6 +19,7 @@ #include <crypto/scatterwalk.h> #include <linux/bug.h> #include <linux/cryptouser.h> +#include <linux/compiler.h> #include <linux/list.h> #include <linux/module.h> #include <linux/rtnetlink.h> @@ -185,12 +186,12 @@ void skcipher_walk_complete(struct skcipher_walk *walk, int err) data = p->data; if (!data) { data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1); - data = skcipher_get_spot(data, walk->chunksize); + data = skcipher_get_spot(data, walk->stride); } scatterwalk_copychunks(data, &p->dst, p->len, 1); - if (offset_in_page(p->data) + p->len + walk->chunksize > + if (offset_in_page(p->data) + p->len + walk->stride > PAGE_SIZE) free_page((unsigned long)p->data); @@ -299,7 +300,7 @@ static int skcipher_next_copy(struct skcipher_walk *walk) p->len = walk->nbytes; skcipher_queue_write(walk, p); - if (offset_in_page(walk->page) + walk->nbytes + walk->chunksize > + if (offset_in_page(walk->page) + walk->nbytes + walk->stride > PAGE_SIZE) walk->page = NULL; else @@ -344,7 +345,7 @@ static int skcipher_walk_next(struct skcipher_walk *walk) SKCIPHER_WALK_DIFF); n = walk->total; - bsize = min(walk->chunksize, max(n, walk->blocksize)); + bsize = min(walk->stride, max(n, walk->blocksize)); n = scatterwalk_clamp(&walk->in, n); n = scatterwalk_clamp(&walk->out, n); @@ -393,7 +394,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk) unsigned a = crypto_tfm_ctx_alignment() - 1; unsigned alignmask = walk->alignmask; unsigned ivsize = walk->ivsize; - unsigned bs = walk->chunksize; + unsigned bs = walk->stride; unsigned aligned_bs; unsigned size; u8 *iv; @@ -463,7 +464,7 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, SKCIPHER_WALK_SLEEP : 0; walk->blocksize = crypto_skcipher_blocksize(tfm); - walk->chunksize = crypto_skcipher_chunksize(tfm); + walk->stride = crypto_skcipher_walksize(tfm); walk->ivsize = crypto_skcipher_ivsize(tfm); walk->alignmask = crypto_skcipher_alignmask(tfm); @@ -525,7 +526,7 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, walk->flags &= ~SKCIPHER_WALK_SLEEP; walk->blocksize = crypto_aead_blocksize(tfm); - walk->chunksize = crypto_aead_chunksize(tfm); + walk->stride = crypto_aead_chunksize(tfm); walk->ivsize = crypto_aead_ivsize(tfm); walk->alignmask = crypto_aead_alignmask(tfm); @@ -807,7 +808,7 @@ static void crypto_skcipher_free_instance(struct crypto_instance *inst) } static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) - __attribute__ ((unused)); + __maybe_unused; static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) { struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg, @@ -821,6 +822,7 @@ static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "max keysize : %u\n", skcipher->max_keysize); seq_printf(m, "ivsize : %u\n", skcipher->ivsize); seq_printf(m, "chunksize : %u\n", skcipher->chunksize); + seq_printf(m, "walksize : %u\n", skcipher->walksize); } #ifdef CONFIG_NET @@ -893,11 +895,14 @@ static int skcipher_prepare_alg(struct skcipher_alg *alg) { struct crypto_alg *base = &alg->base; - if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8) + if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8 || + alg->walksize > PAGE_SIZE / 8) return -EINVAL; if (!alg->chunksize) alg->chunksize = base->cra_blocksize; + if (!alg->walksize) + alg->walksize = alg->chunksize; base->cra_type = &crypto_skcipher_type2; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index ae22f05d5936..9a11f3c2bf98 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -22,6 +22,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <crypto/aead.h> #include <crypto/hash.h> #include <crypto/skcipher.h> @@ -1010,6 +1012,8 @@ static inline int tcrypt_test(const char *alg) { int ret; + pr_debug("testing %s\n", alg); + ret = alg_test(alg, alg, 0, 0); /* non-fips algs return -EINVAL in fips mode */ if (fips_enabled && ret == -EINVAL) @@ -2059,6 +2063,8 @@ static int __init tcrypt_mod_init(void) if (err) { printk(KERN_ERR "tcrypt: one or more tests failed!\n"); goto err_free_tv; + } else { + pr_debug("all tests passed\n"); } /* We intentionaly return -EAGAIN to prevent keeping the module, diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 44e888b0b041..98eb09782db8 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -265,6 +265,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, const int align_offset) { const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm)); + size_t digest_size = crypto_ahash_digestsize(tfm); unsigned int i, j, k, temp; struct scatterlist sg[8]; char *result; @@ -275,7 +276,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, char *xbuf[XBUFSIZE]; int ret = -ENOMEM; - result = kmalloc(MAX_DIGEST_SIZE, GFP_KERNEL); + result = kmalloc(digest_size, GFP_KERNEL); if (!result) return ret; key = kmalloc(MAX_KEYLEN, GFP_KERNEL); @@ -305,7 +306,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, goto out; j++; - memset(result, 0, MAX_DIGEST_SIZE); + memset(result, 0, digest_size); hash_buff = xbuf[0]; hash_buff += align_offset; @@ -380,7 +381,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, continue; j++; - memset(result, 0, MAX_DIGEST_SIZE); + memset(result, 0, digest_size); temp = 0; sg_init_table(sg, template[i].np); @@ -458,7 +459,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, continue; j++; - memset(result, 0, MAX_DIGEST_SIZE); + memset(result, 0, digest_size); ret = -EINVAL; hash_buff = xbuf[0]; @@ -1463,13 +1464,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, int ilen = ctemplate[i].inlen; void *input_vec; - input_vec = kmalloc(ilen, GFP_KERNEL); + input_vec = kmemdup(ctemplate[i].input, ilen, GFP_KERNEL); if (!input_vec) { ret = -ENOMEM; goto out; } - memcpy(input_vec, ctemplate[i].input, ilen); memset(output, 0, dlen); init_completion(&result.completion); sg_init_one(&src, input_vec, ilen); @@ -1525,13 +1525,12 @@ static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, int ilen = dtemplate[i].inlen; void *input_vec; - input_vec = kmalloc(ilen, GFP_KERNEL); + input_vec = kmemdup(dtemplate[i].input, ilen, GFP_KERNEL); if (!input_vec) { ret = -ENOMEM; goto out; } - memcpy(input_vec, dtemplate[i].input, ilen); memset(output, 0, dlen); init_completion(&result.completion); sg_init_one(&src, input_vec, ilen); @@ -2251,30 +2250,23 @@ static int alg_test_null(const struct alg_test_desc *desc, return 0; } +#define __VECS(tv) { .vecs = tv, .count = ARRAY_SIZE(tv) } + /* Please keep this list sorted by algorithm name. */ static const struct alg_test_desc alg_test_descs[] = { { .alg = "ansi_cprng", .test = alg_test_cprng, .suite = { - .cprng = { - .vecs = ansi_cprng_aes_tv_template, - .count = ANSI_CPRNG_AES_TEST_VECTORS - } + .cprng = __VECS(ansi_cprng_aes_tv_template) } }, { .alg = "authenc(hmac(md5),ecb(cipher_null))", .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = hmac_md5_ecb_cipher_null_enc_tv_template, - .count = HMAC_MD5_ECB_CIPHER_NULL_ENC_TEST_VECTORS - }, - .dec = { - .vecs = hmac_md5_ecb_cipher_null_dec_tv_template, - .count = HMAC_MD5_ECB_CIPHER_NULL_DEC_TEST_VECTORS - } + .enc = __VECS(hmac_md5_ecb_cipher_null_enc_tv_template), + .dec = __VECS(hmac_md5_ecb_cipher_null_dec_tv_template) } } }, { @@ -2282,12 +2274,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_aes_cbc_enc_tv_temp, - .count = - HMAC_SHA1_AES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha1_aes_cbc_enc_tv_temp) } } }, { @@ -2295,12 +2282,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_des_cbc_enc_tv_temp, - .count = - HMAC_SHA1_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha1_des_cbc_enc_tv_temp) } } }, { @@ -2309,12 +2291,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA1_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha1_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2326,18 +2303,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha1_ecb_cipher_null_enc_tv_temp, - .count = - HMAC_SHA1_ECB_CIPHER_NULL_ENC_TEST_VEC - }, - .dec = { - .vecs = - hmac_sha1_ecb_cipher_null_dec_tv_temp, - .count = - HMAC_SHA1_ECB_CIPHER_NULL_DEC_TEST_VEC - } + .enc = __VECS(hmac_sha1_ecb_cipher_null_enc_tv_temp), + .dec = __VECS(hmac_sha1_ecb_cipher_null_dec_tv_temp) } } }, { @@ -2349,12 +2316,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha224_des_cbc_enc_tv_temp, - .count = - HMAC_SHA224_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha224_des_cbc_enc_tv_temp) } } }, { @@ -2363,12 +2325,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha224_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA224_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha224_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2377,12 +2334,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha256_aes_cbc_enc_tv_temp, - .count = - HMAC_SHA256_AES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha256_aes_cbc_enc_tv_temp) } } }, { @@ -2390,12 +2342,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha256_des_cbc_enc_tv_temp, - .count = - HMAC_SHA256_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha256_des_cbc_enc_tv_temp) } } }, { @@ -2404,12 +2351,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha256_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA256_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha256_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2425,12 +2367,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha384_des_cbc_enc_tv_temp, - .count = - HMAC_SHA384_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha384_des_cbc_enc_tv_temp) } } }, { @@ -2439,12 +2376,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha384_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA384_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha384_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2461,12 +2393,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha512_aes_cbc_enc_tv_temp, - .count = - HMAC_SHA512_AES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha512_aes_cbc_enc_tv_temp) } } }, { @@ -2474,12 +2401,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha512_des_cbc_enc_tv_temp, - .count = - HMAC_SHA512_DES_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha512_des_cbc_enc_tv_temp) } } }, { @@ -2488,12 +2410,7 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = - hmac_sha512_des3_ede_cbc_enc_tv_temp, - .count = - HMAC_SHA512_DES3_EDE_CBC_ENC_TEST_VEC - } + .enc = __VECS(hmac_sha512_des3_ede_cbc_enc_tv_temp) } } }, { @@ -2510,14 +2427,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_cbc_enc_tv_template, - .count = AES_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_cbc_dec_tv_template, - .count = AES_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(aes_cbc_enc_tv_template), + .dec = __VECS(aes_cbc_dec_tv_template) } } }, { @@ -2525,14 +2436,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = anubis_cbc_enc_tv_template, - .count = ANUBIS_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = anubis_cbc_dec_tv_template, - .count = ANUBIS_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(anubis_cbc_enc_tv_template), + .dec = __VECS(anubis_cbc_dec_tv_template) } } }, { @@ -2540,14 +2445,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = bf_cbc_enc_tv_template, - .count = BF_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = bf_cbc_dec_tv_template, - .count = BF_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(bf_cbc_enc_tv_template), + .dec = __VECS(bf_cbc_dec_tv_template) } } }, { @@ -2555,14 +2454,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_cbc_enc_tv_template, - .count = CAMELLIA_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_cbc_dec_tv_template, - .count = CAMELLIA_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_cbc_enc_tv_template), + .dec = __VECS(camellia_cbc_dec_tv_template) } } }, { @@ -2570,14 +2463,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast5_cbc_enc_tv_template, - .count = CAST5_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast5_cbc_dec_tv_template, - .count = CAST5_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(cast5_cbc_enc_tv_template), + .dec = __VECS(cast5_cbc_dec_tv_template) } } }, { @@ -2585,14 +2472,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_cbc_enc_tv_template, - .count = CAST6_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_cbc_dec_tv_template, - .count = CAST6_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_cbc_enc_tv_template), + .dec = __VECS(cast6_cbc_dec_tv_template) } } }, { @@ -2600,14 +2481,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des_cbc_enc_tv_template, - .count = DES_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des_cbc_dec_tv_template, - .count = DES_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(des_cbc_enc_tv_template), + .dec = __VECS(des_cbc_dec_tv_template) } } }, { @@ -2616,14 +2491,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = des3_ede_cbc_enc_tv_template, - .count = DES3_EDE_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des3_ede_cbc_dec_tv_template, - .count = DES3_EDE_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(des3_ede_cbc_enc_tv_template), + .dec = __VECS(des3_ede_cbc_dec_tv_template) } } }, { @@ -2631,14 +2500,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_cbc_enc_tv_template, - .count = SERPENT_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_cbc_dec_tv_template, - .count = SERPENT_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_cbc_enc_tv_template), + .dec = __VECS(serpent_cbc_dec_tv_template) } } }, { @@ -2646,14 +2509,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_cbc_enc_tv_template, - .count = TF_CBC_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_cbc_dec_tv_template, - .count = TF_CBC_DEC_TEST_VECTORS - } + .enc = __VECS(tf_cbc_enc_tv_template), + .dec = __VECS(tf_cbc_dec_tv_template) } } }, { @@ -2662,14 +2519,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_ccm_enc_tv_template, - .count = AES_CCM_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ccm_dec_tv_template, - .count = AES_CCM_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ccm_enc_tv_template), + .dec = __VECS(aes_ccm_dec_tv_template) } } }, { @@ -2677,14 +2528,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = chacha20_enc_tv_template, - .count = CHACHA20_ENC_TEST_VECTORS - }, - .dec = { - .vecs = chacha20_enc_tv_template, - .count = CHACHA20_ENC_TEST_VECTORS - }, + .enc = __VECS(chacha20_enc_tv_template), + .dec = __VECS(chacha20_enc_tv_template), } } }, { @@ -2692,20 +2537,14 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .test = alg_test_hash, .suite = { - .hash = { - .vecs = aes_cmac128_tv_template, - .count = CMAC_AES_TEST_VECTORS - } + .hash = __VECS(aes_cmac128_tv_template) } }, { .alg = "cmac(des3_ede)", .fips_allowed = 1, .test = alg_test_hash, .suite = { - .hash = { - .vecs = des3_ede_cmac64_tv_template, - .count = CMAC_DES3_EDE_TEST_VECTORS - } + .hash = __VECS(des3_ede_cmac64_tv_template) } }, { .alg = "compress_null", @@ -2714,30 +2553,21 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "crc32", .test = alg_test_hash, .suite = { - .hash = { - .vecs = crc32_tv_template, - .count = CRC32_TEST_VECTORS - } + .hash = __VECS(crc32_tv_template) } }, { .alg = "crc32c", .test = alg_test_crc32c, .fips_allowed = 1, .suite = { - .hash = { - .vecs = crc32c_tv_template, - .count = CRC32C_TEST_VECTORS - } + .hash = __VECS(crc32c_tv_template) } }, { .alg = "crct10dif", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = crct10dif_tv_template, - .count = CRCT10DIF_TEST_VECTORS - } + .hash = __VECS(crct10dif_tv_template) } }, { .alg = "ctr(aes)", @@ -2745,14 +2575,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_ctr_enc_tv_template, - .count = AES_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ctr_dec_tv_template, - .count = AES_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ctr_enc_tv_template), + .dec = __VECS(aes_ctr_dec_tv_template) } } }, { @@ -2760,14 +2584,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = bf_ctr_enc_tv_template, - .count = BF_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = bf_ctr_dec_tv_template, - .count = BF_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(bf_ctr_enc_tv_template), + .dec = __VECS(bf_ctr_dec_tv_template) } } }, { @@ -2775,14 +2593,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_ctr_enc_tv_template, - .count = CAMELLIA_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_ctr_dec_tv_template, - .count = CAMELLIA_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_ctr_enc_tv_template), + .dec = __VECS(camellia_ctr_dec_tv_template) } } }, { @@ -2790,14 +2602,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast5_ctr_enc_tv_template, - .count = CAST5_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast5_ctr_dec_tv_template, - .count = CAST5_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(cast5_ctr_enc_tv_template), + .dec = __VECS(cast5_ctr_dec_tv_template) } } }, { @@ -2805,14 +2611,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_ctr_enc_tv_template, - .count = CAST6_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_ctr_dec_tv_template, - .count = CAST6_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_ctr_enc_tv_template), + .dec = __VECS(cast6_ctr_dec_tv_template) } } }, { @@ -2820,14 +2620,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des_ctr_enc_tv_template, - .count = DES_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des_ctr_dec_tv_template, - .count = DES_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(des_ctr_enc_tv_template), + .dec = __VECS(des_ctr_dec_tv_template) } } }, { @@ -2835,14 +2629,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des3_ede_ctr_enc_tv_template, - .count = DES3_EDE_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des3_ede_ctr_dec_tv_template, - .count = DES3_EDE_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(des3_ede_ctr_enc_tv_template), + .dec = __VECS(des3_ede_ctr_dec_tv_template) } } }, { @@ -2850,14 +2638,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_ctr_enc_tv_template, - .count = SERPENT_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_ctr_dec_tv_template, - .count = SERPENT_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_ctr_enc_tv_template), + .dec = __VECS(serpent_ctr_dec_tv_template) } } }, { @@ -2865,14 +2647,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_ctr_enc_tv_template, - .count = TF_CTR_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_ctr_dec_tv_template, - .count = TF_CTR_DEC_TEST_VECTORS - } + .enc = __VECS(tf_ctr_enc_tv_template), + .dec = __VECS(tf_ctr_dec_tv_template) } } }, { @@ -2880,14 +2656,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cts_mode_enc_tv_template, - .count = CTS_MODE_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cts_mode_dec_tv_template, - .count = CTS_MODE_DEC_TEST_VECTORS - } + .enc = __VECS(cts_mode_enc_tv_template), + .dec = __VECS(cts_mode_dec_tv_template) } } }, { @@ -2896,14 +2666,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = deflate_comp_tv_template, - .count = DEFLATE_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = deflate_decomp_tv_template, - .count = DEFLATE_DECOMP_TEST_VECTORS - } + .comp = __VECS(deflate_comp_tv_template), + .decomp = __VECS(deflate_decomp_tv_template) } } }, { @@ -2911,10 +2675,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_kpp, .fips_allowed = 1, .suite = { - .kpp = { - .vecs = dh_tv_template, - .count = DH_TEST_VECTORS - } + .kpp = __VECS(dh_tv_template) } }, { .alg = "digest_null", @@ -2924,30 +2685,21 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_ctr_aes128_tv_template, - .count = ARRAY_SIZE(drbg_nopr_ctr_aes128_tv_template) - } + .drbg = __VECS(drbg_nopr_ctr_aes128_tv_template) } }, { .alg = "drbg_nopr_ctr_aes192", .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_ctr_aes192_tv_template, - .count = ARRAY_SIZE(drbg_nopr_ctr_aes192_tv_template) - } + .drbg = __VECS(drbg_nopr_ctr_aes192_tv_template) } }, { .alg = "drbg_nopr_ctr_aes256", .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_ctr_aes256_tv_template, - .count = ARRAY_SIZE(drbg_nopr_ctr_aes256_tv_template) - } + .drbg = __VECS(drbg_nopr_ctr_aes256_tv_template) } }, { /* @@ -2962,11 +2714,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_hmac_sha256_tv_template, - .count = - ARRAY_SIZE(drbg_nopr_hmac_sha256_tv_template) - } + .drbg = __VECS(drbg_nopr_hmac_sha256_tv_template) } }, { /* covered by drbg_nopr_hmac_sha256 test */ @@ -2986,10 +2734,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_nopr_sha256_tv_template, - .count = ARRAY_SIZE(drbg_nopr_sha256_tv_template) - } + .drbg = __VECS(drbg_nopr_sha256_tv_template) } }, { /* covered by drbg_nopr_sha256 test */ @@ -3005,10 +2750,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_pr_ctr_aes128_tv_template, - .count = ARRAY_SIZE(drbg_pr_ctr_aes128_tv_template) - } + .drbg = __VECS(drbg_pr_ctr_aes128_tv_template) } }, { /* covered by drbg_pr_ctr_aes128 test */ @@ -3028,10 +2770,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_pr_hmac_sha256_tv_template, - .count = ARRAY_SIZE(drbg_pr_hmac_sha256_tv_template) - } + .drbg = __VECS(drbg_pr_hmac_sha256_tv_template) } }, { /* covered by drbg_pr_hmac_sha256 test */ @@ -3051,10 +2790,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_drbg, .fips_allowed = 1, .suite = { - .drbg = { - .vecs = drbg_pr_sha256_tv_template, - .count = ARRAY_SIZE(drbg_pr_sha256_tv_template) - } + .drbg = __VECS(drbg_pr_sha256_tv_template) } }, { /* covered by drbg_pr_sha256 test */ @@ -3071,14 +2807,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_enc_tv_template, - .count = AES_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_dec_tv_template, - .count = AES_DEC_TEST_VECTORS - } + .enc = __VECS(aes_enc_tv_template), + .dec = __VECS(aes_dec_tv_template) } } }, { @@ -3086,14 +2816,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = anubis_enc_tv_template, - .count = ANUBIS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = anubis_dec_tv_template, - .count = ANUBIS_DEC_TEST_VECTORS - } + .enc = __VECS(anubis_enc_tv_template), + .dec = __VECS(anubis_dec_tv_template) } } }, { @@ -3101,14 +2825,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = arc4_enc_tv_template, - .count = ARC4_ENC_TEST_VECTORS - }, - .dec = { - .vecs = arc4_dec_tv_template, - .count = ARC4_DEC_TEST_VECTORS - } + .enc = __VECS(arc4_enc_tv_template), + .dec = __VECS(arc4_dec_tv_template) } } }, { @@ -3116,14 +2834,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = bf_enc_tv_template, - .count = BF_ENC_TEST_VECTORS - }, - .dec = { - .vecs = bf_dec_tv_template, - .count = BF_DEC_TEST_VECTORS - } + .enc = __VECS(bf_enc_tv_template), + .dec = __VECS(bf_dec_tv_template) } } }, { @@ -3131,14 +2843,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_enc_tv_template, - .count = CAMELLIA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_dec_tv_template, - .count = CAMELLIA_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_enc_tv_template), + .dec = __VECS(camellia_dec_tv_template) } } }, { @@ -3146,14 +2852,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast5_enc_tv_template, - .count = CAST5_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast5_dec_tv_template, - .count = CAST5_DEC_TEST_VECTORS - } + .enc = __VECS(cast5_enc_tv_template), + .dec = __VECS(cast5_dec_tv_template) } } }, { @@ -3161,14 +2861,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_enc_tv_template, - .count = CAST6_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_dec_tv_template, - .count = CAST6_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_enc_tv_template), + .dec = __VECS(cast6_dec_tv_template) } } }, { @@ -3179,14 +2873,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = des_enc_tv_template, - .count = DES_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des_dec_tv_template, - .count = DES_DEC_TEST_VECTORS - } + .enc = __VECS(des_enc_tv_template), + .dec = __VECS(des_dec_tv_template) } } }, { @@ -3195,14 +2883,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = des3_ede_enc_tv_template, - .count = DES3_EDE_ENC_TEST_VECTORS - }, - .dec = { - .vecs = des3_ede_dec_tv_template, - .count = DES3_EDE_DEC_TEST_VECTORS - } + .enc = __VECS(des3_ede_enc_tv_template), + .dec = __VECS(des3_ede_dec_tv_template) } } }, { @@ -3225,14 +2907,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = khazad_enc_tv_template, - .count = KHAZAD_ENC_TEST_VECTORS - }, - .dec = { - .vecs = khazad_dec_tv_template, - .count = KHAZAD_DEC_TEST_VECTORS - } + .enc = __VECS(khazad_enc_tv_template), + .dec = __VECS(khazad_dec_tv_template) } } }, { @@ -3240,14 +2916,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = seed_enc_tv_template, - .count = SEED_ENC_TEST_VECTORS - }, - .dec = { - .vecs = seed_dec_tv_template, - .count = SEED_DEC_TEST_VECTORS - } + .enc = __VECS(seed_enc_tv_template), + .dec = __VECS(seed_dec_tv_template) } } }, { @@ -3255,14 +2925,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_enc_tv_template, - .count = SERPENT_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_dec_tv_template, - .count = SERPENT_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_enc_tv_template), + .dec = __VECS(serpent_dec_tv_template) } } }, { @@ -3270,14 +2934,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tea_enc_tv_template, - .count = TEA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tea_dec_tv_template, - .count = TEA_DEC_TEST_VECTORS - } + .enc = __VECS(tea_enc_tv_template), + .dec = __VECS(tea_dec_tv_template) } } }, { @@ -3285,14 +2943,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tnepres_enc_tv_template, - .count = TNEPRES_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tnepres_dec_tv_template, - .count = TNEPRES_DEC_TEST_VECTORS - } + .enc = __VECS(tnepres_enc_tv_template), + .dec = __VECS(tnepres_dec_tv_template) } } }, { @@ -3300,14 +2952,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_enc_tv_template, - .count = TF_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_dec_tv_template, - .count = TF_DEC_TEST_VECTORS - } + .enc = __VECS(tf_enc_tv_template), + .dec = __VECS(tf_dec_tv_template) } } }, { @@ -3315,14 +2961,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = xeta_enc_tv_template, - .count = XETA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = xeta_dec_tv_template, - .count = XETA_DEC_TEST_VECTORS - } + .enc = __VECS(xeta_enc_tv_template), + .dec = __VECS(xeta_dec_tv_template) } } }, { @@ -3330,14 +2970,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = xtea_enc_tv_template, - .count = XTEA_ENC_TEST_VECTORS - }, - .dec = { - .vecs = xtea_dec_tv_template, - .count = XTEA_DEC_TEST_VECTORS - } + .enc = __VECS(xtea_enc_tv_template), + .dec = __VECS(xtea_dec_tv_template) } } }, { @@ -3345,10 +2979,7 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_kpp, .fips_allowed = 1, .suite = { - .kpp = { - .vecs = ecdh_tv_template, - .count = ECDH_TEST_VECTORS - } + .kpp = __VECS(ecdh_tv_template) } }, { .alg = "gcm(aes)", @@ -3356,14 +2987,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_gcm_enc_tv_template, - .count = AES_GCM_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_gcm_dec_tv_template, - .count = AES_GCM_DEC_TEST_VECTORS - } + .enc = __VECS(aes_gcm_enc_tv_template), + .dec = __VECS(aes_gcm_dec_tv_template) } } }, { @@ -3371,136 +2996,94 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = ghash_tv_template, - .count = GHASH_TEST_VECTORS - } + .hash = __VECS(ghash_tv_template) } }, { .alg = "hmac(crc32)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = bfin_crc_tv_template, - .count = BFIN_CRC_TEST_VECTORS - } + .hash = __VECS(bfin_crc_tv_template) } }, { .alg = "hmac(md5)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = hmac_md5_tv_template, - .count = HMAC_MD5_TEST_VECTORS - } + .hash = __VECS(hmac_md5_tv_template) } }, { .alg = "hmac(rmd128)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = hmac_rmd128_tv_template, - .count = HMAC_RMD128_TEST_VECTORS - } + .hash = __VECS(hmac_rmd128_tv_template) } }, { .alg = "hmac(rmd160)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = hmac_rmd160_tv_template, - .count = HMAC_RMD160_TEST_VECTORS - } + .hash = __VECS(hmac_rmd160_tv_template) } }, { .alg = "hmac(sha1)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha1_tv_template, - .count = HMAC_SHA1_TEST_VECTORS - } + .hash = __VECS(hmac_sha1_tv_template) } }, { .alg = "hmac(sha224)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha224_tv_template, - .count = HMAC_SHA224_TEST_VECTORS - } + .hash = __VECS(hmac_sha224_tv_template) } }, { .alg = "hmac(sha256)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha256_tv_template, - .count = HMAC_SHA256_TEST_VECTORS - } + .hash = __VECS(hmac_sha256_tv_template) } }, { .alg = "hmac(sha3-224)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_224_tv_template, - .count = HMAC_SHA3_224_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_224_tv_template) } }, { .alg = "hmac(sha3-256)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_256_tv_template, - .count = HMAC_SHA3_256_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_256_tv_template) } }, { .alg = "hmac(sha3-384)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_384_tv_template, - .count = HMAC_SHA3_384_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_384_tv_template) } }, { .alg = "hmac(sha3-512)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha3_512_tv_template, - .count = HMAC_SHA3_512_TEST_VECTORS - } + .hash = __VECS(hmac_sha3_512_tv_template) } }, { .alg = "hmac(sha384)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha384_tv_template, - .count = HMAC_SHA384_TEST_VECTORS - } + .hash = __VECS(hmac_sha384_tv_template) } }, { .alg = "hmac(sha512)", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = hmac_sha512_tv_template, - .count = HMAC_SHA512_TEST_VECTORS - } + .hash = __VECS(hmac_sha512_tv_template) } }, { .alg = "jitterentropy_rng", @@ -3512,14 +3095,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_kw_enc_tv_template, - .count = ARRAY_SIZE(aes_kw_enc_tv_template) - }, - .dec = { - .vecs = aes_kw_dec_tv_template, - .count = ARRAY_SIZE(aes_kw_dec_tv_template) - } + .enc = __VECS(aes_kw_enc_tv_template), + .dec = __VECS(aes_kw_dec_tv_template) } } }, { @@ -3527,14 +3104,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = aes_lrw_enc_tv_template, - .count = AES_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_lrw_dec_tv_template, - .count = AES_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(aes_lrw_enc_tv_template), + .dec = __VECS(aes_lrw_dec_tv_template) } } }, { @@ -3542,14 +3113,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_lrw_enc_tv_template, - .count = CAMELLIA_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_lrw_dec_tv_template, - .count = CAMELLIA_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_lrw_enc_tv_template), + .dec = __VECS(camellia_lrw_dec_tv_template) } } }, { @@ -3557,14 +3122,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_lrw_enc_tv_template, - .count = CAST6_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_lrw_dec_tv_template, - .count = CAST6_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_lrw_enc_tv_template), + .dec = __VECS(cast6_lrw_dec_tv_template) } } }, { @@ -3572,14 +3131,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_lrw_enc_tv_template, - .count = SERPENT_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_lrw_dec_tv_template, - .count = SERPENT_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_lrw_enc_tv_template), + .dec = __VECS(serpent_lrw_dec_tv_template) } } }, { @@ -3587,14 +3140,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_lrw_enc_tv_template, - .count = TF_LRW_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_lrw_dec_tv_template, - .count = TF_LRW_DEC_TEST_VECTORS - } + .enc = __VECS(tf_lrw_enc_tv_template), + .dec = __VECS(tf_lrw_dec_tv_template) } } }, { @@ -3603,14 +3150,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = lz4_comp_tv_template, - .count = LZ4_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = lz4_decomp_tv_template, - .count = LZ4_DECOMP_TEST_VECTORS - } + .comp = __VECS(lz4_comp_tv_template), + .decomp = __VECS(lz4_decomp_tv_template) } } }, { @@ -3619,14 +3160,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = lz4hc_comp_tv_template, - .count = LZ4HC_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = lz4hc_decomp_tv_template, - .count = LZ4HC_DECOMP_TEST_VECTORS - } + .comp = __VECS(lz4hc_comp_tv_template), + .decomp = __VECS(lz4hc_decomp_tv_template) } } }, { @@ -3635,42 +3170,27 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .comp = { - .comp = { - .vecs = lzo_comp_tv_template, - .count = LZO_COMP_TEST_VECTORS - }, - .decomp = { - .vecs = lzo_decomp_tv_template, - .count = LZO_DECOMP_TEST_VECTORS - } + .comp = __VECS(lzo_comp_tv_template), + .decomp = __VECS(lzo_decomp_tv_template) } } }, { .alg = "md4", .test = alg_test_hash, .suite = { - .hash = { - .vecs = md4_tv_template, - .count = MD4_TEST_VECTORS - } + .hash = __VECS(md4_tv_template) } }, { .alg = "md5", .test = alg_test_hash, .suite = { - .hash = { - .vecs = md5_tv_template, - .count = MD5_TEST_VECTORS - } + .hash = __VECS(md5_tv_template) } }, { .alg = "michael_mic", .test = alg_test_hash, .suite = { - .hash = { - .vecs = michael_mic_tv_template, - .count = MICHAEL_MIC_TEST_VECTORS - } + .hash = __VECS(michael_mic_tv_template) } }, { .alg = "ofb(aes)", @@ -3678,14 +3198,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_ofb_enc_tv_template, - .count = AES_OFB_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ofb_dec_tv_template, - .count = AES_OFB_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ofb_enc_tv_template), + .dec = __VECS(aes_ofb_dec_tv_template) } } }, { @@ -3693,24 +3207,15 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = fcrypt_pcbc_enc_tv_template, - .count = FCRYPT_ENC_TEST_VECTORS - }, - .dec = { - .vecs = fcrypt_pcbc_dec_tv_template, - .count = FCRYPT_DEC_TEST_VECTORS - } + .enc = __VECS(fcrypt_pcbc_enc_tv_template), + .dec = __VECS(fcrypt_pcbc_dec_tv_template) } } }, { .alg = "poly1305", .test = alg_test_hash, .suite = { - .hash = { - .vecs = poly1305_tv_template, - .count = POLY1305_TEST_VECTORS - } + .hash = __VECS(poly1305_tv_template) } }, { .alg = "rfc3686(ctr(aes))", @@ -3718,14 +3223,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_ctr_rfc3686_enc_tv_template, - .count = AES_CTR_3686_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ctr_rfc3686_dec_tv_template, - .count = AES_CTR_3686_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ctr_rfc3686_enc_tv_template), + .dec = __VECS(aes_ctr_rfc3686_dec_tv_template) } } }, { @@ -3734,14 +3233,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_gcm_rfc4106_enc_tv_template, - .count = AES_GCM_4106_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_gcm_rfc4106_dec_tv_template, - .count = AES_GCM_4106_DEC_TEST_VECTORS - } + .enc = __VECS(aes_gcm_rfc4106_enc_tv_template), + .dec = __VECS(aes_gcm_rfc4106_dec_tv_template) } } }, { @@ -3750,14 +3243,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .aead = { - .enc = { - .vecs = aes_ccm_rfc4309_enc_tv_template, - .count = AES_CCM_4309_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_ccm_rfc4309_dec_tv_template, - .count = AES_CCM_4309_DEC_TEST_VECTORS - } + .enc = __VECS(aes_ccm_rfc4309_enc_tv_template), + .dec = __VECS(aes_ccm_rfc4309_dec_tv_template) } } }, { @@ -3765,14 +3252,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = aes_gcm_rfc4543_enc_tv_template, - .count = AES_GCM_4543_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_gcm_rfc4543_dec_tv_template, - .count = AES_GCM_4543_DEC_TEST_VECTORS - }, + .enc = __VECS(aes_gcm_rfc4543_enc_tv_template), + .dec = __VECS(aes_gcm_rfc4543_dec_tv_template), } } }, { @@ -3780,14 +3261,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = rfc7539_enc_tv_template, - .count = RFC7539_ENC_TEST_VECTORS - }, - .dec = { - .vecs = rfc7539_dec_tv_template, - .count = RFC7539_DEC_TEST_VECTORS - }, + .enc = __VECS(rfc7539_enc_tv_template), + .dec = __VECS(rfc7539_dec_tv_template), } } }, { @@ -3795,71 +3270,47 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_aead, .suite = { .aead = { - .enc = { - .vecs = rfc7539esp_enc_tv_template, - .count = RFC7539ESP_ENC_TEST_VECTORS - }, - .dec = { - .vecs = rfc7539esp_dec_tv_template, - .count = RFC7539ESP_DEC_TEST_VECTORS - }, + .enc = __VECS(rfc7539esp_enc_tv_template), + .dec = __VECS(rfc7539esp_dec_tv_template), } } }, { .alg = "rmd128", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd128_tv_template, - .count = RMD128_TEST_VECTORS - } + .hash = __VECS(rmd128_tv_template) } }, { .alg = "rmd160", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd160_tv_template, - .count = RMD160_TEST_VECTORS - } + .hash = __VECS(rmd160_tv_template) } }, { .alg = "rmd256", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd256_tv_template, - .count = RMD256_TEST_VECTORS - } + .hash = __VECS(rmd256_tv_template) } }, { .alg = "rmd320", .test = alg_test_hash, .suite = { - .hash = { - .vecs = rmd320_tv_template, - .count = RMD320_TEST_VECTORS - } + .hash = __VECS(rmd320_tv_template) } }, { .alg = "rsa", .test = alg_test_akcipher, .fips_allowed = 1, .suite = { - .akcipher = { - .vecs = rsa_tv_template, - .count = RSA_TEST_VECTORS - } + .akcipher = __VECS(rsa_tv_template) } }, { .alg = "salsa20", .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = salsa20_stream_enc_tv_template, - .count = SALSA20_STREAM_ENC_TEST_VECTORS - } + .enc = __VECS(salsa20_stream_enc_tv_template) } } }, { @@ -3867,162 +3318,111 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha1_tv_template, - .count = SHA1_TEST_VECTORS - } + .hash = __VECS(sha1_tv_template) } }, { .alg = "sha224", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha224_tv_template, - .count = SHA224_TEST_VECTORS - } + .hash = __VECS(sha224_tv_template) } }, { .alg = "sha256", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha256_tv_template, - .count = SHA256_TEST_VECTORS - } + .hash = __VECS(sha256_tv_template) } }, { .alg = "sha3-224", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_224_tv_template, - .count = SHA3_224_TEST_VECTORS - } + .hash = __VECS(sha3_224_tv_template) } }, { .alg = "sha3-256", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_256_tv_template, - .count = SHA3_256_TEST_VECTORS - } + .hash = __VECS(sha3_256_tv_template) } }, { .alg = "sha3-384", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_384_tv_template, - .count = SHA3_384_TEST_VECTORS - } + .hash = __VECS(sha3_384_tv_template) } }, { .alg = "sha3-512", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha3_512_tv_template, - .count = SHA3_512_TEST_VECTORS - } + .hash = __VECS(sha3_512_tv_template) } }, { .alg = "sha384", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha384_tv_template, - .count = SHA384_TEST_VECTORS - } + .hash = __VECS(sha384_tv_template) } }, { .alg = "sha512", .test = alg_test_hash, .fips_allowed = 1, .suite = { - .hash = { - .vecs = sha512_tv_template, - .count = SHA512_TEST_VECTORS - } + .hash = __VECS(sha512_tv_template) } }, { .alg = "tgr128", .test = alg_test_hash, .suite = { - .hash = { - .vecs = tgr128_tv_template, - .count = TGR128_TEST_VECTORS - } + .hash = __VECS(tgr128_tv_template) } }, { .alg = "tgr160", .test = alg_test_hash, .suite = { - .hash = { - .vecs = tgr160_tv_template, - .count = TGR160_TEST_VECTORS - } + .hash = __VECS(tgr160_tv_template) } }, { .alg = "tgr192", .test = alg_test_hash, .suite = { - .hash = { - .vecs = tgr192_tv_template, - .count = TGR192_TEST_VECTORS - } + .hash = __VECS(tgr192_tv_template) } }, { .alg = "vmac(aes)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = aes_vmac128_tv_template, - .count = VMAC_AES_TEST_VECTORS - } + .hash = __VECS(aes_vmac128_tv_template) } }, { .alg = "wp256", .test = alg_test_hash, .suite = { - .hash = { - .vecs = wp256_tv_template, - .count = WP256_TEST_VECTORS - } + .hash = __VECS(wp256_tv_template) } }, { .alg = "wp384", .test = alg_test_hash, .suite = { - .hash = { - .vecs = wp384_tv_template, - .count = WP384_TEST_VECTORS - } + .hash = __VECS(wp384_tv_template) } }, { .alg = "wp512", .test = alg_test_hash, .suite = { - .hash = { - .vecs = wp512_tv_template, - .count = WP512_TEST_VECTORS - } + .hash = __VECS(wp512_tv_template) } }, { .alg = "xcbc(aes)", .test = alg_test_hash, .suite = { - .hash = { - .vecs = aes_xcbc128_tv_template, - .count = XCBC_AES_TEST_VECTORS - } + .hash = __VECS(aes_xcbc128_tv_template) } }, { .alg = "xts(aes)", @@ -4030,14 +3430,8 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .suite = { .cipher = { - .enc = { - .vecs = aes_xts_enc_tv_template, - .count = AES_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = aes_xts_dec_tv_template, - .count = AES_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(aes_xts_enc_tv_template), + .dec = __VECS(aes_xts_dec_tv_template) } } }, { @@ -4045,14 +3439,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = camellia_xts_enc_tv_template, - .count = CAMELLIA_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = camellia_xts_dec_tv_template, - .count = CAMELLIA_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(camellia_xts_enc_tv_template), + .dec = __VECS(camellia_xts_dec_tv_template) } } }, { @@ -4060,14 +3448,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = cast6_xts_enc_tv_template, - .count = CAST6_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = cast6_xts_dec_tv_template, - .count = CAST6_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(cast6_xts_enc_tv_template), + .dec = __VECS(cast6_xts_dec_tv_template) } } }, { @@ -4075,14 +3457,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = serpent_xts_enc_tv_template, - .count = SERPENT_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = serpent_xts_dec_tv_template, - .count = SERPENT_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(serpent_xts_enc_tv_template), + .dec = __VECS(serpent_xts_dec_tv_template) } } }, { @@ -4090,14 +3466,8 @@ static const struct alg_test_desc alg_test_descs[] = { .test = alg_test_skcipher, .suite = { .cipher = { - .enc = { - .vecs = tf_xts_enc_tv_template, - .count = TF_XTS_ENC_TEST_VECTORS - }, - .dec = { - .vecs = tf_xts_dec_tv_template, - .count = TF_XTS_DEC_TEST_VECTORS - } + .enc = __VECS(tf_xts_enc_tv_template), + .dec = __VECS(tf_xts_dec_tv_template) } } } diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 9b656be7f52f..64595f067d72 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -151,11 +151,6 @@ static char zeroed_string[48]; /* * RSA test vectors. Borrowed from openSSL. */ -#ifdef CONFIG_CRYPTO_FIPS -#define RSA_TEST_VECTORS 2 -#else -#define RSA_TEST_VECTORS 5 -#endif static struct akcipher_testvec rsa_tv_template[] = { { #ifndef CONFIG_CRYPTO_FIPS @@ -340,6 +335,7 @@ static struct akcipher_testvec rsa_tv_template[] = { .m_size = 8, .c_size = 256, .public_key_vec = true, +#ifndef CONFIG_CRYPTO_FIPS }, { .key = "\x30\x82\x09\x29" /* sequence of 2345 bytes */ @@ -538,11 +534,10 @@ static struct akcipher_testvec rsa_tv_template[] = { .key_len = 2349, .m_size = 8, .c_size = 512, +#endif } }; -#define DH_TEST_VECTORS 2 - struct kpp_testvec dh_tv_template[] = { { .secret = @@ -760,11 +755,6 @@ struct kpp_testvec dh_tv_template[] = { } }; -#ifdef CONFIG_CRYPTO_FIPS -#define ECDH_TEST_VECTORS 1 -#else -#define ECDH_TEST_VECTORS 2 -#endif struct kpp_testvec ecdh_tv_template[] = { { #ifndef CONFIG_CRYPTO_FIPS @@ -856,8 +846,6 @@ struct kpp_testvec ecdh_tv_template[] = { /* * MD4 test vectors from RFC1320 */ -#define MD4_TEST_VECTORS 7 - static struct hash_testvec md4_tv_template [] = { { .plaintext = "", @@ -899,7 +887,6 @@ static struct hash_testvec md4_tv_template [] = { }, }; -#define SHA3_224_TEST_VECTORS 3 static struct hash_testvec sha3_224_tv_template[] = { { .plaintext = "", @@ -925,7 +912,6 @@ static struct hash_testvec sha3_224_tv_template[] = { }, }; -#define SHA3_256_TEST_VECTORS 3 static struct hash_testvec sha3_256_tv_template[] = { { .plaintext = "", @@ -952,7 +938,6 @@ static struct hash_testvec sha3_256_tv_template[] = { }; -#define SHA3_384_TEST_VECTORS 3 static struct hash_testvec sha3_384_tv_template[] = { { .plaintext = "", @@ -985,7 +970,6 @@ static struct hash_testvec sha3_384_tv_template[] = { }; -#define SHA3_512_TEST_VECTORS 3 static struct hash_testvec sha3_512_tv_template[] = { { .plaintext = "", @@ -1027,8 +1011,6 @@ static struct hash_testvec sha3_512_tv_template[] = { /* * MD5 test vectors from RFC1321 */ -#define MD5_TEST_VECTORS 7 - static struct hash_testvec md5_tv_template[] = { { .digest = "\xd4\x1d\x8c\xd9\x8f\x00\xb2\x04" @@ -1073,8 +1055,6 @@ static struct hash_testvec md5_tv_template[] = { /* * RIPEMD-128 test vectors from ISO/IEC 10118-3:2004(E) */ -#define RMD128_TEST_VECTORS 10 - static struct hash_testvec rmd128_tv_template[] = { { .digest = "\xcd\xf2\x62\x13\xa1\x50\xdc\x3e" @@ -1137,8 +1117,6 @@ static struct hash_testvec rmd128_tv_template[] = { /* * RIPEMD-160 test vectors from ISO/IEC 10118-3:2004(E) */ -#define RMD160_TEST_VECTORS 10 - static struct hash_testvec rmd160_tv_template[] = { { .digest = "\x9c\x11\x85\xa5\xc5\xe9\xfc\x54\x61\x28" @@ -1201,8 +1179,6 @@ static struct hash_testvec rmd160_tv_template[] = { /* * RIPEMD-256 test vectors */ -#define RMD256_TEST_VECTORS 8 - static struct hash_testvec rmd256_tv_template[] = { { .digest = "\x02\xba\x4c\x4e\x5f\x8e\xcd\x18" @@ -1269,8 +1245,6 @@ static struct hash_testvec rmd256_tv_template[] = { /* * RIPEMD-320 test vectors */ -#define RMD320_TEST_VECTORS 8 - static struct hash_testvec rmd320_tv_template[] = { { .digest = "\x22\xd6\x5d\x56\x61\x53\x6c\xdc\x75\xc1" @@ -1334,7 +1308,6 @@ static struct hash_testvec rmd320_tv_template[] = { } }; -#define CRCT10DIF_TEST_VECTORS ARRAY_SIZE(crct10dif_tv_template) static struct hash_testvec crct10dif_tv_template[] = { { .plaintext = "abc", @@ -1385,8 +1358,6 @@ static struct hash_testvec crct10dif_tv_template[] = { * SHA1 test vectors from from FIPS PUB 180-1 * Long vector from CAVS 5.0 */ -#define SHA1_TEST_VECTORS 6 - static struct hash_testvec sha1_tv_template[] = { { .plaintext = "", @@ -1577,8 +1548,6 @@ static struct hash_testvec sha1_tv_template[] = { /* * SHA224 test vectors from from FIPS PUB 180-2 */ -#define SHA224_TEST_VECTORS 5 - static struct hash_testvec sha224_tv_template[] = { { .plaintext = "", @@ -1751,8 +1720,6 @@ static struct hash_testvec sha224_tv_template[] = { /* * SHA256 test vectors from from NIST */ -#define SHA256_TEST_VECTORS 5 - static struct hash_testvec sha256_tv_template[] = { { .plaintext = "", @@ -1924,8 +1891,6 @@ static struct hash_testvec sha256_tv_template[] = { /* * SHA384 test vectors from from NIST and kerneli */ -#define SHA384_TEST_VECTORS 6 - static struct hash_testvec sha384_tv_template[] = { { .plaintext = "", @@ -2118,8 +2083,6 @@ static struct hash_testvec sha384_tv_template[] = { /* * SHA512 test vectors from from NIST and kerneli */ -#define SHA512_TEST_VECTORS 6 - static struct hash_testvec sha512_tv_template[] = { { .plaintext = "", @@ -2327,8 +2290,6 @@ static struct hash_testvec sha512_tv_template[] = { * by Vincent Rijmen and Paulo S. L. M. Barreto as part of the NESSIE * submission */ -#define WP512_TEST_VECTORS 8 - static struct hash_testvec wp512_tv_template[] = { { .plaintext = "", @@ -2425,8 +2386,6 @@ static struct hash_testvec wp512_tv_template[] = { }, }; -#define WP384_TEST_VECTORS 8 - static struct hash_testvec wp384_tv_template[] = { { .plaintext = "", @@ -2507,8 +2466,6 @@ static struct hash_testvec wp384_tv_template[] = { }, }; -#define WP256_TEST_VECTORS 8 - static struct hash_testvec wp256_tv_template[] = { { .plaintext = "", @@ -2576,8 +2533,6 @@ static struct hash_testvec wp256_tv_template[] = { /* * TIGER test vectors from Tiger website */ -#define TGR192_TEST_VECTORS 6 - static struct hash_testvec tgr192_tv_template[] = { { .plaintext = "", @@ -2621,8 +2576,6 @@ static struct hash_testvec tgr192_tv_template[] = { }, }; -#define TGR160_TEST_VECTORS 6 - static struct hash_testvec tgr160_tv_template[] = { { .plaintext = "", @@ -2666,8 +2619,6 @@ static struct hash_testvec tgr160_tv_template[] = { }, }; -#define TGR128_TEST_VECTORS 6 - static struct hash_testvec tgr128_tv_template[] = { { .plaintext = "", @@ -2705,8 +2656,6 @@ static struct hash_testvec tgr128_tv_template[] = { }, }; -#define GHASH_TEST_VECTORS 6 - static struct hash_testvec ghash_tv_template[] = { { @@ -2822,8 +2771,6 @@ static struct hash_testvec ghash_tv_template[] = * HMAC-MD5 test vectors from RFC2202 * (These need to be fixed to not use strlen). */ -#define HMAC_MD5_TEST_VECTORS 7 - static struct hash_testvec hmac_md5_tv_template[] = { { @@ -2904,8 +2851,6 @@ static struct hash_testvec hmac_md5_tv_template[] = /* * HMAC-RIPEMD128 test vectors from RFC2286 */ -#define HMAC_RMD128_TEST_VECTORS 7 - static struct hash_testvec hmac_rmd128_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", @@ -2985,8 +2930,6 @@ static struct hash_testvec hmac_rmd128_tv_template[] = { /* * HMAC-RIPEMD160 test vectors from RFC2286 */ -#define HMAC_RMD160_TEST_VECTORS 7 - static struct hash_testvec hmac_rmd160_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", @@ -3066,8 +3009,6 @@ static struct hash_testvec hmac_rmd160_tv_template[] = { /* * HMAC-SHA1 test vectors from RFC2202 */ -#define HMAC_SHA1_TEST_VECTORS 7 - static struct hash_testvec hmac_sha1_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", @@ -3149,8 +3090,6 @@ static struct hash_testvec hmac_sha1_tv_template[] = { /* * SHA224 HMAC test vectors from RFC4231 */ -#define HMAC_SHA224_TEST_VECTORS 4 - static struct hash_testvec hmac_sha224_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3264,8 +3203,6 @@ static struct hash_testvec hmac_sha224_tv_template[] = { * HMAC-SHA256 test vectors from * draft-ietf-ipsec-ciph-sha-256-01.txt */ -#define HMAC_SHA256_TEST_VECTORS 10 - static struct hash_testvec hmac_sha256_tv_template[] = { { .key = "\x01\x02\x03\x04\x05\x06\x07\x08" @@ -3401,8 +3338,6 @@ static struct hash_testvec hmac_sha256_tv_template[] = { }, }; -#define CMAC_AES_TEST_VECTORS 6 - static struct hash_testvec aes_cmac128_tv_template[] = { { /* From NIST Special Publication 800-38B, AES-128 */ .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" @@ -3478,8 +3413,6 @@ static struct hash_testvec aes_cmac128_tv_template[] = { } }; -#define CMAC_DES3_EDE_TEST_VECTORS 4 - static struct hash_testvec des3_ede_cmac64_tv_template[] = { /* * From NIST Special Publication 800-38B, Three Key TDEA @@ -3526,8 +3459,6 @@ static struct hash_testvec des3_ede_cmac64_tv_template[] = { } }; -#define XCBC_AES_TEST_VECTORS 6 - static struct hash_testvec aes_xcbc128_tv_template[] = { { .key = "\x00\x01\x02\x03\x04\x05\x06\x07" @@ -3594,7 +3525,6 @@ static struct hash_testvec aes_xcbc128_tv_template[] = { } }; -#define VMAC_AES_TEST_VECTORS 11 static char vmac_string1[128] = {'\x01', '\x01', '\x01', '\x01', '\x02', '\x03', '\x02', '\x02', '\x02', '\x04', '\x01', '\x07', @@ -3701,8 +3631,6 @@ static struct hash_testvec aes_vmac128_tv_template[] = { * SHA384 HMAC test vectors from RFC4231 */ -#define HMAC_SHA384_TEST_VECTORS 4 - static struct hash_testvec hmac_sha384_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3801,8 +3729,6 @@ static struct hash_testvec hmac_sha384_tv_template[] = { * SHA512 HMAC test vectors from RFC4231 */ -#define HMAC_SHA512_TEST_VECTORS 4 - static struct hash_testvec hmac_sha512_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3908,8 +3834,6 @@ static struct hash_testvec hmac_sha512_tv_template[] = { }, }; -#define HMAC_SHA3_224_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_224_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -3999,8 +3923,6 @@ static struct hash_testvec hmac_sha3_224_tv_template[] = { }, }; -#define HMAC_SHA3_256_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_256_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -4090,8 +4012,6 @@ static struct hash_testvec hmac_sha3_256_tv_template[] = { }, }; -#define HMAC_SHA3_384_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_384_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -4189,8 +4109,6 @@ static struct hash_testvec hmac_sha3_384_tv_template[] = { }, }; -#define HMAC_SHA3_512_TEST_VECTORS 4 - static struct hash_testvec hmac_sha3_512_tv_template[] = { { .key = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" @@ -4300,8 +4218,6 @@ static struct hash_testvec hmac_sha3_512_tv_template[] = { * Poly1305 test vectors from RFC7539 A.3. */ -#define POLY1305_TEST_VECTORS 11 - static struct hash_testvec poly1305_tv_template[] = { { /* Test Vector #1 */ .plaintext = "\x00\x00\x00\x00\x00\x00\x00\x00" @@ -4547,19 +4463,6 @@ static struct hash_testvec poly1305_tv_template[] = { /* * DES test vectors. */ -#define DES_ENC_TEST_VECTORS 11 -#define DES_DEC_TEST_VECTORS 5 -#define DES_CBC_ENC_TEST_VECTORS 6 -#define DES_CBC_DEC_TEST_VECTORS 5 -#define DES_CTR_ENC_TEST_VECTORS 2 -#define DES_CTR_DEC_TEST_VECTORS 2 -#define DES3_EDE_ENC_TEST_VECTORS 4 -#define DES3_EDE_DEC_TEST_VECTORS 4 -#define DES3_EDE_CBC_ENC_TEST_VECTORS 2 -#define DES3_EDE_CBC_DEC_TEST_VECTORS 2 -#define DES3_EDE_CTR_ENC_TEST_VECTORS 2 -#define DES3_EDE_CTR_DEC_TEST_VECTORS 2 - static struct cipher_testvec des_enc_tv_template[] = { { /* From Applied Cryptography */ .key = "\x01\x23\x45\x67\x89\xab\xcd\xef", @@ -6620,13 +6523,6 @@ static struct cipher_testvec des3_ede_ctr_dec_tv_template[] = { /* * Blowfish test vectors. */ -#define BF_ENC_TEST_VECTORS 7 -#define BF_DEC_TEST_VECTORS 7 -#define BF_CBC_ENC_TEST_VECTORS 2 -#define BF_CBC_DEC_TEST_VECTORS 2 -#define BF_CTR_ENC_TEST_VECTORS 2 -#define BF_CTR_DEC_TEST_VECTORS 2 - static struct cipher_testvec bf_enc_tv_template[] = { { /* DES test vectors from OpenSSL */ .key = "\x00\x00\x00\x00\x00\x00\x00\x00", @@ -8152,17 +8048,6 @@ static struct cipher_testvec bf_ctr_dec_tv_template[] = { /* * Twofish test vectors. */ -#define TF_ENC_TEST_VECTORS 4 -#define TF_DEC_TEST_VECTORS 4 -#define TF_CBC_ENC_TEST_VECTORS 5 -#define TF_CBC_DEC_TEST_VECTORS 5 -#define TF_CTR_ENC_TEST_VECTORS 2 -#define TF_CTR_DEC_TEST_VECTORS 2 -#define TF_LRW_ENC_TEST_VECTORS 8 -#define TF_LRW_DEC_TEST_VECTORS 8 -#define TF_XTS_ENC_TEST_VECTORS 5 -#define TF_XTS_DEC_TEST_VECTORS 5 - static struct cipher_testvec tf_enc_tv_template[] = { { .key = zeroed_string, @@ -10881,24 +10766,6 @@ static struct cipher_testvec tf_xts_dec_tv_template[] = { * Serpent test vectors. These are backwards because Serpent writes * octet sequences in right-to-left mode. */ -#define SERPENT_ENC_TEST_VECTORS 5 -#define SERPENT_DEC_TEST_VECTORS 5 - -#define TNEPRES_ENC_TEST_VECTORS 4 -#define TNEPRES_DEC_TEST_VECTORS 4 - -#define SERPENT_CBC_ENC_TEST_VECTORS 1 -#define SERPENT_CBC_DEC_TEST_VECTORS 1 - -#define SERPENT_CTR_ENC_TEST_VECTORS 2 -#define SERPENT_CTR_DEC_TEST_VECTORS 2 - -#define SERPENT_LRW_ENC_TEST_VECTORS 8 -#define SERPENT_LRW_DEC_TEST_VECTORS 8 - -#define SERPENT_XTS_ENC_TEST_VECTORS 5 -#define SERPENT_XTS_DEC_TEST_VECTORS 5 - static struct cipher_testvec serpent_enc_tv_template[] = { { .input = "\x00\x01\x02\x03\x04\x05\x06\x07" @@ -13637,17 +13504,6 @@ static struct cipher_testvec serpent_xts_dec_tv_template[] = { }; /* Cast6 test vectors from RFC 2612 */ -#define CAST6_ENC_TEST_VECTORS 4 -#define CAST6_DEC_TEST_VECTORS 4 -#define CAST6_CBC_ENC_TEST_VECTORS 1 -#define CAST6_CBC_DEC_TEST_VECTORS 1 -#define CAST6_CTR_ENC_TEST_VECTORS 2 -#define CAST6_CTR_DEC_TEST_VECTORS 2 -#define CAST6_LRW_ENC_TEST_VECTORS 1 -#define CAST6_LRW_DEC_TEST_VECTORS 1 -#define CAST6_XTS_ENC_TEST_VECTORS 1 -#define CAST6_XTS_DEC_TEST_VECTORS 1 - static struct cipher_testvec cast6_enc_tv_template[] = { { .key = "\x23\x42\xbb\x9e\xfa\x38\x54\x2c" @@ -15182,38 +15038,6 @@ static struct cipher_testvec cast6_xts_dec_tv_template[] = { /* * AES test vectors. */ -#define AES_ENC_TEST_VECTORS 4 -#define AES_DEC_TEST_VECTORS 4 -#define AES_CBC_ENC_TEST_VECTORS 5 -#define AES_CBC_DEC_TEST_VECTORS 5 -#define HMAC_MD5_ECB_CIPHER_NULL_ENC_TEST_VECTORS 2 -#define HMAC_MD5_ECB_CIPHER_NULL_DEC_TEST_VECTORS 2 -#define HMAC_SHA1_ECB_CIPHER_NULL_ENC_TEST_VEC 2 -#define HMAC_SHA1_ECB_CIPHER_NULL_DEC_TEST_VEC 2 -#define HMAC_SHA1_AES_CBC_ENC_TEST_VEC 7 -#define HMAC_SHA256_AES_CBC_ENC_TEST_VEC 7 -#define HMAC_SHA512_AES_CBC_ENC_TEST_VEC 7 -#define AES_LRW_ENC_TEST_VECTORS 8 -#define AES_LRW_DEC_TEST_VECTORS 8 -#define AES_XTS_ENC_TEST_VECTORS 5 -#define AES_XTS_DEC_TEST_VECTORS 5 -#define AES_CTR_ENC_TEST_VECTORS 5 -#define AES_CTR_DEC_TEST_VECTORS 5 -#define AES_OFB_ENC_TEST_VECTORS 1 -#define AES_OFB_DEC_TEST_VECTORS 1 -#define AES_CTR_3686_ENC_TEST_VECTORS 7 -#define AES_CTR_3686_DEC_TEST_VECTORS 6 -#define AES_GCM_ENC_TEST_VECTORS 9 -#define AES_GCM_DEC_TEST_VECTORS 8 -#define AES_GCM_4106_ENC_TEST_VECTORS 23 -#define AES_GCM_4106_DEC_TEST_VECTORS 23 -#define AES_GCM_4543_ENC_TEST_VECTORS 1 -#define AES_GCM_4543_DEC_TEST_VECTORS 2 -#define AES_CCM_ENC_TEST_VECTORS 8 -#define AES_CCM_DEC_TEST_VECTORS 7 -#define AES_CCM_4309_ENC_TEST_VECTORS 7 -#define AES_CCM_4309_DEC_TEST_VECTORS 10 - static struct cipher_testvec aes_enc_tv_template[] = { { /* From FIPS-197 */ .key = "\x00\x01\x02\x03\x04\x05\x06\x07" @@ -17069,8 +16893,6 @@ static struct aead_testvec hmac_sha512_aes_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA1_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha1_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17130,8 +16952,6 @@ static struct aead_testvec hmac_sha1_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA224_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha224_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17191,8 +17011,6 @@ static struct aead_testvec hmac_sha224_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA256_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha256_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17254,8 +17072,6 @@ static struct aead_testvec hmac_sha256_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA384_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha384_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17321,8 +17137,6 @@ static struct aead_testvec hmac_sha384_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA512_DES_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha512_des_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17392,8 +17206,6 @@ static struct aead_testvec hmac_sha512_des_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA1_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha1_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17455,8 +17267,6 @@ static struct aead_testvec hmac_sha1_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA224_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha224_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17518,8 +17328,6 @@ static struct aead_testvec hmac_sha224_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA256_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha256_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17583,8 +17391,6 @@ static struct aead_testvec hmac_sha256_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA384_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha384_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -17652,8 +17458,6 @@ static struct aead_testvec hmac_sha384_des3_ede_cbc_enc_tv_temp[] = { }, }; -#define HMAC_SHA512_DES3_EDE_CBC_ENC_TEST_VEC 1 - static struct aead_testvec hmac_sha512_des3_ede_cbc_enc_tv_temp[] = { { /*Generated with cryptopp*/ #ifdef __LITTLE_ENDIAN @@ -24434,8 +24238,6 @@ static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = { /* * ChaCha20-Poly1305 AEAD test vectors from RFC7539 2.8.2./A.5. */ -#define RFC7539_ENC_TEST_VECTORS 2 -#define RFC7539_DEC_TEST_VECTORS 2 static struct aead_testvec rfc7539_enc_tv_template[] = { { .key = "\x80\x81\x82\x83\x84\x85\x86\x87" @@ -24703,8 +24505,6 @@ static struct aead_testvec rfc7539_dec_tv_template[] = { /* * draft-irtf-cfrg-chacha20-poly1305 */ -#define RFC7539ESP_DEC_TEST_VECTORS 1 -#define RFC7539ESP_ENC_TEST_VECTORS 1 static struct aead_testvec rfc7539esp_enc_tv_template[] = { { .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" @@ -24927,8 +24727,6 @@ static struct cipher_testvec aes_kw_dec_tv_template[] = { * http://csrc.nist.gov/groups/STM/cavp/documents/rng/RNGVS.pdf * Only AES-128 is supported at this time. */ -#define ANSI_CPRNG_AES_TEST_VECTORS 6 - static struct cprng_testvec ansi_cprng_aes_tv_template[] = { { .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42" @@ -25846,13 +25644,6 @@ static struct drbg_testvec drbg_nopr_ctr_aes128_tv_template[] = { }; /* Cast5 test vectors from RFC 2144 */ -#define CAST5_ENC_TEST_VECTORS 4 -#define CAST5_DEC_TEST_VECTORS 4 -#define CAST5_CBC_ENC_TEST_VECTORS 1 -#define CAST5_CBC_DEC_TEST_VECTORS 1 -#define CAST5_CTR_ENC_TEST_VECTORS 2 -#define CAST5_CTR_DEC_TEST_VECTORS 2 - static struct cipher_testvec cast5_enc_tv_template[] = { { .key = "\x01\x23\x45\x67\x12\x34\x56\x78" @@ -26756,9 +26547,6 @@ static struct cipher_testvec cast5_ctr_dec_tv_template[] = { /* * ARC4 test vectors from OpenSSL */ -#define ARC4_ENC_TEST_VECTORS 7 -#define ARC4_DEC_TEST_VECTORS 7 - static struct cipher_testvec arc4_enc_tv_template[] = { { .key = "\x01\x23\x45\x67\x89\xab\xcd\xef", @@ -26894,9 +26682,6 @@ static struct cipher_testvec arc4_dec_tv_template[] = { /* * TEA test vectors */ -#define TEA_ENC_TEST_VECTORS 4 -#define TEA_DEC_TEST_VECTORS 4 - static struct cipher_testvec tea_enc_tv_template[] = { { .key = zeroed_string, @@ -26986,9 +26771,6 @@ static struct cipher_testvec tea_dec_tv_template[] = { /* * XTEA test vectors */ -#define XTEA_ENC_TEST_VECTORS 4 -#define XTEA_DEC_TEST_VECTORS 4 - static struct cipher_testvec xtea_enc_tv_template[] = { { .key = zeroed_string, @@ -27078,9 +26860,6 @@ static struct cipher_testvec xtea_dec_tv_template[] = { /* * KHAZAD test vectors. */ -#define KHAZAD_ENC_TEST_VECTORS 5 -#define KHAZAD_DEC_TEST_VECTORS 5 - static struct cipher_testvec khazad_enc_tv_template[] = { { .key = "\x80\x00\x00\x00\x00\x00\x00\x00" @@ -27177,11 +26956,6 @@ static struct cipher_testvec khazad_dec_tv_template[] = { * Anubis test vectors. */ -#define ANUBIS_ENC_TEST_VECTORS 5 -#define ANUBIS_DEC_TEST_VECTORS 5 -#define ANUBIS_CBC_ENC_TEST_VECTORS 2 -#define ANUBIS_CBC_DEC_TEST_VECTORS 2 - static struct cipher_testvec anubis_enc_tv_template[] = { { .key = "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe" @@ -27381,9 +27155,6 @@ static struct cipher_testvec anubis_cbc_dec_tv_template[] = { /* * XETA test vectors */ -#define XETA_ENC_TEST_VECTORS 4 -#define XETA_DEC_TEST_VECTORS 4 - static struct cipher_testvec xeta_enc_tv_template[] = { { .key = zeroed_string, @@ -27473,9 +27244,6 @@ static struct cipher_testvec xeta_dec_tv_template[] = { /* * FCrypt test vectors */ -#define FCRYPT_ENC_TEST_VECTORS ARRAY_SIZE(fcrypt_pcbc_enc_tv_template) -#define FCRYPT_DEC_TEST_VECTORS ARRAY_SIZE(fcrypt_pcbc_dec_tv_template) - static struct cipher_testvec fcrypt_pcbc_enc_tv_template[] = { { /* http://www.openafs.org/pipermail/openafs-devel/2000-December/005320.html */ .key = "\x00\x00\x00\x00\x00\x00\x00\x00", @@ -27601,17 +27369,6 @@ static struct cipher_testvec fcrypt_pcbc_dec_tv_template[] = { /* * CAMELLIA test vectors. */ -#define CAMELLIA_ENC_TEST_VECTORS 4 -#define CAMELLIA_DEC_TEST_VECTORS 4 -#define CAMELLIA_CBC_ENC_TEST_VECTORS 3 -#define CAMELLIA_CBC_DEC_TEST_VECTORS 3 -#define CAMELLIA_CTR_ENC_TEST_VECTORS 2 -#define CAMELLIA_CTR_DEC_TEST_VECTORS 2 -#define CAMELLIA_LRW_ENC_TEST_VECTORS 8 -#define CAMELLIA_LRW_DEC_TEST_VECTORS 8 -#define CAMELLIA_XTS_ENC_TEST_VECTORS 5 -#define CAMELLIA_XTS_DEC_TEST_VECTORS 5 - static struct cipher_testvec camellia_enc_tv_template[] = { { .key = "\x01\x23\x45\x67\x89\xab\xcd\xef" @@ -31331,9 +31088,6 @@ static struct cipher_testvec camellia_xts_dec_tv_template[] = { /* * SEED test vectors */ -#define SEED_ENC_TEST_VECTORS 4 -#define SEED_DEC_TEST_VECTORS 4 - static struct cipher_testvec seed_enc_tv_template[] = { { .key = zeroed_string, @@ -31418,7 +31172,6 @@ static struct cipher_testvec seed_dec_tv_template[] = { } }; -#define SALSA20_STREAM_ENC_TEST_VECTORS 5 static struct cipher_testvec salsa20_stream_enc_tv_template[] = { /* * Testvectors from verified.test-vectors submitted to ECRYPT. @@ -32588,7 +32341,6 @@ static struct cipher_testvec salsa20_stream_enc_tv_template[] = { }, }; -#define CHACHA20_ENC_TEST_VECTORS 4 static struct cipher_testvec chacha20_enc_tv_template[] = { { /* RFC7539 A.2. Test Vector #1 */ .key = "\x00\x00\x00\x00\x00\x00\x00\x00" @@ -33100,8 +32852,6 @@ static struct cipher_testvec chacha20_enc_tv_template[] = { /* * CTS (Cipher Text Stealing) mode tests */ -#define CTS_MODE_ENC_TEST_VECTORS 6 -#define CTS_MODE_DEC_TEST_VECTORS 6 static struct cipher_testvec cts_mode_enc_tv_template[] = { { /* from rfc3962 */ .klen = 16, @@ -33322,9 +33072,6 @@ struct comp_testvec { * Params: winbits=-11, Z_DEFAULT_COMPRESSION, MAX_MEM_LEVEL. */ -#define DEFLATE_COMP_TEST_VECTORS 2 -#define DEFLATE_DECOMP_TEST_VECTORS 2 - static struct comp_testvec deflate_comp_tv_template[] = { { .inlen = 70, @@ -33400,9 +33147,6 @@ static struct comp_testvec deflate_decomp_tv_template[] = { /* * LZO test vectors (null-terminated strings). */ -#define LZO_COMP_TEST_VECTORS 2 -#define LZO_DECOMP_TEST_VECTORS 2 - static struct comp_testvec lzo_comp_tv_template[] = { { .inlen = 70, @@ -33534,8 +33278,6 @@ static struct hash_testvec michael_mic_tv_template[] = { /* * CRC32 test vectors */ -#define CRC32_TEST_VECTORS 14 - static struct hash_testvec crc32_tv_template[] = { { .key = "\x87\xa9\xcb\xed", @@ -33968,8 +33710,6 @@ static struct hash_testvec crc32_tv_template[] = { /* * CRC32C test vectors */ -#define CRC32C_TEST_VECTORS 15 - static struct hash_testvec crc32c_tv_template[] = { { .psize = 0, @@ -34406,8 +34146,6 @@ static struct hash_testvec crc32c_tv_template[] = { /* * Blakcifn CRC test vectors */ -#define BFIN_CRC_TEST_VECTORS 6 - static struct hash_testvec bfin_crc_tv_template[] = { { .psize = 0, @@ -34493,9 +34231,6 @@ static struct hash_testvec bfin_crc_tv_template[] = { }; -#define LZ4_COMP_TEST_VECTORS 1 -#define LZ4_DECOMP_TEST_VECTORS 1 - static struct comp_testvec lz4_comp_tv_template[] = { { .inlen = 70, @@ -34526,9 +34261,6 @@ static struct comp_testvec lz4_decomp_tv_template[] = { }, }; -#define LZ4HC_COMP_TEST_VECTORS 1 -#define LZ4HC_DECOMP_TEST_VECTORS 1 - static struct comp_testvec lz4hc_comp_tv_template[] = { { .inlen = 70, diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 6ce5ce8be2f2..68fdc60ddf50 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -1,55 +1,30 @@ /* - Added support for the AMD Geode LX RNG - (c) Copyright 2004-2005 Advanced Micro Devices, Inc. - - derived from - - Hardware driver for the Intel/AMD/VIA Random Number Generators (RNG) - (c) Copyright 2003 Red Hat Inc <jgarzik@redhat.com> - - derived from - - Hardware driver for the AMD 768 Random Number Generator (RNG) - (c) Copyright 2001 Red Hat Inc <alan@redhat.com> - - derived from - - Hardware driver for Intel i810 Random Number Generator (RNG) - Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com> - Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com> - - Added generic RNG API - Copyright 2006 Michael Buesch <m@bues.ch> - Copyright 2005 (c) MontaVista Software, Inc. - - Please read Documentation/hw_random.txt for details on use. - - ---------------------------------------------------------- - This software may be used and distributed according to the terms - of the GNU General Public License, incorporated herein by reference. - + * hw_random/core.c: HWRNG core API + * + * Copyright 2006 Michael Buesch <m@bues.ch> + * Copyright 2005 (c) MontaVista Software, Inc. + * + * Please read Documentation/hw_random.txt for details on use. + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. */ - +#include <linux/delay.h> #include <linux/device.h> +#include <linux/err.h> +#include <linux/fs.h> #include <linux/hw_random.h> -#include <linux/module.h> #include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/sched.h> -#include <linux/miscdevice.h> #include <linux/kthread.h> -#include <linux/delay.h> -#include <linux/slab.h> +#include <linux/miscdevice.h> +#include <linux/module.h> #include <linux/random.h> -#include <linux/err.h> +#include <linux/sched.h> +#include <linux/slab.h> #include <linux/uaccess.h> - #define RNG_MODULE_NAME "hw_random" -#define PFX RNG_MODULE_NAME ": " -#define RNG_MISCDEV_MINOR 183 /* official */ - static struct hwrng *current_rng; static struct task_struct *hwrng_fill; @@ -298,7 +273,6 @@ out_put: goto out; } - static const struct file_operations rng_chrdev_ops = { .owner = THIS_MODULE, .open = rng_dev_open, @@ -309,14 +283,13 @@ static const struct file_operations rng_chrdev_ops = { static const struct attribute_group *rng_dev_groups[]; static struct miscdevice rng_miscdev = { - .minor = RNG_MISCDEV_MINOR, + .minor = HWRNG_MINOR, .name = RNG_MODULE_NAME, .nodename = "hwrng", .fops = &rng_chrdev_ops, .groups = rng_dev_groups, }; - static ssize_t hwrng_attr_current_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -447,8 +420,7 @@ int hwrng_register(struct hwrng *rng) int err = -EINVAL; struct hwrng *old_rng, *tmp; - if (rng->name == NULL || - (rng->data_read == NULL && rng->read == NULL)) + if (!rng->name || (!rng->data_read && !rng->read)) goto out; mutex_lock(&rng_mutex); diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index 3b06c1d6cfb2..31cbdbbaebfc 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -21,11 +21,11 @@ #define DRV_MODULE_NAME "n2rng" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "0.2" -#define DRV_MODULE_RELDATE "July 27, 2011" +#define DRV_MODULE_VERSION "0.3" +#define DRV_MODULE_RELDATE "Jan 7, 2017" static char version[] = - DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + DRV_MODULE_NAME " v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); MODULE_DESCRIPTION("Niagara2 RNG driver"); @@ -302,26 +302,57 @@ static int n2rng_try_read_ctl(struct n2rng *np) return n2rng_hv_err_trans(hv_err); } -#define CONTROL_DEFAULT_BASE \ - ((2 << RNG_CTL_ASEL_SHIFT) | \ - (N2RNG_ACCUM_CYCLES_DEFAULT << RNG_CTL_WAIT_SHIFT) | \ - RNG_CTL_LFSR) - -#define CONTROL_DEFAULT_0 \ - (CONTROL_DEFAULT_BASE | \ - (1 << RNG_CTL_VCO_SHIFT) | \ - RNG_CTL_ES1) -#define CONTROL_DEFAULT_1 \ - (CONTROL_DEFAULT_BASE | \ - (2 << RNG_CTL_VCO_SHIFT) | \ - RNG_CTL_ES2) -#define CONTROL_DEFAULT_2 \ - (CONTROL_DEFAULT_BASE | \ - (3 << RNG_CTL_VCO_SHIFT) | \ - RNG_CTL_ES3) -#define CONTROL_DEFAULT_3 \ - (CONTROL_DEFAULT_BASE | \ - RNG_CTL_ES1 | RNG_CTL_ES2 | RNG_CTL_ES3) +static u64 n2rng_control_default(struct n2rng *np, int ctl) +{ + u64 val = 0; + + if (np->data->chip_version == 1) { + val = ((2 << RNG_v1_CTL_ASEL_SHIFT) | + (N2RNG_ACCUM_CYCLES_DEFAULT << RNG_v1_CTL_WAIT_SHIFT) | + RNG_CTL_LFSR); + + switch (ctl) { + case 0: + val |= (1 << RNG_v1_CTL_VCO_SHIFT) | RNG_CTL_ES1; + break; + case 1: + val |= (2 << RNG_v1_CTL_VCO_SHIFT) | RNG_CTL_ES2; + break; + case 2: + val |= (3 << RNG_v1_CTL_VCO_SHIFT) | RNG_CTL_ES3; + break; + case 3: + val |= RNG_CTL_ES1 | RNG_CTL_ES2 | RNG_CTL_ES3; + break; + default: + break; + } + + } else { + val = ((2 << RNG_v2_CTL_ASEL_SHIFT) | + (N2RNG_ACCUM_CYCLES_DEFAULT << RNG_v2_CTL_WAIT_SHIFT) | + RNG_CTL_LFSR); + + switch (ctl) { + case 0: + val |= (1 << RNG_v2_CTL_VCO_SHIFT) | RNG_CTL_ES1; + break; + case 1: + val |= (2 << RNG_v2_CTL_VCO_SHIFT) | RNG_CTL_ES2; + break; + case 2: + val |= (3 << RNG_v2_CTL_VCO_SHIFT) | RNG_CTL_ES3; + break; + case 3: + val |= RNG_CTL_ES1 | RNG_CTL_ES2 | RNG_CTL_ES3; + break; + default: + break; + } + } + + return val; +} static void n2rng_control_swstate_init(struct n2rng *np) { @@ -336,10 +367,10 @@ static void n2rng_control_swstate_init(struct n2rng *np) for (i = 0; i < np->num_units; i++) { struct n2rng_unit *up = &np->units[i]; - up->control[0] = CONTROL_DEFAULT_0; - up->control[1] = CONTROL_DEFAULT_1; - up->control[2] = CONTROL_DEFAULT_2; - up->control[3] = CONTROL_DEFAULT_3; + up->control[0] = n2rng_control_default(np, 0); + up->control[1] = n2rng_control_default(np, 1); + up->control[2] = n2rng_control_default(np, 2); + up->control[3] = n2rng_control_default(np, 3); } np->hv_state = HV_RNG_STATE_UNCONFIGURED; @@ -399,6 +430,7 @@ static int n2rng_data_read(struct hwrng *rng, u32 *data) } else { int err = n2rng_generic_read_data(ra); if (!err) { + np->flags |= N2RNG_FLAG_BUFFER_VALID; np->buffer = np->test_data >> 32; *data = np->test_data & 0xffffffff; len = 4; @@ -487,9 +519,21 @@ static void n2rng_dump_test_buffer(struct n2rng *np) static int n2rng_check_selftest_buffer(struct n2rng *np, unsigned long unit) { - u64 val = SELFTEST_VAL; + u64 val; int err, matches, limit; + switch (np->data->id) { + case N2_n2_rng: + case N2_vf_rng: + case N2_kt_rng: + case N2_m4_rng: /* yes, m4 uses the old value */ + val = RNG_v1_SELFTEST_VAL; + break; + default: + val = RNG_v2_SELFTEST_VAL; + break; + } + matches = 0; for (limit = 0; limit < SELFTEST_LOOPS_MAX; limit++) { matches += n2rng_test_buffer_find(np, val); @@ -512,14 +556,32 @@ static int n2rng_check_selftest_buffer(struct n2rng *np, unsigned long unit) static int n2rng_control_selftest(struct n2rng *np, unsigned long unit) { int err; + u64 base, base3; + + switch (np->data->id) { + case N2_n2_rng: + case N2_vf_rng: + case N2_kt_rng: + base = RNG_v1_CTL_ASEL_NOOUT << RNG_v1_CTL_ASEL_SHIFT; + base3 = base | RNG_CTL_LFSR | + ((RNG_v1_SELFTEST_TICKS - 2) << RNG_v1_CTL_WAIT_SHIFT); + break; + case N2_m4_rng: + base = RNG_v2_CTL_ASEL_NOOUT << RNG_v2_CTL_ASEL_SHIFT; + base3 = base | RNG_CTL_LFSR | + ((RNG_v1_SELFTEST_TICKS - 2) << RNG_v2_CTL_WAIT_SHIFT); + break; + default: + base = RNG_v2_CTL_ASEL_NOOUT << RNG_v2_CTL_ASEL_SHIFT; + base3 = base | RNG_CTL_LFSR | + (RNG_v2_SELFTEST_TICKS << RNG_v2_CTL_WAIT_SHIFT); + break; + } - np->test_control[0] = (0x2 << RNG_CTL_ASEL_SHIFT); - np->test_control[1] = (0x2 << RNG_CTL_ASEL_SHIFT); - np->test_control[2] = (0x2 << RNG_CTL_ASEL_SHIFT); - np->test_control[3] = ((0x2 << RNG_CTL_ASEL_SHIFT) | - RNG_CTL_LFSR | - ((SELFTEST_TICKS - 2) << RNG_CTL_WAIT_SHIFT)); - + np->test_control[0] = base; + np->test_control[1] = base; + np->test_control[2] = base; + np->test_control[3] = base3; err = n2rng_entropy_diag_read(np, unit, np->test_control, HV_RNG_STATE_HEALTHCHECK, @@ -557,11 +619,19 @@ static int n2rng_control_configure_units(struct n2rng *np) struct n2rng_unit *up = &np->units[unit]; unsigned long ctl_ra = __pa(&up->control[0]); int esrc; - u64 base; + u64 base, shift; - base = ((np->accum_cycles << RNG_CTL_WAIT_SHIFT) | - (2 << RNG_CTL_ASEL_SHIFT) | - RNG_CTL_LFSR); + if (np->data->chip_version == 1) { + base = ((np->accum_cycles << RNG_v1_CTL_WAIT_SHIFT) | + (RNG_v1_CTL_ASEL_NOOUT << RNG_v1_CTL_ASEL_SHIFT) | + RNG_CTL_LFSR); + shift = RNG_v1_CTL_VCO_SHIFT; + } else { + base = ((np->accum_cycles << RNG_v2_CTL_WAIT_SHIFT) | + (RNG_v2_CTL_ASEL_NOOUT << RNG_v2_CTL_ASEL_SHIFT) | + RNG_CTL_LFSR); + shift = RNG_v2_CTL_VCO_SHIFT; + } /* XXX This isn't the best. We should fetch a bunch * XXX of words using each entropy source combined XXX @@ -570,7 +640,7 @@ static int n2rng_control_configure_units(struct n2rng *np) */ for (esrc = 0; esrc < 3; esrc++) up->control[esrc] = base | - (esrc << RNG_CTL_VCO_SHIFT) | + (esrc << shift) | (RNG_CTL_ES1 << esrc); up->control[3] = base | @@ -589,6 +659,7 @@ static void n2rng_work(struct work_struct *work) { struct n2rng *np = container_of(work, struct n2rng, work.work); int err = 0; + static int retries = 4; if (!(np->flags & N2RNG_FLAG_CONTROL)) { err = n2rng_guest_check(np); @@ -606,7 +677,9 @@ static void n2rng_work(struct work_struct *work) dev_info(&np->op->dev, "RNG ready\n"); } - if (err && !(np->flags & N2RNG_FLAG_SHUTDOWN)) + if (--retries == 0) + dev_err(&np->op->dev, "Self-test retries failed, RNG not ready\n"); + else if (err && !(np->flags & N2RNG_FLAG_SHUTDOWN)) schedule_delayed_work(&np->work, HZ * 2); } @@ -622,24 +695,23 @@ static const struct of_device_id n2rng_match[]; static int n2rng_probe(struct platform_device *op) { const struct of_device_id *match; - int multi_capable; int err = -ENOMEM; struct n2rng *np; match = of_match_device(n2rng_match, &op->dev); if (!match) return -EINVAL; - multi_capable = (match->data != NULL); n2rng_driver_version(); np = devm_kzalloc(&op->dev, sizeof(*np), GFP_KERNEL); if (!np) goto out; np->op = op; + np->data = (struct n2rng_template *)match->data; INIT_DELAYED_WORK(&np->work, n2rng_work); - if (multi_capable) + if (np->data->multi_capable) np->flags |= N2RNG_FLAG_MULTI; err = -ENODEV; @@ -670,8 +742,9 @@ static int n2rng_probe(struct platform_device *op) dev_err(&op->dev, "VF RNG lacks rng-#units property\n"); goto out_hvapi_unregister; } - } else + } else { np->num_units = 1; + } dev_info(&op->dev, "Registered RNG HVAPI major %lu minor %lu\n", np->hvapi_major, np->hvapi_minor); @@ -692,7 +765,7 @@ static int n2rng_probe(struct platform_device *op) "multi-unit-capable" : "single-unit"), np->num_units); - np->hwrng.name = "n2rng"; + np->hwrng.name = DRV_MODULE_NAME; np->hwrng.data_read = n2rng_data_read; np->hwrng.priv = (unsigned long) np; @@ -728,30 +801,61 @@ static int n2rng_remove(struct platform_device *op) return 0; } +static struct n2rng_template n2_template = { + .id = N2_n2_rng, + .multi_capable = 0, + .chip_version = 1, +}; + +static struct n2rng_template vf_template = { + .id = N2_vf_rng, + .multi_capable = 1, + .chip_version = 1, +}; + +static struct n2rng_template kt_template = { + .id = N2_kt_rng, + .multi_capable = 1, + .chip_version = 1, +}; + +static struct n2rng_template m4_template = { + .id = N2_m4_rng, + .multi_capable = 1, + .chip_version = 2, +}; + +static struct n2rng_template m7_template = { + .id = N2_m7_rng, + .multi_capable = 1, + .chip_version = 2, +}; + static const struct of_device_id n2rng_match[] = { { .name = "random-number-generator", .compatible = "SUNW,n2-rng", + .data = &n2_template, }, { .name = "random-number-generator", .compatible = "SUNW,vf-rng", - .data = (void *) 1, + .data = &vf_template, }, { .name = "random-number-generator", .compatible = "SUNW,kt-rng", - .data = (void *) 1, + .data = &kt_template, }, { .name = "random-number-generator", .compatible = "ORCL,m4-rng", - .data = (void *) 1, + .data = &m4_template, }, { .name = "random-number-generator", .compatible = "ORCL,m7-rng", - .data = (void *) 1, + .data = &m7_template, }, {}, }; diff --git a/drivers/char/hw_random/n2rng.h b/drivers/char/hw_random/n2rng.h index f244ac89087f..6bad6cc634e8 100644 --- a/drivers/char/hw_random/n2rng.h +++ b/drivers/char/hw_random/n2rng.h @@ -6,18 +6,34 @@ #ifndef _N2RNG_H #define _N2RNG_H -#define RNG_CTL_WAIT 0x0000000001fffe00ULL /* Minimum wait time */ -#define RNG_CTL_WAIT_SHIFT 9 -#define RNG_CTL_BYPASS 0x0000000000000100ULL /* VCO voltage source */ -#define RNG_CTL_VCO 0x00000000000000c0ULL /* VCO rate control */ -#define RNG_CTL_VCO_SHIFT 6 -#define RNG_CTL_ASEL 0x0000000000000030ULL /* Analog MUX select */ -#define RNG_CTL_ASEL_SHIFT 4 +/* ver1 devices - n2-rng, vf-rng, kt-rng */ +#define RNG_v1_CTL_WAIT 0x0000000001fffe00ULL /* Minimum wait time */ +#define RNG_v1_CTL_WAIT_SHIFT 9 +#define RNG_v1_CTL_BYPASS 0x0000000000000100ULL /* VCO voltage source */ +#define RNG_v1_CTL_VCO 0x00000000000000c0ULL /* VCO rate control */ +#define RNG_v1_CTL_VCO_SHIFT 6 +#define RNG_v1_CTL_ASEL 0x0000000000000030ULL /* Analog MUX select */ +#define RNG_v1_CTL_ASEL_SHIFT 4 +#define RNG_v1_CTL_ASEL_NOOUT 2 + +/* these are the same in v2 as in v1 */ #define RNG_CTL_LFSR 0x0000000000000008ULL /* Use LFSR or plain shift */ #define RNG_CTL_ES3 0x0000000000000004ULL /* Enable entropy source 3 */ #define RNG_CTL_ES2 0x0000000000000002ULL /* Enable entropy source 2 */ #define RNG_CTL_ES1 0x0000000000000001ULL /* Enable entropy source 1 */ +/* ver2 devices - m4-rng, m7-rng */ +#define RNG_v2_CTL_WAIT 0x0000000007fff800ULL /* Minimum wait time */ +#define RNG_v2_CTL_WAIT_SHIFT 12 +#define RNG_v2_CTL_BYPASS 0x0000000000000400ULL /* VCO voltage source */ +#define RNG_v2_CTL_VCO 0x0000000000000300ULL /* VCO rate control */ +#define RNG_v2_CTL_VCO_SHIFT 9 +#define RNG_v2_CTL_PERF 0x0000000000000180ULL /* Perf */ +#define RNG_v2_CTL_ASEL 0x0000000000000070ULL /* Analog MUX select */ +#define RNG_v2_CTL_ASEL_SHIFT 4 +#define RNG_v2_CTL_ASEL_NOOUT 7 + + #define HV_FAST_RNG_GET_DIAG_CTL 0x130 #define HV_FAST_RNG_CTL_READ 0x131 #define HV_FAST_RNG_CTL_WRITE 0x132 @@ -60,6 +76,20 @@ extern unsigned long sun4v_rng_data_read_diag_v2(unsigned long data_ra, extern unsigned long sun4v_rng_data_read(unsigned long data_ra, unsigned long *tick_delta); +enum n2rng_compat_id { + N2_n2_rng, + N2_vf_rng, + N2_kt_rng, + N2_m4_rng, + N2_m7_rng, +}; + +struct n2rng_template { + enum n2rng_compat_id id; + int multi_capable; + int chip_version; +}; + struct n2rng_unit { u64 control[HV_RNG_NUM_CONTROL]; }; @@ -74,6 +104,7 @@ struct n2rng { #define N2RNG_FLAG_SHUTDOWN 0x00000010 /* Driver unregistering */ #define N2RNG_FLAG_BUFFER_VALID 0x00000020 /* u32 buffer holds valid data */ + struct n2rng_template *data; int num_units; struct n2rng_unit *units; @@ -97,8 +128,10 @@ struct n2rng { u64 scratch_control[HV_RNG_NUM_CONTROL]; -#define SELFTEST_TICKS 38859 -#define SELFTEST_VAL ((u64)0xB8820C7BD387E32C) +#define RNG_v1_SELFTEST_TICKS 38859 +#define RNG_v1_SELFTEST_VAL ((u64)0xB8820C7BD387E32C) +#define RNG_v2_SELFTEST_TICKS 64 +#define RNG_v2_SELFTEST_VAL ((u64)0xffffffffffffffff) #define SELFTEST_POLY ((u64)0x231DCEE91262B8A3) #define SELFTEST_MATCH_GOAL 6 #define SELFTEST_LOOPS_MAX 40000 diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 79564785ae30..74824612d3e9 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -339,7 +339,7 @@ config CRYPTO_DEV_OMAP_DES config CRYPTO_DEV_PICOXCELL tristate "Support for picoXcell IPSEC and Layer2 crypto engines" - depends on ARCH_PICOXCELL && HAVE_CLK + depends on (ARCH_PICOXCELL || COMPILE_TEST) && HAVE_CLK select CRYPTO_AEAD select CRYPTO_AES select CRYPTO_AUTHENC @@ -415,6 +415,18 @@ config CRYPTO_DEV_BFIN_CRC Newer Blackfin processors have CRC hardware. Select this if you want to use the Blackfin CRC module. +config CRYPTO_DEV_ATMEL_AUTHENC + tristate "Support for Atmel IPSEC/SSL hw accelerator" + depends on (ARCH_AT91 && HAS_DMA) || COMPILE_TEST + select CRYPTO_AUTHENC + select CRYPTO_DEV_ATMEL_AES + select CRYPTO_DEV_ATMEL_SHA + help + Some Atmel processors can combine the AES and SHA hw accelerators + to enhance support of IPSEC/SSL. + Select this if you want to use the Atmel modules for + authenc(hmac(shaX),Y(cbc)) algorithms. + config CRYPTO_DEV_ATMEL_AES tristate "Support for Atmel AES hw accelerator" depends on HAS_DMA @@ -553,6 +565,22 @@ config CRYPTO_DEV_ROCKCHIP This driver interfaces with the hardware crypto accelerator. Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. +config CRYPTO_DEV_MEDIATEK + tristate "MediaTek's EIP97 Cryptographic Engine driver" + depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST + select CRYPTO_AES + select CRYPTO_AEAD + select CRYPTO_BLKCIPHER + select CRYPTO_CTR + select CRYPTO_SHA1 + select CRYPTO_SHA256 + select CRYPTO_SHA512 + select CRYPTO_HMAC + help + This driver allows you to utilize the hardware crypto accelerator + EIP97 which can be found on the MT7623 MT2701, MT8521p, etc .... + Select this if you want to use it for AES/SHA1/SHA2 algorithms. + source "drivers/crypto/chelsio/Kconfig" source "drivers/crypto/virtio/Kconfig" diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index bc53cb833a06..8891ccc5844c 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/ +obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o @@ -10,7 +11,9 @@ obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/ +obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mediatek/ obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o +obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += mxc-scc.o obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o n2_crypto-y := n2_core.o n2_asm.o obj-$(CONFIG_CRYPTO_DEV_NX) += nx/ @@ -21,15 +24,13 @@ obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ +obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ +obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ +obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/ obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o -obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += mxc-scc.o +obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ -obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ -obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ -obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ -obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/ -obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/ -obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ +obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ diff --git a/drivers/crypto/atmel-aes-regs.h b/drivers/crypto/atmel-aes-regs.h index 0ec04407b533..7694679802b3 100644 --- a/drivers/crypto/atmel-aes-regs.h +++ b/drivers/crypto/atmel-aes-regs.h @@ -68,6 +68,22 @@ #define AES_CTRR 0x98 #define AES_GCMHR(x) (0x9c + ((x) * 0x04)) +#define AES_EMR 0xb0 +#define AES_EMR_APEN BIT(0) /* Auto Padding Enable */ +#define AES_EMR_APM BIT(1) /* Auto Padding Mode */ +#define AES_EMR_APM_IPSEC 0x0 +#define AES_EMR_APM_SSL BIT(1) +#define AES_EMR_PLIPEN BIT(4) /* PLIP Enable */ +#define AES_EMR_PLIPD BIT(5) /* PLIP Decipher */ +#define AES_EMR_PADLEN_MASK (0xFu << 8) +#define AES_EMR_PADLEN_OFFSET 8 +#define AES_EMR_PADLEN(padlen) (((padlen) << AES_EMR_PADLEN_OFFSET) &\ + AES_EMR_PADLEN_MASK) +#define AES_EMR_NHEAD_MASK (0xFu << 16) +#define AES_EMR_NHEAD_OFFSET 16 +#define AES_EMR_NHEAD(nhead) (((nhead) << AES_EMR_NHEAD_OFFSET) &\ + AES_EMR_NHEAD_MASK) + #define AES_TWR(x) (0xc0 + ((x) * 0x04)) #define AES_ALPHAR(x) (0xd0 + ((x) * 0x04)) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 0e3d0d655b96..29e20c37f3a6 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -41,6 +41,7 @@ #include <linux/platform_data/crypto-atmel.h> #include <dt-bindings/dma/at91.h> #include "atmel-aes-regs.h" +#include "atmel-authenc.h" #define ATMEL_AES_PRIORITY 300 @@ -78,6 +79,7 @@ #define AES_FLAGS_INIT BIT(2) #define AES_FLAGS_BUSY BIT(3) #define AES_FLAGS_DUMP_REG BIT(4) +#define AES_FLAGS_OWN_SHA BIT(5) #define AES_FLAGS_PERSISTENT (AES_FLAGS_INIT | AES_FLAGS_BUSY) @@ -92,6 +94,7 @@ struct atmel_aes_caps { bool has_ctr32; bool has_gcm; bool has_xts; + bool has_authenc; u32 max_burst_size; }; @@ -144,10 +147,31 @@ struct atmel_aes_xts_ctx { u32 key2[AES_KEYSIZE_256 / sizeof(u32)]; }; +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC +struct atmel_aes_authenc_ctx { + struct atmel_aes_base_ctx base; + struct atmel_sha_authenc_ctx *auth; +}; +#endif + struct atmel_aes_reqctx { unsigned long mode; }; +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC +struct atmel_aes_authenc_reqctx { + struct atmel_aes_reqctx base; + + struct scatterlist src[2]; + struct scatterlist dst[2]; + size_t textlen; + u32 digest[SHA512_DIGEST_SIZE / sizeof(u32)]; + + /* auth_req MUST be place last. */ + struct ahash_request auth_req; +}; +#endif + struct atmel_aes_dma { struct dma_chan *chan; struct scatterlist *sg; @@ -291,6 +315,9 @@ static const char *atmel_aes_reg_name(u32 offset, char *tmp, size_t sz) snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2); break; + case AES_EMR: + return "EMR"; + case AES_TWR(0): case AES_TWR(1): case AES_TWR(2): @@ -463,8 +490,16 @@ static inline bool atmel_aes_is_encrypt(const struct atmel_aes_dev *dd) return (dd->flags & AES_FLAGS_ENCRYPT); } +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC +static void atmel_aes_authenc_complete(struct atmel_aes_dev *dd, int err); +#endif + static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err) { +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + atmel_aes_authenc_complete(dd, err); +#endif + clk_disable(dd->iclk); dd->flags &= ~AES_FLAGS_BUSY; @@ -879,6 +914,7 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd, struct crypto_async_request *areq, *backlog; struct atmel_aes_base_ctx *ctx; unsigned long flags; + bool start_async; int err, ret = 0; spin_lock_irqsave(&dd->lock, flags); @@ -904,10 +940,12 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd, dd->areq = areq; dd->ctx = ctx; - dd->is_async = (areq != new_areq); + start_async = (areq != new_areq); + dd->is_async = start_async; + /* WARNING: ctx->start() MAY change dd->is_async. */ err = ctx->start(dd); - return (dd->is_async) ? ret : err; + return (start_async) ? ret : err; } @@ -1928,6 +1966,384 @@ static struct crypto_alg aes_xts_alg = { } }; +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC +/* authenc aead functions */ + +static int atmel_aes_authenc_start(struct atmel_aes_dev *dd); +static int atmel_aes_authenc_init(struct atmel_aes_dev *dd, int err, + bool is_async); +static int atmel_aes_authenc_transfer(struct atmel_aes_dev *dd, int err, + bool is_async); +static int atmel_aes_authenc_digest(struct atmel_aes_dev *dd); +static int atmel_aes_authenc_final(struct atmel_aes_dev *dd, int err, + bool is_async); + +static void atmel_aes_authenc_complete(struct atmel_aes_dev *dd, int err) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + + if (err && (dd->flags & AES_FLAGS_OWN_SHA)) + atmel_sha_authenc_abort(&rctx->auth_req); + dd->flags &= ~AES_FLAGS_OWN_SHA; +} + +static int atmel_aes_authenc_start(struct atmel_aes_dev *dd) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm); + int err; + + atmel_aes_set_mode(dd, &rctx->base); + + err = atmel_aes_hw_init(dd); + if (err) + return atmel_aes_complete(dd, err); + + return atmel_sha_authenc_schedule(&rctx->auth_req, ctx->auth, + atmel_aes_authenc_init, dd); +} + +static int atmel_aes_authenc_init(struct atmel_aes_dev *dd, int err, + bool is_async) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + + if (is_async) + dd->is_async = true; + if (err) + return atmel_aes_complete(dd, err); + + /* If here, we've got the ownership of the SHA device. */ + dd->flags |= AES_FLAGS_OWN_SHA; + + /* Configure the SHA device. */ + return atmel_sha_authenc_init(&rctx->auth_req, + req->src, req->assoclen, + rctx->textlen, + atmel_aes_authenc_transfer, dd); +} + +static int atmel_aes_authenc_transfer(struct atmel_aes_dev *dd, int err, + bool is_async) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + bool enc = atmel_aes_is_encrypt(dd); + struct scatterlist *src, *dst; + u32 iv[AES_BLOCK_SIZE / sizeof(u32)]; + u32 emr; + + if (is_async) + dd->is_async = true; + if (err) + return atmel_aes_complete(dd, err); + + /* Prepare src and dst scatter-lists to transfer cipher/plain texts. */ + src = scatterwalk_ffwd(rctx->src, req->src, req->assoclen); + dst = src; + + if (req->src != req->dst) + dst = scatterwalk_ffwd(rctx->dst, req->dst, req->assoclen); + + /* Configure the AES device. */ + memcpy(iv, req->iv, sizeof(iv)); + + /* + * Here we always set the 2nd parameter of atmel_aes_write_ctrl() to + * 'true' even if the data transfer is actually performed by the CPU (so + * not by the DMA) because we must force the AES_MR_SMOD bitfield to the + * value AES_MR_SMOD_IDATAR0. Indeed, both AES_MR_SMOD and SHA_MR_SMOD + * must be set to *_MR_SMOD_IDATAR0. + */ + atmel_aes_write_ctrl(dd, true, iv); + emr = AES_EMR_PLIPEN; + if (!enc) + emr |= AES_EMR_PLIPD; + atmel_aes_write(dd, AES_EMR, emr); + + /* Transfer data. */ + return atmel_aes_dma_start(dd, src, dst, rctx->textlen, + atmel_aes_authenc_digest); +} + +static int atmel_aes_authenc_digest(struct atmel_aes_dev *dd) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + + /* atmel_sha_authenc_final() releases the SHA device. */ + dd->flags &= ~AES_FLAGS_OWN_SHA; + return atmel_sha_authenc_final(&rctx->auth_req, + rctx->digest, sizeof(rctx->digest), + atmel_aes_authenc_final, dd); +} + +static int atmel_aes_authenc_final(struct atmel_aes_dev *dd, int err, + bool is_async) +{ + struct aead_request *req = aead_request_cast(dd->areq); + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + bool enc = atmel_aes_is_encrypt(dd); + u32 idigest[SHA512_DIGEST_SIZE / sizeof(u32)], *odigest = rctx->digest; + u32 offs, authsize; + + if (is_async) + dd->is_async = true; + if (err) + goto complete; + + offs = req->assoclen + rctx->textlen; + authsize = crypto_aead_authsize(tfm); + if (enc) { + scatterwalk_map_and_copy(odigest, req->dst, offs, authsize, 1); + } else { + scatterwalk_map_and_copy(idigest, req->src, offs, authsize, 0); + if (crypto_memneq(idigest, odigest, authsize)) + err = -EBADMSG; + } + +complete: + return atmel_aes_complete(dd, err); +} + +static int atmel_aes_authenc_setkey(struct crypto_aead *tfm, const u8 *key, + unsigned int keylen) +{ + struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm); + struct crypto_authenc_keys keys; + u32 flags; + int err; + + if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) + goto badkey; + + if (keys.enckeylen > sizeof(ctx->base.key)) + goto badkey; + + /* Save auth key. */ + flags = crypto_aead_get_flags(tfm); + err = atmel_sha_authenc_setkey(ctx->auth, + keys.authkey, keys.authkeylen, + &flags); + crypto_aead_set_flags(tfm, flags & CRYPTO_TFM_RES_MASK); + if (err) { + memzero_explicit(&keys, sizeof(keys)); + return err; + } + + /* Save enc key. */ + ctx->base.keylen = keys.enckeylen; + memcpy(ctx->base.key, keys.enckey, keys.enckeylen); + + memzero_explicit(&keys, sizeof(keys)); + return 0; + +badkey: + crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + memzero_explicit(&key, sizeof(keys)); + return -EINVAL; +} + +static int atmel_aes_authenc_init_tfm(struct crypto_aead *tfm, + unsigned long auth_mode) +{ + struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm); + unsigned int auth_reqsize = atmel_sha_authenc_get_reqsize(); + + ctx->auth = atmel_sha_authenc_spawn(auth_mode); + if (IS_ERR(ctx->auth)) + return PTR_ERR(ctx->auth); + + crypto_aead_set_reqsize(tfm, (sizeof(struct atmel_aes_authenc_reqctx) + + auth_reqsize)); + ctx->base.start = atmel_aes_authenc_start; + + return 0; +} + +static int atmel_aes_authenc_hmac_sha1_init_tfm(struct crypto_aead *tfm) +{ + return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA1); +} + +static int atmel_aes_authenc_hmac_sha224_init_tfm(struct crypto_aead *tfm) +{ + return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA224); +} + +static int atmel_aes_authenc_hmac_sha256_init_tfm(struct crypto_aead *tfm) +{ + return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA256); +} + +static int atmel_aes_authenc_hmac_sha384_init_tfm(struct crypto_aead *tfm) +{ + return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA384); +} + +static int atmel_aes_authenc_hmac_sha512_init_tfm(struct crypto_aead *tfm) +{ + return atmel_aes_authenc_init_tfm(tfm, SHA_FLAGS_HMAC_SHA512); +} + +static void atmel_aes_authenc_exit_tfm(struct crypto_aead *tfm) +{ + struct atmel_aes_authenc_ctx *ctx = crypto_aead_ctx(tfm); + + atmel_sha_authenc_free(ctx->auth); +} + +static int atmel_aes_authenc_crypt(struct aead_request *req, + unsigned long mode) +{ + struct atmel_aes_authenc_reqctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct atmel_aes_base_ctx *ctx = crypto_aead_ctx(tfm); + u32 authsize = crypto_aead_authsize(tfm); + bool enc = (mode & AES_FLAGS_ENCRYPT); + struct atmel_aes_dev *dd; + + /* Compute text length. */ + if (!enc && req->cryptlen < authsize) + return -EINVAL; + rctx->textlen = req->cryptlen - (enc ? 0 : authsize); + + /* + * Currently, empty messages are not supported yet: + * the SHA auto-padding can be used only on non-empty messages. + * Hence a special case needs to be implemented for empty message. + */ + if (!rctx->textlen && !req->assoclen) + return -EINVAL; + + rctx->base.mode = mode; + ctx->block_size = AES_BLOCK_SIZE; + + dd = atmel_aes_find_dev(ctx); + if (!dd) + return -ENODEV; + + return atmel_aes_handle_queue(dd, &req->base); +} + +static int atmel_aes_authenc_cbc_aes_encrypt(struct aead_request *req) +{ + return atmel_aes_authenc_crypt(req, AES_FLAGS_CBC | AES_FLAGS_ENCRYPT); +} + +static int atmel_aes_authenc_cbc_aes_decrypt(struct aead_request *req) +{ + return atmel_aes_authenc_crypt(req, AES_FLAGS_CBC); +} + +static struct aead_alg aes_authenc_algs[] = { +{ + .setkey = atmel_aes_authenc_setkey, + .encrypt = atmel_aes_authenc_cbc_aes_encrypt, + .decrypt = atmel_aes_authenc_cbc_aes_decrypt, + .init = atmel_aes_authenc_hmac_sha1_init_tfm, + .exit = atmel_aes_authenc_exit_tfm, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "atmel-authenc-hmac-sha1-cbc-aes", + .cra_priority = ATMEL_AES_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_authenc_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}, +{ + .setkey = atmel_aes_authenc_setkey, + .encrypt = atmel_aes_authenc_cbc_aes_encrypt, + .decrypt = atmel_aes_authenc_cbc_aes_decrypt, + .init = atmel_aes_authenc_hmac_sha224_init_tfm, + .exit = atmel_aes_authenc_exit_tfm, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + + .base = { + .cra_name = "authenc(hmac(sha224),cbc(aes))", + .cra_driver_name = "atmel-authenc-hmac-sha224-cbc-aes", + .cra_priority = ATMEL_AES_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_authenc_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}, +{ + .setkey = atmel_aes_authenc_setkey, + .encrypt = atmel_aes_authenc_cbc_aes_encrypt, + .decrypt = atmel_aes_authenc_cbc_aes_decrypt, + .init = atmel_aes_authenc_hmac_sha256_init_tfm, + .exit = atmel_aes_authenc_exit_tfm, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "atmel-authenc-hmac-sha256-cbc-aes", + .cra_priority = ATMEL_AES_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_authenc_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}, +{ + .setkey = atmel_aes_authenc_setkey, + .encrypt = atmel_aes_authenc_cbc_aes_encrypt, + .decrypt = atmel_aes_authenc_cbc_aes_decrypt, + .init = atmel_aes_authenc_hmac_sha384_init_tfm, + .exit = atmel_aes_authenc_exit_tfm, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, + + .base = { + .cra_name = "authenc(hmac(sha384),cbc(aes))", + .cra_driver_name = "atmel-authenc-hmac-sha384-cbc-aes", + .cra_priority = ATMEL_AES_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_authenc_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}, +{ + .setkey = atmel_aes_authenc_setkey, + .encrypt = atmel_aes_authenc_cbc_aes_encrypt, + .decrypt = atmel_aes_authenc_cbc_aes_decrypt, + .init = atmel_aes_authenc_hmac_sha512_init_tfm, + .exit = atmel_aes_authenc_exit_tfm, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, + + .base = { + .cra_name = "authenc(hmac(sha512),cbc(aes))", + .cra_driver_name = "atmel-authenc-hmac-sha512-cbc-aes", + .cra_priority = ATMEL_AES_PRIORITY, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_authenc_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}, +}; +#endif /* CONFIG_CRYPTO_DEV_ATMEL_AUTHENC */ /* Probe functions */ @@ -2037,6 +2453,12 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd) { int i; +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + if (dd->caps.has_authenc) + for (i = 0; i < ARRAY_SIZE(aes_authenc_algs); i++) + crypto_unregister_aead(&aes_authenc_algs[i]); +#endif + if (dd->caps.has_xts) crypto_unregister_alg(&aes_xts_alg); @@ -2078,8 +2500,25 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd) goto err_aes_xts_alg; } +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + if (dd->caps.has_authenc) { + for (i = 0; i < ARRAY_SIZE(aes_authenc_algs); i++) { + err = crypto_register_aead(&aes_authenc_algs[i]); + if (err) + goto err_aes_authenc_alg; + } + } +#endif + return 0; +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + /* i = ARRAY_SIZE(aes_authenc_algs); */ +err_aes_authenc_alg: + for (j = 0; j < i; j++) + crypto_unregister_aead(&aes_authenc_algs[j]); + crypto_unregister_alg(&aes_xts_alg); +#endif err_aes_xts_alg: crypto_unregister_aead(&aes_gcm_alg); err_aes_gcm_alg: @@ -2100,6 +2539,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) dd->caps.has_ctr32 = 0; dd->caps.has_gcm = 0; dd->caps.has_xts = 0; + dd->caps.has_authenc = 0; dd->caps.max_burst_size = 1; /* keep only major version number */ @@ -2110,6 +2550,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) dd->caps.has_ctr32 = 1; dd->caps.has_gcm = 1; dd->caps.has_xts = 1; + dd->caps.has_authenc = 1; dd->caps.max_burst_size = 4; break; case 0x200: @@ -2268,6 +2709,13 @@ static int atmel_aes_probe(struct platform_device *pdev) atmel_aes_get_cap(aes_dd); +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + if (aes_dd->caps.has_authenc && !atmel_sha_authenc_is_ready()) { + err = -EPROBE_DEFER; + goto iclk_unprepare; + } +#endif + err = atmel_aes_buff_init(aes_dd); if (err) goto err_aes_buff; @@ -2304,7 +2752,8 @@ res_err: tasklet_kill(&aes_dd->done_task); tasklet_kill(&aes_dd->queue_task); aes_dd_err: - dev_err(dev, "initialization failed.\n"); + if (err != -EPROBE_DEFER) + dev_err(dev, "initialization failed.\n"); return err; } diff --git a/drivers/crypto/atmel-authenc.h b/drivers/crypto/atmel-authenc.h new file mode 100644 index 000000000000..2a60d1224143 --- /dev/null +++ b/drivers/crypto/atmel-authenc.h @@ -0,0 +1,64 @@ +/* + * API for Atmel Secure Protocol Layers Improved Performances (SPLIP) + * + * Copyright (C) 2016 Atmel Corporation + * + * Author: Cyrille Pitchen <cyrille.pitchen@atmel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + * + * This driver is based on drivers/mtd/spi-nor/fsl-quadspi.c from Freescale. + */ + +#ifndef __ATMEL_AUTHENC_H__ +#define __ATMEL_AUTHENC_H__ + +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC + +#include <crypto/authenc.h> +#include <crypto/hash.h> +#include <crypto/sha.h> +#include "atmel-sha-regs.h" + +struct atmel_aes_dev; +typedef int (*atmel_aes_authenc_fn_t)(struct atmel_aes_dev *, int, bool); + +struct atmel_sha_authenc_ctx; + +bool atmel_sha_authenc_is_ready(void); +unsigned int atmel_sha_authenc_get_reqsize(void); + +struct atmel_sha_authenc_ctx *atmel_sha_authenc_spawn(unsigned long mode); +void atmel_sha_authenc_free(struct atmel_sha_authenc_ctx *auth); +int atmel_sha_authenc_setkey(struct atmel_sha_authenc_ctx *auth, + const u8 *key, unsigned int keylen, + u32 *flags); + +int atmel_sha_authenc_schedule(struct ahash_request *req, + struct atmel_sha_authenc_ctx *auth, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *dd); +int atmel_sha_authenc_init(struct ahash_request *req, + struct scatterlist *assoc, unsigned int assoclen, + unsigned int textlen, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *dd); +int atmel_sha_authenc_final(struct ahash_request *req, + u32 *digest, unsigned int digestlen, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *dd); +void atmel_sha_authenc_abort(struct ahash_request *req); + +#endif /* CONFIG_CRYPTO_DEV_ATMEL_AUTHENC */ + +#endif /* __ATMEL_AUTHENC_H__ */ diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h index e08897109cab..1b0eba4a2706 100644 --- a/drivers/crypto/atmel-sha-regs.h +++ b/drivers/crypto/atmel-sha-regs.h @@ -16,16 +16,33 @@ #define SHA_MR_MODE_MANUAL 0x0 #define SHA_MR_MODE_AUTO 0x1 #define SHA_MR_MODE_PDC 0x2 +#define SHA_MR_MODE_IDATAR0 0x2 #define SHA_MR_PROCDLY (1 << 4) #define SHA_MR_UIHV (1 << 5) #define SHA_MR_UIEHV (1 << 6) +#define SHA_MR_ALGO_MASK GENMASK(10, 8) #define SHA_MR_ALGO_SHA1 (0 << 8) #define SHA_MR_ALGO_SHA256 (1 << 8) #define SHA_MR_ALGO_SHA384 (2 << 8) #define SHA_MR_ALGO_SHA512 (3 << 8) #define SHA_MR_ALGO_SHA224 (4 << 8) +#define SHA_MR_HMAC (1 << 11) #define SHA_MR_DUALBUFF (1 << 16) +#define SHA_FLAGS_ALGO_MASK SHA_MR_ALGO_MASK +#define SHA_FLAGS_SHA1 SHA_MR_ALGO_SHA1 +#define SHA_FLAGS_SHA256 SHA_MR_ALGO_SHA256 +#define SHA_FLAGS_SHA384 SHA_MR_ALGO_SHA384 +#define SHA_FLAGS_SHA512 SHA_MR_ALGO_SHA512 +#define SHA_FLAGS_SHA224 SHA_MR_ALGO_SHA224 +#define SHA_FLAGS_HMAC SHA_MR_HMAC +#define SHA_FLAGS_HMAC_SHA1 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA1) +#define SHA_FLAGS_HMAC_SHA256 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA256) +#define SHA_FLAGS_HMAC_SHA384 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA384) +#define SHA_FLAGS_HMAC_SHA512 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA512) +#define SHA_FLAGS_HMAC_SHA224 (SHA_FLAGS_HMAC | SHA_FLAGS_SHA224) +#define SHA_FLAGS_MODE_MASK (SHA_FLAGS_HMAC | SHA_FLAGS_ALGO_MASK) + #define SHA_IER 0x10 #define SHA_IDR 0x14 #define SHA_IMR 0x18 @@ -40,6 +57,9 @@ #define SHA_ISR_URAT_MR (0x2 << 12) #define SHA_ISR_URAT_WO (0x5 << 12) +#define SHA_MSR 0x20 +#define SHA_BCR 0x30 + #define SHA_HW_VERSION 0xFC #define SHA_TPR 0x108 diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 97e34799e077..22d0c0c118da 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -41,6 +41,7 @@ #include <crypto/internal/hash.h> #include <linux/platform_data/crypto-atmel.h> #include "atmel-sha-regs.h" +#include "atmel-authenc.h" /* SHA flags */ #define SHA_FLAGS_BUSY BIT(0) @@ -50,21 +51,22 @@ #define SHA_FLAGS_INIT BIT(4) #define SHA_FLAGS_CPU BIT(5) #define SHA_FLAGS_DMA_READY BIT(6) +#define SHA_FLAGS_DUMP_REG BIT(7) + +/* bits[11:8] are reserved. */ #define SHA_FLAGS_FINUP BIT(16) #define SHA_FLAGS_SG BIT(17) -#define SHA_FLAGS_ALGO_MASK GENMASK(22, 18) -#define SHA_FLAGS_SHA1 BIT(18) -#define SHA_FLAGS_SHA224 BIT(19) -#define SHA_FLAGS_SHA256 BIT(20) -#define SHA_FLAGS_SHA384 BIT(21) -#define SHA_FLAGS_SHA512 BIT(22) #define SHA_FLAGS_ERROR BIT(23) #define SHA_FLAGS_PAD BIT(24) #define SHA_FLAGS_RESTORE BIT(25) +#define SHA_FLAGS_IDATAR0 BIT(26) +#define SHA_FLAGS_WAIT_DATARDY BIT(27) +#define SHA_OP_INIT 0 #define SHA_OP_UPDATE 1 #define SHA_OP_FINAL 2 +#define SHA_OP_DIGEST 3 #define SHA_BUFFER_LEN (PAGE_SIZE / 16) @@ -76,6 +78,7 @@ struct atmel_sha_caps { bool has_sha224; bool has_sha_384_512; bool has_uihv; + bool has_hmac; }; struct atmel_sha_dev; @@ -101,12 +104,16 @@ struct atmel_sha_reqctx { unsigned int total; /* total request */ size_t block_size; + size_t hash_size; u8 buffer[SHA_BUFFER_LEN + SHA512_BLOCK_SIZE] __aligned(sizeof(u32)); }; +typedef int (*atmel_sha_fn_t)(struct atmel_sha_dev *); + struct atmel_sha_ctx { struct atmel_sha_dev *dd; + atmel_sha_fn_t start; unsigned long flags; }; @@ -116,6 +123,9 @@ struct atmel_sha_ctx { struct atmel_sha_dma { struct dma_chan *chan; struct dma_slave_config dma_conf; + struct scatterlist *sg; + int nents; + unsigned int last_sg_length; }; struct atmel_sha_dev { @@ -134,11 +144,17 @@ struct atmel_sha_dev { unsigned long flags; struct crypto_queue queue; struct ahash_request *req; + bool is_async; + bool force_complete; + atmel_sha_fn_t resume; + atmel_sha_fn_t cpu_transfer_complete; struct atmel_sha_dma dma_lch_in; struct atmel_sha_caps caps; + struct scatterlist tmp; + u32 hw_version; }; @@ -152,17 +168,140 @@ static struct atmel_sha_drv atmel_sha = { .lock = __SPIN_LOCK_UNLOCKED(atmel_sha.lock), }; +#ifdef VERBOSE_DEBUG +static const char *atmel_sha_reg_name(u32 offset, char *tmp, size_t sz, bool wr) +{ + switch (offset) { + case SHA_CR: + return "CR"; + + case SHA_MR: + return "MR"; + + case SHA_IER: + return "IER"; + + case SHA_IDR: + return "IDR"; + + case SHA_IMR: + return "IMR"; + + case SHA_ISR: + return "ISR"; + + case SHA_MSR: + return "MSR"; + + case SHA_BCR: + return "BCR"; + + case SHA_REG_DIN(0): + case SHA_REG_DIN(1): + case SHA_REG_DIN(2): + case SHA_REG_DIN(3): + case SHA_REG_DIN(4): + case SHA_REG_DIN(5): + case SHA_REG_DIN(6): + case SHA_REG_DIN(7): + case SHA_REG_DIN(8): + case SHA_REG_DIN(9): + case SHA_REG_DIN(10): + case SHA_REG_DIN(11): + case SHA_REG_DIN(12): + case SHA_REG_DIN(13): + case SHA_REG_DIN(14): + case SHA_REG_DIN(15): + snprintf(tmp, sz, "IDATAR[%u]", (offset - SHA_REG_DIN(0)) >> 2); + break; + + case SHA_REG_DIGEST(0): + case SHA_REG_DIGEST(1): + case SHA_REG_DIGEST(2): + case SHA_REG_DIGEST(3): + case SHA_REG_DIGEST(4): + case SHA_REG_DIGEST(5): + case SHA_REG_DIGEST(6): + case SHA_REG_DIGEST(7): + case SHA_REG_DIGEST(8): + case SHA_REG_DIGEST(9): + case SHA_REG_DIGEST(10): + case SHA_REG_DIGEST(11): + case SHA_REG_DIGEST(12): + case SHA_REG_DIGEST(13): + case SHA_REG_DIGEST(14): + case SHA_REG_DIGEST(15): + if (wr) + snprintf(tmp, sz, "IDATAR[%u]", + 16u + ((offset - SHA_REG_DIGEST(0)) >> 2)); + else + snprintf(tmp, sz, "ODATAR[%u]", + (offset - SHA_REG_DIGEST(0)) >> 2); + break; + + case SHA_HW_VERSION: + return "HWVER"; + + default: + snprintf(tmp, sz, "0x%02x", offset); + break; + } + + return tmp; +} + +#endif /* VERBOSE_DEBUG */ + static inline u32 atmel_sha_read(struct atmel_sha_dev *dd, u32 offset) { - return readl_relaxed(dd->io_base + offset); + u32 value = readl_relaxed(dd->io_base + offset); + +#ifdef VERBOSE_DEBUG + if (dd->flags & SHA_FLAGS_DUMP_REG) { + char tmp[16]; + + dev_vdbg(dd->dev, "read 0x%08x from %s\n", value, + atmel_sha_reg_name(offset, tmp, sizeof(tmp), false)); + } +#endif /* VERBOSE_DEBUG */ + + return value; } static inline void atmel_sha_write(struct atmel_sha_dev *dd, u32 offset, u32 value) { +#ifdef VERBOSE_DEBUG + if (dd->flags & SHA_FLAGS_DUMP_REG) { + char tmp[16]; + + dev_vdbg(dd->dev, "write 0x%08x into %s\n", value, + atmel_sha_reg_name(offset, tmp, sizeof(tmp), true)); + } +#endif /* VERBOSE_DEBUG */ + writel_relaxed(value, dd->io_base + offset); } +static inline int atmel_sha_complete(struct atmel_sha_dev *dd, int err) +{ + struct ahash_request *req = dd->req; + + dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU | + SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY | + SHA_FLAGS_DUMP_REG); + + clk_disable(dd->iclk); + + if ((dd->is_async || dd->force_complete) && req->base.complete) + req->base.complete(&req->base, err); + + /* handle new request */ + tasklet_schedule(&dd->queue_task); + + return err; +} + static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx) { size_t count; @@ -241,7 +380,9 @@ static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length) bits[1] = cpu_to_be64(size[0] << 3); bits[0] = cpu_to_be64(size[1] << 3 | size[0] >> 61); - if (ctx->flags & (SHA_FLAGS_SHA384 | SHA_FLAGS_SHA512)) { + switch (ctx->flags & SHA_FLAGS_ALGO_MASK) { + case SHA_FLAGS_SHA384: + case SHA_FLAGS_SHA512: index = ctx->bufcnt & 0x7f; padlen = (index < 112) ? (112 - index) : ((128+112) - index); *(ctx->buffer + ctx->bufcnt) = 0x80; @@ -249,7 +390,9 @@ static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length) memcpy(ctx->buffer + ctx->bufcnt + padlen, bits, 16); ctx->bufcnt += padlen + 16; ctx->flags |= SHA_FLAGS_PAD; - } else { + break; + + default: index = ctx->bufcnt & 0x3f; padlen = (index < 56) ? (56 - index) : ((64+56) - index); *(ctx->buffer + ctx->bufcnt) = 0x80; @@ -257,14 +400,12 @@ static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length) memcpy(ctx->buffer + ctx->bufcnt + padlen, &bits[1], 8); ctx->bufcnt += padlen + 8; ctx->flags |= SHA_FLAGS_PAD; + break; } } -static int atmel_sha_init(struct ahash_request *req) +static struct atmel_sha_dev *atmel_sha_find_dev(struct atmel_sha_ctx *tctx) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm); - struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); struct atmel_sha_dev *dd = NULL; struct atmel_sha_dev *tmp; @@ -281,6 +422,16 @@ static int atmel_sha_init(struct ahash_request *req) spin_unlock_bh(&atmel_sha.lock); + return dd; +} + +static int atmel_sha_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm); + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct atmel_sha_dev *dd = atmel_sha_find_dev(tctx); + ctx->dd = dd; ctx->flags = 0; @@ -397,6 +548,19 @@ static void atmel_sha_write_ctrl(struct atmel_sha_dev *dd, int dma) atmel_sha_write(dd, SHA_MR, valmr); } +static inline int atmel_sha_wait_for_data_ready(struct atmel_sha_dev *dd, + atmel_sha_fn_t resume) +{ + u32 isr = atmel_sha_read(dd, SHA_ISR); + + if (unlikely(isr & SHA_INT_DATARDY)) + return resume(dd); + + dd->resume = resume; + atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY); + return -EINPROGRESS; +} + static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf, size_t length, int final) { @@ -467,6 +631,8 @@ static void atmel_sha_dma_callback(void *data) { struct atmel_sha_dev *dd = data; + dd->is_async = true; + /* dma_lch_in - completed - wait DATRDY */ atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY); } @@ -502,7 +668,7 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); } if (!in_desc) - return -EINVAL; + atmel_sha_complete(dd, -EINVAL); in_desc->callback = atmel_sha_dma_callback; in_desc->callback_param = dd; @@ -559,7 +725,7 @@ static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd, if (dma_mapping_error(dd->dev, ctx->dma_addr)) { dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen + ctx->block_size); - return -EINVAL; + atmel_sha_complete(dd, -EINVAL); } ctx->flags &= ~SHA_FLAGS_SG; @@ -650,7 +816,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) if (dma_mapping_error(dd->dev, ctx->dma_addr)) { dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen + ctx->block_size); - return -EINVAL; + atmel_sha_complete(dd, -EINVAL); } if (length == 0) { @@ -664,7 +830,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) { dev_err(dd->dev, "dma_map_sg error\n"); - return -EINVAL; + atmel_sha_complete(dd, -EINVAL); } ctx->flags |= SHA_FLAGS_SG; @@ -678,7 +844,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) { dev_err(dd->dev, "dma_map_sg error\n"); - return -EINVAL; + atmel_sha_complete(dd, -EINVAL); } ctx->flags |= SHA_FLAGS_SG; @@ -796,16 +962,28 @@ static void atmel_sha_copy_ready_hash(struct ahash_request *req) if (!req->result) return; - if (ctx->flags & SHA_FLAGS_SHA1) + switch (ctx->flags & SHA_FLAGS_ALGO_MASK) { + default: + case SHA_FLAGS_SHA1: memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE); - else if (ctx->flags & SHA_FLAGS_SHA224) + break; + + case SHA_FLAGS_SHA224: memcpy(req->result, ctx->digest, SHA224_DIGEST_SIZE); - else if (ctx->flags & SHA_FLAGS_SHA256) + break; + + case SHA_FLAGS_SHA256: memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE); - else if (ctx->flags & SHA_FLAGS_SHA384) + break; + + case SHA_FLAGS_SHA384: memcpy(req->result, ctx->digest, SHA384_DIGEST_SIZE); - else + break; + + case SHA_FLAGS_SHA512: memcpy(req->result, ctx->digest, SHA512_DIGEST_SIZE); + break; + } } static int atmel_sha_finish(struct ahash_request *req) @@ -836,16 +1014,7 @@ static void atmel_sha_finish_req(struct ahash_request *req, int err) } /* atomic operation is not needed here */ - dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU | - SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY); - - clk_disable(dd->iclk); - - if (req->base.complete) - req->base.complete(&req->base, err); - - /* handle new request */ - tasklet_schedule(&dd->queue_task); + (void)atmel_sha_complete(dd, err); } static int atmel_sha_hw_init(struct atmel_sha_dev *dd) @@ -886,8 +1055,9 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, struct ahash_request *req) { struct crypto_async_request *async_req, *backlog; - struct atmel_sha_reqctx *ctx; + struct atmel_sha_ctx *ctx; unsigned long flags; + bool start_async; int err = 0, ret = 0; spin_lock_irqsave(&dd->lock, flags); @@ -912,9 +1082,25 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, if (backlog) backlog->complete(backlog, -EINPROGRESS); - req = ahash_request_cast(async_req); - dd->req = req; - ctx = ahash_request_ctx(req); + ctx = crypto_tfm_ctx(async_req->tfm); + + dd->req = ahash_request_cast(async_req); + start_async = (dd->req != req); + dd->is_async = start_async; + dd->force_complete = false; + + /* WARNING: ctx->start() MAY change dd->is_async. */ + err = ctx->start(dd); + return (start_async) ? ret : err; +} + +static int atmel_sha_done(struct atmel_sha_dev *dd); + +static int atmel_sha_start(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + int err; dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n", ctx->op, req->nbytes); @@ -924,6 +1110,7 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, if (err) goto err1; + dd->resume = atmel_sha_done; if (ctx->op == SHA_OP_UPDATE) { err = atmel_sha_update_req(dd); if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) @@ -940,7 +1127,7 @@ err1: dev_dbg(dd->dev, "exit, err: %d\n", err); - return ret; + return err; } static int atmel_sha_enqueue(struct ahash_request *req, unsigned int op) @@ -1036,8 +1223,11 @@ static int atmel_sha_import(struct ahash_request *req, const void *in) static int atmel_sha_cra_init(struct crypto_tfm *tfm) { + struct atmel_sha_ctx *ctx = crypto_tfm_ctx(tfm); + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct atmel_sha_reqctx)); + ctx->start = atmel_sha_start; return 0; } @@ -1176,9 +1366,8 @@ static void atmel_sha_queue_task(unsigned long data) atmel_sha_handle_queue(dd, NULL); } -static void atmel_sha_done_task(unsigned long data) +static int atmel_sha_done(struct atmel_sha_dev *dd) { - struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data; int err = 0; if (SHA_FLAGS_CPU & dd->flags) { @@ -1204,11 +1393,21 @@ static void atmel_sha_done_task(unsigned long data) goto finish; } } - return; + return err; finish: /* finish curent request */ atmel_sha_finish_req(dd->req, err); + + return err; +} + +static void atmel_sha_done_task(unsigned long data) +{ + struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data; + + dd->is_async = true; + (void)dd->resume(dd); } static irqreturn_t atmel_sha_irq(int irq, void *dev_id) @@ -1233,10 +1432,1104 @@ static irqreturn_t atmel_sha_irq(int irq, void *dev_id) return IRQ_NONE; } + +/* DMA transfer functions */ + +static bool atmel_sha_dma_check_aligned(struct atmel_sha_dev *dd, + struct scatterlist *sg, + size_t len) +{ + struct atmel_sha_dma *dma = &dd->dma_lch_in; + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + size_t bs = ctx->block_size; + int nents; + + for (nents = 0; sg; sg = sg_next(sg), ++nents) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) + return false; + + /* + * This is the last sg, the only one that is allowed to + * have an unaligned length. + */ + if (len <= sg->length) { + dma->nents = nents + 1; + dma->last_sg_length = sg->length; + sg->length = ALIGN(len, sizeof(u32)); + return true; + } + + /* All other sg lengths MUST be aligned to the block size. */ + if (!IS_ALIGNED(sg->length, bs)) + return false; + + len -= sg->length; + } + + return false; +} + +static void atmel_sha_dma_callback2(void *data) +{ + struct atmel_sha_dev *dd = data; + struct atmel_sha_dma *dma = &dd->dma_lch_in; + struct scatterlist *sg; + int nents; + + dmaengine_terminate_all(dma->chan); + dma_unmap_sg(dd->dev, dma->sg, dma->nents, DMA_TO_DEVICE); + + sg = dma->sg; + for (nents = 0; nents < dma->nents - 1; ++nents) + sg = sg_next(sg); + sg->length = dma->last_sg_length; + + dd->is_async = true; + (void)atmel_sha_wait_for_data_ready(dd, dd->resume); +} + +static int atmel_sha_dma_start(struct atmel_sha_dev *dd, + struct scatterlist *src, + size_t len, + atmel_sha_fn_t resume) +{ + struct atmel_sha_dma *dma = &dd->dma_lch_in; + struct dma_slave_config *config = &dma->dma_conf; + struct dma_chan *chan = dma->chan; + struct dma_async_tx_descriptor *desc; + dma_cookie_t cookie; + unsigned int sg_len; + int err; + + dd->resume = resume; + + /* + * dma->nents has already been initialized by + * atmel_sha_dma_check_aligned(). + */ + dma->sg = src; + sg_len = dma_map_sg(dd->dev, dma->sg, dma->nents, DMA_TO_DEVICE); + if (!sg_len) { + err = -ENOMEM; + goto exit; + } + + config->src_maxburst = 16; + config->dst_maxburst = 16; + err = dmaengine_slave_config(chan, config); + if (err) + goto unmap_sg; + + desc = dmaengine_prep_slave_sg(chan, dma->sg, sg_len, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!desc) { + err = -ENOMEM; + goto unmap_sg; + } + + desc->callback = atmel_sha_dma_callback2; + desc->callback_param = dd; + cookie = dmaengine_submit(desc); + err = dma_submit_error(cookie); + if (err) + goto unmap_sg; + + dma_async_issue_pending(chan); + + return -EINPROGRESS; + +unmap_sg: + dma_unmap_sg(dd->dev, dma->sg, dma->nents, DMA_TO_DEVICE); +exit: + return atmel_sha_complete(dd, err); +} + + +/* CPU transfer functions */ + +static int atmel_sha_cpu_transfer(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + const u32 *words = (const u32 *)ctx->buffer; + size_t i, num_words; + u32 isr, din, din_inc; + + din_inc = (ctx->flags & SHA_FLAGS_IDATAR0) ? 0 : 1; + for (;;) { + /* Write data into the Input Data Registers. */ + num_words = DIV_ROUND_UP(ctx->bufcnt, sizeof(u32)); + for (i = 0, din = 0; i < num_words; ++i, din += din_inc) + atmel_sha_write(dd, SHA_REG_DIN(din), words[i]); + + ctx->offset += ctx->bufcnt; + ctx->total -= ctx->bufcnt; + + if (!ctx->total) + break; + + /* + * Prepare next block: + * Fill ctx->buffer now with the next data to be written into + * IDATARx: it gives time for the SHA hardware to process + * the current data so the SHA_INT_DATARDY flag might be set + * in SHA_ISR when polling this register at the beginning of + * the next loop. + */ + ctx->bufcnt = min_t(size_t, ctx->block_size, ctx->total); + scatterwalk_map_and_copy(ctx->buffer, ctx->sg, + ctx->offset, ctx->bufcnt, 0); + + /* Wait for hardware to be ready again. */ + isr = atmel_sha_read(dd, SHA_ISR); + if (!(isr & SHA_INT_DATARDY)) { + /* Not ready yet. */ + dd->resume = atmel_sha_cpu_transfer; + atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY); + return -EINPROGRESS; + } + } + + if (unlikely(!(ctx->flags & SHA_FLAGS_WAIT_DATARDY))) + return dd->cpu_transfer_complete(dd); + + return atmel_sha_wait_for_data_ready(dd, dd->cpu_transfer_complete); +} + +static int atmel_sha_cpu_start(struct atmel_sha_dev *dd, + struct scatterlist *sg, + unsigned int len, + bool idatar0_only, + bool wait_data_ready, + atmel_sha_fn_t resume) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + + if (!len) + return resume(dd); + + ctx->flags &= ~(SHA_FLAGS_IDATAR0 | SHA_FLAGS_WAIT_DATARDY); + + if (idatar0_only) + ctx->flags |= SHA_FLAGS_IDATAR0; + + if (wait_data_ready) + ctx->flags |= SHA_FLAGS_WAIT_DATARDY; + + ctx->sg = sg; + ctx->total = len; + ctx->offset = 0; + + /* Prepare the first block to be written. */ + ctx->bufcnt = min_t(size_t, ctx->block_size, ctx->total); + scatterwalk_map_and_copy(ctx->buffer, ctx->sg, + ctx->offset, ctx->bufcnt, 0); + + dd->cpu_transfer_complete = resume; + return atmel_sha_cpu_transfer(dd); +} + +static int atmel_sha_cpu_hash(struct atmel_sha_dev *dd, + const void *data, unsigned int datalen, + bool auto_padding, + atmel_sha_fn_t resume) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + u32 msglen = (auto_padding) ? datalen : 0; + u32 mr = SHA_MR_MODE_AUTO; + + if (!(IS_ALIGNED(datalen, ctx->block_size) || auto_padding)) + return atmel_sha_complete(dd, -EINVAL); + + mr |= (ctx->flags & SHA_FLAGS_ALGO_MASK); + atmel_sha_write(dd, SHA_MR, mr); + atmel_sha_write(dd, SHA_MSR, msglen); + atmel_sha_write(dd, SHA_BCR, msglen); + atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST); + + sg_init_one(&dd->tmp, data, datalen); + return atmel_sha_cpu_start(dd, &dd->tmp, datalen, false, true, resume); +} + + +/* hmac functions */ + +struct atmel_sha_hmac_key { + bool valid; + unsigned int keylen; + u8 buffer[SHA512_BLOCK_SIZE]; + u8 *keydup; +}; + +static inline void atmel_sha_hmac_key_init(struct atmel_sha_hmac_key *hkey) +{ + memset(hkey, 0, sizeof(*hkey)); +} + +static inline void atmel_sha_hmac_key_release(struct atmel_sha_hmac_key *hkey) +{ + kfree(hkey->keydup); + memset(hkey, 0, sizeof(*hkey)); +} + +static inline int atmel_sha_hmac_key_set(struct atmel_sha_hmac_key *hkey, + const u8 *key, + unsigned int keylen) +{ + atmel_sha_hmac_key_release(hkey); + + if (keylen > sizeof(hkey->buffer)) { + hkey->keydup = kmemdup(key, keylen, GFP_KERNEL); + if (!hkey->keydup) + return -ENOMEM; + + } else { + memcpy(hkey->buffer, key, keylen); + } + + hkey->valid = true; + hkey->keylen = keylen; + return 0; +} + +static inline bool atmel_sha_hmac_key_get(const struct atmel_sha_hmac_key *hkey, + const u8 **key, + unsigned int *keylen) +{ + if (!hkey->valid) + return false; + + *keylen = hkey->keylen; + *key = (hkey->keydup) ? hkey->keydup : hkey->buffer; + return true; +} + + +struct atmel_sha_hmac_ctx { + struct atmel_sha_ctx base; + + struct atmel_sha_hmac_key hkey; + u32 ipad[SHA512_BLOCK_SIZE / sizeof(u32)]; + u32 opad[SHA512_BLOCK_SIZE / sizeof(u32)]; + atmel_sha_fn_t resume; +}; + +static int atmel_sha_hmac_setup(struct atmel_sha_dev *dd, + atmel_sha_fn_t resume); +static int atmel_sha_hmac_prehash_key(struct atmel_sha_dev *dd, + const u8 *key, unsigned int keylen); +static int atmel_sha_hmac_prehash_key_done(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_compute_ipad_hash(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_compute_opad_hash(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_setup_done(struct atmel_sha_dev *dd); + +static int atmel_sha_hmac_init_done(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_final(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_final_done(struct atmel_sha_dev *dd); +static int atmel_sha_hmac_digest2(struct atmel_sha_dev *dd); + +static int atmel_sha_hmac_setup(struct atmel_sha_dev *dd, + atmel_sha_fn_t resume) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + unsigned int keylen; + const u8 *key; + size_t bs; + + hmac->resume = resume; + switch (ctx->flags & SHA_FLAGS_ALGO_MASK) { + case SHA_FLAGS_SHA1: + ctx->block_size = SHA1_BLOCK_SIZE; + ctx->hash_size = SHA1_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA224: + ctx->block_size = SHA224_BLOCK_SIZE; + ctx->hash_size = SHA256_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA256: + ctx->block_size = SHA256_BLOCK_SIZE; + ctx->hash_size = SHA256_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA384: + ctx->block_size = SHA384_BLOCK_SIZE; + ctx->hash_size = SHA512_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA512: + ctx->block_size = SHA512_BLOCK_SIZE; + ctx->hash_size = SHA512_DIGEST_SIZE; + break; + + default: + return atmel_sha_complete(dd, -EINVAL); + } + bs = ctx->block_size; + + if (likely(!atmel_sha_hmac_key_get(&hmac->hkey, &key, &keylen))) + return resume(dd); + + /* Compute K' from K. */ + if (unlikely(keylen > bs)) + return atmel_sha_hmac_prehash_key(dd, key, keylen); + + /* Prepare ipad. */ + memcpy((u8 *)hmac->ipad, key, keylen); + memset((u8 *)hmac->ipad + keylen, 0, bs - keylen); + return atmel_sha_hmac_compute_ipad_hash(dd); +} + +static int atmel_sha_hmac_prehash_key(struct atmel_sha_dev *dd, + const u8 *key, unsigned int keylen) +{ + return atmel_sha_cpu_hash(dd, key, keylen, true, + atmel_sha_hmac_prehash_key_done); +} + +static int atmel_sha_hmac_prehash_key_done(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + size_t ds = crypto_ahash_digestsize(tfm); + size_t bs = ctx->block_size; + size_t i, num_words = ds / sizeof(u32); + + /* Prepare ipad. */ + for (i = 0; i < num_words; ++i) + hmac->ipad[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i)); + memset((u8 *)hmac->ipad + ds, 0, bs - ds); + return atmel_sha_hmac_compute_ipad_hash(dd); +} + +static int atmel_sha_hmac_compute_ipad_hash(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + size_t bs = ctx->block_size; + size_t i, num_words = bs / sizeof(u32); + + memcpy(hmac->opad, hmac->ipad, bs); + for (i = 0; i < num_words; ++i) { + hmac->ipad[i] ^= 0x36363636; + hmac->opad[i] ^= 0x5c5c5c5c; + } + + return atmel_sha_cpu_hash(dd, hmac->ipad, bs, false, + atmel_sha_hmac_compute_opad_hash); +} + +static int atmel_sha_hmac_compute_opad_hash(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + size_t bs = ctx->block_size; + size_t hs = ctx->hash_size; + size_t i, num_words = hs / sizeof(u32); + + for (i = 0; i < num_words; ++i) + hmac->ipad[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i)); + return atmel_sha_cpu_hash(dd, hmac->opad, bs, false, + atmel_sha_hmac_setup_done); +} + +static int atmel_sha_hmac_setup_done(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + size_t hs = ctx->hash_size; + size_t i, num_words = hs / sizeof(u32); + + for (i = 0; i < num_words; ++i) + hmac->opad[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i)); + atmel_sha_hmac_key_release(&hmac->hkey); + return hmac->resume(dd); +} + +static int atmel_sha_hmac_start(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + int err; + + err = atmel_sha_hw_init(dd); + if (err) + return atmel_sha_complete(dd, err); + + switch (ctx->op) { + case SHA_OP_INIT: + err = atmel_sha_hmac_setup(dd, atmel_sha_hmac_init_done); + break; + + case SHA_OP_UPDATE: + dd->resume = atmel_sha_done; + err = atmel_sha_update_req(dd); + break; + + case SHA_OP_FINAL: + dd->resume = atmel_sha_hmac_final; + err = atmel_sha_final_req(dd); + break; + + case SHA_OP_DIGEST: + err = atmel_sha_hmac_setup(dd, atmel_sha_hmac_digest2); + break; + + default: + return atmel_sha_complete(dd, -EINVAL); + } + + return err; +} + +static int atmel_sha_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + + if (atmel_sha_hmac_key_set(&hmac->hkey, key, keylen)) { + crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + return 0; +} + +static int atmel_sha_hmac_init(struct ahash_request *req) +{ + int err; + + err = atmel_sha_init(req); + if (err) + return err; + + return atmel_sha_enqueue(req, SHA_OP_INIT); +} + +static int atmel_sha_hmac_init_done(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + size_t bs = ctx->block_size; + size_t hs = ctx->hash_size; + + ctx->bufcnt = 0; + ctx->digcnt[0] = bs; + ctx->digcnt[1] = 0; + ctx->flags |= SHA_FLAGS_RESTORE; + memcpy(ctx->digest, hmac->ipad, hs); + return atmel_sha_complete(dd, 0); +} + +static int atmel_sha_hmac_final(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + u32 *digest = (u32 *)ctx->digest; + size_t ds = crypto_ahash_digestsize(tfm); + size_t bs = ctx->block_size; + size_t hs = ctx->hash_size; + size_t i, num_words; + u32 mr; + + /* Save d = SHA((K' + ipad) | msg). */ + num_words = ds / sizeof(u32); + for (i = 0; i < num_words; ++i) + digest[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i)); + + /* Restore context to finish computing SHA((K' + opad) | d). */ + atmel_sha_write(dd, SHA_CR, SHA_CR_WUIHV); + num_words = hs / sizeof(u32); + for (i = 0; i < num_words; ++i) + atmel_sha_write(dd, SHA_REG_DIN(i), hmac->opad[i]); + + mr = SHA_MR_MODE_AUTO | SHA_MR_UIHV; + mr |= (ctx->flags & SHA_FLAGS_ALGO_MASK); + atmel_sha_write(dd, SHA_MR, mr); + atmel_sha_write(dd, SHA_MSR, bs + ds); + atmel_sha_write(dd, SHA_BCR, ds); + atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST); + + sg_init_one(&dd->tmp, digest, ds); + return atmel_sha_cpu_start(dd, &dd->tmp, ds, false, true, + atmel_sha_hmac_final_done); +} + +static int atmel_sha_hmac_final_done(struct atmel_sha_dev *dd) +{ + /* + * req->result might not be sizeof(u32) aligned, so copy the + * digest into ctx->digest[] before memcpy() the data into + * req->result. + */ + atmel_sha_copy_hash(dd->req); + atmel_sha_copy_ready_hash(dd->req); + return atmel_sha_complete(dd, 0); +} + +static int atmel_sha_hmac_digest(struct ahash_request *req) +{ + int err; + + err = atmel_sha_init(req); + if (err) + return err; + + return atmel_sha_enqueue(req, SHA_OP_DIGEST); +} + +static int atmel_sha_hmac_digest2(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + size_t hs = ctx->hash_size; + size_t i, num_words = hs / sizeof(u32); + bool use_dma = false; + u32 mr; + + /* Special case for empty message. */ + if (!req->nbytes) + return atmel_sha_complete(dd, -EINVAL); // TODO: + + /* Check DMA threshold and alignment. */ + if (req->nbytes > ATMEL_SHA_DMA_THRESHOLD && + atmel_sha_dma_check_aligned(dd, req->src, req->nbytes)) + use_dma = true; + + /* Write both initial hash values to compute a HMAC. */ + atmel_sha_write(dd, SHA_CR, SHA_CR_WUIHV); + for (i = 0; i < num_words; ++i) + atmel_sha_write(dd, SHA_REG_DIN(i), hmac->ipad[i]); + + atmel_sha_write(dd, SHA_CR, SHA_CR_WUIEHV); + for (i = 0; i < num_words; ++i) + atmel_sha_write(dd, SHA_REG_DIN(i), hmac->opad[i]); + + /* Write the Mode, Message Size, Bytes Count then Control Registers. */ + mr = (SHA_MR_HMAC | SHA_MR_DUALBUFF); + mr |= ctx->flags & SHA_FLAGS_ALGO_MASK; + if (use_dma) + mr |= SHA_MR_MODE_IDATAR0; + else + mr |= SHA_MR_MODE_AUTO; + atmel_sha_write(dd, SHA_MR, mr); + + atmel_sha_write(dd, SHA_MSR, req->nbytes); + atmel_sha_write(dd, SHA_BCR, req->nbytes); + + atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST); + + /* Process data. */ + if (use_dma) + return atmel_sha_dma_start(dd, req->src, req->nbytes, + atmel_sha_hmac_final_done); + + return atmel_sha_cpu_start(dd, req->src, req->nbytes, false, true, + atmel_sha_hmac_final_done); +} + +static int atmel_sha_hmac_cra_init(struct crypto_tfm *tfm) +{ + struct atmel_sha_hmac_ctx *hmac = crypto_tfm_ctx(tfm); + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct atmel_sha_reqctx)); + hmac->base.start = atmel_sha_hmac_start; + atmel_sha_hmac_key_init(&hmac->hkey); + + return 0; +} + +static void atmel_sha_hmac_cra_exit(struct crypto_tfm *tfm) +{ + struct atmel_sha_hmac_ctx *hmac = crypto_tfm_ctx(tfm); + + atmel_sha_hmac_key_release(&hmac->hkey); +} + +static struct ahash_alg sha_hmac_algs[] = { +{ + .init = atmel_sha_hmac_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .digest = atmel_sha_hmac_digest, + .setkey = atmel_sha_hmac_setkey, + .export = atmel_sha_export, + .import = atmel_sha_import, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct atmel_sha_reqctx), + .base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "atmel-hmac-sha1", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_hmac_cra_init, + .cra_exit = atmel_sha_hmac_cra_exit, + } + } +}, +{ + .init = atmel_sha_hmac_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .digest = atmel_sha_hmac_digest, + .setkey = atmel_sha_hmac_setkey, + .export = atmel_sha_export, + .import = atmel_sha_import, + .halg = { + .digestsize = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct atmel_sha_reqctx), + .base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "atmel-hmac-sha224", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_hmac_cra_init, + .cra_exit = atmel_sha_hmac_cra_exit, + } + } +}, +{ + .init = atmel_sha_hmac_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .digest = atmel_sha_hmac_digest, + .setkey = atmel_sha_hmac_setkey, + .export = atmel_sha_export, + .import = atmel_sha_import, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct atmel_sha_reqctx), + .base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "atmel-hmac-sha256", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_hmac_cra_init, + .cra_exit = atmel_sha_hmac_cra_exit, + } + } +}, +{ + .init = atmel_sha_hmac_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .digest = atmel_sha_hmac_digest, + .setkey = atmel_sha_hmac_setkey, + .export = atmel_sha_export, + .import = atmel_sha_import, + .halg = { + .digestsize = SHA384_DIGEST_SIZE, + .statesize = sizeof(struct atmel_sha_reqctx), + .base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "atmel-hmac-sha384", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_hmac_cra_init, + .cra_exit = atmel_sha_hmac_cra_exit, + } + } +}, +{ + .init = atmel_sha_hmac_init, + .update = atmel_sha_update, + .final = atmel_sha_final, + .digest = atmel_sha_hmac_digest, + .setkey = atmel_sha_hmac_setkey, + .export = atmel_sha_export, + .import = atmel_sha_import, + .halg = { + .digestsize = SHA512_DIGEST_SIZE, + .statesize = sizeof(struct atmel_sha_reqctx), + .base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "atmel-hmac-sha512", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_sha_hmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = atmel_sha_hmac_cra_init, + .cra_exit = atmel_sha_hmac_cra_exit, + } + } +}, +}; + +#ifdef CONFIG_CRYPTO_DEV_ATMEL_AUTHENC +/* authenc functions */ + +static int atmel_sha_authenc_init2(struct atmel_sha_dev *dd); +static int atmel_sha_authenc_init_done(struct atmel_sha_dev *dd); +static int atmel_sha_authenc_final_done(struct atmel_sha_dev *dd); + + +struct atmel_sha_authenc_ctx { + struct crypto_ahash *tfm; +}; + +struct atmel_sha_authenc_reqctx { + struct atmel_sha_reqctx base; + + atmel_aes_authenc_fn_t cb; + struct atmel_aes_dev *aes_dev; + + /* _init() parameters. */ + struct scatterlist *assoc; + u32 assoclen; + u32 textlen; + + /* _final() parameters. */ + u32 *digest; + unsigned int digestlen; +}; + +static void atmel_sha_authenc_complete(struct crypto_async_request *areq, + int err) +{ + struct ahash_request *req = areq->data; + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + + authctx->cb(authctx->aes_dev, err, authctx->base.dd->is_async); +} + +static int atmel_sha_authenc_start(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + int err; + + /* + * Force atmel_sha_complete() to call req->base.complete(), ie + * atmel_sha_authenc_complete(), which in turn calls authctx->cb(). + */ + dd->force_complete = true; + + err = atmel_sha_hw_init(dd); + return authctx->cb(authctx->aes_dev, err, dd->is_async); +} + +bool atmel_sha_authenc_is_ready(void) +{ + struct atmel_sha_ctx dummy; + + dummy.dd = NULL; + return (atmel_sha_find_dev(&dummy) != NULL); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_is_ready); + +unsigned int atmel_sha_authenc_get_reqsize(void) +{ + return sizeof(struct atmel_sha_authenc_reqctx); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_get_reqsize); + +struct atmel_sha_authenc_ctx *atmel_sha_authenc_spawn(unsigned long mode) +{ + struct atmel_sha_authenc_ctx *auth; + struct crypto_ahash *tfm; + struct atmel_sha_ctx *tctx; + const char *name; + int err = -EINVAL; + + switch (mode & SHA_FLAGS_MODE_MASK) { + case SHA_FLAGS_HMAC_SHA1: + name = "atmel-hmac-sha1"; + break; + + case SHA_FLAGS_HMAC_SHA224: + name = "atmel-hmac-sha224"; + break; + + case SHA_FLAGS_HMAC_SHA256: + name = "atmel-hmac-sha256"; + break; + + case SHA_FLAGS_HMAC_SHA384: + name = "atmel-hmac-sha384"; + break; + + case SHA_FLAGS_HMAC_SHA512: + name = "atmel-hmac-sha512"; + break; + + default: + goto error; + } + + tfm = crypto_alloc_ahash(name, + CRYPTO_ALG_TYPE_AHASH, + CRYPTO_ALG_TYPE_AHASH_MASK); + if (IS_ERR(tfm)) { + err = PTR_ERR(tfm); + goto error; + } + tctx = crypto_ahash_ctx(tfm); + tctx->start = atmel_sha_authenc_start; + tctx->flags = mode; + + auth = kzalloc(sizeof(*auth), GFP_KERNEL); + if (!auth) { + err = -ENOMEM; + goto err_free_ahash; + } + auth->tfm = tfm; + + return auth; + +err_free_ahash: + crypto_free_ahash(tfm); +error: + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_spawn); + +void atmel_sha_authenc_free(struct atmel_sha_authenc_ctx *auth) +{ + if (auth) + crypto_free_ahash(auth->tfm); + kfree(auth); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_free); + +int atmel_sha_authenc_setkey(struct atmel_sha_authenc_ctx *auth, + const u8 *key, unsigned int keylen, + u32 *flags) +{ + struct crypto_ahash *tfm = auth->tfm; + int err; + + crypto_ahash_clear_flags(tfm, CRYPTO_TFM_REQ_MASK); + crypto_ahash_set_flags(tfm, *flags & CRYPTO_TFM_REQ_MASK); + err = crypto_ahash_setkey(tfm, key, keylen); + *flags = crypto_ahash_get_flags(tfm); + + return err; +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_setkey); + +int atmel_sha_authenc_schedule(struct ahash_request *req, + struct atmel_sha_authenc_ctx *auth, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *aes_dev) +{ + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + struct atmel_sha_reqctx *ctx = &authctx->base; + struct crypto_ahash *tfm = auth->tfm; + struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm); + struct atmel_sha_dev *dd; + + /* Reset request context (MUST be done first). */ + memset(authctx, 0, sizeof(*authctx)); + + /* Get SHA device. */ + dd = atmel_sha_find_dev(tctx); + if (!dd) + return cb(aes_dev, -ENODEV, false); + + /* Init request context. */ + ctx->dd = dd; + ctx->buflen = SHA_BUFFER_LEN; + authctx->cb = cb; + authctx->aes_dev = aes_dev; + ahash_request_set_tfm(req, tfm); + ahash_request_set_callback(req, 0, atmel_sha_authenc_complete, req); + + return atmel_sha_handle_queue(dd, req); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_schedule); + +int atmel_sha_authenc_init(struct ahash_request *req, + struct scatterlist *assoc, unsigned int assoclen, + unsigned int textlen, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *aes_dev) +{ + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + struct atmel_sha_reqctx *ctx = &authctx->base; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + struct atmel_sha_dev *dd = ctx->dd; + + if (unlikely(!IS_ALIGNED(assoclen, sizeof(u32)))) + return atmel_sha_complete(dd, -EINVAL); + + authctx->cb = cb; + authctx->aes_dev = aes_dev; + authctx->assoc = assoc; + authctx->assoclen = assoclen; + authctx->textlen = textlen; + + ctx->flags = hmac->base.flags; + return atmel_sha_hmac_setup(dd, atmel_sha_authenc_init2); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_init); + +static int atmel_sha_authenc_init2(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + struct atmel_sha_reqctx *ctx = &authctx->base; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); + size_t hs = ctx->hash_size; + size_t i, num_words = hs / sizeof(u32); + u32 mr, msg_size; + + atmel_sha_write(dd, SHA_CR, SHA_CR_WUIHV); + for (i = 0; i < num_words; ++i) + atmel_sha_write(dd, SHA_REG_DIN(i), hmac->ipad[i]); + + atmel_sha_write(dd, SHA_CR, SHA_CR_WUIEHV); + for (i = 0; i < num_words; ++i) + atmel_sha_write(dd, SHA_REG_DIN(i), hmac->opad[i]); + + mr = (SHA_MR_MODE_IDATAR0 | + SHA_MR_HMAC | + SHA_MR_DUALBUFF); + mr |= ctx->flags & SHA_FLAGS_ALGO_MASK; + atmel_sha_write(dd, SHA_MR, mr); + + msg_size = authctx->assoclen + authctx->textlen; + atmel_sha_write(dd, SHA_MSR, msg_size); + atmel_sha_write(dd, SHA_BCR, msg_size); + + atmel_sha_write(dd, SHA_CR, SHA_CR_FIRST); + + /* Process assoc data. */ + return atmel_sha_cpu_start(dd, authctx->assoc, authctx->assoclen, + true, false, + atmel_sha_authenc_init_done); +} + +static int atmel_sha_authenc_init_done(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + + return authctx->cb(authctx->aes_dev, 0, dd->is_async); +} + +int atmel_sha_authenc_final(struct ahash_request *req, + u32 *digest, unsigned int digestlen, + atmel_aes_authenc_fn_t cb, + struct atmel_aes_dev *aes_dev) +{ + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + struct atmel_sha_reqctx *ctx = &authctx->base; + struct atmel_sha_dev *dd = ctx->dd; + + switch (ctx->flags & SHA_FLAGS_ALGO_MASK) { + case SHA_FLAGS_SHA1: + authctx->digestlen = SHA1_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA224: + authctx->digestlen = SHA224_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA256: + authctx->digestlen = SHA256_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA384: + authctx->digestlen = SHA384_DIGEST_SIZE; + break; + + case SHA_FLAGS_SHA512: + authctx->digestlen = SHA512_DIGEST_SIZE; + break; + + default: + return atmel_sha_complete(dd, -EINVAL); + } + if (authctx->digestlen > digestlen) + authctx->digestlen = digestlen; + + authctx->cb = cb; + authctx->aes_dev = aes_dev; + authctx->digest = digest; + return atmel_sha_wait_for_data_ready(dd, + atmel_sha_authenc_final_done); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_final); + +static int atmel_sha_authenc_final_done(struct atmel_sha_dev *dd) +{ + struct ahash_request *req = dd->req; + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + size_t i, num_words = authctx->digestlen / sizeof(u32); + + for (i = 0; i < num_words; ++i) + authctx->digest[i] = atmel_sha_read(dd, SHA_REG_DIGEST(i)); + + return atmel_sha_complete(dd, 0); +} + +void atmel_sha_authenc_abort(struct ahash_request *req) +{ + struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); + struct atmel_sha_reqctx *ctx = &authctx->base; + struct atmel_sha_dev *dd = ctx->dd; + + /* Prevent atmel_sha_complete() from calling req->base.complete(). */ + dd->is_async = false; + dd->force_complete = false; + (void)atmel_sha_complete(dd, 0); +} +EXPORT_SYMBOL_GPL(atmel_sha_authenc_abort); + +#endif /* CONFIG_CRYPTO_DEV_ATMEL_AUTHENC */ + + static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd) { int i; + if (dd->caps.has_hmac) + for (i = 0; i < ARRAY_SIZE(sha_hmac_algs); i++) + crypto_unregister_ahash(&sha_hmac_algs[i]); + for (i = 0; i < ARRAY_SIZE(sha_1_256_algs); i++) crypto_unregister_ahash(&sha_1_256_algs[i]); @@ -1273,8 +2566,21 @@ static int atmel_sha_register_algs(struct atmel_sha_dev *dd) } } + if (dd->caps.has_hmac) { + for (i = 0; i < ARRAY_SIZE(sha_hmac_algs); i++) { + err = crypto_register_ahash(&sha_hmac_algs[i]); + if (err) + goto err_sha_hmac_algs; + } + } + return 0; + /*i = ARRAY_SIZE(sha_hmac_algs);*/ +err_sha_hmac_algs: + for (j = 0; j < i; j++) + crypto_unregister_ahash(&sha_hmac_algs[j]); + i = ARRAY_SIZE(sha_384_512_algs); err_sha_384_512_algs: for (j = 0; j < i; j++) crypto_unregister_ahash(&sha_384_512_algs[j]); @@ -1344,6 +2650,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd) dd->caps.has_sha224 = 0; dd->caps.has_sha_384_512 = 0; dd->caps.has_uihv = 0; + dd->caps.has_hmac = 0; /* keep only major version number */ switch (dd->hw_version & 0xff0) { @@ -1353,6 +2660,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd) dd->caps.has_sha224 = 1; dd->caps.has_sha_384_512 = 1; dd->caps.has_uihv = 1; + dd->caps.has_hmac = 1; break; case 0x420: dd->caps.has_dma = 1; diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index 10db7df366c8..a118b9bed669 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -203,7 +203,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc) crc->sg_cpu[i].x_count = 1; crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE; dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " - "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + "cfg:0x%x, x_count:0x%x, x_modify:0x%x\n", i, crc->sg_cpu[i].start_addr, crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, crc->sg_cpu[i].x_modify); @@ -233,7 +233,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc) crc->sg_cpu[i].x_count = dma_count; crc->sg_cpu[i].x_modify = dma_mod; dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " - "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + "cfg:0x%x, x_count:0x%x, x_modify:0x%x\n", i, crc->sg_cpu[i].start_addr, crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, crc->sg_cpu[i].x_modify); @@ -257,7 +257,7 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc) crc->sg_cpu[i].x_count = 1; crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE; dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, " - "cfg:0x%lx, x_count:0x%lx, x_modify:0x%lx\n", + "cfg:0x%x, x_count:0x%x, x_modify:0x%x\n", i, crc->sg_cpu[i].start_addr, crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count, crc->sg_cpu[i].x_modify); diff --git a/drivers/crypto/bfin_crc.h b/drivers/crypto/bfin_crc.h index 75cef4dc85a1..786ef746d109 100644 --- a/drivers/crypto/bfin_crc.h +++ b/drivers/crypto/bfin_crc.h @@ -55,7 +55,6 @@ struct crc_info { #include <linux/types.h> #include <linux/spinlock.h> -#include <linux/miscdevice.h> struct crc_register { u32 control; diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index b4b78b37f8a6..41bc7f4f58cd 100644..100755 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -171,7 +171,7 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, } break; - case CRYPTO_ALG_TYPE_BLKCIPHER: + case CRYPTO_ALG_TYPE_ABLKCIPHER: ctx_req.req.ablk_req = (struct ablkcipher_request *)req; ctx_req.ctx.ablk_ctx = ablkcipher_request_ctx(ctx_req.req.ablk_req); @@ -542,10 +542,11 @@ static inline void create_wreq(struct chcr_context *ctx, (calc_tx_flits_ofld(skb) * 8), 16))); chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req); chcr_req->wreq.rx_chid_to_rx_q_id = - FILL_WR_RX_Q_ID(ctx->dev->tx_channel_id, qid, - is_iv ? iv_loc : IV_NOP); + FILL_WR_RX_Q_ID(ctx->dev->rx_channel_id, qid, + is_iv ? iv_loc : IV_NOP, ctx->tx_channel_id); - chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id); + chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id, + qid); chcr_req->ulptx.len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8), 16) - ((sizeof(chcr_req->wreq)) >> 4))); @@ -606,7 +607,7 @@ static struct sk_buff chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len); memset(chcr_req, 0, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 1); + FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(ivsize + req->nbytes); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -782,6 +783,7 @@ static int chcr_device_init(struct chcr_context *ctx) spin_lock(&ctx->dev->lock_chcr_dev); ctx->tx_channel_id = rxq_idx; ctx->dev->tx_channel_id = !ctx->dev->tx_channel_id; + ctx->dev->rx_channel_id = 0; spin_unlock(&ctx->dev->lock_chcr_dev); } out: @@ -874,7 +876,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, memset(chcr_req, 0, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 0); + FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 0); chcr_req->sec_cpl.pldlen = htonl(param->bfr_len + param->sg_len); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -1425,7 +1427,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, * to the hardware spec */ chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, + FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, (ivsize ? (assoclen + 1) : 0)); chcr_req->sec_cpl.pldlen = htonl(assoclen + ivsize + req->cryptlen); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( @@ -1601,7 +1603,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, unsigned int ivsize = AES_BLOCK_SIZE; unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM; unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC; - unsigned int c_id = chcrctx->dev->tx_channel_id; + unsigned int c_id = chcrctx->dev->rx_channel_id; unsigned int ccm_xtra; unsigned char tag_offset = 0, auth_offset = 0; unsigned char hmac_ctrl = get_hmac(crypto_aead_authsize(tfm)); @@ -1877,7 +1879,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, tag_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize; chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR( - ctx->dev->tx_channel_id, 2, (ivsize ? + ctx->dev->rx_channel_id, 2, (ivsize ? (req->assoclen + 1) : 0)); chcr_req->sec_cpl.pldlen = htonl(req->assoclen + ivsize + crypt_len); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( @@ -2187,8 +2189,7 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key, struct chcr_context *ctx = crypto_aead_ctx(aead); struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_gcm_ctx *gctx = GCM_CTX(aeadctx); - struct blkcipher_desc h_desc; - struct scatterlist src[1]; + struct crypto_cipher *cipher; unsigned int ck_size; int ret = 0, key_ctx_size = 0; @@ -2221,27 +2222,26 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key, CHCR_KEYCTX_MAC_KEY_SIZE_128, 0, 0, key_ctx_size >> 4); - /* Calculate the H = CIPH(K, 0 repeated 16 times) using sync aes - * blkcipher It will go on key context + /* Calculate the H = CIPH(K, 0 repeated 16 times). + * It will go in key context */ - h_desc.tfm = crypto_alloc_blkcipher("cbc(aes-generic)", 0, 0); - if (IS_ERR(h_desc.tfm)) { + cipher = crypto_alloc_cipher("aes-generic", 0, 0); + if (IS_ERR(cipher)) { aeadctx->enckey_len = 0; ret = -ENOMEM; goto out; } - h_desc.flags = 0; - ret = crypto_blkcipher_setkey(h_desc.tfm, key, keylen); + + ret = crypto_cipher_setkey(cipher, key, keylen); if (ret) { aeadctx->enckey_len = 0; goto out1; } memset(gctx->ghash_h, 0, AEAD_H_SIZE); - sg_init_one(&src[0], gctx->ghash_h, AEAD_H_SIZE); - ret = crypto_blkcipher_encrypt(&h_desc, &src[0], &src[0], AEAD_H_SIZE); + crypto_cipher_encrypt_one(cipher, gctx->ghash_h, gctx->ghash_h); out1: - crypto_free_blkcipher(h_desc.tfm); + crypto_free_cipher(cipher); out: return ret; } @@ -2456,13 +2456,14 @@ static int chcr_aead_op(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_context *ctx = crypto_aead_ctx(tfm); - struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct uld_ctx *u_ctx; struct sk_buff *skb; - if (ctx && !ctx->dev) { + if (!ctx->dev) { pr_err("chcr : %s : No crypto device.\n", __func__); return -ENXIO; } + u_ctx = ULD_CTX(ctx); if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], ctx->tx_channel_id)) { if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) @@ -2492,7 +2493,7 @@ static struct chcr_alg_template driver_algs[] = { .cra_name = "cbc(aes)", .cra_driver_name = "cbc-aes-chcr", .cra_priority = CHCR_CRA_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct chcr_context) @@ -2519,7 +2520,7 @@ static struct chcr_alg_template driver_algs[] = { .cra_name = "xts(aes)", .cra_driver_name = "xts-aes-chcr", .cra_priority = CHCR_CRA_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct chcr_context) + diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h index 3c7c51f7bedf..ba38bae7ce80 100644 --- a/drivers/crypto/chelsio/chcr_algo.h +++ b/drivers/crypto/chelsio/chcr_algo.h @@ -185,20 +185,21 @@ FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(1) | \ FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V((ctx_len))) -#define FILL_WR_RX_Q_ID(cid, qid, wr_iv) \ +#define FILL_WR_RX_Q_ID(cid, qid, wr_iv, fid) \ htonl( \ FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V((cid)) | \ FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V((qid)) | \ FW_CRYPTO_LOOKASIDE_WR_LCB_V(0) | \ - FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv))) + FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv)) | \ + FW_CRYPTO_LOOKASIDE_WR_FQIDX_V(fid)) -#define FILL_ULPTX_CMD_DEST(cid) \ +#define FILL_ULPTX_CMD_DEST(cid, qid) \ htonl(ULPTX_CMD_V(ULP_TX_PKT) | \ ULP_TXPKT_DEST_V(0) | \ ULP_TXPKT_DATAMODIFY_V(0) | \ ULP_TXPKT_CHANNELID_V((cid)) | \ ULP_TXPKT_RO_V(1) | \ - ULP_TXPKT_FID_V(0)) + ULP_TXPKT_FID_V(qid)) #define KEYCTX_ALIGN_PAD(bs) ({unsigned int _bs = (bs);\ _bs == SHA1_DIGEST_SIZE ? 12 : 0; }) diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index 1c65f07e1cc9..c28e018e0773 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -61,7 +61,7 @@ int assign_chcr_device(struct chcr_dev **dev) */ mutex_lock(&dev_mutex); /* TODO ? */ list_for_each_entry(u_ctx, &uld_ctx_list, entry) - if (u_ctx && u_ctx->dev) { + if (u_ctx->dev) { *dev = u_ctx->dev; ret = 0; break; @@ -151,18 +151,17 @@ int chcr_uld_rx_handler(void *handle, const __be64 *rsp, { struct uld_ctx *u_ctx = (struct uld_ctx *)handle; struct chcr_dev *dev = u_ctx->dev; - const struct cpl_act_establish *rpl = (struct cpl_act_establish - *)rsp; + const struct cpl_fw6_pld *rpl = (struct cpl_fw6_pld *)rsp; - if (rpl->ot.opcode != CPL_FW6_PLD) { + if (rpl->opcode != CPL_FW6_PLD) { pr_err("Unsupported opcode\n"); return 0; } if (!pgl) - work_handlers[rpl->ot.opcode](dev, (unsigned char *)&rsp[1]); + work_handlers[rpl->opcode](dev, (unsigned char *)&rsp[1]); else - work_handlers[rpl->ot.opcode](dev, pgl->va); + work_handlers[rpl->opcode](dev, pgl->va); return 0; } diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h index c7088a4e0a49..79da22b5cdc9 100644 --- a/drivers/crypto/chelsio/chcr_core.h +++ b/drivers/crypto/chelsio/chcr_core.h @@ -75,6 +75,7 @@ struct chcr_dev { spinlock_t lock_chcr_dev; struct uld_ctx *u_ctx; unsigned char tx_channel_id; + unsigned char rx_channel_id; }; struct uld_ctx { diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index 7ec0a8f12475..81cfd0ba132e 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -48,7 +48,7 @@ * giving the processed data */ -#define CHCR_CRA_PRIORITY 300 +#define CHCR_CRA_PRIORITY 3000 #define CHCR_AES_MAX_KEY_LEN (2 * (AES_MAX_KEY_SIZE)) /* consider xts */ #define CHCR_MAX_CRYPTO_IV_LEN 16 /* AES IV len */ diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index a2e77b87485b..9b07f3d88feb 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -226,7 +226,7 @@ static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg) struct dma_async_tx_descriptor *desc; struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); - ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV); + ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_TO_DEVICE); if (ctx->dma_ct == 0) { dev_err(hdev->dev, "Invalid DMA sg\n"); hdev->err = -EINVAL; @@ -241,7 +241,7 @@ static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg) if (!desc) { dev_err(hdev->dev, "Null DMA descriptor\n"); hdev->err = -EINVAL; - dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV); + dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE); return -EINVAL; } desc->callback = img_hash_dma_callback; diff --git a/drivers/crypto/mediatek/Makefile b/drivers/crypto/mediatek/Makefile new file mode 100644 index 000000000000..187be79c7f3e --- /dev/null +++ b/drivers/crypto/mediatek/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mtk-crypto.o +mtk-crypto-objs:= mtk-platform.o mtk-aes.o mtk-sha.o diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c new file mode 100644 index 000000000000..3a47cdb8f0c8 --- /dev/null +++ b/drivers/crypto/mediatek/mtk-aes.c @@ -0,0 +1,1299 @@ +/* + * Cryptographic API. + * + * Driver for EIP97 AES acceleration. + * + * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Some ideas are from atmel-aes.c drivers. + */ + +#include <crypto/aes.h> +#include "mtk-platform.h" + +#define AES_QUEUE_SIZE 512 +#define AES_BUF_ORDER 2 +#define AES_BUF_SIZE ((PAGE_SIZE << AES_BUF_ORDER) \ + & ~(AES_BLOCK_SIZE - 1)) + +/* AES command token size */ +#define AES_CT_SIZE_ECB 2 +#define AES_CT_SIZE_CBC 3 +#define AES_CT_SIZE_CTR 3 +#define AES_CT_SIZE_GCM_OUT 5 +#define AES_CT_SIZE_GCM_IN 6 +#define AES_CT_CTRL_HDR cpu_to_le32(0x00220000) + +/* AES-CBC/ECB/CTR command token */ +#define AES_CMD0 cpu_to_le32(0x05000000) +#define AES_CMD1 cpu_to_le32(0x2d060000) +#define AES_CMD2 cpu_to_le32(0xe4a63806) +/* AES-GCM command token */ +#define AES_GCM_CMD0 cpu_to_le32(0x0b000000) +#define AES_GCM_CMD1 cpu_to_le32(0xa0800000) +#define AES_GCM_CMD2 cpu_to_le32(0x25000010) +#define AES_GCM_CMD3 cpu_to_le32(0x0f020000) +#define AES_GCM_CMD4 cpu_to_le32(0x21e60000) +#define AES_GCM_CMD5 cpu_to_le32(0x40e60000) +#define AES_GCM_CMD6 cpu_to_le32(0xd0070000) + +/* AES transform information word 0 fields */ +#define AES_TFM_BASIC_OUT cpu_to_le32(0x4 << 0) +#define AES_TFM_BASIC_IN cpu_to_le32(0x5 << 0) +#define AES_TFM_GCM_OUT cpu_to_le32(0x6 << 0) +#define AES_TFM_GCM_IN cpu_to_le32(0xf << 0) +#define AES_TFM_SIZE(x) cpu_to_le32((x) << 8) +#define AES_TFM_128BITS cpu_to_le32(0xb << 16) +#define AES_TFM_192BITS cpu_to_le32(0xd << 16) +#define AES_TFM_256BITS cpu_to_le32(0xf << 16) +/* AES transform information word 1 fields */ +#define AES_TFM_ECB cpu_to_le32(0x0 << 0) +#define AES_TFM_CBC cpu_to_le32(0x1 << 0) +#define AES_TFM_CTR_INIT cpu_to_le32(0x2 << 0) /* init counter to 1 */ +#define AES_TFM_CTR_LOAD cpu_to_le32(0x6 << 0) /* load/reuse counter */ +#define AES_TFM_3IV cpu_to_le32(0x7 << 5) /* using IV 0-2 */ +#define AES_TFM_FULL_IV cpu_to_le32(0xf << 5) /* using IV 0-3 */ +#define AES_TFM_IV_CTR_MODE cpu_to_le32(0x1 << 10) +#define AES_TFM_ENC_HASH cpu_to_le32(0x1 << 17) +#define AES_TFM_GHASH_DIG cpu_to_le32(0x2 << 21) +#define AES_TFM_GHASH cpu_to_le32(0x4 << 23) + +/* AES flags */ +#define AES_FLAGS_ECB BIT(0) +#define AES_FLAGS_CBC BIT(1) +#define AES_FLAGS_CTR BIT(2) +#define AES_FLAGS_GCM BIT(3) +#define AES_FLAGS_ENCRYPT BIT(4) +#define AES_FLAGS_BUSY BIT(5) + +/** + * Command token(CT) is a set of hardware instructions that + * are used to control engine's processing flow of AES. + * + * Transform information(TFM) is used to define AES state and + * contains all keys and initial vectors. + * + * The engine requires CT and TFM to do: + * - Commands decoding and control of the engine's data path. + * - Coordinating hardware data fetch and store operations. + * - Result token construction and output. + * + * Memory map of GCM's TFM: + * /-----------\ + * | AES KEY | 128/196/256 bits + * |-----------| + * | HASH KEY | a string 128 zero bits encrypted using the block cipher + * |-----------| + * | IVs | 4 * 4 bytes + * \-----------/ + */ +struct mtk_aes_ct { + __le32 cmd[AES_CT_SIZE_GCM_IN]; +}; + +struct mtk_aes_tfm { + __le32 ctrl[2]; + __le32 state[SIZE_IN_WORDS(AES_KEYSIZE_256 + AES_BLOCK_SIZE * 2)]; +}; + +struct mtk_aes_reqctx { + u64 mode; +}; + +struct mtk_aes_base_ctx { + struct mtk_cryp *cryp; + u32 keylen; + mtk_aes_fn start; + + struct mtk_aes_ct ct; + dma_addr_t ct_dma; + struct mtk_aes_tfm tfm; + dma_addr_t tfm_dma; + + __le32 ct_hdr; + u32 ct_size; +}; + +struct mtk_aes_ctx { + struct mtk_aes_base_ctx base; +}; + +struct mtk_aes_ctr_ctx { + struct mtk_aes_base_ctx base; + + u32 iv[AES_BLOCK_SIZE / sizeof(u32)]; + size_t offset; + struct scatterlist src[2]; + struct scatterlist dst[2]; +}; + +struct mtk_aes_gcm_ctx { + struct mtk_aes_base_ctx base; + + u32 authsize; + size_t textlen; + + struct crypto_skcipher *ctr; +}; + +struct mtk_aes_gcm_setkey_result { + int err; + struct completion completion; +}; + +struct mtk_aes_drv { + struct list_head dev_list; + /* Device list lock */ + spinlock_t lock; +}; + +static struct mtk_aes_drv mtk_aes = { + .dev_list = LIST_HEAD_INIT(mtk_aes.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(mtk_aes.lock), +}; + +static inline u32 mtk_aes_read(struct mtk_cryp *cryp, u32 offset) +{ + return readl_relaxed(cryp->base + offset); +} + +static inline void mtk_aes_write(struct mtk_cryp *cryp, + u32 offset, u32 value) +{ + writel_relaxed(value, cryp->base + offset); +} + +static struct mtk_cryp *mtk_aes_find_dev(struct mtk_aes_base_ctx *ctx) +{ + struct mtk_cryp *cryp = NULL; + struct mtk_cryp *tmp; + + spin_lock_bh(&mtk_aes.lock); + if (!ctx->cryp) { + list_for_each_entry(tmp, &mtk_aes.dev_list, aes_list) { + cryp = tmp; + break; + } + ctx->cryp = cryp; + } else { + cryp = ctx->cryp; + } + spin_unlock_bh(&mtk_aes.lock); + + return cryp; +} + +static inline size_t mtk_aes_padlen(size_t len) +{ + len &= AES_BLOCK_SIZE - 1; + return len ? AES_BLOCK_SIZE - len : 0; +} + +static bool mtk_aes_check_aligned(struct scatterlist *sg, size_t len, + struct mtk_aes_dma *dma) +{ + int nents; + + if (!IS_ALIGNED(len, AES_BLOCK_SIZE)) + return false; + + for (nents = 0; sg; sg = sg_next(sg), ++nents) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) + return false; + + if (len <= sg->length) { + if (!IS_ALIGNED(len, AES_BLOCK_SIZE)) + return false; + + dma->nents = nents + 1; + dma->remainder = sg->length - len; + sg->length = len; + return true; + } + + if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE)) + return false; + + len -= sg->length; + } + + return false; +} + +static inline void mtk_aes_set_mode(struct mtk_aes_rec *aes, + const struct mtk_aes_reqctx *rctx) +{ + /* Clear all but persistent flags and set request flags. */ + aes->flags = (aes->flags & AES_FLAGS_BUSY) | rctx->mode; +} + +static inline void mtk_aes_restore_sg(const struct mtk_aes_dma *dma) +{ + struct scatterlist *sg = dma->sg; + int nents = dma->nents; + + if (!dma->remainder) + return; + + while (--nents > 0 && sg) + sg = sg_next(sg); + + if (!sg) + return; + + sg->length += dma->remainder; +} + +/* + * Write descriptors for processing. This will configure the engine, load + * the transform information and then start the packet processing. + */ +static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_ring *ring = cryp->ring[aes->id]; + struct mtk_desc *cmd = NULL, *res = NULL; + struct scatterlist *ssg = aes->src.sg, *dsg = aes->dst.sg; + u32 slen = aes->src.sg_len, dlen = aes->dst.sg_len; + int nents; + + /* Write command descriptors */ + for (nents = 0; nents < slen; ++nents, ssg = sg_next(ssg)) { + cmd = ring->cmd_base + ring->cmd_pos; + cmd->hdr = MTK_DESC_BUF_LEN(ssg->length); + cmd->buf = cpu_to_le32(sg_dma_address(ssg)); + + if (nents == 0) { + cmd->hdr |= MTK_DESC_FIRST | + MTK_DESC_CT_LEN(aes->ctx->ct_size); + cmd->ct = cpu_to_le32(aes->ctx->ct_dma); + cmd->ct_hdr = aes->ctx->ct_hdr; + cmd->tfm = cpu_to_le32(aes->ctx->tfm_dma); + } + + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; + } + cmd->hdr |= MTK_DESC_LAST; + + /* Prepare result descriptors */ + for (nents = 0; nents < dlen; ++nents, dsg = sg_next(dsg)) { + res = ring->res_base + ring->res_pos; + res->hdr = MTK_DESC_BUF_LEN(dsg->length); + res->buf = cpu_to_le32(sg_dma_address(dsg)); + + if (nents == 0) + res->hdr |= MTK_DESC_FIRST; + + if (++ring->res_pos == MTK_DESC_NUM) + ring->res_pos = 0; + } + res->hdr |= MTK_DESC_LAST; + + /* Prepare enough space for authenticated tag */ + if (aes->flags & AES_FLAGS_GCM) + res->hdr += AES_BLOCK_SIZE; + + /* + * Make sure that all changes to the DMA ring are done before we + * start engine. + */ + wmb(); + /* Start DMA transfer */ + mtk_aes_write(cryp, RDR_PREP_COUNT(aes->id), MTK_DESC_CNT(dlen)); + mtk_aes_write(cryp, CDR_PREP_COUNT(aes->id), MTK_DESC_CNT(slen)); + + return -EINPROGRESS; +} + +static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_base_ctx *ctx = aes->ctx; + + dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct), + DMA_TO_DEVICE); + dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm), + DMA_TO_DEVICE); + + if (aes->src.sg == aes->dst.sg) { + dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, + DMA_BIDIRECTIONAL); + + if (aes->src.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->src); + } else { + dma_unmap_sg(cryp->dev, aes->dst.sg, aes->dst.nents, + DMA_FROM_DEVICE); + + if (aes->dst.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->dst); + + dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, + DMA_TO_DEVICE); + + if (aes->src.sg != &aes->aligned_sg) + mtk_aes_restore_sg(&aes->src); + } + + if (aes->dst.sg == &aes->aligned_sg) + sg_copy_from_buffer(aes->real_dst, sg_nents(aes->real_dst), + aes->buf, aes->total); +} + +static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_base_ctx *ctx = aes->ctx; + + ctx->ct_dma = dma_map_single(cryp->dev, &ctx->ct, sizeof(ctx->ct), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) + return -EINVAL; + + ctx->tfm_dma = dma_map_single(cryp->dev, &ctx->tfm, sizeof(ctx->tfm), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->tfm_dma))) + goto tfm_map_err; + + if (aes->src.sg == aes->dst.sg) { + aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, + aes->src.nents, + DMA_BIDIRECTIONAL); + aes->dst.sg_len = aes->src.sg_len; + if (unlikely(!aes->src.sg_len)) + goto sg_map_err; + } else { + aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg, + aes->src.nents, DMA_TO_DEVICE); + if (unlikely(!aes->src.sg_len)) + goto sg_map_err; + + aes->dst.sg_len = dma_map_sg(cryp->dev, aes->dst.sg, + aes->dst.nents, DMA_FROM_DEVICE); + if (unlikely(!aes->dst.sg_len)) { + dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents, + DMA_TO_DEVICE); + goto sg_map_err; + } + } + + return mtk_aes_xmit(cryp, aes); + +sg_map_err: + dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm), + DMA_TO_DEVICE); +tfm_map_err: + dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct), + DMA_TO_DEVICE); + + return -EINVAL; +} + +/* Initialize transform information of CBC/ECB/CTR mode */ +static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, + size_t len) +{ + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct mtk_aes_base_ctx *ctx = aes->ctx; + + ctx->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len); + ctx->ct.cmd[0] = AES_CMD0 | cpu_to_le32(len); + ctx->ct.cmd[1] = AES_CMD1; + + if (aes->flags & AES_FLAGS_ENCRYPT) + ctx->tfm.ctrl[0] = AES_TFM_BASIC_OUT; + else + ctx->tfm.ctrl[0] = AES_TFM_BASIC_IN; + + if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128)) + ctx->tfm.ctrl[0] |= AES_TFM_128BITS; + else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256)) + ctx->tfm.ctrl[0] |= AES_TFM_256BITS; + else + ctx->tfm.ctrl[0] |= AES_TFM_192BITS; + + if (aes->flags & AES_FLAGS_CBC) { + const u32 *iv = (const u32 *)req->info; + u32 *iv_state = ctx->tfm.state + ctx->keylen; + int i; + + ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen + + SIZE_IN_WORDS(AES_BLOCK_SIZE)); + ctx->tfm.ctrl[1] = AES_TFM_CBC | AES_TFM_FULL_IV; + + for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++) + iv_state[i] = cpu_to_le32(iv[i]); + + ctx->ct.cmd[2] = AES_CMD2; + ctx->ct_size = AES_CT_SIZE_CBC; + } else if (aes->flags & AES_FLAGS_ECB) { + ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen); + ctx->tfm.ctrl[1] = AES_TFM_ECB; + + ctx->ct_size = AES_CT_SIZE_ECB; + } else if (aes->flags & AES_FLAGS_CTR) { + ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen + + SIZE_IN_WORDS(AES_BLOCK_SIZE)); + ctx->tfm.ctrl[1] = AES_TFM_CTR_LOAD | AES_TFM_FULL_IV; + + ctx->ct.cmd[2] = AES_CMD2; + ctx->ct_size = AES_CT_SIZE_CTR; + } +} + +static int mtk_aes_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, + struct scatterlist *src, struct scatterlist *dst, + size_t len) +{ + size_t padlen = 0; + bool src_aligned, dst_aligned; + + aes->total = len; + aes->src.sg = src; + aes->dst.sg = dst; + aes->real_dst = dst; + + src_aligned = mtk_aes_check_aligned(src, len, &aes->src); + if (src == dst) + dst_aligned = src_aligned; + else + dst_aligned = mtk_aes_check_aligned(dst, len, &aes->dst); + + if (!src_aligned || !dst_aligned) { + padlen = mtk_aes_padlen(len); + + if (len + padlen > AES_BUF_SIZE) + return -ENOMEM; + + if (!src_aligned) { + sg_copy_to_buffer(src, sg_nents(src), aes->buf, len); + aes->src.sg = &aes->aligned_sg; + aes->src.nents = 1; + aes->src.remainder = 0; + } + + if (!dst_aligned) { + aes->dst.sg = &aes->aligned_sg; + aes->dst.nents = 1; + aes->dst.remainder = 0; + } + + sg_init_table(&aes->aligned_sg, 1); + sg_set_buf(&aes->aligned_sg, aes->buf, len + padlen); + } + + mtk_aes_info_init(cryp, aes, len + padlen); + + return mtk_aes_map(cryp, aes); +} + +static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id, + struct crypto_async_request *new_areq) +{ + struct mtk_aes_rec *aes = cryp->aes[id]; + struct crypto_async_request *areq, *backlog; + struct mtk_aes_base_ctx *ctx; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&aes->lock, flags); + if (new_areq) + ret = crypto_enqueue_request(&aes->queue, new_areq); + if (aes->flags & AES_FLAGS_BUSY) { + spin_unlock_irqrestore(&aes->lock, flags); + return ret; + } + backlog = crypto_get_backlog(&aes->queue); + areq = crypto_dequeue_request(&aes->queue); + if (areq) + aes->flags |= AES_FLAGS_BUSY; + spin_unlock_irqrestore(&aes->lock, flags); + + if (!areq) + return ret; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + ctx = crypto_tfm_ctx(areq->tfm); + + aes->areq = areq; + aes->ctx = ctx; + + return ctx->start(cryp, aes); +} + +static int mtk_aes_complete(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + aes->flags &= ~AES_FLAGS_BUSY; + aes->areq->complete(aes->areq, 0); + + /* Handle new request */ + return mtk_aes_handle_queue(cryp, aes->id, NULL); +} + +static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req); + + mtk_aes_set_mode(aes, rctx); + aes->resume = mtk_aes_complete; + + return mtk_aes_dma(cryp, aes, req->src, req->dst, req->nbytes); +} + +static inline struct mtk_aes_ctr_ctx * +mtk_aes_ctr_ctx_cast(struct mtk_aes_base_ctx *ctx) +{ + return container_of(ctx, struct mtk_aes_ctr_ctx, base); +} + +static int mtk_aes_ctr_transfer(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_base_ctx *ctx = aes->ctx; + struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(ctx); + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct scatterlist *src, *dst; + int i; + u32 start, end, ctr, blocks, *iv_state; + size_t datalen; + bool fragmented = false; + + /* Check for transfer completion. */ + cctx->offset += aes->total; + if (cctx->offset >= req->nbytes) + return mtk_aes_complete(cryp, aes); + + /* Compute data length. */ + datalen = req->nbytes - cctx->offset; + blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE); + ctr = be32_to_cpu(cctx->iv[3]); + + /* Check 32bit counter overflow. */ + start = ctr; + end = start + blocks - 1; + if (end < start) { + ctr |= 0xffffffff; + datalen = AES_BLOCK_SIZE * -start; + fragmented = true; + } + + /* Jump to offset. */ + src = scatterwalk_ffwd(cctx->src, req->src, cctx->offset); + dst = ((req->src == req->dst) ? src : + scatterwalk_ffwd(cctx->dst, req->dst, cctx->offset)); + + /* Write IVs into transform state buffer. */ + iv_state = ctx->tfm.state + ctx->keylen; + for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++) + iv_state[i] = cpu_to_le32(cctx->iv[i]); + + if (unlikely(fragmented)) { + /* + * Increment the counter manually to cope with the hardware + * counter overflow. + */ + cctx->iv[3] = cpu_to_be32(ctr); + crypto_inc((u8 *)cctx->iv, AES_BLOCK_SIZE); + } + aes->resume = mtk_aes_ctr_transfer; + + return mtk_aes_dma(cryp, aes, src, dst, datalen); +} + +static int mtk_aes_ctr_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(aes->ctx); + struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq); + struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req); + + mtk_aes_set_mode(aes, rctx); + + memcpy(cctx->iv, req->info, AES_BLOCK_SIZE); + cctx->offset = 0; + aes->total = 0; + + return mtk_aes_ctr_transfer(cryp, aes); +} + +/* Check and set the AES key to transform state buffer */ +static int mtk_aes_setkey(struct crypto_ablkcipher *tfm, + const u8 *key, u32 keylen) +{ + struct mtk_aes_base_ctx *ctx = crypto_ablkcipher_ctx(tfm); + const u32 *aes_key = (const u32 *)key; + u32 *key_state = ctx->tfm.state; + int i; + + if (keylen != AES_KEYSIZE_128 && + keylen != AES_KEYSIZE_192 && + keylen != AES_KEYSIZE_256) { + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + ctx->keylen = SIZE_IN_WORDS(keylen); + + for (i = 0; i < ctx->keylen; i++) + key_state[i] = cpu_to_le32(aes_key[i]); + + return 0; +} + +static int mtk_aes_crypt(struct ablkcipher_request *req, u64 mode) +{ + struct mtk_aes_base_ctx *ctx; + struct mtk_aes_reqctx *rctx; + + ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); + rctx = ablkcipher_request_ctx(req); + rctx->mode = mode; + + return mtk_aes_handle_queue(ctx->cryp, !(mode & AES_FLAGS_ENCRYPT), + &req->base); +} + +static int mtk_aes_ecb_encrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_ECB); +} + +static int mtk_aes_ecb_decrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ECB); +} + +static int mtk_aes_cbc_encrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CBC); +} + +static int mtk_aes_cbc_decrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_CBC); +} + +static int mtk_aes_ctr_encrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CTR); +} + +static int mtk_aes_ctr_decrypt(struct ablkcipher_request *req) +{ + return mtk_aes_crypt(req, AES_FLAGS_CTR); +} + +static int mtk_aes_cra_init(struct crypto_tfm *tfm) +{ + struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct mtk_cryp *cryp = NULL; + + cryp = mtk_aes_find_dev(&ctx->base); + if (!cryp) { + pr_err("can't find crypto device\n"); + return -ENODEV; + } + + tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx); + ctx->base.start = mtk_aes_start; + return 0; +} + +static int mtk_aes_ctr_cra_init(struct crypto_tfm *tfm) +{ + struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct mtk_cryp *cryp = NULL; + + cryp = mtk_aes_find_dev(&ctx->base); + if (!cryp) { + pr_err("can't find crypto device\n"); + return -ENODEV; + } + + tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx); + ctx->base.start = mtk_aes_ctr_start; + return 0; +} + +static struct crypto_alg aes_algs[] = { +{ + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_init = mtk_aes_cra_init, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_aes_ctx), + .cra_alignmask = 0xf, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = mtk_aes_setkey, + .encrypt = mtk_aes_cbc_encrypt, + .decrypt = mtk_aes_cbc_decrypt, + .ivsize = AES_BLOCK_SIZE, + } +}, +{ + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_init = mtk_aes_cra_init, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_aes_ctx), + .cra_alignmask = 0xf, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = mtk_aes_setkey, + .encrypt = mtk_aes_ecb_encrypt, + .decrypt = mtk_aes_ecb_decrypt, + } +}, +{ + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_init = mtk_aes_ctr_cra_init, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct mtk_aes_ctr_ctx), + .cra_alignmask = 0xf, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = mtk_aes_setkey, + .encrypt = mtk_aes_ctr_encrypt, + .decrypt = mtk_aes_ctr_decrypt, + } +}, +}; + +static inline struct mtk_aes_gcm_ctx * +mtk_aes_gcm_ctx_cast(struct mtk_aes_base_ctx *ctx) +{ + return container_of(ctx, struct mtk_aes_gcm_ctx, base); +} + +/* Initialize transform information of GCM mode */ +static void mtk_aes_gcm_info_init(struct mtk_cryp *cryp, + struct mtk_aes_rec *aes, + size_t len) +{ + struct aead_request *req = aead_request_cast(aes->areq); + struct mtk_aes_base_ctx *ctx = aes->ctx; + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); + const u32 *iv = (const u32 *)req->iv; + u32 *iv_state = ctx->tfm.state + ctx->keylen + + SIZE_IN_WORDS(AES_BLOCK_SIZE); + u32 ivsize = crypto_aead_ivsize(crypto_aead_reqtfm(req)); + int i; + + ctx->ct_hdr = AES_CT_CTRL_HDR | len; + + ctx->ct.cmd[0] = AES_GCM_CMD0 | cpu_to_le32(req->assoclen); + ctx->ct.cmd[1] = AES_GCM_CMD1 | cpu_to_le32(req->assoclen); + ctx->ct.cmd[2] = AES_GCM_CMD2; + ctx->ct.cmd[3] = AES_GCM_CMD3 | cpu_to_le32(gctx->textlen); + + if (aes->flags & AES_FLAGS_ENCRYPT) { + ctx->ct.cmd[4] = AES_GCM_CMD4 | cpu_to_le32(gctx->authsize); + ctx->ct_size = AES_CT_SIZE_GCM_OUT; + ctx->tfm.ctrl[0] = AES_TFM_GCM_OUT; + } else { + ctx->ct.cmd[4] = AES_GCM_CMD5 | cpu_to_le32(gctx->authsize); + ctx->ct.cmd[5] = AES_GCM_CMD6 | cpu_to_le32(gctx->authsize); + ctx->ct_size = AES_CT_SIZE_GCM_IN; + ctx->tfm.ctrl[0] = AES_TFM_GCM_IN; + } + + if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128)) + ctx->tfm.ctrl[0] |= AES_TFM_128BITS; + else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256)) + ctx->tfm.ctrl[0] |= AES_TFM_256BITS; + else + ctx->tfm.ctrl[0] |= AES_TFM_192BITS; + + ctx->tfm.ctrl[0] |= AES_TFM_GHASH_DIG | AES_TFM_GHASH | + AES_TFM_SIZE(ctx->keylen + SIZE_IN_WORDS( + AES_BLOCK_SIZE + ivsize)); + ctx->tfm.ctrl[1] = AES_TFM_CTR_INIT | AES_TFM_IV_CTR_MODE | + AES_TFM_3IV | AES_TFM_ENC_HASH; + + for (i = 0; i < SIZE_IN_WORDS(ivsize); i++) + iv_state[i] = cpu_to_le32(iv[i]); +} + +static int mtk_aes_gcm_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes, + struct scatterlist *src, struct scatterlist *dst, + size_t len) +{ + bool src_aligned, dst_aligned; + + aes->src.sg = src; + aes->dst.sg = dst; + aes->real_dst = dst; + + src_aligned = mtk_aes_check_aligned(src, len, &aes->src); + if (src == dst) + dst_aligned = src_aligned; + else + dst_aligned = mtk_aes_check_aligned(dst, len, &aes->dst); + + if (!src_aligned || !dst_aligned) { + if (aes->total > AES_BUF_SIZE) + return -ENOMEM; + + if (!src_aligned) { + sg_copy_to_buffer(src, sg_nents(src), aes->buf, len); + aes->src.sg = &aes->aligned_sg; + aes->src.nents = 1; + aes->src.remainder = 0; + } + + if (!dst_aligned) { + aes->dst.sg = &aes->aligned_sg; + aes->dst.nents = 1; + aes->dst.remainder = 0; + } + + sg_init_table(&aes->aligned_sg, 1); + sg_set_buf(&aes->aligned_sg, aes->buf, aes->total); + } + + mtk_aes_gcm_info_init(cryp, aes, len); + + return mtk_aes_map(cryp, aes); +} + +/* Todo: GMAC */ +static int mtk_aes_gcm_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes) +{ + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(aes->ctx); + struct aead_request *req = aead_request_cast(aes->areq); + struct mtk_aes_reqctx *rctx = aead_request_ctx(req); + u32 len = req->assoclen + req->cryptlen; + + mtk_aes_set_mode(aes, rctx); + + if (aes->flags & AES_FLAGS_ENCRYPT) { + u32 tag[4]; + /* Compute total process length. */ + aes->total = len + gctx->authsize; + /* Compute text length. */ + gctx->textlen = req->cryptlen; + /* Hardware will append authenticated tag to output buffer */ + scatterwalk_map_and_copy(tag, req->dst, len, gctx->authsize, 1); + } else { + aes->total = len; + gctx->textlen = req->cryptlen - gctx->authsize; + } + aes->resume = mtk_aes_complete; + + return mtk_aes_gcm_dma(cryp, aes, req->src, req->dst, len); +} + +static int mtk_aes_gcm_crypt(struct aead_request *req, u64 mode) +{ + struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct mtk_aes_reqctx *rctx = aead_request_ctx(req); + + rctx->mode = AES_FLAGS_GCM | mode; + + return mtk_aes_handle_queue(ctx->cryp, !!(mode & AES_FLAGS_ENCRYPT), + &req->base); +} + +static void mtk_gcm_setkey_done(struct crypto_async_request *req, int err) +{ + struct mtk_aes_gcm_setkey_result *result = req->data; + + if (err == -EINPROGRESS) + return; + + result->err = err; + complete(&result->completion); +} + +/* + * Because of the hardware limitation, we need to pre-calculate key(H) + * for the GHASH operation. The result of the encryption operation + * need to be stored in the transform state buffer. + */ +static int mtk_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, + u32 keylen) +{ + struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead); + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); + struct crypto_skcipher *ctr = gctx->ctr; + struct { + u32 hash[4]; + u8 iv[8]; + + struct mtk_aes_gcm_setkey_result result; + + struct scatterlist sg[1]; + struct skcipher_request req; + } *data; + const u32 *aes_key; + u32 *key_state, *hash_state; + int err, i; + + if (keylen != AES_KEYSIZE_256 && + keylen != AES_KEYSIZE_192 && + keylen != AES_KEYSIZE_128) { + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + key_state = ctx->tfm.state; + aes_key = (u32 *)key; + ctx->keylen = SIZE_IN_WORDS(keylen); + + for (i = 0; i < ctx->keylen; i++) + ctx->tfm.state[i] = cpu_to_le32(aes_key[i]); + + /* Same as crypto_gcm_setkey() from crypto/gcm.c */ + crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(ctr, key, keylen); + crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) & + CRYPTO_TFM_RES_MASK); + if (err) + return err; + + data = kzalloc(sizeof(*data) + crypto_skcipher_reqsize(ctr), + GFP_KERNEL); + if (!data) + return -ENOMEM; + + init_completion(&data->result.completion); + sg_init_one(data->sg, &data->hash, AES_BLOCK_SIZE); + skcipher_request_set_tfm(&data->req, ctr); + skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + mtk_gcm_setkey_done, &data->result); + skcipher_request_set_crypt(&data->req, data->sg, data->sg, + AES_BLOCK_SIZE, data->iv); + + err = crypto_skcipher_encrypt(&data->req); + if (err == -EINPROGRESS || err == -EBUSY) { + err = wait_for_completion_interruptible( + &data->result.completion); + if (!err) + err = data->result.err; + } + if (err) + goto out; + + hash_state = key_state + ctx->keylen; + + for (i = 0; i < 4; i++) + hash_state[i] = cpu_to_be32(data->hash[i]); +out: + kzfree(data); + return err; +} + +static int mtk_aes_gcm_setauthsize(struct crypto_aead *aead, + u32 authsize) +{ + struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead); + struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx); + + /* Same as crypto_gcm_authsize() from crypto/gcm.c */ + switch (authsize) { + case 8: + case 12: + case 16: + break; + default: + return -EINVAL; + } + + gctx->authsize = authsize; + return 0; +} + +static int mtk_aes_gcm_encrypt(struct aead_request *req) +{ + return mtk_aes_gcm_crypt(req, AES_FLAGS_ENCRYPT); +} + +static int mtk_aes_gcm_decrypt(struct aead_request *req) +{ + return mtk_aes_gcm_crypt(req, 0); +} + +static int mtk_aes_gcm_init(struct crypto_aead *aead) +{ + struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead); + struct mtk_cryp *cryp = NULL; + + cryp = mtk_aes_find_dev(&ctx->base); + if (!cryp) { + pr_err("can't find crypto device\n"); + return -ENODEV; + } + + ctx->ctr = crypto_alloc_skcipher("ctr(aes)", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->ctr)) { + pr_err("Error allocating ctr(aes)\n"); + return PTR_ERR(ctx->ctr); + } + + crypto_aead_set_reqsize(aead, sizeof(struct mtk_aes_reqctx)); + ctx->base.start = mtk_aes_gcm_start; + return 0; +} + +static void mtk_aes_gcm_exit(struct crypto_aead *aead) +{ + struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead); + + crypto_free_skcipher(ctx->ctr); +} + +static struct aead_alg aes_gcm_alg = { + .setkey = mtk_aes_gcm_setkey, + .setauthsize = mtk_aes_gcm_setauthsize, + .encrypt = mtk_aes_gcm_encrypt, + .decrypt = mtk_aes_gcm_decrypt, + .init = mtk_aes_gcm_init, + .exit = mtk_aes_gcm_exit, + .ivsize = 12, + .maxauthsize = AES_BLOCK_SIZE, + + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-mtk", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct mtk_aes_gcm_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, +}; + +static void mtk_aes_enc_task(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_aes_rec *aes = cryp->aes[0]; + + mtk_aes_unmap(cryp, aes); + aes->resume(cryp, aes); +} + +static void mtk_aes_dec_task(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_aes_rec *aes = cryp->aes[1]; + + mtk_aes_unmap(cryp, aes); + aes->resume(cryp, aes); +} + +static irqreturn_t mtk_aes_enc_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_aes_rec *aes = cryp->aes[0]; + u32 val = mtk_aes_read(cryp, RDR_STAT(RING0)); + + mtk_aes_write(cryp, RDR_STAT(RING0), val); + + if (likely(AES_FLAGS_BUSY & aes->flags)) { + mtk_aes_write(cryp, RDR_PROC_COUNT(RING0), MTK_CNT_RST); + mtk_aes_write(cryp, RDR_THRESH(RING0), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&aes->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +static irqreturn_t mtk_aes_dec_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_aes_rec *aes = cryp->aes[1]; + u32 val = mtk_aes_read(cryp, RDR_STAT(RING1)); + + mtk_aes_write(cryp, RDR_STAT(RING1), val); + + if (likely(AES_FLAGS_BUSY & aes->flags)) { + mtk_aes_write(cryp, RDR_PROC_COUNT(RING1), MTK_CNT_RST); + mtk_aes_write(cryp, RDR_THRESH(RING1), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&aes->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +/* + * The purpose of creating encryption and decryption records is + * to process outbound/inbound data in parallel, it can improve + * performance in most use cases, such as IPSec VPN, especially + * under heavy network traffic. + */ +static int mtk_aes_record_init(struct mtk_cryp *cryp) +{ + struct mtk_aes_rec **aes = cryp->aes; + int i, err = -ENOMEM; + + for (i = 0; i < MTK_REC_NUM; i++) { + aes[i] = kzalloc(sizeof(**aes), GFP_KERNEL); + if (!aes[i]) + goto err_cleanup; + + aes[i]->buf = (void *)__get_free_pages(GFP_KERNEL, + AES_BUF_ORDER); + if (!aes[i]->buf) + goto err_cleanup; + + aes[i]->id = i; + + spin_lock_init(&aes[i]->lock); + crypto_init_queue(&aes[i]->queue, AES_QUEUE_SIZE); + } + + tasklet_init(&aes[0]->task, mtk_aes_enc_task, (unsigned long)cryp); + tasklet_init(&aes[1]->task, mtk_aes_dec_task, (unsigned long)cryp); + + return 0; + +err_cleanup: + for (; i--; ) { + free_page((unsigned long)aes[i]->buf); + kfree(aes[i]); + } + + return err; +} + +static void mtk_aes_record_free(struct mtk_cryp *cryp) +{ + int i; + + for (i = 0; i < MTK_REC_NUM; i++) { + tasklet_kill(&cryp->aes[i]->task); + free_page((unsigned long)cryp->aes[i]->buf); + kfree(cryp->aes[i]); + } +} + +static void mtk_aes_unregister_algs(void) +{ + int i; + + crypto_unregister_aead(&aes_gcm_alg); + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) + crypto_unregister_alg(&aes_algs[i]); +} + +static int mtk_aes_register_algs(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + err = crypto_register_alg(&aes_algs[i]); + if (err) + goto err_aes_algs; + } + + err = crypto_register_aead(&aes_gcm_alg); + if (err) + goto err_aes_algs; + + return 0; + +err_aes_algs: + for (; i--; ) + crypto_unregister_alg(&aes_algs[i]); + + return err; +} + +int mtk_cipher_alg_register(struct mtk_cryp *cryp) +{ + int ret; + + INIT_LIST_HEAD(&cryp->aes_list); + + /* Initialize two cipher records */ + ret = mtk_aes_record_init(cryp); + if (ret) + goto err_record; + + /* Ring0 is use by encryption record */ + ret = devm_request_irq(cryp->dev, cryp->irq[RING0], mtk_aes_enc_irq, + IRQF_TRIGGER_LOW, "mtk-aes", cryp); + if (ret) { + dev_err(cryp->dev, "unable to request AES encryption irq.\n"); + goto err_res; + } + + /* Ring1 is use by decryption record */ + ret = devm_request_irq(cryp->dev, cryp->irq[RING1], mtk_aes_dec_irq, + IRQF_TRIGGER_LOW, "mtk-aes", cryp); + if (ret) { + dev_err(cryp->dev, "unable to request AES decryption irq.\n"); + goto err_res; + } + + /* Enable ring0 and ring1 interrupt */ + mtk_aes_write(cryp, AIC_ENABLE_SET(RING0), MTK_IRQ_RDR0); + mtk_aes_write(cryp, AIC_ENABLE_SET(RING1), MTK_IRQ_RDR1); + + spin_lock(&mtk_aes.lock); + list_add_tail(&cryp->aes_list, &mtk_aes.dev_list); + spin_unlock(&mtk_aes.lock); + + ret = mtk_aes_register_algs(); + if (ret) + goto err_algs; + + return 0; + +err_algs: + spin_lock(&mtk_aes.lock); + list_del(&cryp->aes_list); + spin_unlock(&mtk_aes.lock); +err_res: + mtk_aes_record_free(cryp); +err_record: + + dev_err(cryp->dev, "mtk-aes initialization failed.\n"); + return ret; +} + +void mtk_cipher_alg_release(struct mtk_cryp *cryp) +{ + spin_lock(&mtk_aes.lock); + list_del(&cryp->aes_list); + spin_unlock(&mtk_aes.lock); + + mtk_aes_unregister_algs(); + mtk_aes_record_free(cryp); +} diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c new file mode 100644 index 000000000000..a9c713d4c733 --- /dev/null +++ b/drivers/crypto/mediatek/mtk-platform.c @@ -0,0 +1,604 @@ +/* + * Driver for EIP97 cryptographic accelerator. + * + * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/clk.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include "mtk-platform.h" + +#define MTK_BURST_SIZE_MSK GENMASK(7, 4) +#define MTK_BURST_SIZE(x) ((x) << 4) +#define MTK_DESC_SIZE(x) ((x) << 0) +#define MTK_DESC_OFFSET(x) ((x) << 16) +#define MTK_DESC_FETCH_SIZE(x) ((x) << 0) +#define MTK_DESC_FETCH_THRESH(x) ((x) << 16) +#define MTK_DESC_OVL_IRQ_EN BIT(25) +#define MTK_DESC_ATP_PRESENT BIT(30) + +#define MTK_DFSE_IDLE GENMASK(3, 0) +#define MTK_DFSE_THR_CTRL_EN BIT(30) +#define MTK_DFSE_THR_CTRL_RESET BIT(31) +#define MTK_DFSE_RING_ID(x) (((x) >> 12) & GENMASK(3, 0)) +#define MTK_DFSE_MIN_DATA(x) ((x) << 0) +#define MTK_DFSE_MAX_DATA(x) ((x) << 8) +#define MTK_DFE_MIN_CTRL(x) ((x) << 16) +#define MTK_DFE_MAX_CTRL(x) ((x) << 24) + +#define MTK_IN_BUF_MIN_THRESH(x) ((x) << 8) +#define MTK_IN_BUF_MAX_THRESH(x) ((x) << 12) +#define MTK_OUT_BUF_MIN_THRESH(x) ((x) << 0) +#define MTK_OUT_BUF_MAX_THRESH(x) ((x) << 4) +#define MTK_IN_TBUF_SIZE(x) (((x) >> 4) & GENMASK(3, 0)) +#define MTK_IN_DBUF_SIZE(x) (((x) >> 8) & GENMASK(3, 0)) +#define MTK_OUT_DBUF_SIZE(x) (((x) >> 16) & GENMASK(3, 0)) +#define MTK_CMD_FIFO_SIZE(x) (((x) >> 8) & GENMASK(3, 0)) +#define MTK_RES_FIFO_SIZE(x) (((x) >> 12) & GENMASK(3, 0)) + +#define MTK_PE_TK_LOC_AVL BIT(2) +#define MTK_PE_PROC_HELD BIT(14) +#define MTK_PE_TK_TIMEOUT_EN BIT(22) +#define MTK_PE_INPUT_DMA_ERR BIT(0) +#define MTK_PE_OUTPUT_DMA_ERR BIT(1) +#define MTK_PE_PKT_PORC_ERR BIT(2) +#define MTK_PE_PKT_TIMEOUT BIT(3) +#define MTK_PE_FATAL_ERR BIT(14) +#define MTK_PE_INPUT_DMA_ERR_EN BIT(16) +#define MTK_PE_OUTPUT_DMA_ERR_EN BIT(17) +#define MTK_PE_PKT_PORC_ERR_EN BIT(18) +#define MTK_PE_PKT_TIMEOUT_EN BIT(19) +#define MTK_PE_FATAL_ERR_EN BIT(30) +#define MTK_PE_INT_OUT_EN BIT(31) + +#define MTK_HIA_SIGNATURE ((u16)0x35ca) +#define MTK_HIA_DATA_WIDTH(x) (((x) >> 25) & GENMASK(1, 0)) +#define MTK_HIA_DMA_LENGTH(x) (((x) >> 20) & GENMASK(4, 0)) +#define MTK_CDR_STAT_CLR GENMASK(4, 0) +#define MTK_RDR_STAT_CLR GENMASK(7, 0) + +#define MTK_AIC_INT_MSK GENMASK(5, 0) +#define MTK_AIC_VER_MSK (GENMASK(15, 0) | GENMASK(27, 20)) +#define MTK_AIC_VER11 0x011036c9 +#define MTK_AIC_VER12 0x012036c9 +#define MTK_AIC_G_CLR GENMASK(30, 20) + +/** + * EIP97 is an integrated security subsystem to accelerate cryptographic + * functions and protocols to offload the host processor. + * Some important hardware modules are briefly introduced below: + * + * Host Interface Adapter(HIA) - the main interface between the host + * system and the hardware subsystem. It is responsible for attaching + * processing engine to the specific host bus interface and provides a + * standardized software view for off loading tasks to the engine. + * + * Command Descriptor Ring Manager(CDR Manager) - keeps track of how many + * CD the host has prepared in the CDR. It monitors the fill level of its + * CD-FIFO and if there's sufficient space for the next block of descriptors, + * then it fires off a DMA request to fetch a block of CDs. + * + * Data fetch engine(DFE) - It is responsible for parsing the CD and + * setting up the required control and packet data DMA transfers from + * system memory to the processing engine. + * + * Result Descriptor Ring Manager(RDR Manager) - same as CDR Manager, + * but target is result descriptors, Moreover, it also handles the RD + * updates under control of the DSE. For each packet data segment + * processed, the DSE triggers the RDR Manager to write the updated RD. + * If triggered to update, the RDR Manager sets up a DMA operation to + * copy the RD from the DSE to the correct location in the RDR. + * + * Data Store Engine(DSE) - It is responsible for parsing the prepared RD + * and setting up the required control and packet data DMA transfers from + * the processing engine to system memory. + * + * Advanced Interrupt Controllers(AICs) - receive interrupt request signals + * from various sources and combine them into one interrupt output. + * The AICs are used by: + * - One for the HIA global and processing engine interrupts. + * - The others for the descriptor ring interrupts. + */ + +/* Cryptographic engine capabilities */ +struct mtk_sys_cap { + /* host interface adapter */ + u32 hia_ver; + u32 hia_opt; + /* packet engine */ + u32 pkt_eng_opt; + /* global hardware */ + u32 hw_opt; +}; + +static void mtk_desc_ring_link(struct mtk_cryp *cryp, u32 mask) +{ + /* Assign rings to DFE/DSE thread and enable it */ + writel(MTK_DFSE_THR_CTRL_EN | mask, cryp->base + DFE_THR_CTRL); + writel(MTK_DFSE_THR_CTRL_EN | mask, cryp->base + DSE_THR_CTRL); +} + +static void mtk_dfe_dse_buf_setup(struct mtk_cryp *cryp, + struct mtk_sys_cap *cap) +{ + u32 width = MTK_HIA_DATA_WIDTH(cap->hia_opt) + 2; + u32 len = MTK_HIA_DMA_LENGTH(cap->hia_opt) - 1; + u32 ipbuf = min((u32)MTK_IN_DBUF_SIZE(cap->hw_opt) + width, len); + u32 opbuf = min((u32)MTK_OUT_DBUF_SIZE(cap->hw_opt) + width, len); + u32 itbuf = min((u32)MTK_IN_TBUF_SIZE(cap->hw_opt) + width, len); + + writel(MTK_DFSE_MIN_DATA(ipbuf - 1) | + MTK_DFSE_MAX_DATA(ipbuf) | + MTK_DFE_MIN_CTRL(itbuf - 1) | + MTK_DFE_MAX_CTRL(itbuf), + cryp->base + DFE_CFG); + + writel(MTK_DFSE_MIN_DATA(opbuf - 1) | + MTK_DFSE_MAX_DATA(opbuf), + cryp->base + DSE_CFG); + + writel(MTK_IN_BUF_MIN_THRESH(ipbuf - 1) | + MTK_IN_BUF_MAX_THRESH(ipbuf), + cryp->base + PE_IN_DBUF_THRESH); + + writel(MTK_IN_BUF_MIN_THRESH(itbuf - 1) | + MTK_IN_BUF_MAX_THRESH(itbuf), + cryp->base + PE_IN_TBUF_THRESH); + + writel(MTK_OUT_BUF_MIN_THRESH(opbuf - 1) | + MTK_OUT_BUF_MAX_THRESH(opbuf), + cryp->base + PE_OUT_DBUF_THRESH); + + writel(0, cryp->base + PE_OUT_TBUF_THRESH); + writel(0, cryp->base + PE_OUT_BUF_CTRL); +} + +static int mtk_dfe_dse_state_check(struct mtk_cryp *cryp) +{ + int ret = -EINVAL; + u32 val; + + /* Check for completion of all DMA transfers */ + val = readl(cryp->base + DFE_THR_STAT); + if (MTK_DFSE_RING_ID(val) == MTK_DFSE_IDLE) { + val = readl(cryp->base + DSE_THR_STAT); + if (MTK_DFSE_RING_ID(val) == MTK_DFSE_IDLE) + ret = 0; + } + + if (!ret) { + /* Take DFE/DSE thread out of reset */ + writel(0, cryp->base + DFE_THR_CTRL); + writel(0, cryp->base + DSE_THR_CTRL); + } else { + return -EBUSY; + } + + return 0; +} + +static int mtk_dfe_dse_reset(struct mtk_cryp *cryp) +{ + int err; + + /* Reset DSE/DFE and correct system priorities for all rings. */ + writel(MTK_DFSE_THR_CTRL_RESET, cryp->base + DFE_THR_CTRL); + writel(0, cryp->base + DFE_PRIO_0); + writel(0, cryp->base + DFE_PRIO_1); + writel(0, cryp->base + DFE_PRIO_2); + writel(0, cryp->base + DFE_PRIO_3); + + writel(MTK_DFSE_THR_CTRL_RESET, cryp->base + DSE_THR_CTRL); + writel(0, cryp->base + DSE_PRIO_0); + writel(0, cryp->base + DSE_PRIO_1); + writel(0, cryp->base + DSE_PRIO_2); + writel(0, cryp->base + DSE_PRIO_3); + + err = mtk_dfe_dse_state_check(cryp); + if (err) + return err; + + return 0; +} + +static void mtk_cmd_desc_ring_setup(struct mtk_cryp *cryp, + int i, struct mtk_sys_cap *cap) +{ + /* Full descriptor that fits FIFO minus one */ + u32 count = + ((1 << MTK_CMD_FIFO_SIZE(cap->hia_opt)) / MTK_DESC_SZ) - 1; + + /* Temporarily disable external triggering */ + writel(0, cryp->base + CDR_CFG(i)); + + /* Clear CDR count */ + writel(MTK_CNT_RST, cryp->base + CDR_PREP_COUNT(i)); + writel(MTK_CNT_RST, cryp->base + CDR_PROC_COUNT(i)); + + writel(0, cryp->base + CDR_PREP_PNTR(i)); + writel(0, cryp->base + CDR_PROC_PNTR(i)); + writel(0, cryp->base + CDR_DMA_CFG(i)); + + /* Configure CDR host address space */ + writel(0, cryp->base + CDR_BASE_ADDR_HI(i)); + writel(cryp->ring[i]->cmd_dma, cryp->base + CDR_BASE_ADDR_LO(i)); + + writel(MTK_DESC_RING_SZ, cryp->base + CDR_RING_SIZE(i)); + + /* Clear and disable all CDR interrupts */ + writel(MTK_CDR_STAT_CLR, cryp->base + CDR_STAT(i)); + + /* + * Set command descriptor offset and enable additional + * token present in descriptor. + */ + writel(MTK_DESC_SIZE(MTK_DESC_SZ) | + MTK_DESC_OFFSET(MTK_DESC_OFF) | + MTK_DESC_ATP_PRESENT, + cryp->base + CDR_DESC_SIZE(i)); + + writel(MTK_DESC_FETCH_SIZE(count * MTK_DESC_OFF) | + MTK_DESC_FETCH_THRESH(count * MTK_DESC_SZ), + cryp->base + CDR_CFG(i)); +} + +static void mtk_res_desc_ring_setup(struct mtk_cryp *cryp, + int i, struct mtk_sys_cap *cap) +{ + u32 rndup = 2; + u32 count = ((1 << MTK_RES_FIFO_SIZE(cap->hia_opt)) / rndup) - 1; + + /* Temporarily disable external triggering */ + writel(0, cryp->base + RDR_CFG(i)); + + /* Clear RDR count */ + writel(MTK_CNT_RST, cryp->base + RDR_PREP_COUNT(i)); + writel(MTK_CNT_RST, cryp->base + RDR_PROC_COUNT(i)); + + writel(0, cryp->base + RDR_PREP_PNTR(i)); + writel(0, cryp->base + RDR_PROC_PNTR(i)); + writel(0, cryp->base + RDR_DMA_CFG(i)); + + /* Configure RDR host address space */ + writel(0, cryp->base + RDR_BASE_ADDR_HI(i)); + writel(cryp->ring[i]->res_dma, cryp->base + RDR_BASE_ADDR_LO(i)); + + writel(MTK_DESC_RING_SZ, cryp->base + RDR_RING_SIZE(i)); + writel(MTK_RDR_STAT_CLR, cryp->base + RDR_STAT(i)); + + /* + * RDR manager generates update interrupts on a per-completed-packet, + * and the rd_proc_thresh_irq interrupt is fired when proc_pkt_count + * for the RDR exceeds the number of packets. + */ + writel(MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE, + cryp->base + RDR_THRESH(i)); + + /* + * Configure a threshold and time-out value for the processed + * result descriptors (or complete packets) that are written to + * the RDR. + */ + writel(MTK_DESC_SIZE(MTK_DESC_SZ) | MTK_DESC_OFFSET(MTK_DESC_OFF), + cryp->base + RDR_DESC_SIZE(i)); + + /* + * Configure HIA fetch size and fetch threshold that are used to + * fetch blocks of multiple descriptors. + */ + writel(MTK_DESC_FETCH_SIZE(count * MTK_DESC_OFF) | + MTK_DESC_FETCH_THRESH(count * rndup) | + MTK_DESC_OVL_IRQ_EN, + cryp->base + RDR_CFG(i)); +} + +static int mtk_packet_engine_setup(struct mtk_cryp *cryp) +{ + struct mtk_sys_cap cap; + int i, err; + u32 val; + + cap.hia_ver = readl(cryp->base + HIA_VERSION); + cap.hia_opt = readl(cryp->base + HIA_OPTIONS); + cap.hw_opt = readl(cryp->base + EIP97_OPTIONS); + + if (!(((u16)cap.hia_ver) == MTK_HIA_SIGNATURE)) + return -EINVAL; + + /* Configure endianness conversion method for master (DMA) interface */ + writel(0, cryp->base + EIP97_MST_CTRL); + + /* Set HIA burst size */ + val = readl(cryp->base + HIA_MST_CTRL); + val &= ~MTK_BURST_SIZE_MSK; + val |= MTK_BURST_SIZE(5); + writel(val, cryp->base + HIA_MST_CTRL); + + err = mtk_dfe_dse_reset(cryp); + if (err) { + dev_err(cryp->dev, "Failed to reset DFE and DSE.\n"); + return err; + } + + mtk_dfe_dse_buf_setup(cryp, &cap); + + /* Enable the 4 rings for the packet engines. */ + mtk_desc_ring_link(cryp, 0xf); + + for (i = 0; i < RING_MAX; i++) { + mtk_cmd_desc_ring_setup(cryp, i, &cap); + mtk_res_desc_ring_setup(cryp, i, &cap); + } + + writel(MTK_PE_TK_LOC_AVL | MTK_PE_PROC_HELD | MTK_PE_TK_TIMEOUT_EN, + cryp->base + PE_TOKEN_CTRL_STAT); + + /* Clear all pending interrupts */ + writel(MTK_AIC_G_CLR, cryp->base + AIC_G_ACK); + writel(MTK_PE_INPUT_DMA_ERR | MTK_PE_OUTPUT_DMA_ERR | + MTK_PE_PKT_PORC_ERR | MTK_PE_PKT_TIMEOUT | + MTK_PE_FATAL_ERR | MTK_PE_INPUT_DMA_ERR_EN | + MTK_PE_OUTPUT_DMA_ERR_EN | MTK_PE_PKT_PORC_ERR_EN | + MTK_PE_PKT_TIMEOUT_EN | MTK_PE_FATAL_ERR_EN | + MTK_PE_INT_OUT_EN, + cryp->base + PE_INTERRUPT_CTRL_STAT); + + return 0; +} + +static int mtk_aic_cap_check(struct mtk_cryp *cryp, int hw) +{ + u32 val; + + if (hw == RING_MAX) + val = readl(cryp->base + AIC_G_VERSION); + else + val = readl(cryp->base + AIC_VERSION(hw)); + + val &= MTK_AIC_VER_MSK; + if (val != MTK_AIC_VER11 && val != MTK_AIC_VER12) + return -ENXIO; + + if (hw == RING_MAX) + val = readl(cryp->base + AIC_G_OPTIONS); + else + val = readl(cryp->base + AIC_OPTIONS(hw)); + + val &= MTK_AIC_INT_MSK; + if (!val || val > 32) + return -ENXIO; + + return 0; +} + +static int mtk_aic_init(struct mtk_cryp *cryp, int hw) +{ + int err; + + err = mtk_aic_cap_check(cryp, hw); + if (err) + return err; + + /* Disable all interrupts and set initial configuration */ + if (hw == RING_MAX) { + writel(0, cryp->base + AIC_G_ENABLE_CTRL); + writel(0, cryp->base + AIC_G_POL_CTRL); + writel(0, cryp->base + AIC_G_TYPE_CTRL); + writel(0, cryp->base + AIC_G_ENABLE_SET); + } else { + writel(0, cryp->base + AIC_ENABLE_CTRL(hw)); + writel(0, cryp->base + AIC_POL_CTRL(hw)); + writel(0, cryp->base + AIC_TYPE_CTRL(hw)); + writel(0, cryp->base + AIC_ENABLE_SET(hw)); + } + + return 0; +} + +static int mtk_accelerator_init(struct mtk_cryp *cryp) +{ + int i, err; + + /* Initialize advanced interrupt controller(AIC) */ + for (i = 0; i < MTK_IRQ_NUM; i++) { + err = mtk_aic_init(cryp, i); + if (err) { + dev_err(cryp->dev, "Failed to initialize AIC.\n"); + return err; + } + } + + /* Initialize packet engine */ + err = mtk_packet_engine_setup(cryp); + if (err) { + dev_err(cryp->dev, "Failed to configure packet engine.\n"); + return err; + } + + return 0; +} + +static void mtk_desc_dma_free(struct mtk_cryp *cryp) +{ + int i; + + for (i = 0; i < RING_MAX; i++) { + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + cryp->ring[i]->res_base, + cryp->ring[i]->res_dma); + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + cryp->ring[i]->cmd_base, + cryp->ring[i]->cmd_dma); + kfree(cryp->ring[i]); + } +} + +static int mtk_desc_ring_alloc(struct mtk_cryp *cryp) +{ + struct mtk_ring **ring = cryp->ring; + int i, err = ENOMEM; + + for (i = 0; i < RING_MAX; i++) { + ring[i] = kzalloc(sizeof(**ring), GFP_KERNEL); + if (!ring[i]) + goto err_cleanup; + + ring[i]->cmd_base = dma_zalloc_coherent(cryp->dev, + MTK_DESC_RING_SZ, + &ring[i]->cmd_dma, + GFP_KERNEL); + if (!ring[i]->cmd_base) + goto err_cleanup; + + ring[i]->res_base = dma_zalloc_coherent(cryp->dev, + MTK_DESC_RING_SZ, + &ring[i]->res_dma, + GFP_KERNEL); + if (!ring[i]->res_base) + goto err_cleanup; + } + return 0; + +err_cleanup: + for (; i--; ) { + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + ring[i]->res_base, ring[i]->res_dma); + dma_free_coherent(cryp->dev, MTK_DESC_RING_SZ, + ring[i]->cmd_base, ring[i]->cmd_dma); + kfree(ring[i]); + } + return err; +} + +static int mtk_crypto_probe(struct platform_device *pdev) +{ + struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + struct mtk_cryp *cryp; + int i, err; + + cryp = devm_kzalloc(&pdev->dev, sizeof(*cryp), GFP_KERNEL); + if (!cryp) + return -ENOMEM; + + cryp->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(cryp->base)) + return PTR_ERR(cryp->base); + + for (i = 0; i < MTK_IRQ_NUM; i++) { + cryp->irq[i] = platform_get_irq(pdev, i); + if (cryp->irq[i] < 0) { + dev_err(cryp->dev, "no IRQ:%d resource info\n", i); + return -ENXIO; + } + } + + cryp->clk_ethif = devm_clk_get(&pdev->dev, "ethif"); + cryp->clk_cryp = devm_clk_get(&pdev->dev, "cryp"); + if (IS_ERR(cryp->clk_ethif) || IS_ERR(cryp->clk_cryp)) + return -EPROBE_DEFER; + + cryp->dev = &pdev->dev; + pm_runtime_enable(cryp->dev); + pm_runtime_get_sync(cryp->dev); + + err = clk_prepare_enable(cryp->clk_ethif); + if (err) + goto err_clk_ethif; + + err = clk_prepare_enable(cryp->clk_cryp); + if (err) + goto err_clk_cryp; + + /* Allocate four command/result descriptor rings */ + err = mtk_desc_ring_alloc(cryp); + if (err) { + dev_err(cryp->dev, "Unable to allocate descriptor rings.\n"); + goto err_resource; + } + + /* Initialize hardware modules */ + err = mtk_accelerator_init(cryp); + if (err) { + dev_err(cryp->dev, "Failed to initialize cryptographic engine.\n"); + goto err_engine; + } + + err = mtk_cipher_alg_register(cryp); + if (err) { + dev_err(cryp->dev, "Unable to register cipher algorithm.\n"); + goto err_cipher; + } + + err = mtk_hash_alg_register(cryp); + if (err) { + dev_err(cryp->dev, "Unable to register hash algorithm.\n"); + goto err_hash; + } + + platform_set_drvdata(pdev, cryp); + return 0; + +err_hash: + mtk_cipher_alg_release(cryp); +err_cipher: + mtk_dfe_dse_reset(cryp); +err_engine: + mtk_desc_dma_free(cryp); +err_resource: + clk_disable_unprepare(cryp->clk_cryp); +err_clk_cryp: + clk_disable_unprepare(cryp->clk_ethif); +err_clk_ethif: + pm_runtime_put_sync(cryp->dev); + pm_runtime_disable(cryp->dev); + + return err; +} + +static int mtk_crypto_remove(struct platform_device *pdev) +{ + struct mtk_cryp *cryp = platform_get_drvdata(pdev); + + mtk_hash_alg_release(cryp); + mtk_cipher_alg_release(cryp); + mtk_desc_dma_free(cryp); + + clk_disable_unprepare(cryp->clk_cryp); + clk_disable_unprepare(cryp->clk_ethif); + + pm_runtime_put_sync(cryp->dev); + pm_runtime_disable(cryp->dev); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static const struct of_device_id of_crypto_id[] = { + { .compatible = "mediatek,eip97-crypto" }, + {}, +}; +MODULE_DEVICE_TABLE(of, of_crypto_id); + +static struct platform_driver mtk_crypto_driver = { + .probe = mtk_crypto_probe, + .remove = mtk_crypto_remove, + .driver = { + .name = "mtk-crypto", + .owner = THIS_MODULE, + .of_match_table = of_crypto_id, + }, +}; +module_platform_driver(mtk_crypto_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ryder Lee <ryder.lee@mediatek.com>"); +MODULE_DESCRIPTION("Cryptographic accelerator driver for EIP97"); diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h new file mode 100644 index 000000000000..ed6d8717f7f4 --- /dev/null +++ b/drivers/crypto/mediatek/mtk-platform.h @@ -0,0 +1,231 @@ +/* + * Driver for EIP97 cryptographic accelerator. + * + * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __MTK_PLATFORM_H_ +#define __MTK_PLATFORM_H_ + +#include <crypto/algapi.h> +#include <crypto/internal/aead.h> +#include <crypto/internal/hash.h> +#include <crypto/scatterwalk.h> +#include <crypto/skcipher.h> +#include <linux/crypto.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/scatterlist.h> +#include "mtk-regs.h" + +#define MTK_RDR_PROC_THRESH BIT(0) +#define MTK_RDR_PROC_MODE BIT(23) +#define MTK_CNT_RST BIT(31) +#define MTK_IRQ_RDR0 BIT(1) +#define MTK_IRQ_RDR1 BIT(3) +#define MTK_IRQ_RDR2 BIT(5) +#define MTK_IRQ_RDR3 BIT(7) + +#define SIZE_IN_WORDS(x) ((x) >> 2) + +/** + * Ring 0/1 are used by AES encrypt and decrypt. + * Ring 2/3 are used by SHA. + */ +enum { + RING0 = 0, + RING1, + RING2, + RING3, + RING_MAX, +}; + +#define MTK_REC_NUM (RING_MAX / 2) +#define MTK_IRQ_NUM 5 + +/** + * struct mtk_desc - DMA descriptor + * @hdr: the descriptor control header + * @buf: DMA address of input buffer segment + * @ct: DMA address of command token that control operation flow + * @ct_hdr: the command token control header + * @tag: the user-defined field + * @tfm: DMA address of transform state + * @bound: align descriptors offset boundary + * + * Structure passed to the crypto engine to describe where source + * data needs to be fetched and how it needs to be processed. + */ +struct mtk_desc { + __le32 hdr; + __le32 buf; + __le32 ct; + __le32 ct_hdr; + __le32 tag; + __le32 tfm; + __le32 bound[2]; +}; + +#define MTK_DESC_NUM 512 +#define MTK_DESC_OFF SIZE_IN_WORDS(sizeof(struct mtk_desc)) +#define MTK_DESC_SZ (MTK_DESC_OFF - 2) +#define MTK_DESC_RING_SZ ((sizeof(struct mtk_desc) * MTK_DESC_NUM)) +#define MTK_DESC_CNT(x) ((MTK_DESC_OFF * (x)) << 2) +#define MTK_DESC_LAST cpu_to_le32(BIT(22)) +#define MTK_DESC_FIRST cpu_to_le32(BIT(23)) +#define MTK_DESC_BUF_LEN(x) cpu_to_le32(x) +#define MTK_DESC_CT_LEN(x) cpu_to_le32((x) << 24) + +/** + * struct mtk_ring - Descriptor ring + * @cmd_base: pointer to command descriptor ring base + * @cmd_dma: DMA address of command descriptor ring + * @cmd_pos: current position in the command descriptor ring + * @res_base: pointer to result descriptor ring base + * @res_dma: DMA address of result descriptor ring + * @res_pos: current position in the result descriptor ring + * + * A descriptor ring is a circular buffer that is used to manage + * one or more descriptors. There are two type of descriptor rings; + * the command descriptor ring and result descriptor ring. + */ +struct mtk_ring { + struct mtk_desc *cmd_base; + dma_addr_t cmd_dma; + u32 cmd_pos; + struct mtk_desc *res_base; + dma_addr_t res_dma; + u32 res_pos; +}; + +/** + * struct mtk_aes_dma - Structure that holds sg list info + * @sg: pointer to scatter-gather list + * @nents: number of entries in the sg list + * @remainder: remainder of sg list + * @sg_len: number of entries in the sg mapped list + */ +struct mtk_aes_dma { + struct scatterlist *sg; + int nents; + u32 remainder; + u32 sg_len; +}; + +struct mtk_aes_base_ctx; +struct mtk_aes_rec; +struct mtk_cryp; + +typedef int (*mtk_aes_fn)(struct mtk_cryp *cryp, struct mtk_aes_rec *aes); + +/** + * struct mtk_aes_rec - AES operation record + * @queue: crypto request queue + * @areq: pointer to async request + * @task: the tasklet is use in AES interrupt + * @ctx: pointer to current context + * @src: the structure that holds source sg list info + * @dst: the structure that holds destination sg list info + * @aligned_sg: the scatter list is use to alignment + * @real_dst: pointer to the destination sg list + * @resume: pointer to resume function + * @total: request buffer length + * @buf: pointer to page buffer + * @id: record identification + * @flags: it's describing AES operation state + * @lock: the async queue lock + * + * Structure used to record AES execution state. + */ +struct mtk_aes_rec { + struct crypto_queue queue; + struct crypto_async_request *areq; + struct tasklet_struct task; + struct mtk_aes_base_ctx *ctx; + struct mtk_aes_dma src; + struct mtk_aes_dma dst; + + struct scatterlist aligned_sg; + struct scatterlist *real_dst; + + mtk_aes_fn resume; + + size_t total; + void *buf; + + u8 id; + unsigned long flags; + /* queue lock */ + spinlock_t lock; +}; + +/** + * struct mtk_sha_rec - SHA operation record + * @queue: crypto request queue + * @req: pointer to ahash request + * @task: the tasklet is use in SHA interrupt + * @id: record identification + * @flags: it's describing SHA operation state + * @lock: the ablkcipher queue lock + * + * Structure used to record SHA execution state. + */ +struct mtk_sha_rec { + struct crypto_queue queue; + struct ahash_request *req; + struct tasklet_struct task; + + u8 id; + unsigned long flags; + /* queue lock */ + spinlock_t lock; +}; + +/** + * struct mtk_cryp - Cryptographic device + * @base: pointer to mapped register I/O base + * @dev: pointer to device + * @clk_ethif: pointer to ethif clock + * @clk_cryp: pointer to crypto clock + * @irq: global system and rings IRQ + * @ring: pointer to execution state of AES + * @aes: pointer to execution state of SHA + * @sha: each execution record map to a ring + * @aes_list: device list of AES + * @sha_list: device list of SHA + * @tmp: pointer to temporary buffer for internal use + * @tmp_dma: DMA address of temporary buffer + * @rec: it's used to select SHA record for tfm + * + * Structure storing cryptographic device information. + */ +struct mtk_cryp { + void __iomem *base; + struct device *dev; + struct clk *clk_ethif; + struct clk *clk_cryp; + int irq[MTK_IRQ_NUM]; + + struct mtk_ring *ring[RING_MAX]; + struct mtk_aes_rec *aes[MTK_REC_NUM]; + struct mtk_sha_rec *sha[MTK_REC_NUM]; + + struct list_head aes_list; + struct list_head sha_list; + + void *tmp; + dma_addr_t tmp_dma; + bool rec; +}; + +int mtk_cipher_alg_register(struct mtk_cryp *cryp); +void mtk_cipher_alg_release(struct mtk_cryp *cryp); +int mtk_hash_alg_register(struct mtk_cryp *cryp); +void mtk_hash_alg_release(struct mtk_cryp *cryp); + +#endif /* __MTK_PLATFORM_H_ */ diff --git a/drivers/crypto/mediatek/mtk-regs.h b/drivers/crypto/mediatek/mtk-regs.h new file mode 100644 index 000000000000..94f4eb85be3f --- /dev/null +++ b/drivers/crypto/mediatek/mtk-regs.h @@ -0,0 +1,194 @@ +/* + * Support for MediaTek cryptographic accelerator. + * + * Copyright (c) 2016 MediaTek Inc. + * Author: Ryder Lee <ryder.lee@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + */ + +#ifndef __MTK_REGS_H__ +#define __MTK_REGS_H__ + +/* HIA, Command Descriptor Ring Manager */ +#define CDR_BASE_ADDR_LO(x) (0x0 + ((x) << 12)) +#define CDR_BASE_ADDR_HI(x) (0x4 + ((x) << 12)) +#define CDR_DATA_BASE_ADDR_LO(x) (0x8 + ((x) << 12)) +#define CDR_DATA_BASE_ADDR_HI(x) (0xC + ((x) << 12)) +#define CDR_ACD_BASE_ADDR_LO(x) (0x10 + ((x) << 12)) +#define CDR_ACD_BASE_ADDR_HI(x) (0x14 + ((x) << 12)) +#define CDR_RING_SIZE(x) (0x18 + ((x) << 12)) +#define CDR_DESC_SIZE(x) (0x1C + ((x) << 12)) +#define CDR_CFG(x) (0x20 + ((x) << 12)) +#define CDR_DMA_CFG(x) (0x24 + ((x) << 12)) +#define CDR_THRESH(x) (0x28 + ((x) << 12)) +#define CDR_PREP_COUNT(x) (0x2C + ((x) << 12)) +#define CDR_PROC_COUNT(x) (0x30 + ((x) << 12)) +#define CDR_PREP_PNTR(x) (0x34 + ((x) << 12)) +#define CDR_PROC_PNTR(x) (0x38 + ((x) << 12)) +#define CDR_STAT(x) (0x3C + ((x) << 12)) + +/* HIA, Result Descriptor Ring Manager */ +#define RDR_BASE_ADDR_LO(x) (0x800 + ((x) << 12)) +#define RDR_BASE_ADDR_HI(x) (0x804 + ((x) << 12)) +#define RDR_DATA_BASE_ADDR_LO(x) (0x808 + ((x) << 12)) +#define RDR_DATA_BASE_ADDR_HI(x) (0x80C + ((x) << 12)) +#define RDR_ACD_BASE_ADDR_LO(x) (0x810 + ((x) << 12)) +#define RDR_ACD_BASE_ADDR_HI(x) (0x814 + ((x) << 12)) +#define RDR_RING_SIZE(x) (0x818 + ((x) << 12)) +#define RDR_DESC_SIZE(x) (0x81C + ((x) << 12)) +#define RDR_CFG(x) (0x820 + ((x) << 12)) +#define RDR_DMA_CFG(x) (0x824 + ((x) << 12)) +#define RDR_THRESH(x) (0x828 + ((x) << 12)) +#define RDR_PREP_COUNT(x) (0x82C + ((x) << 12)) +#define RDR_PROC_COUNT(x) (0x830 + ((x) << 12)) +#define RDR_PREP_PNTR(x) (0x834 + ((x) << 12)) +#define RDR_PROC_PNTR(x) (0x838 + ((x) << 12)) +#define RDR_STAT(x) (0x83C + ((x) << 12)) + +/* HIA, Ring AIC */ +#define AIC_POL_CTRL(x) (0xE000 - ((x) << 12)) +#define AIC_TYPE_CTRL(x) (0xE004 - ((x) << 12)) +#define AIC_ENABLE_CTRL(x) (0xE008 - ((x) << 12)) +#define AIC_RAW_STAL(x) (0xE00C - ((x) << 12)) +#define AIC_ENABLE_SET(x) (0xE00C - ((x) << 12)) +#define AIC_ENABLED_STAT(x) (0xE010 - ((x) << 12)) +#define AIC_ACK(x) (0xE010 - ((x) << 12)) +#define AIC_ENABLE_CLR(x) (0xE014 - ((x) << 12)) +#define AIC_OPTIONS(x) (0xE018 - ((x) << 12)) +#define AIC_VERSION(x) (0xE01C - ((x) << 12)) + +/* HIA, Global AIC */ +#define AIC_G_POL_CTRL 0xF800 +#define AIC_G_TYPE_CTRL 0xF804 +#define AIC_G_ENABLE_CTRL 0xF808 +#define AIC_G_RAW_STAT 0xF80C +#define AIC_G_ENABLE_SET 0xF80C +#define AIC_G_ENABLED_STAT 0xF810 +#define AIC_G_ACK 0xF810 +#define AIC_G_ENABLE_CLR 0xF814 +#define AIC_G_OPTIONS 0xF818 +#define AIC_G_VERSION 0xF81C + +/* HIA, Data Fetch Engine */ +#define DFE_CFG 0xF000 +#define DFE_PRIO_0 0xF010 +#define DFE_PRIO_1 0xF014 +#define DFE_PRIO_2 0xF018 +#define DFE_PRIO_3 0xF01C + +/* HIA, Data Fetch Engine access monitoring for CDR */ +#define DFE_RING_REGION_LO(x) (0xF080 + ((x) << 3)) +#define DFE_RING_REGION_HI(x) (0xF084 + ((x) << 3)) + +/* HIA, Data Fetch Engine thread control and status for thread */ +#define DFE_THR_CTRL 0xF200 +#define DFE_THR_STAT 0xF204 +#define DFE_THR_DESC_CTRL 0xF208 +#define DFE_THR_DESC_DPTR_LO 0xF210 +#define DFE_THR_DESC_DPTR_HI 0xF214 +#define DFE_THR_DESC_ACDPTR_LO 0xF218 +#define DFE_THR_DESC_ACDPTR_HI 0xF21C + +/* HIA, Data Store Engine */ +#define DSE_CFG 0xF400 +#define DSE_PRIO_0 0xF410 +#define DSE_PRIO_1 0xF414 +#define DSE_PRIO_2 0xF418 +#define DSE_PRIO_3 0xF41C + +/* HIA, Data Store Engine access monitoring for RDR */ +#define DSE_RING_REGION_LO(x) (0xF480 + ((x) << 3)) +#define DSE_RING_REGION_HI(x) (0xF484 + ((x) << 3)) + +/* HIA, Data Store Engine thread control and status for thread */ +#define DSE_THR_CTRL 0xF600 +#define DSE_THR_STAT 0xF604 +#define DSE_THR_DESC_CTRL 0xF608 +#define DSE_THR_DESC_DPTR_LO 0xF610 +#define DSE_THR_DESC_DPTR_HI 0xF614 +#define DSE_THR_DESC_S_DPTR_LO 0xF618 +#define DSE_THR_DESC_S_DPTR_HI 0xF61C +#define DSE_THR_ERROR_STAT 0xF620 + +/* HIA Global */ +#define HIA_MST_CTRL 0xFFF4 +#define HIA_OPTIONS 0xFFF8 +#define HIA_VERSION 0xFFFC + +/* Processing Engine Input Side, Processing Engine */ +#define PE_IN_DBUF_THRESH 0x10000 +#define PE_IN_TBUF_THRESH 0x10100 + +/* Packet Engine Configuration / Status Registers */ +#define PE_TOKEN_CTRL_STAT 0x11000 +#define PE_FUNCTION_EN 0x11004 +#define PE_CONTEXT_CTRL 0x11008 +#define PE_INTERRUPT_CTRL_STAT 0x11010 +#define PE_CONTEXT_STAT 0x1100C +#define PE_OUT_TRANS_CTRL_STAT 0x11018 +#define PE_OUT_BUF_CTRL 0x1101C + +/* Packet Engine PRNG Registers */ +#define PE_PRNG_STAT 0x11040 +#define PE_PRNG_CTRL 0x11044 +#define PE_PRNG_SEED_L 0x11048 +#define PE_PRNG_SEED_H 0x1104C +#define PE_PRNG_KEY_0_L 0x11050 +#define PE_PRNG_KEY_0_H 0x11054 +#define PE_PRNG_KEY_1_L 0x11058 +#define PE_PRNG_KEY_1_H 0x1105C +#define PE_PRNG_RES_0 0x11060 +#define PE_PRNG_RES_1 0x11064 +#define PE_PRNG_RES_2 0x11068 +#define PE_PRNG_RES_3 0x1106C +#define PE_PRNG_LFSR_L 0x11070 +#define PE_PRNG_LFSR_H 0x11074 + +/* Packet Engine AIC */ +#define PE_EIP96_AIC_POL_CTRL 0x113C0 +#define PE_EIP96_AIC_TYPE_CTRL 0x113C4 +#define PE_EIP96_AIC_ENABLE_CTRL 0x113C8 +#define PE_EIP96_AIC_RAW_STAT 0x113CC +#define PE_EIP96_AIC_ENABLE_SET 0x113CC +#define PE_EIP96_AIC_ENABLED_STAT 0x113D0 +#define PE_EIP96_AIC_ACK 0x113D0 +#define PE_EIP96_AIC_ENABLE_CLR 0x113D4 +#define PE_EIP96_AIC_OPTIONS 0x113D8 +#define PE_EIP96_AIC_VERSION 0x113DC + +/* Packet Engine Options & Version Registers */ +#define PE_EIP96_OPTIONS 0x113F8 +#define PE_EIP96_VERSION 0x113FC + +/* Processing Engine Output Side */ +#define PE_OUT_DBUF_THRESH 0x11C00 +#define PE_OUT_TBUF_THRESH 0x11D00 + +/* Processing Engine Local AIC */ +#define PE_AIC_POL_CTRL 0x11F00 +#define PE_AIC_TYPE_CTRL 0x11F04 +#define PE_AIC_ENABLE_CTRL 0x11F08 +#define PE_AIC_RAW_STAT 0x11F0C +#define PE_AIC_ENABLE_SET 0x11F0C +#define PE_AIC_ENABLED_STAT 0x11F10 +#define PE_AIC_ENABLE_CLR 0x11F14 +#define PE_AIC_OPTIONS 0x11F18 +#define PE_AIC_VERSION 0x11F1C + +/* Processing Engine General Configuration and Version */ +#define PE_IN_FLIGHT 0x11FF0 +#define PE_OPTIONS 0x11FF8 +#define PE_VERSION 0x11FFC + +/* EIP-97 - Global */ +#define EIP97_CLOCK_STATE 0x1FFE4 +#define EIP97_FORCE_CLOCK_ON 0x1FFE8 +#define EIP97_FORCE_CLOCK_OFF 0x1FFEC +#define EIP97_MST_CTRL 0x1FFF4 +#define EIP97_OPTIONS 0x1FFF8 +#define EIP97_VERSION 0x1FFFC +#endif /* __MTK_REGS_H__ */ diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c new file mode 100644 index 000000000000..55e3805fba07 --- /dev/null +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -0,0 +1,1435 @@ +/* + * Cryptographic API. + * + * Driver for EIP97 SHA1/SHA2(HMAC) acceleration. + * + * Copyright (c) 2016 Ryder Lee <ryder.lee@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Some ideas are from atmel-sha.c and omap-sham.c drivers. + */ + +#include <crypto/sha.h> +#include "mtk-platform.h" + +#define SHA_ALIGN_MSK (sizeof(u32) - 1) +#define SHA_QUEUE_SIZE 512 +#define SHA_TMP_BUF_SIZE 512 +#define SHA_BUF_SIZE ((u32)PAGE_SIZE) + +#define SHA_OP_UPDATE 1 +#define SHA_OP_FINAL 2 + +#define SHA_DATA_LEN_MSK cpu_to_le32(GENMASK(16, 0)) + +/* SHA command token */ +#define SHA_CT_SIZE 5 +#define SHA_CT_CTRL_HDR cpu_to_le32(0x02220000) +#define SHA_CMD0 cpu_to_le32(0x03020000) +#define SHA_CMD1 cpu_to_le32(0x21060000) +#define SHA_CMD2 cpu_to_le32(0xe0e63802) + +/* SHA transform information */ +#define SHA_TFM_HASH cpu_to_le32(0x2 << 0) +#define SHA_TFM_INNER_DIG cpu_to_le32(0x1 << 21) +#define SHA_TFM_SIZE(x) cpu_to_le32((x) << 8) +#define SHA_TFM_START cpu_to_le32(0x1 << 4) +#define SHA_TFM_CONTINUE cpu_to_le32(0x1 << 5) +#define SHA_TFM_HASH_STORE cpu_to_le32(0x1 << 19) +#define SHA_TFM_SHA1 cpu_to_le32(0x2 << 23) +#define SHA_TFM_SHA256 cpu_to_le32(0x3 << 23) +#define SHA_TFM_SHA224 cpu_to_le32(0x4 << 23) +#define SHA_TFM_SHA512 cpu_to_le32(0x5 << 23) +#define SHA_TFM_SHA384 cpu_to_le32(0x6 << 23) +#define SHA_TFM_DIGEST(x) cpu_to_le32(((x) & GENMASK(3, 0)) << 24) + +/* SHA flags */ +#define SHA_FLAGS_BUSY BIT(0) +#define SHA_FLAGS_FINAL BIT(1) +#define SHA_FLAGS_FINUP BIT(2) +#define SHA_FLAGS_SG BIT(3) +#define SHA_FLAGS_ALGO_MSK GENMASK(8, 4) +#define SHA_FLAGS_SHA1 BIT(4) +#define SHA_FLAGS_SHA224 BIT(5) +#define SHA_FLAGS_SHA256 BIT(6) +#define SHA_FLAGS_SHA384 BIT(7) +#define SHA_FLAGS_SHA512 BIT(8) +#define SHA_FLAGS_HMAC BIT(9) +#define SHA_FLAGS_PAD BIT(10) + +/** + * mtk_sha_ct is a set of hardware instructions(command token) + * that are used to control engine's processing flow of SHA, + * and it contains the first two words of transform state. + */ +struct mtk_sha_ct { + __le32 ctrl[2]; + __le32 cmd[3]; +}; + +/** + * mtk_sha_tfm is used to define SHA transform state + * and store result digest that produced by engine. + */ +struct mtk_sha_tfm { + __le32 ctrl[2]; + __le32 digest[SIZE_IN_WORDS(SHA512_DIGEST_SIZE)]; +}; + +/** + * mtk_sha_info consists of command token and transform state + * of SHA, its role is similar to mtk_aes_info. + */ +struct mtk_sha_info { + struct mtk_sha_ct ct; + struct mtk_sha_tfm tfm; +}; + +struct mtk_sha_reqctx { + struct mtk_sha_info info; + unsigned long flags; + unsigned long op; + + u64 digcnt; + bool start; + size_t bufcnt; + dma_addr_t dma_addr; + + __le32 ct_hdr; + u32 ct_size; + dma_addr_t ct_dma; + dma_addr_t tfm_dma; + + /* Walk state */ + struct scatterlist *sg; + u32 offset; /* Offset in current sg */ + u32 total; /* Total request */ + size_t ds; + size_t bs; + + u8 *buffer; +}; + +struct mtk_sha_hmac_ctx { + struct crypto_shash *shash; + u8 ipad[SHA512_BLOCK_SIZE] __aligned(sizeof(u32)); + u8 opad[SHA512_BLOCK_SIZE] __aligned(sizeof(u32)); +}; + +struct mtk_sha_ctx { + struct mtk_cryp *cryp; + unsigned long flags; + u8 id; + u8 buf[SHA_BUF_SIZE] __aligned(sizeof(u32)); + + struct mtk_sha_hmac_ctx base[0]; +}; + +struct mtk_sha_drv { + struct list_head dev_list; + /* Device list lock */ + spinlock_t lock; +}; + +static struct mtk_sha_drv mtk_sha = { + .dev_list = LIST_HEAD_INIT(mtk_sha.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(mtk_sha.lock), +}; + +static int mtk_sha_handle_queue(struct mtk_cryp *cryp, u8 id, + struct ahash_request *req); + +static inline u32 mtk_sha_read(struct mtk_cryp *cryp, u32 offset) +{ + return readl_relaxed(cryp->base + offset); +} + +static inline void mtk_sha_write(struct mtk_cryp *cryp, + u32 offset, u32 value) +{ + writel_relaxed(value, cryp->base + offset); +} + +static struct mtk_cryp *mtk_sha_find_dev(struct mtk_sha_ctx *tctx) +{ + struct mtk_cryp *cryp = NULL; + struct mtk_cryp *tmp; + + spin_lock_bh(&mtk_sha.lock); + if (!tctx->cryp) { + list_for_each_entry(tmp, &mtk_sha.dev_list, sha_list) { + cryp = tmp; + break; + } + tctx->cryp = cryp; + } else { + cryp = tctx->cryp; + } + + /* + * Assign record id to tfm in round-robin fashion, and this + * will help tfm to bind to corresponding descriptor rings. + */ + tctx->id = cryp->rec; + cryp->rec = !cryp->rec; + + spin_unlock_bh(&mtk_sha.lock); + + return cryp; +} + +static int mtk_sha_append_sg(struct mtk_sha_reqctx *ctx) +{ + size_t count; + + while ((ctx->bufcnt < SHA_BUF_SIZE) && ctx->total) { + count = min(ctx->sg->length - ctx->offset, ctx->total); + count = min(count, SHA_BUF_SIZE - ctx->bufcnt); + + if (count <= 0) { + /* + * Check if count <= 0 because the buffer is full or + * because the sg length is 0. In the latest case, + * check if there is another sg in the list, a 0 length + * sg doesn't necessarily mean the end of the sg list. + */ + if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) { + ctx->sg = sg_next(ctx->sg); + continue; + } else { + break; + } + } + + scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg, + ctx->offset, count, 0); + + ctx->bufcnt += count; + ctx->offset += count; + ctx->total -= count; + + if (ctx->offset == ctx->sg->length) { + ctx->sg = sg_next(ctx->sg); + if (ctx->sg) + ctx->offset = 0; + else + ctx->total = 0; + } + } + + return 0; +} + +/* + * The purpose of this padding is to ensure that the padded message is a + * multiple of 512 bits (SHA1/SHA224/SHA256) or 1024 bits (SHA384/SHA512). + * The bit "1" is appended at the end of the message followed by + * "padlen-1" zero bits. Then a 64 bits block (SHA1/SHA224/SHA256) or + * 128 bits block (SHA384/SHA512) equals to the message length in bits + * is appended. + * + * For SHA1/SHA224/SHA256, padlen is calculated as followed: + * - if message length < 56 bytes then padlen = 56 - message length + * - else padlen = 64 + 56 - message length + * + * For SHA384/SHA512, padlen is calculated as followed: + * - if message length < 112 bytes then padlen = 112 - message length + * - else padlen = 128 + 112 - message length + */ +static void mtk_sha_fill_padding(struct mtk_sha_reqctx *ctx, u32 len) +{ + u32 index, padlen; + u64 bits[2]; + u64 size = ctx->digcnt; + + size += ctx->bufcnt; + size += len; + + bits[1] = cpu_to_be64(size << 3); + bits[0] = cpu_to_be64(size >> 61); + + if (ctx->flags & (SHA_FLAGS_SHA384 | SHA_FLAGS_SHA512)) { + index = ctx->bufcnt & 0x7f; + padlen = (index < 112) ? (112 - index) : ((128 + 112) - index); + *(ctx->buffer + ctx->bufcnt) = 0x80; + memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen - 1); + memcpy(ctx->buffer + ctx->bufcnt + padlen, bits, 16); + ctx->bufcnt += padlen + 16; + ctx->flags |= SHA_FLAGS_PAD; + } else { + index = ctx->bufcnt & 0x3f; + padlen = (index < 56) ? (56 - index) : ((64 + 56) - index); + *(ctx->buffer + ctx->bufcnt) = 0x80; + memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen - 1); + memcpy(ctx->buffer + ctx->bufcnt + padlen, &bits[1], 8); + ctx->bufcnt += padlen + 8; + ctx->flags |= SHA_FLAGS_PAD; + } +} + +/* Initialize basic transform information of SHA */ +static void mtk_sha_info_init(struct mtk_sha_reqctx *ctx) +{ + struct mtk_sha_ct *ct = &ctx->info.ct; + struct mtk_sha_tfm *tfm = &ctx->info.tfm; + + ctx->ct_hdr = SHA_CT_CTRL_HDR; + ctx->ct_size = SHA_CT_SIZE; + + tfm->ctrl[0] = SHA_TFM_HASH | SHA_TFM_INNER_DIG | + SHA_TFM_SIZE(SIZE_IN_WORDS(ctx->ds)); + + switch (ctx->flags & SHA_FLAGS_ALGO_MSK) { + case SHA_FLAGS_SHA1: + tfm->ctrl[0] |= SHA_TFM_SHA1; + break; + case SHA_FLAGS_SHA224: + tfm->ctrl[0] |= SHA_TFM_SHA224; + break; + case SHA_FLAGS_SHA256: + tfm->ctrl[0] |= SHA_TFM_SHA256; + break; + case SHA_FLAGS_SHA384: + tfm->ctrl[0] |= SHA_TFM_SHA384; + break; + case SHA_FLAGS_SHA512: + tfm->ctrl[0] |= SHA_TFM_SHA512; + break; + + default: + /* Should not happen... */ + return; + } + + tfm->ctrl[1] = SHA_TFM_HASH_STORE; + ct->ctrl[0] = tfm->ctrl[0] | SHA_TFM_CONTINUE | SHA_TFM_START; + ct->ctrl[1] = tfm->ctrl[1]; + + ct->cmd[0] = SHA_CMD0; + ct->cmd[1] = SHA_CMD1; + ct->cmd[2] = SHA_CMD2 | SHA_TFM_DIGEST(SIZE_IN_WORDS(ctx->ds)); +} + +/* + * Update input data length field of transform information and + * map it to DMA region. + */ +static int mtk_sha_info_update(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + size_t len) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + struct mtk_sha_info *info = &ctx->info; + struct mtk_sha_ct *ct = &info->ct; + + if (ctx->start) + ctx->start = false; + else + ct->ctrl[0] &= ~SHA_TFM_START; + + ctx->ct_hdr &= ~SHA_DATA_LEN_MSK; + ctx->ct_hdr |= cpu_to_le32(len); + ct->cmd[0] &= ~SHA_DATA_LEN_MSK; + ct->cmd[0] |= cpu_to_le32(len); + + ctx->digcnt += len; + + ctx->ct_dma = dma_map_single(cryp->dev, info, sizeof(*info), + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma))) { + dev_err(cryp->dev, "dma %zu bytes error\n", sizeof(*info)); + return -EINVAL; + } + ctx->tfm_dma = ctx->ct_dma + sizeof(*ct); + + return 0; +} + +/* + * Because of hardware limitation, we must pre-calculate the inner + * and outer digest that need to be processed firstly by engine, then + * apply the result digest to the input message. These complex hashing + * procedures limits HMAC performance, so we use fallback SW encoding. + */ +static int mtk_sha_finish_hmac(struct ahash_request *req) +{ + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + struct mtk_sha_hmac_ctx *bctx = tctx->base; + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + SHASH_DESC_ON_STACK(shash, bctx->shash); + + shash->tfm = bctx->shash; + shash->flags = 0; /* not CRYPTO_TFM_REQ_MAY_SLEEP */ + + return crypto_shash_init(shash) ?: + crypto_shash_update(shash, bctx->opad, ctx->bs) ?: + crypto_shash_finup(shash, req->result, ctx->ds, req->result); +} + +/* Initialize request context */ +static int mtk_sha_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct mtk_sha_ctx *tctx = crypto_ahash_ctx(tfm); + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + ctx->flags = 0; + ctx->ds = crypto_ahash_digestsize(tfm); + + switch (ctx->ds) { + case SHA1_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA1; + ctx->bs = SHA1_BLOCK_SIZE; + break; + case SHA224_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA224; + ctx->bs = SHA224_BLOCK_SIZE; + break; + case SHA256_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA256; + ctx->bs = SHA256_BLOCK_SIZE; + break; + case SHA384_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA384; + ctx->bs = SHA384_BLOCK_SIZE; + break; + case SHA512_DIGEST_SIZE: + ctx->flags |= SHA_FLAGS_SHA512; + ctx->bs = SHA512_BLOCK_SIZE; + break; + default: + return -EINVAL; + } + + ctx->bufcnt = 0; + ctx->digcnt = 0; + ctx->buffer = tctx->buf; + ctx->start = true; + + if (tctx->flags & SHA_FLAGS_HMAC) { + struct mtk_sha_hmac_ctx *bctx = tctx->base; + + memcpy(ctx->buffer, bctx->ipad, ctx->bs); + ctx->bufcnt = ctx->bs; + ctx->flags |= SHA_FLAGS_HMAC; + } + + return 0; +} + +static int mtk_sha_xmit(struct mtk_cryp *cryp, struct mtk_sha_rec *sha, + dma_addr_t addr, size_t len) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + struct mtk_ring *ring = cryp->ring[sha->id]; + struct mtk_desc *cmd = ring->cmd_base + ring->cmd_pos; + struct mtk_desc *res = ring->res_base + ring->res_pos; + int err; + + err = mtk_sha_info_update(cryp, sha, len); + if (err) + return err; + + /* Fill in the command/result descriptors */ + res->hdr = MTK_DESC_FIRST | MTK_DESC_LAST | MTK_DESC_BUF_LEN(len); + res->buf = cpu_to_le32(cryp->tmp_dma); + + cmd->hdr = MTK_DESC_FIRST | MTK_DESC_LAST | MTK_DESC_BUF_LEN(len) | + MTK_DESC_CT_LEN(ctx->ct_size); + + cmd->buf = cpu_to_le32(addr); + cmd->ct = cpu_to_le32(ctx->ct_dma); + cmd->ct_hdr = ctx->ct_hdr; + cmd->tfm = cpu_to_le32(ctx->tfm_dma); + + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; + + ring->res_pos = ring->cmd_pos; + /* + * Make sure that all changes to the DMA ring are done before we + * start engine. + */ + wmb(); + /* Start DMA transfer */ + mtk_sha_write(cryp, RDR_PREP_COUNT(sha->id), MTK_DESC_CNT(1)); + mtk_sha_write(cryp, CDR_PREP_COUNT(sha->id), MTK_DESC_CNT(1)); + + return -EINPROGRESS; +} + +static int mtk_sha_xmit2(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + struct mtk_sha_reqctx *ctx, + size_t len1, size_t len2) +{ + struct mtk_ring *ring = cryp->ring[sha->id]; + struct mtk_desc *cmd = ring->cmd_base + ring->cmd_pos; + struct mtk_desc *res = ring->res_base + ring->res_pos; + int err; + + err = mtk_sha_info_update(cryp, sha, len1 + len2); + if (err) + return err; + + /* Fill in the command/result descriptors */ + res->hdr = MTK_DESC_BUF_LEN(len1) | MTK_DESC_FIRST; + res->buf = cpu_to_le32(cryp->tmp_dma); + + cmd->hdr = MTK_DESC_BUF_LEN(len1) | MTK_DESC_FIRST | + MTK_DESC_CT_LEN(ctx->ct_size); + cmd->buf = cpu_to_le32(sg_dma_address(ctx->sg)); + cmd->ct = cpu_to_le32(ctx->ct_dma); + cmd->ct_hdr = ctx->ct_hdr; + cmd->tfm = cpu_to_le32(ctx->tfm_dma); + + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; + + ring->res_pos = ring->cmd_pos; + + cmd = ring->cmd_base + ring->cmd_pos; + res = ring->res_base + ring->res_pos; + + res->hdr = MTK_DESC_BUF_LEN(len2) | MTK_DESC_LAST; + res->buf = cpu_to_le32(cryp->tmp_dma); + + cmd->hdr = MTK_DESC_BUF_LEN(len2) | MTK_DESC_LAST; + cmd->buf = cpu_to_le32(ctx->dma_addr); + + if (++ring->cmd_pos == MTK_DESC_NUM) + ring->cmd_pos = 0; + + ring->res_pos = ring->cmd_pos; + + /* + * Make sure that all changes to the DMA ring are done before we + * start engine. + */ + wmb(); + /* Start DMA transfer */ + mtk_sha_write(cryp, RDR_PREP_COUNT(sha->id), MTK_DESC_CNT(2)); + mtk_sha_write(cryp, CDR_PREP_COUNT(sha->id), MTK_DESC_CNT(2)); + + return -EINPROGRESS; +} + +static int mtk_sha_dma_map(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + struct mtk_sha_reqctx *ctx, + size_t count) +{ + ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer, + SHA_BUF_SIZE, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) { + dev_err(cryp->dev, "dma map error\n"); + return -EINVAL; + } + + ctx->flags &= ~SHA_FLAGS_SG; + + return mtk_sha_xmit(cryp, sha, ctx->dma_addr, count); +} + +static int mtk_sha_update_slow(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + size_t count; + u32 final; + + mtk_sha_append_sg(ctx); + + final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; + + dev_dbg(cryp->dev, "slow: bufcnt: %zu\n", ctx->bufcnt); + + if (final) { + sha->flags |= SHA_FLAGS_FINAL; + mtk_sha_fill_padding(ctx, 0); + } + + if (final || (ctx->bufcnt == SHA_BUF_SIZE && ctx->total)) { + count = ctx->bufcnt; + ctx->bufcnt = 0; + + return mtk_sha_dma_map(cryp, sha, ctx, count); + } + return 0; +} + +static int mtk_sha_update_start(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + u32 len, final, tail; + struct scatterlist *sg; + + if (!ctx->total) + return 0; + + if (ctx->bufcnt || ctx->offset) + return mtk_sha_update_slow(cryp, sha); + + sg = ctx->sg; + + if (!IS_ALIGNED(sg->offset, sizeof(u32))) + return mtk_sha_update_slow(cryp, sha); + + if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, ctx->bs)) + /* size is not ctx->bs aligned */ + return mtk_sha_update_slow(cryp, sha); + + len = min(ctx->total, sg->length); + + if (sg_is_last(sg)) { + if (!(ctx->flags & SHA_FLAGS_FINUP)) { + /* not last sg must be ctx->bs aligned */ + tail = len & (ctx->bs - 1); + len -= tail; + } + } + + ctx->total -= len; + ctx->offset = len; /* offset where to start slow */ + + final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; + + /* Add padding */ + if (final) { + size_t count; + + tail = len & (ctx->bs - 1); + len -= tail; + ctx->total += tail; + ctx->offset = len; /* offset where to start slow */ + + sg = ctx->sg; + mtk_sha_append_sg(ctx); + mtk_sha_fill_padding(ctx, len); + + ctx->dma_addr = dma_map_single(cryp->dev, ctx->buffer, + SHA_BUF_SIZE, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(cryp->dev, ctx->dma_addr))) { + dev_err(cryp->dev, "dma map bytes error\n"); + return -EINVAL; + } + + sha->flags |= SHA_FLAGS_FINAL; + count = ctx->bufcnt; + ctx->bufcnt = 0; + + if (len == 0) { + ctx->flags &= ~SHA_FLAGS_SG; + return mtk_sha_xmit(cryp, sha, ctx->dma_addr, count); + + } else { + ctx->sg = sg; + if (!dma_map_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE)) { + dev_err(cryp->dev, "dma_map_sg error\n"); + return -EINVAL; + } + + ctx->flags |= SHA_FLAGS_SG; + return mtk_sha_xmit2(cryp, sha, ctx, len, count); + } + } + + if (!dma_map_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE)) { + dev_err(cryp->dev, "dma_map_sg error\n"); + return -EINVAL; + } + + ctx->flags |= SHA_FLAGS_SG; + + return mtk_sha_xmit(cryp, sha, sg_dma_address(ctx->sg), len); +} + +static int mtk_sha_final_req(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + size_t count; + + mtk_sha_fill_padding(ctx, 0); + + sha->flags |= SHA_FLAGS_FINAL; + count = ctx->bufcnt; + ctx->bufcnt = 0; + + return mtk_sha_dma_map(cryp, sha, ctx, count); +} + +/* Copy ready hash (+ finalize hmac) */ +static int mtk_sha_finish(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + u32 *digest = ctx->info.tfm.digest; + u32 *result = (u32 *)req->result; + int i; + + /* Get the hash from the digest buffer */ + for (i = 0; i < SIZE_IN_WORDS(ctx->ds); i++) + result[i] = le32_to_cpu(digest[i]); + + if (ctx->flags & SHA_FLAGS_HMAC) + return mtk_sha_finish_hmac(req); + + return 0; +} + +static void mtk_sha_finish_req(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha, + int err) +{ + if (likely(!err && (SHA_FLAGS_FINAL & sha->flags))) + err = mtk_sha_finish(sha->req); + + sha->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL); + + sha->req->base.complete(&sha->req->base, err); + + /* Handle new request */ + mtk_sha_handle_queue(cryp, sha->id - RING2, NULL); +} + +static int mtk_sha_handle_queue(struct mtk_cryp *cryp, u8 id, + struct ahash_request *req) +{ + struct mtk_sha_rec *sha = cryp->sha[id]; + struct crypto_async_request *async_req, *backlog; + struct mtk_sha_reqctx *ctx; + unsigned long flags; + int err = 0, ret = 0; + + spin_lock_irqsave(&sha->lock, flags); + if (req) + ret = ahash_enqueue_request(&sha->queue, req); + + if (SHA_FLAGS_BUSY & sha->flags) { + spin_unlock_irqrestore(&sha->lock, flags); + return ret; + } + + backlog = crypto_get_backlog(&sha->queue); + async_req = crypto_dequeue_request(&sha->queue); + if (async_req) + sha->flags |= SHA_FLAGS_BUSY; + spin_unlock_irqrestore(&sha->lock, flags); + + if (!async_req) + return ret; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ahash_request_cast(async_req); + ctx = ahash_request_ctx(req); + + sha->req = req; + + mtk_sha_info_init(ctx); + + if (ctx->op == SHA_OP_UPDATE) { + err = mtk_sha_update_start(cryp, sha); + if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) + /* No final() after finup() */ + err = mtk_sha_final_req(cryp, sha); + } else if (ctx->op == SHA_OP_FINAL) { + err = mtk_sha_final_req(cryp, sha); + } + + if (unlikely(err != -EINPROGRESS)) + /* Task will not finish it, so do it here */ + mtk_sha_finish_req(cryp, sha, err); + + return ret; +} + +static int mtk_sha_enqueue(struct ahash_request *req, u32 op) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + + ctx->op = op; + + return mtk_sha_handle_queue(tctx->cryp, tctx->id, req); +} + +static void mtk_sha_unmap(struct mtk_cryp *cryp, struct mtk_sha_rec *sha) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(sha->req); + + dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->info), + DMA_BIDIRECTIONAL); + + if (ctx->flags & SHA_FLAGS_SG) { + dma_unmap_sg(cryp->dev, ctx->sg, 1, DMA_TO_DEVICE); + if (ctx->sg->length == ctx->offset) { + ctx->sg = sg_next(ctx->sg); + if (ctx->sg) + ctx->offset = 0; + } + if (ctx->flags & SHA_FLAGS_PAD) { + dma_unmap_single(cryp->dev, ctx->dma_addr, + SHA_BUF_SIZE, DMA_TO_DEVICE); + } + } else + dma_unmap_single(cryp->dev, ctx->dma_addr, + SHA_BUF_SIZE, DMA_TO_DEVICE); +} + +static void mtk_sha_complete(struct mtk_cryp *cryp, + struct mtk_sha_rec *sha) +{ + int err = 0; + + err = mtk_sha_update_start(cryp, sha); + if (err != -EINPROGRESS) + mtk_sha_finish_req(cryp, sha, err); +} + +static int mtk_sha_update(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + ctx->total = req->nbytes; + ctx->sg = req->src; + ctx->offset = 0; + + if ((ctx->bufcnt + ctx->total < SHA_BUF_SIZE) && + !(ctx->flags & SHA_FLAGS_FINUP)) + return mtk_sha_append_sg(ctx); + + return mtk_sha_enqueue(req, SHA_OP_UPDATE); +} + +static int mtk_sha_final(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + ctx->flags |= SHA_FLAGS_FINUP; + + if (ctx->flags & SHA_FLAGS_PAD) + return mtk_sha_finish(req); + + return mtk_sha_enqueue(req, SHA_OP_FINAL); +} + +static int mtk_sha_finup(struct ahash_request *req) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + int err1, err2; + + ctx->flags |= SHA_FLAGS_FINUP; + + err1 = mtk_sha_update(req); + if (err1 == -EINPROGRESS || err1 == -EBUSY) + return err1; + /* + * final() has to be always called to cleanup resources + * even if update() failed + */ + err2 = mtk_sha_final(req); + + return err1 ?: err2; +} + +static int mtk_sha_digest(struct ahash_request *req) +{ + return mtk_sha_init(req) ?: mtk_sha_finup(req); +} + +static int mtk_sha_setkey(struct crypto_ahash *tfm, const u8 *key, + u32 keylen) +{ + struct mtk_sha_ctx *tctx = crypto_ahash_ctx(tfm); + struct mtk_sha_hmac_ctx *bctx = tctx->base; + size_t bs = crypto_shash_blocksize(bctx->shash); + size_t ds = crypto_shash_digestsize(bctx->shash); + int err, i; + + SHASH_DESC_ON_STACK(shash, bctx->shash); + + shash->tfm = bctx->shash; + shash->flags = crypto_shash_get_flags(bctx->shash) & + CRYPTO_TFM_REQ_MAY_SLEEP; + + if (keylen > bs) { + err = crypto_shash_digest(shash, key, keylen, bctx->ipad); + if (err) + return err; + keylen = ds; + } else { + memcpy(bctx->ipad, key, keylen); + } + + memset(bctx->ipad + keylen, 0, bs - keylen); + memcpy(bctx->opad, bctx->ipad, bs); + + for (i = 0; i < bs; i++) { + bctx->ipad[i] ^= 0x36; + bctx->opad[i] ^= 0x5c; + } + + return 0; +} + +static int mtk_sha_export(struct ahash_request *req, void *out) +{ + const struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + memcpy(out, ctx, sizeof(*ctx)); + return 0; +} + +static int mtk_sha_import(struct ahash_request *req, const void *in) +{ + struct mtk_sha_reqctx *ctx = ahash_request_ctx(req); + + memcpy(ctx, in, sizeof(*ctx)); + return 0; +} + +static int mtk_sha_cra_init_alg(struct crypto_tfm *tfm, + const char *alg_base) +{ + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(tfm); + struct mtk_cryp *cryp = NULL; + + cryp = mtk_sha_find_dev(tctx); + if (!cryp) + return -ENODEV; + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct mtk_sha_reqctx)); + + if (alg_base) { + struct mtk_sha_hmac_ctx *bctx = tctx->base; + + tctx->flags |= SHA_FLAGS_HMAC; + bctx->shash = crypto_alloc_shash(alg_base, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(bctx->shash)) { + pr_err("base driver %s could not be loaded.\n", + alg_base); + + return PTR_ERR(bctx->shash); + } + } + return 0; +} + +static int mtk_sha_cra_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, NULL); +} + +static int mtk_sha_cra_sha1_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha1"); +} + +static int mtk_sha_cra_sha224_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha224"); +} + +static int mtk_sha_cra_sha256_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha256"); +} + +static int mtk_sha_cra_sha384_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha384"); +} + +static int mtk_sha_cra_sha512_init(struct crypto_tfm *tfm) +{ + return mtk_sha_cra_init_alg(tfm, "sha512"); +} + +static void mtk_sha_cra_exit(struct crypto_tfm *tfm) +{ + struct mtk_sha_ctx *tctx = crypto_tfm_ctx(tfm); + + if (tctx->flags & SHA_FLAGS_HMAC) { + struct mtk_sha_hmac_ctx *bctx = tctx->base; + + crypto_free_shash(bctx->shash); + } +} + +static struct ahash_alg algs_sha1_sha224_sha256[] = { +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "mtk-sha1", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha224", + .cra_driver_name = "mtk-sha224", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "mtk-sha256", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "mtk-hmac-sha1", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha1_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "mtk-hmac-sha224", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha224_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "mtk-hmac-sha256", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha256_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +}; + +static struct ahash_alg algs_sha384_sha512[] = { +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha384", + .cra_driver_name = "mtk-sha384", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "sha512", + .cra_driver_name = "mtk-sha512", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "mtk-hmac-sha384", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha384_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +{ + .init = mtk_sha_init, + .update = mtk_sha_update, + .final = mtk_sha_final, + .finup = mtk_sha_finup, + .digest = mtk_sha_digest, + .export = mtk_sha_export, + .import = mtk_sha_import, + .setkey = mtk_sha_setkey, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct mtk_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "mtk-hmac-sha512", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mtk_sha_ctx) + + sizeof(struct mtk_sha_hmac_ctx), + .cra_alignmask = SHA_ALIGN_MSK, + .cra_module = THIS_MODULE, + .cra_init = mtk_sha_cra_sha512_init, + .cra_exit = mtk_sha_cra_exit, + } +}, +}; + +static void mtk_sha_task0(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_sha_rec *sha = cryp->sha[0]; + + mtk_sha_unmap(cryp, sha); + mtk_sha_complete(cryp, sha); +} + +static void mtk_sha_task1(unsigned long data) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)data; + struct mtk_sha_rec *sha = cryp->sha[1]; + + mtk_sha_unmap(cryp, sha); + mtk_sha_complete(cryp, sha); +} + +static irqreturn_t mtk_sha_ring2_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_sha_rec *sha = cryp->sha[0]; + u32 val = mtk_sha_read(cryp, RDR_STAT(RING2)); + + mtk_sha_write(cryp, RDR_STAT(RING2), val); + + if (likely((SHA_FLAGS_BUSY & sha->flags))) { + mtk_sha_write(cryp, RDR_PROC_COUNT(RING2), MTK_CNT_RST); + mtk_sha_write(cryp, RDR_THRESH(RING2), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&sha->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +static irqreturn_t mtk_sha_ring3_irq(int irq, void *dev_id) +{ + struct mtk_cryp *cryp = (struct mtk_cryp *)dev_id; + struct mtk_sha_rec *sha = cryp->sha[1]; + u32 val = mtk_sha_read(cryp, RDR_STAT(RING3)); + + mtk_sha_write(cryp, RDR_STAT(RING3), val); + + if (likely((SHA_FLAGS_BUSY & sha->flags))) { + mtk_sha_write(cryp, RDR_PROC_COUNT(RING3), MTK_CNT_RST); + mtk_sha_write(cryp, RDR_THRESH(RING3), + MTK_RDR_PROC_THRESH | MTK_RDR_PROC_MODE); + + tasklet_schedule(&sha->task); + } else { + dev_warn(cryp->dev, "AES interrupt when no active requests.\n"); + } + return IRQ_HANDLED; +} + +/* + * The purpose of two SHA records is used to get extra performance. + * It is similar to mtk_aes_record_init(). + */ +static int mtk_sha_record_init(struct mtk_cryp *cryp) +{ + struct mtk_sha_rec **sha = cryp->sha; + int i, err = -ENOMEM; + + for (i = 0; i < MTK_REC_NUM; i++) { + sha[i] = kzalloc(sizeof(**sha), GFP_KERNEL); + if (!sha[i]) + goto err_cleanup; + + sha[i]->id = i + RING2; + + spin_lock_init(&sha[i]->lock); + crypto_init_queue(&sha[i]->queue, SHA_QUEUE_SIZE); + } + + tasklet_init(&sha[0]->task, mtk_sha_task0, (unsigned long)cryp); + tasklet_init(&sha[1]->task, mtk_sha_task1, (unsigned long)cryp); + + cryp->rec = 1; + + return 0; + +err_cleanup: + for (; i--; ) + kfree(sha[i]); + return err; +} + +static void mtk_sha_record_free(struct mtk_cryp *cryp) +{ + int i; + + for (i = 0; i < MTK_REC_NUM; i++) { + tasklet_kill(&cryp->sha[i]->task); + kfree(cryp->sha[i]); + } +} + +static void mtk_sha_unregister_algs(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(algs_sha1_sha224_sha256); i++) + crypto_unregister_ahash(&algs_sha1_sha224_sha256[i]); + + for (i = 0; i < ARRAY_SIZE(algs_sha384_sha512); i++) + crypto_unregister_ahash(&algs_sha384_sha512[i]); +} + +static int mtk_sha_register_algs(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(algs_sha1_sha224_sha256); i++) { + err = crypto_register_ahash(&algs_sha1_sha224_sha256[i]); + if (err) + goto err_sha_224_256_algs; + } + + for (i = 0; i < ARRAY_SIZE(algs_sha384_sha512); i++) { + err = crypto_register_ahash(&algs_sha384_sha512[i]); + if (err) + goto err_sha_384_512_algs; + } + + return 0; + +err_sha_384_512_algs: + for (; i--; ) + crypto_unregister_ahash(&algs_sha384_sha512[i]); + i = ARRAY_SIZE(algs_sha1_sha224_sha256); +err_sha_224_256_algs: + for (; i--; ) + crypto_unregister_ahash(&algs_sha1_sha224_sha256[i]); + + return err; +} + +int mtk_hash_alg_register(struct mtk_cryp *cryp) +{ + int err; + + INIT_LIST_HEAD(&cryp->sha_list); + + /* Initialize two hash records */ + err = mtk_sha_record_init(cryp); + if (err) + goto err_record; + + /* Ring2 is use by SHA record0 */ + err = devm_request_irq(cryp->dev, cryp->irq[RING2], + mtk_sha_ring2_irq, IRQF_TRIGGER_LOW, + "mtk-sha", cryp); + if (err) { + dev_err(cryp->dev, "unable to request sha irq0.\n"); + goto err_res; + } + + /* Ring3 is use by SHA record1 */ + err = devm_request_irq(cryp->dev, cryp->irq[RING3], + mtk_sha_ring3_irq, IRQF_TRIGGER_LOW, + "mtk-sha", cryp); + if (err) { + dev_err(cryp->dev, "unable to request sha irq1.\n"); + goto err_res; + } + + /* Enable ring2 and ring3 interrupt for hash */ + mtk_sha_write(cryp, AIC_ENABLE_SET(RING2), MTK_IRQ_RDR2); + mtk_sha_write(cryp, AIC_ENABLE_SET(RING3), MTK_IRQ_RDR3); + + cryp->tmp = dma_alloc_coherent(cryp->dev, SHA_TMP_BUF_SIZE, + &cryp->tmp_dma, GFP_KERNEL); + if (!cryp->tmp) { + dev_err(cryp->dev, "unable to allocate tmp buffer.\n"); + err = -EINVAL; + goto err_res; + } + + spin_lock(&mtk_sha.lock); + list_add_tail(&cryp->sha_list, &mtk_sha.dev_list); + spin_unlock(&mtk_sha.lock); + + err = mtk_sha_register_algs(); + if (err) + goto err_algs; + + return 0; + +err_algs: + spin_lock(&mtk_sha.lock); + list_del(&cryp->sha_list); + spin_unlock(&mtk_sha.lock); + dma_free_coherent(cryp->dev, SHA_TMP_BUF_SIZE, + cryp->tmp, cryp->tmp_dma); +err_res: + mtk_sha_record_free(cryp); +err_record: + + dev_err(cryp->dev, "mtk-sha initialization failed.\n"); + return err; +} + +void mtk_hash_alg_release(struct mtk_cryp *cryp) +{ + spin_lock(&mtk_sha.lock); + list_del(&cryp->sha_list); + spin_unlock(&mtk_sha.lock); + + mtk_sha_unregister_algs(); + dma_free_coherent(cryp->dev, SHA_TMP_BUF_SIZE, + cryp->tmp, cryp->tmp_dma); + mtk_sha_record_free(cryp); +} diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 47576098831f..b6f14844702e 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1616,32 +1616,17 @@ static const struct of_device_id spacc_of_id_table[] = { MODULE_DEVICE_TABLE(of, spacc_of_id_table); #endif /* CONFIG_OF */ -static bool spacc_is_compatible(struct platform_device *pdev, - const char *spacc_type) -{ - const struct platform_device_id *platid = platform_get_device_id(pdev); - - if (platid && !strcmp(platid->name, spacc_type)) - return true; - -#ifdef CONFIG_OF - if (of_device_is_compatible(pdev->dev.of_node, spacc_type)) - return true; -#endif /* CONFIG_OF */ - - return false; -} - static int spacc_probe(struct platform_device *pdev) { int i, err, ret = -EINVAL; struct resource *mem, *irq; + struct device_node *np = pdev->dev.of_node; struct spacc_engine *engine = devm_kzalloc(&pdev->dev, sizeof(*engine), GFP_KERNEL); if (!engine) return -ENOMEM; - if (spacc_is_compatible(pdev, "picochip,spacc-ipsec")) { + if (of_device_is_compatible(np, "picochip,spacc-ipsec")) { engine->max_ctxs = SPACC_CRYPTO_IPSEC_MAX_CTXS; engine->cipher_pg_sz = SPACC_CRYPTO_IPSEC_CIPHER_PG_SZ; engine->hash_pg_sz = SPACC_CRYPTO_IPSEC_HASH_PG_SZ; @@ -1650,7 +1635,7 @@ static int spacc_probe(struct platform_device *pdev) engine->num_algs = ARRAY_SIZE(ipsec_engine_algs); engine->aeads = ipsec_engine_aeads; engine->num_aeads = ARRAY_SIZE(ipsec_engine_aeads); - } else if (spacc_is_compatible(pdev, "picochip,spacc-l2")) { + } else if (of_device_is_compatible(np, "picochip,spacc-l2")) { engine->max_ctxs = SPACC_CRYPTO_L2_MAX_CTXS; engine->cipher_pg_sz = SPACC_CRYPTO_L2_CIPHER_PG_SZ; engine->hash_pg_sz = SPACC_CRYPTO_L2_HASH_PG_SZ; @@ -1803,12 +1788,6 @@ static int spacc_remove(struct platform_device *pdev) return 0; } -static const struct platform_device_id spacc_id_table[] = { - { "picochip,spacc-ipsec", }, - { "picochip,spacc-l2", }, - { } -}; - static struct platform_driver spacc_driver = { .probe = spacc_probe, .remove = spacc_remove, @@ -1819,7 +1798,6 @@ static struct platform_driver spacc_driver = { #endif /* CONFIG_PM */ .of_match_table = of_match_ptr(spacc_of_id_table), }, - .id_table = spacc_id_table, }; module_platform_driver(spacc_driver); diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index 640c3fc870fd..f172171668ee 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index 949d77b79fbe..24ec908eb26c 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index 5b2d78a5b5aa..58a984c9c3ec 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index 7540ce13b0d0..b9f3e0e4fde9 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_common/adf_cfg_common.h b/drivers/crypto/qat/qat_common/adf_cfg_common.h index 8c4f6573ce59..1211261de7c2 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_common.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_common.h @@ -61,6 +61,7 @@ #define ADF_CFG_AFFINITY_WHATEVER 0xFF #define MAX_DEVICE_NAME_SIZE 32 #define ADF_MAX_DEVICES (32 * 32) +#define ADF_DEVS_ARRAY_SIZE BITS_TO_LONGS(ADF_MAX_DEVICES) enum adf_cfg_val_type { ADF_DEC, diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 980e07475012..5c4c0a253129 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -87,8 +87,8 @@ enum adf_event { struct service_hndl { int (*event_hld)(struct adf_accel_dev *accel_dev, enum adf_event event); - unsigned long init_status; - unsigned long start_status; + unsigned long init_status[ADF_DEVS_ARRAY_SIZE]; + unsigned long start_status[ADF_DEVS_ARRAY_SIZE]; char *name; struct list_head list; }; diff --git a/drivers/crypto/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/qat/qat_common/adf_dev_mgr.c index b3ebb25f9ca7..8afac52677a6 100644 --- a/drivers/crypto/qat/qat_common/adf_dev_mgr.c +++ b/drivers/crypto/qat/qat_common/adf_dev_mgr.c @@ -152,7 +152,7 @@ void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data) ptr->hw_device->instance_id = i++; if (i == class->instances) - break; + break; } } EXPORT_SYMBOL_GPL(adf_devmgr_update_class_index); diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 888c6675e7e5..26556c713049 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -64,8 +64,8 @@ static void adf_service_add(struct service_hndl *service) int adf_service_register(struct service_hndl *service) { - service->init_status = 0; - service->start_status = 0; + memset(service->init_status, 0, sizeof(service->init_status)); + memset(service->start_status, 0, sizeof(service->start_status)); adf_service_add(service); return 0; } @@ -79,9 +79,13 @@ static void adf_service_remove(struct service_hndl *service) int adf_service_unregister(struct service_hndl *service) { - if (service->init_status || service->start_status) { - pr_err("QAT: Could not remove active service\n"); - return -EFAULT; + int i; + + for (i = 0; i < ARRAY_SIZE(service->init_status); i++) { + if (service->init_status[i] || service->start_status[i]) { + pr_err("QAT: Could not remove active service\n"); + return -EFAULT; + } } adf_service_remove(service); return 0; @@ -163,7 +167,7 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) service->name); return -EFAULT; } - set_bit(accel_dev->accel_id, &service->init_status); + set_bit(accel_dev->accel_id, service->init_status); } hw_data->enable_error_correction(accel_dev); @@ -210,7 +214,7 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) service->name); return -EFAULT; } - set_bit(accel_dev->accel_id, &service->start_status); + set_bit(accel_dev->accel_id, service->start_status); } clear_bit(ADF_STATUS_STARTING, &accel_dev->status); @@ -259,14 +263,14 @@ void adf_dev_stop(struct adf_accel_dev *accel_dev) list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); - if (!test_bit(accel_dev->accel_id, &service->start_status)) + if (!test_bit(accel_dev->accel_id, service->start_status)) continue; ret = service->event_hld(accel_dev, ADF_EVENT_STOP); if (!ret) { - clear_bit(accel_dev->accel_id, &service->start_status); + clear_bit(accel_dev->accel_id, service->start_status); } else if (ret == -EAGAIN) { wait = true; - clear_bit(accel_dev->accel_id, &service->start_status); + clear_bit(accel_dev->accel_id, service->start_status); } } @@ -317,14 +321,14 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev) list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); - if (!test_bit(accel_dev->accel_id, &service->init_status)) + if (!test_bit(accel_dev->accel_id, service->init_status)) continue; if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN)) dev_err(&GET_DEV(accel_dev), "Failed to shutdown service %s\n", service->name); else - clear_bit(accel_dev->accel_id, &service->init_status); + clear_bit(accel_dev->accel_id, service->init_status); } hw_data->disable_iov(accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 9320ae1d005b..b36d8653b1ba 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -162,9 +162,9 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) /** * adf_disable_sriov() - Disable SRIOV for the device - * @pdev: Pointer to pci device. + * @accel_dev: Pointer to accel device. * - * Function disables SRIOV for the pci device. + * Function disables SRIOV for the accel device. * * Return: 0 on success, error code otherwise. */ diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index bf99e11a3403..4a73fc70f7a9 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -148,7 +148,7 @@ static void adf_pf2vf_bh_handler(void *data) INIT_WORK(&stop_data->work, adf_dev_stop_async); queue_work(adf_vf_stop_wq, &stop_data->work); /* To ack, clear the PF2VFINT bit */ - msg &= ~BIT(0); + msg &= ~ADF_PF2VF_INT; ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg); return; } @@ -168,7 +168,7 @@ static void adf_pf2vf_bh_handler(void *data) } /* To ack, clear the PF2VFINT bit */ - msg &= ~BIT(0); + msg &= ~ADF_PF2VF_INT; ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg); /* Re-enable PF2VF interrupts */ diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index 4d2de2838451..2ce01f010c74 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -186,7 +186,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index 60df98632fa2..26ab17bfc6da 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -170,7 +170,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) accel_pci_dev->sku = hw_data->get_sku(hw_data); /* Create dev top level debugfs entry */ - snprintf(name, sizeof(name), "%s%s_%02x:%02d.%02d", + snprintf(name, sizeof(name), "%s%s_%02x:%02d.%d", ADF_DEVICE_NAME_PREFIX, hw_data->dev_class->name, pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff --git a/drivers/crypto/virtio/Kconfig b/drivers/crypto/virtio/Kconfig index d80f73366ae2..5db07495ddc5 100644 --- a/drivers/crypto/virtio/Kconfig +++ b/drivers/crypto/virtio/Kconfig @@ -4,6 +4,7 @@ config CRYPTO_DEV_VIRTIO select CRYPTO_AEAD select CRYPTO_AUTHENC select CRYPTO_BLKCIPHER + select CRYPTO_ENGINE default m help This driver provides support for virtio crypto device. If you diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c index c2374df9abae..49defda4e03d 100644 --- a/drivers/crypto/virtio/virtio_crypto_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_algs.c @@ -288,8 +288,7 @@ static int virtio_crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm, static int __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req, struct ablkcipher_request *req, - struct data_queue *data_vq, - __u8 op) + struct data_queue *data_vq) { struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); unsigned int ivsize = crypto_ablkcipher_ivsize(tfm); @@ -329,7 +328,7 @@ __virtio_crypto_ablkcipher_do_req(struct virtio_crypto_request *vc_req, vc_req->req_data = req_data; vc_req->type = VIRTIO_CRYPTO_SYM_OP_CIPHER; /* Head of operation */ - if (op) { + if (vc_req->encrypt) { req_data->header.session_id = cpu_to_le64(ctx->enc_sess_info.session_id); req_data->header.opcode = @@ -424,19 +423,15 @@ static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req) struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm); struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); struct virtio_crypto *vcrypto = ctx->vcrypto; - int ret; /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; vc_req->ablkcipher_ctx = ctx; vc_req->ablkcipher_req = req; - ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq, 1); - if (ret < 0) { - pr_err("virtio_crypto: Encryption failed!\n"); - return ret; - } + vc_req->encrypt = true; + vc_req->dataq = data_vq; - return -EINPROGRESS; + return crypto_transfer_cipher_request_to_engine(data_vq->engine, req); } static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req) @@ -445,20 +440,16 @@ static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req) struct virtio_crypto_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(atfm); struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); struct virtio_crypto *vcrypto = ctx->vcrypto; - int ret; /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; vc_req->ablkcipher_ctx = ctx; vc_req->ablkcipher_req = req; - ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq, 0); - if (ret < 0) { - pr_err("virtio_crypto: Decryption failed!\n"); - return ret; - } + vc_req->encrypt = false; + vc_req->dataq = data_vq; - return -EINPROGRESS; + return crypto_transfer_cipher_request_to_engine(data_vq->engine, req); } static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm) @@ -484,10 +475,37 @@ static void virtio_crypto_ablkcipher_exit(struct crypto_tfm *tfm) ctx->vcrypto = NULL; } +int virtio_crypto_ablkcipher_crypt_req( + struct crypto_engine *engine, + struct ablkcipher_request *req) +{ + struct virtio_crypto_request *vc_req = ablkcipher_request_ctx(req); + struct data_queue *data_vq = vc_req->dataq; + int ret; + + ret = __virtio_crypto_ablkcipher_do_req(vc_req, req, data_vq); + if (ret < 0) + return ret; + + virtqueue_kick(data_vq->vq); + + return 0; +} + +void virtio_crypto_ablkcipher_finalize_req( + struct virtio_crypto_request *vc_req, + struct ablkcipher_request *req, + int err) +{ + crypto_finalize_cipher_request(vc_req->dataq->engine, req, err); + + virtcrypto_clear_request(vc_req); +} + static struct crypto_alg virtio_crypto_algs[] = { { .cra_name = "cbc(aes)", .cra_driver_name = "virtio_crypto_aes_cbc", - .cra_priority = 501, + .cra_priority = 150, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct virtio_crypto_ablkcipher_ctx), diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h index 3d6566b02876..da6d8c0ea407 100644 --- a/drivers/crypto/virtio/virtio_crypto_common.h +++ b/drivers/crypto/virtio/virtio_crypto_common.h @@ -25,6 +25,7 @@ #include <crypto/aead.h> #include <crypto/aes.h> #include <crypto/authenc.h> +#include <crypto/engine.h> /* Internal representation of a data virtqueue */ @@ -37,6 +38,8 @@ struct data_queue { /* Name of the tx queue: dataq.$index */ char name[32]; + + struct crypto_engine *engine; }; struct virtio_crypto { @@ -97,6 +100,9 @@ struct virtio_crypto_request { struct virtio_crypto_op_data_req *req_data; struct scatterlist **sgs; uint8_t *iv; + /* Encryption? */ + bool encrypt; + struct data_queue *dataq; }; int virtcrypto_devmgr_add_dev(struct virtio_crypto *vcrypto_dev); @@ -110,6 +116,16 @@ int virtcrypto_dev_started(struct virtio_crypto *vcrypto_dev); struct virtio_crypto *virtcrypto_get_dev_node(int node); int virtcrypto_dev_start(struct virtio_crypto *vcrypto); void virtcrypto_dev_stop(struct virtio_crypto *vcrypto); +int virtio_crypto_ablkcipher_crypt_req( + struct crypto_engine *engine, + struct ablkcipher_request *req); +void virtio_crypto_ablkcipher_finalize_req( + struct virtio_crypto_request *vc_req, + struct ablkcipher_request *req, + int err); + +void +virtcrypto_clear_request(struct virtio_crypto_request *vc_req); static inline int virtio_crypto_get_current_node(void) { diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index fe70ec823b27..b5b153317376 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -25,7 +25,7 @@ #include "virtio_crypto_common.h" -static void +void virtcrypto_clear_request(struct virtio_crypto_request *vc_req) { if (vc_req) { @@ -66,12 +66,12 @@ static void virtcrypto_dataq_callback(struct virtqueue *vq) break; } ablk_req = vc_req->ablkcipher_req; - virtcrypto_clear_request(vc_req); spin_unlock_irqrestore( &vcrypto->data_vq[qid].lock, flags); /* Finish the encrypt or decrypt process */ - ablk_req->base.complete(&ablk_req->base, error); + virtio_crypto_ablkcipher_finalize_req(vc_req, + ablk_req, error); spin_lock_irqsave( &vcrypto->data_vq[qid].lock, flags); } @@ -87,6 +87,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) int ret = -ENOMEM; int i, total_vqs; const char **names; + struct device *dev = &vi->vdev->dev; /* * We expect 1 data virtqueue, followed by @@ -128,6 +129,15 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) for (i = 0; i < vi->max_data_queues; i++) { spin_lock_init(&vi->data_vq[i].lock); vi->data_vq[i].vq = vqs[i]; + /* Initialize crypto engine */ + vi->data_vq[i].engine = crypto_engine_alloc_init(dev, 1); + if (!vi->data_vq[i].engine) { + ret = -ENOMEM; + goto err_engine; + } + + vi->data_vq[i].engine->cipher_one_request = + virtio_crypto_ablkcipher_crypt_req; } kfree(names); @@ -136,6 +146,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) return 0; +err_engine: err_find: kfree(names); err_names: @@ -269,6 +280,38 @@ static int virtcrypto_update_status(struct virtio_crypto *vcrypto) return 0; } +static int virtcrypto_start_crypto_engines(struct virtio_crypto *vcrypto) +{ + int32_t i; + int ret; + + for (i = 0; i < vcrypto->max_data_queues; i++) { + if (vcrypto->data_vq[i].engine) { + ret = crypto_engine_start(vcrypto->data_vq[i].engine); + if (ret) + goto err; + } + } + + return 0; + +err: + while (--i >= 0) + if (vcrypto->data_vq[i].engine) + crypto_engine_exit(vcrypto->data_vq[i].engine); + + return ret; +} + +static void virtcrypto_clear_crypto_engines(struct virtio_crypto *vcrypto) +{ + u32 i; + + for (i = 0; i < vcrypto->max_data_queues; i++) + if (vcrypto->data_vq[i].engine) + crypto_engine_exit(vcrypto->data_vq[i].engine); +} + static void virtcrypto_del_vqs(struct virtio_crypto *vcrypto) { struct virtio_device *vdev = vcrypto->vdev; @@ -355,14 +398,21 @@ static int virtcrypto_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "Failed to initialize vqs.\n"); goto free_dev; } + + err = virtcrypto_start_crypto_engines(vcrypto); + if (err) + goto free_vqs; + virtio_device_ready(vdev); err = virtcrypto_update_status(vcrypto); if (err) - goto free_vqs; + goto free_engines; return 0; +free_engines: + virtcrypto_clear_crypto_engines(vcrypto); free_vqs: vcrypto->vdev->config->reset(vdev); virtcrypto_del_vqs(vcrypto); @@ -398,6 +448,7 @@ static void virtcrypto_remove(struct virtio_device *vdev) virtcrypto_dev_stop(vcrypto); vdev->config->reset(vdev); virtcrypto_free_unused_reqs(vcrypto); + virtcrypto_clear_crypto_engines(vcrypto); virtcrypto_del_vqs(vcrypto); virtcrypto_devmgr_rm_dev(vcrypto); kfree(vcrypto); @@ -420,6 +471,7 @@ static int virtcrypto_freeze(struct virtio_device *vdev) if (virtcrypto_dev_started(vcrypto)) virtcrypto_dev_stop(vcrypto); + virtcrypto_clear_crypto_engines(vcrypto); virtcrypto_del_vqs(vcrypto); return 0; } @@ -433,14 +485,26 @@ static int virtcrypto_restore(struct virtio_device *vdev) if (err) return err; + err = virtcrypto_start_crypto_engines(vcrypto); + if (err) + goto free_vqs; + virtio_device_ready(vdev); + err = virtcrypto_dev_start(vcrypto); if (err) { dev_err(&vdev->dev, "Failed to start virtio crypto device.\n"); - return -EFAULT; + goto free_engines; } return 0; + +free_engines: + virtcrypto_clear_crypto_engines(vcrypto); +free_vqs: + vcrypto->vdev->config->reset(vdev); + virtcrypto_del_vqs(vcrypto); + return err; } #endif diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 38ed10d761d0..7cf6d31c1123 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -80,11 +80,13 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key, int ret; struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); disable_kernel_vsx(); pagefault_enable(); + preempt_enable(); ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); return ret; @@ -99,11 +101,13 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx, u8 *dst = walk->dst.virt.addr; unsigned int nbytes = walk->nbytes; + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); disable_kernel_vsx(); pagefault_enable(); + preempt_enable(); crypto_xor(keystream, src, nbytes); memcpy(dst, keystream, nbytes); @@ -132,6 +136,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + preempt_disable(); pagefault_disable(); enable_kernel_vsx(); aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, @@ -143,6 +148,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, walk.iv); disable_kernel_vsx(); pagefault_enable(); + preempt_enable(); /* We need to update IV mostly for last bytes/round */ inc = (nbytes & AES_BLOCK_MASK) / AES_BLOCK_SIZE; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 8d9e4b7a8e84..ccc05f874419 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -3385,6 +3385,14 @@ struct fw_crypto_lookaside_wr { #define FW_CRYPTO_LOOKASIDE_WR_IV_G(x) \ (((x) >> FW_CRYPTO_LOOKASIDE_WR_IV_S) & FW_CRYPTO_LOOKASIDE_WR_IV_M) +#define FW_CRYPTO_LOOKASIDE_WR_FQIDX_S 15 +#define FW_CRYPTO_LOOKASIDE_WR_FQIDX_M 0xff +#define FW_CRYPTO_LOOKASIDE_WR_FQIDX_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_FQIDX_S) +#define FW_CRYPTO_LOOKASIDE_WR_FQIDX_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_FQIDX_S) & \ + FW_CRYPTO_LOOKASIDE_WR_FQIDX_M) + #define FW_CRYPTO_LOOKASIDE_WR_TX_CH_S 10 #define FW_CRYPTO_LOOKASIDE_WR_TX_CH_M 0x3 #define FW_CRYPTO_LOOKASIDE_WR_TX_CH_V(x) \ diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h index 20d20f681a72..445fc45f4b5b 100644 --- a/include/crypto/chacha20.h +++ b/include/crypto/chacha20.h @@ -5,6 +5,7 @@ #ifndef _CRYPTO_CHACHA20_H #define _CRYPTO_CHACHA20_H +#include <crypto/skcipher.h> #include <linux/types.h> #include <linux/crypto.h> @@ -18,9 +19,8 @@ struct chacha20_ctx { void chacha20_block(u32 *state, void *stream); void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv); -int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key, +int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keysize); -int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); +int crypto_chacha20_crypt(struct skcipher_request *req); #endif diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 216a2b876147..b5727bcd2336 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -329,6 +329,16 @@ static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm) return crypto_hash_alg_common(tfm)->digestsize; } +/** + * crypto_ahash_statesize() - obtain size of the ahash state + * @tfm: cipher handle + * + * Return the size of the ahash state. With the crypto_ahash_export() + * function, the caller can export the state into a buffer whose size is + * defined with this function. + * + * Return: size of the ahash state + */ static inline unsigned int crypto_ahash_statesize(struct crypto_ahash *tfm) { return crypto_hash_alg_common(tfm)->statesize; @@ -369,11 +379,7 @@ static inline struct crypto_ahash *crypto_ahash_reqtfm( * crypto_ahash_reqsize() - obtain size of the request data structure * @tfm: cipher handle * - * Return the size of the ahash state size. With the crypto_ahash_export - * function, the caller can export the state into a buffer whose size is - * defined with this function. - * - * Return: size of the ahash state + * Return: size of the request data */ static inline unsigned int crypto_ahash_reqsize(struct crypto_ahash *tfm) { @@ -453,7 +459,7 @@ int crypto_ahash_digest(struct ahash_request *req); * * This function exports the hash state of the ahash_request handle into the * caller-allocated output buffer out which must have sufficient size (e.g. by - * calling crypto_ahash_reqsize). + * calling crypto_ahash_statesize()). * * Return: 0 if the export was successful; < 0 if an error occurred */ diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 8735979ed341..e42f7063f245 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -66,7 +66,7 @@ struct skcipher_walk { int flags; unsigned int blocksize; - unsigned int chunksize; + unsigned int stride; unsigned int alignmask; }; diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 750b14f1ada4..562001cb412b 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -115,6 +115,9 @@ struct crypto_skcipher { * IV of exactly that size to perform the encrypt or decrypt operation. * @chunksize: Equal to the block size except for stream ciphers such as * CTR where it is set to the underlying block size. + * @walksize: Equal to the chunk size except in cases where the algorithm is + * considerably more efficient if it can operate on multiple chunks + * in parallel. Should be a multiple of chunksize. * @base: Definition of a generic crypto algorithm. * * All fields except @ivsize are mandatory and must be filled. @@ -131,6 +134,7 @@ struct skcipher_alg { unsigned int max_keysize; unsigned int ivsize; unsigned int chunksize; + unsigned int walksize; struct crypto_alg base; }; @@ -289,6 +293,19 @@ static inline unsigned int crypto_skcipher_alg_chunksize( return alg->chunksize; } +static inline unsigned int crypto_skcipher_alg_walksize( + struct skcipher_alg *alg) +{ + if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_BLKCIPHER) + return alg->base.cra_blocksize; + + if (alg->base.cra_ablkcipher.encrypt) + return alg->base.cra_blocksize; + + return alg->walksize; +} + /** * crypto_skcipher_chunksize() - obtain chunk size * @tfm: cipher handle @@ -307,6 +324,23 @@ static inline unsigned int crypto_skcipher_chunksize( } /** + * crypto_skcipher_walksize() - obtain walk size + * @tfm: cipher handle + * + * In some cases, algorithms can only perform optimally when operating on + * multiple blocks in parallel. This is reflected by the walksize, which + * must be a multiple of the chunksize (or equal if the concern does not + * apply) + * + * Return: walk size in bytes + */ +static inline unsigned int crypto_skcipher_walksize( + struct crypto_skcipher *tfm) +{ + return crypto_skcipher_alg_walksize(crypto_skcipher_alg(tfm)); +} + +/** * crypto_skcipher_blocksize() - obtain block size of cipher * @tfm: cipher handle * diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 0444b1336268..fddd1a5eb322 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -116,6 +116,7 @@ */ #define __pure __attribute__((pure)) #define __aligned(x) __attribute__((aligned(x))) +#define __aligned_largest __attribute__((aligned)) #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) #define __attribute_const__ __attribute__((__const__)) diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index ed30d5d713e3..5d81f739aa0a 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -31,6 +31,7 @@ #define SGI_MMTIMER 153 #define STORE_QUEUE_MINOR 155 /* unused */ #define I2O_MINOR 166 +#define HWRNG_MINOR 183 #define MICROCODE_MINOR 184 #define IRNET_MINOR 187 #define VFIO_MINOR 196 |