summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-27 18:06:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-27 18:06:49 -0700
commitd075c0c1be279c5f4c6688ac0442fff6494e56bc (patch)
treec3e3ab6b35139229ad0a5096ccea0c00eb97998b
parentbf272460d744112bacd4c4d562592decbf0edf64 (diff)
parente4e62bbc6aba49a5edb3156ec65f6698ff37d228 (diff)
Merge tag 'v5.19-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Test in-place en/decryption with two sglists in testmgr - Fix process vs softirq race in cryptd Algorithms: - Add arm64 acceleration for sm4 - Add s390 acceleration for chacha20 Drivers: - Add polarfire soc hwrng support in mpsf - Add support for TI SoC AM62x in sa2ul - Add support for ATSHA204 cryptochip in atmel-sha204a - Add support for PRNG in caam - Restore support for storage encryption in qat - Restore support for storage encryption in hisilicon/sec" * tag 'v5.19-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (116 commits) hwrng: omap3-rom - fix using wrong clk_disable() in omap_rom_rng_runtime_resume() crypto: hisilicon/sec - delete the flag CRYPTO_ALG_ALLOCATES_MEMORY crypto: qat - add support for 401xx devices crypto: qat - re-enable registration of algorithms crypto: qat - honor CRYPTO_TFM_REQ_MAY_SLEEP flag crypto: qat - add param check for DH crypto: qat - add param check for RSA crypto: qat - remove dma_free_coherent() for DH crypto: qat - remove dma_free_coherent() for RSA crypto: qat - fix memory leak in RSA crypto: qat - add backlog mechanism crypto: qat - refactor submission logic crypto: qat - use pre-allocated buffers in datapath crypto: qat - set to zero DH parameters before free crypto: s390 - add crypto library interface for ChaCha20 crypto: talitos - Uniform coding style with defined variable crypto: octeontx2 - simplify the return expression of otx2_cpt_aead_cbc_aes_sha_setkey() crypto: cryptd - Protect per-CPU resource by disabling BH. crypto: sun8i-ce - do not fallback if cryptlen is less than sg length crypto: sun8i-ce - rework debugging ...
-rw-r--r--Documentation/ABI/testing/debugfs-hisi-hpre14
-rw-r--r--Documentation/ABI/testing/debugfs-hisi-sec14
-rw-r--r--Documentation/ABI/testing/debugfs-hisi-zip14
-rw-r--r--Documentation/ABI/testing/sysfs-driver-ccp87
-rw-r--r--Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml1
-rw-r--r--Documentation/devicetree/bindings/trivial-devices.yaml4
-rw-r--r--arch/arm64/crypto/Kconfig16
-rw-r--r--arch/arm64/crypto/Makefile8
-rw-r--r--arch/arm64/crypto/sm4-ce-cipher-core.S36
-rw-r--r--arch/arm64/crypto/sm4-ce-cipher-glue.c82
-rw-r--r--arch/arm64/crypto/sm4-ce-core.S688
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c386
-rw-r--r--arch/arm64/crypto/sm4-neon-core.S487
-rw-r--r--arch/arm64/crypto/sm4-neon-glue.c442
-rw-r--r--arch/s390/crypto/chacha-glue.c34
-rw-r--r--arch/x86/crypto/blowfish_glue.c8
-rw-r--r--arch/x86/crypto/camellia_glue.c8
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c8
-rw-r--r--arch/x86/crypto/twofish_glue.c8
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c8
-rw-r--r--crypto/Kconfig18
-rw-r--r--crypto/Makefile6
-rw-r--r--crypto/cryptd.c23
-rw-r--r--crypto/crypto_engine.c1
-rw-r--r--crypto/ecrdsa.c8
-rw-r--r--crypto/sm3.c (renamed from lib/crypto/sm3.c)0
-rw-r--r--crypto/sm4.c (renamed from lib/crypto/sm4.c)10
-rw-r--r--crypto/testmgr.c75
-rw-r--r--drivers/char/hw_random/Kconfig15
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/cn10k-rng.c31
-rw-r--r--drivers/char/hw_random/mpfs-rng.c104
-rw-r--r--drivers/char/hw_random/omap3-rom-rng.c2
-rw-r--r--drivers/char/hw_random/optee-rng.c2
-rw-r--r--drivers/crypto/Kconfig4
-rw-r--r--drivers/crypto/Makefile1
-rw-r--r--drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c22
-rw-r--r--drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h1
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c102
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c54
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c130
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c6
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h19
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c180
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c92
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c385
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c6
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h33
-rw-r--r--drivers/crypto/atmel-ecc.c2
-rw-r--r--drivers/crypto/atmel-i2c.c30
-rw-r--r--drivers/crypto/atmel-i2c.h1
-rw-r--r--drivers/crypto/atmel-sha204a.c11
-rw-r--r--drivers/crypto/caam/Kconfig8
-rw-r--r--drivers/crypto/caam/Makefile1
-rw-r--r--drivers/crypto/caam/caamprng.c235
-rw-r--r--drivers/crypto/caam/ctrl.c18
-rw-r--r--drivers/crypto/caam/intern.h15
-rw-r--r--drivers/crypto/caam/jr.c3
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_main.c10
-rw-r--r--drivers/crypto/ccp/psp-dev.c49
-rw-r--r--drivers/crypto/ccp/psp-dev.h22
-rw-r--r--drivers/crypto/ccp/sev-dev.c32
-rw-r--r--drivers/crypto/ccp/sp-pci.c62
-rw-r--r--drivers/crypto/ccree/cc_buffer_mgr.c27
-rw-r--r--drivers/crypto/ccree/cc_driver.c24
-rw-r--r--drivers/crypto/hisilicon/Kconfig1
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre_main.c222
-rw-r--r--drivers/crypto/hisilicon/qm.c282
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_crypto.c2
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_main.c108
-rw-r--r--drivers/crypto/hisilicon/sgl.c6
-rw-r--r--drivers/crypto/hisilicon/zip/zip_crypto.c2
-rw-r--r--drivers/crypto/hisilicon/zip/zip_main.c185
-rw-r--r--drivers/crypto/inside-secure/safexcel.c9
-rw-r--r--drivers/crypto/keembay/keembay-ocs-aes-core.c9
-rw-r--r--drivers/crypto/marvell/cesa/cipher.c1
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c7
-rw-r--r--drivers/crypto/qat/qat_4xxx/adf_drv.c8
-rw-r--r--drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c15
-rw-r--r--drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h4
-rw-r--r--drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c15
-rw-r--r--drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h4
-rw-r--r--drivers/crypto/qat/qat_common/Makefile1
-rw-r--r--drivers/crypto/qat/qat_common/adf_accel_devices.h6
-rw-r--r--drivers/crypto/qat/qat_common/adf_common_drv.h18
-rw-r--r--drivers/crypto/qat/qat_common/adf_gen2_hw_data.c13
-rw-r--r--drivers/crypto/qat/qat_common/adf_gen2_hw_data.h6
-rw-r--r--drivers/crypto/qat/qat_common/adf_gen2_pfvf.c78
-rw-r--r--drivers/crypto/qat/qat_common/adf_gen4_pfvf.c61
-rw-r--r--drivers/crypto/qat/qat_common/adf_isr.c21
-rw-r--r--drivers/crypto/qat/qat_common/adf_pfvf_msg.h4
-rw-r--r--drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c6
-rw-r--r--drivers/crypto/qat/qat_common/adf_sriov.c16
-rw-r--r--drivers/crypto/qat/qat_common/adf_transport.c11
-rw-r--r--drivers/crypto/qat/qat_common/adf_transport.h1
-rw-r--r--drivers/crypto/qat/qat_common/adf_transport_internal.h1
-rw-r--r--drivers/crypto/qat/qat_common/adf_vf_isr.c1
-rw-r--r--drivers/crypto/qat/qat_common/qat_algs.c153
-rw-r--r--drivers/crypto/qat/qat_common/qat_algs_send.c86
-rw-r--r--drivers/crypto/qat/qat_common/qat_algs_send.h11
-rw-r--r--drivers/crypto/qat/qat_common/qat_asym_algs.c307
-rw-r--r--drivers/crypto/qat/qat_common/qat_crypto.c10
-rw-r--r--drivers/crypto/qat/qat_common/qat_crypto.h44
-rw-r--r--drivers/crypto/qat/qat_common/qat_hal.c1
-rw-r--r--drivers/crypto/qat/qat_common/qat_uclo.c3
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c126
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h4
-rw-r--r--drivers/crypto/sa2ul.c1
-rw-r--r--drivers/crypto/talitos.c10
-rw-r--r--drivers/crypto/ux500/hash/hash_core.c4
-rw-r--r--drivers/crypto/vmx/Makefile17
-rw-r--r--include/crypto/sm4.h4
-rw-r--r--include/linux/hisi_acc_qm.h23
-rw-r--r--lib/crypto/Kconfig6
-rw-r--r--lib/crypto/Makefile6
-rw-r--r--tools/testing/crypto/chacha20-s390/Makefile12
-rw-r--r--tools/testing/crypto/chacha20-s390/run-tests.sh34
-rw-r--r--tools/testing/crypto/chacha20-s390/test-cipher.c372
118 files changed, 5534 insertions, 1058 deletions
diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre
index 396de7bc735d..82abf92df429 100644
--- a/Documentation/ABI/testing/debugfs-hisi-hpre
+++ b/Documentation/ABI/testing/debugfs-hisi-hpre
@@ -104,6 +104,20 @@ Description: Dump the status of the QM.
Four states: initiated, started, stopped and closed.
Available for both PF and VF, and take no other effect on HPRE.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: QM debug registers(regs) read hardware register value. This
+ node is used to show the change of the qm register values. This
+ node can be help users to check the change of register values.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: HPRE debug registers(regs) read hardware register value. This
+ node is used to show the change of the register values. This
+ node can be help users to check the change of register values.
+
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/send_cnt
Date: Apr 2020
Contact: linux-crypto@vger.kernel.org
diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec
index 2bf84ced484b..93c530d1bf0f 100644
--- a/Documentation/ABI/testing/debugfs-hisi-sec
+++ b/Documentation/ABI/testing/debugfs-hisi-sec
@@ -84,6 +84,20 @@ Description: Dump the status of the QM.
Four states: initiated, started, stopped and closed.
Available for both PF and VF, and take no other effect on SEC.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: QM debug registers(regs) read hardware register value. This
+ node is used to show the change of the qm register values. This
+ node can be help users to check the change of register values.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: SEC debug registers(regs) read hardware register value. This
+ node is used to show the change of the register values. This
+ node can be help users to check the change of register values.
+
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/send_cnt
Date: Apr 2020
Contact: linux-crypto@vger.kernel.org
diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip
index bf1258bc6495..fd3f314cf8d1 100644
--- a/Documentation/ABI/testing/debugfs-hisi-zip
+++ b/Documentation/ABI/testing/debugfs-hisi-zip
@@ -97,6 +97,20 @@ Description: Dump the status of the QM.
Four states: initiated, started, stopped and closed.
Available for both PF and VF, and take no other effect on ZIP.
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: QM debug registers(regs) read hardware register value. This
+ node is used to show the change of the qm registers value. This
+ node can be help users to check the change of register values.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/diff_regs
+Date: Mar 2022
+Contact: linux-crypto@vger.kernel.org
+Description: ZIP debug registers(regs) read hardware register value. This
+ node is used to show the change of the registers value. this
+ node can be help users to check the change of register values.
+
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/send_cnt
Date: Apr 2020
Contact: linux-crypto@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-driver-ccp b/Documentation/ABI/testing/sysfs-driver-ccp
new file mode 100644
index 000000000000..7aded9b75553
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-ccp
@@ -0,0 +1,87 @@
+What: /sys/bus/pci/devices/<BDF>/fused_part
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/fused_part file reports
+ whether the CPU or APU has been fused to prevent tampering.
+ 0: Not fused
+ 1: Fused
+
+What: /sys/bus/pci/devices/<BDF>/debug_lock_on
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/debug_lock_on reports
+ whether the AMD CPU or APU has been unlocked for debugging.
+ Possible values:
+ 0: Not locked
+ 1: Locked
+
+What: /sys/bus/pci/devices/<BDF>/tsme_status
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/tsme_status file reports
+ the status of transparent secure memory encryption on AMD systems.
+ Possible values:
+ 0: Not active
+ 1: Active
+
+What: /sys/bus/pci/devices/<BDF>/anti_rollback_status
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/anti_rollback_status file reports
+ whether the PSP is enforcing rollback protection.
+ Possible values:
+ 0: Not enforcing
+ 1: Enforcing
+
+What: /sys/bus/pci/devices/<BDF>/rpmc_production_enabled
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/rpmc_production_enabled file reports
+ whether Replay Protected Monotonic Counter support has been enabled.
+ Possible values:
+ 0: Not enabled
+ 1: Enabled
+
+What: /sys/bus/pci/devices/<BDF>/rpmc_spirom_available
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/rpmc_spirom_available file reports
+ whether an Replay Protected Monotonic Counter supported SPI is installed
+ on the system.
+ Possible values:
+ 0: Not present
+ 1: Present
+
+What: /sys/bus/pci/devices/<BDF>/hsp_tpm_available
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/hsp_tpm_available file reports
+ whether the HSP TPM has been activated.
+ Possible values:
+ 0: Not activated or present
+ 1: Activated
+
+What: /sys/bus/pci/devices/<BDF>/rom_armor_enforced
+Date: June 2022
+KernelVersion: 5.19
+Contact: mario.limonciello@amd.com
+Description:
+ The /sys/bus/pci/devices/<BDF>/rom_armor_enforced file reports
+ whether RomArmor SPI protection is enforced.
+ Possible values:
+ 0: Not enforced
+ 1: Enforced
diff --git a/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml b/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml
index a410d2cedde6..02f47c2e7998 100644
--- a/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml
+++ b/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml
@@ -15,6 +15,7 @@ properties:
- ti,j721e-sa2ul
- ti,am654-sa2ul
- ti,am64-sa2ul
+ - ti,am62-sa3ul
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 05f01f4acca8..6aafa71806a3 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -47,7 +47,9 @@ properties:
- at,24c08
# i2c trusted platform module (TPM)
- atmel,at97sc3204t
- # i2c h/w symmetric crypto module
+ # ATSHA204 - i2c h/w symmetric crypto module
+ - atmel,atsha204
+ # ATSHA204A - i2c h/w symmetric crypto module
- atmel,atsha204a
# i2c h/w elliptic curve crypto module
- atmel,atecc508a
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 2a965aa0188d..ac85682c013c 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -45,13 +45,25 @@ config CRYPTO_SM3_ARM64_CE
tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
- select CRYPTO_LIB_SM3
+ select CRYPTO_SM3
config CRYPTO_SM4_ARM64_CE
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
- select CRYPTO_LIB_SM4
+ select CRYPTO_SM4
+
+config CRYPTO_SM4_ARM64_CE_BLK
+ tristate "SM4 in ECB/CBC/CFB/CTR modes using ARMv8 Crypto Extensions"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_SKCIPHER
+ select CRYPTO_SM4
+
+config CRYPTO_SM4_ARM64_NEON_BLK
+ tristate "SM4 in ECB/CBC/CFB/CTR modes using NEON instructions"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_SKCIPHER
+ select CRYPTO_SM4
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 09a805cc32d7..bea8995133b1 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -20,9 +20,15 @@ sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
-obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o
+obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce-cipher.o
+sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o
+
+obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_BLK) += sm4-ce.o
sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
+obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o
+sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
+
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
diff --git a/arch/arm64/crypto/sm4-ce-cipher-core.S b/arch/arm64/crypto/sm4-ce-cipher-core.S
new file mode 100644
index 000000000000..4ac6cfbc5797
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-cipher-core.S
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8
+ .set .Lv\b\().4s, \b
+ .endr
+
+ .macro sm4e, rd, rn
+ .inst 0xcec08400 | .L\rd | (.L\rn << 5)
+ .endm
+
+ /*
+ * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
+ */
+ .text
+SYM_FUNC_START(sm4_ce_do_crypt)
+ ld1 {v8.4s}, [x2]
+ ld1 {v0.4s-v3.4s}, [x0], #64
+CPU_LE( rev32 v8.16b, v8.16b )
+ ld1 {v4.4s-v7.4s}, [x0]
+ sm4e v8.4s, v0.4s
+ sm4e v8.4s, v1.4s
+ sm4e v8.4s, v2.4s
+ sm4e v8.4s, v3.4s
+ sm4e v8.4s, v4.4s
+ sm4e v8.4s, v5.4s
+ sm4e v8.4s, v6.4s
+ sm4e v8.4s, v7.4s
+ rev64 v8.4s, v8.4s
+ ext v8.16b, v8.16b, v8.16b, #8
+CPU_LE( rev32 v8.16b, v8.16b )
+ st1 {v8.4s}, [x1]
+ ret
+SYM_FUNC_END(sm4_ce_do_crypt)
diff --git a/arch/arm64/crypto/sm4-ce-cipher-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c
new file mode 100644
index 000000000000..76a34ef4abbb
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/sm4.h>
+#include <crypto/internal/simd.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/types.h>
+
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-ce");
+MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
+
+static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ return sm4_expandkey(ctx, key, key_len);
+}
+
+static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (!crypto_simd_usable()) {
+ sm4_crypt_block(ctx->rkey_enc, out, in);
+ } else {
+ kernel_neon_begin();
+ sm4_ce_do_crypt(ctx->rkey_enc, out, in);
+ kernel_neon_end();
+ }
+}
+
+static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (!crypto_simd_usable()) {
+ sm4_crypt_block(ctx->rkey_dec, out, in);
+ } else {
+ kernel_neon_begin();
+ sm4_ce_do_crypt(ctx->rkey_dec, out, in);
+ kernel_neon_end();
+ }
+}
+
+static struct crypto_alg sm4_ce_alg = {
+ .cra_name = "sm4",
+ .cra_driver_name = "sm4-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_u.cipher = {
+ .cia_min_keysize = SM4_KEY_SIZE,
+ .cia_max_keysize = SM4_KEY_SIZE,
+ .cia_setkey = sm4_ce_setkey,
+ .cia_encrypt = sm4_ce_encrypt,
+ .cia_decrypt = sm4_ce_decrypt
+ }
+};
+
+static int __init sm4_ce_mod_init(void)
+{
+ return crypto_register_alg(&sm4_ce_alg);
+}
+
+static void __exit sm4_ce_mod_fini(void)
+{
+ crypto_unregister_alg(&sm4_ce_alg);
+}
+
+module_cpu_feature_match(SM4, sm4_ce_mod_init);
+module_exit(sm4_ce_mod_fini);
diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index 4ac6cfbc5797..934e0f093279 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -1,36 +1,660 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm for ARMv8 with Crypto Extensions
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2022, Alibaba Group.
+ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
#include <linux/linkage.h>
#include <asm/assembler.h>
- .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8
- .set .Lv\b\().4s, \b
- .endr
-
- .macro sm4e, rd, rn
- .inst 0xcec08400 | .L\rd | (.L\rn << 5)
- .endm
-
- /*
- * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
- */
- .text
-SYM_FUNC_START(sm4_ce_do_crypt)
- ld1 {v8.4s}, [x2]
- ld1 {v0.4s-v3.4s}, [x0], #64
-CPU_LE( rev32 v8.16b, v8.16b )
- ld1 {v4.4s-v7.4s}, [x0]
- sm4e v8.4s, v0.4s
- sm4e v8.4s, v1.4s
- sm4e v8.4s, v2.4s
- sm4e v8.4s, v3.4s
- sm4e v8.4s, v4.4s
- sm4e v8.4s, v5.4s
- sm4e v8.4s, v6.4s
- sm4e v8.4s, v7.4s
- rev64 v8.4s, v8.4s
- ext v8.16b, v8.16b, v8.16b, #8
-CPU_LE( rev32 v8.16b, v8.16b )
- st1 {v8.4s}, [x1]
- ret
-SYM_FUNC_END(sm4_ce_do_crypt)
+.arch armv8-a+crypto
+
+.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 16, 20, 24, 25, 26, 27, 28, 29, 30, 31
+ .set .Lv\b\().4s, \b
+.endr
+
+.macro sm4e, vd, vn
+ .inst 0xcec08400 | (.L\vn << 5) | .L\vd
+.endm
+
+.macro sm4ekey, vd, vn, vm
+ .inst 0xce60c800 | (.L\vm << 16) | (.L\vn << 5) | .L\vd
+.endm
+
+/* Register macros */
+
+#define RTMP0 v16
+#define RTMP1 v17
+#define RTMP2 v18
+#define RTMP3 v19
+
+#define RIV v20
+
+/* Helper macros. */
+
+#define PREPARE \
+ ld1 {v24.16b-v27.16b}, [x0], #64; \
+ ld1 {v28.16b-v31.16b}, [x0];
+
+#define SM4_CRYPT_BLK(b0) \
+ rev32 b0.16b, b0.16b; \
+ sm4e b0.4s, v24.4s; \
+ sm4e b0.4s, v25.4s; \
+ sm4e b0.4s, v26.4s; \
+ sm4e b0.4s, v27.4s; \
+ sm4e b0.4s, v28.4s; \
+ sm4e b0.4s, v29.4s; \
+ sm4e b0.4s, v30.4s; \
+ sm4e b0.4s, v31.4s; \
+ rev64 b0.4s, b0.4s; \
+ ext b0.16b, b0.16b, b0.16b, #8; \
+ rev32 b0.16b, b0.16b;
+
+#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b; \
+ sm4e b0.4s, v24.4s; \
+ sm4e b1.4s, v24.4s; \
+ sm4e b2.4s, v24.4s; \
+ sm4e b3.4s, v24.4s; \
+ sm4e b0.4s, v25.4s; \
+ sm4e b1.4s, v25.4s; \
+ sm4e b2.4s, v25.4s; \
+ sm4e b3.4s, v25.4s; \
+ sm4e b0.4s, v26.4s; \
+ sm4e b1.4s, v26.4s; \
+ sm4e b2.4s, v26.4s; \
+ sm4e b3.4s, v26.4s; \
+ sm4e b0.4s, v27.4s; \
+ sm4e b1.4s, v27.4s; \
+ sm4e b2.4s, v27.4s; \
+ sm4e b3.4s, v27.4s; \
+ sm4e b0.4s, v28.4s; \
+ sm4e b1.4s, v28.4s; \
+ sm4e b2.4s, v28.4s; \
+ sm4e b3.4s, v28.4s; \
+ sm4e b0.4s, v29.4s; \
+ sm4e b1.4s, v29.4s; \
+ sm4e b2.4s, v29.4s; \
+ sm4e b3.4s, v29.4s; \
+ sm4e b0.4s, v30.4s; \
+ sm4e b1.4s, v30.4s; \
+ sm4e b2.4s, v30.4s; \
+ sm4e b3.4s, v30.4s; \
+ sm4e b0.4s, v31.4s; \
+ sm4e b1.4s, v31.4s; \
+ sm4e b2.4s, v31.4s; \
+ sm4e b3.4s, v31.4s; \
+ rev64 b0.4s, b0.4s; \
+ rev64 b1.4s, b1.4s; \
+ rev64 b2.4s, b2.4s; \
+ rev64 b3.4s, b3.4s; \
+ ext b0.16b, b0.16b, b0.16b, #8; \
+ ext b1.16b, b1.16b, b1.16b, #8; \
+ ext b2.16b, b2.16b, b2.16b, #8; \
+ ext b3.16b, b3.16b, b3.16b, #8; \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b;
+
+#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b; \
+ rev32 b4.16b, b4.16b; \
+ rev32 b5.16b, b5.16b; \
+ rev32 b6.16b, b6.16b; \
+ rev32 b7.16b, b7.16b; \
+ sm4e b0.4s, v24.4s; \
+ sm4e b1.4s, v24.4s; \
+ sm4e b2.4s, v24.4s; \
+ sm4e b3.4s, v24.4s; \
+ sm4e b4.4s, v24.4s; \
+ sm4e b5.4s, v24.4s; \
+ sm4e b6.4s, v24.4s; \
+ sm4e b7.4s, v24.4s; \
+ sm4e b0.4s, v25.4s; \
+ sm4e b1.4s, v25.4s; \
+ sm4e b2.4s, v25.4s; \
+ sm4e b3.4s, v25.4s; \
+ sm4e b4.4s, v25.4s; \
+ sm4e b5.4s, v25.4s; \
+ sm4e b6.4s, v25.4s; \
+ sm4e b7.4s, v25.4s; \
+ sm4e b0.4s, v26.4s; \
+ sm4e b1.4s, v26.4s; \
+ sm4e b2.4s, v26.4s; \
+ sm4e b3.4s, v26.4s; \
+ sm4e b4.4s, v26.4s; \
+ sm4e b5.4s, v26.4s; \
+ sm4e b6.4s, v26.4s; \
+ sm4e b7.4s, v26.4s; \
+ sm4e b0.4s, v27.4s; \
+ sm4e b1.4s, v27.4s; \
+ sm4e b2.4s, v27.4s; \
+ sm4e b3.4s, v27.4s; \
+ sm4e b4.4s, v27.4s; \
+ sm4e b5.4s, v27.4s; \
+ sm4e b6.4s, v27.4s; \
+ sm4e b7.4s, v27.4s; \
+ sm4e b0.4s, v28.4s; \
+ sm4e b1.4s, v28.4s; \
+ sm4e b2.4s, v28.4s; \
+ sm4e b3.4s, v28.4s; \
+ sm4e b4.4s, v28.4s; \
+ sm4e b5.4s, v28.4s; \
+ sm4e b6.4s, v28.4s; \
+ sm4e b7.4s, v28.4s; \
+ sm4e b0.4s, v29.4s; \
+ sm4e b1.4s, v29.4s; \
+ sm4e b2.4s, v29.4s; \
+ sm4e b3.4s, v29.4s; \
+ sm4e b4.4s, v29.4s; \
+ sm4e b5.4s, v29.4s; \
+ sm4e b6.4s, v29.4s; \
+ sm4e b7.4s, v29.4s; \
+ sm4e b0.4s, v30.4s; \
+ sm4e b1.4s, v30.4s; \
+ sm4e b2.4s, v30.4s; \
+ sm4e b3.4s, v30.4s; \
+ sm4e b4.4s, v30.4s; \
+ sm4e b5.4s, v30.4s; \
+ sm4e b6.4s, v30.4s; \
+ sm4e b7.4s, v30.4s; \
+ sm4e b0.4s, v31.4s; \
+ sm4e b1.4s, v31.4s; \
+ sm4e b2.4s, v31.4s; \
+ sm4e b3.4s, v31.4s; \
+ sm4e b4.4s, v31.4s; \
+ sm4e b5.4s, v31.4s; \
+ sm4e b6.4s, v31.4s; \
+ sm4e b7.4s, v31.4s; \
+ rev64 b0.4s, b0.4s; \
+ rev64 b1.4s, b1.4s; \
+ rev64 b2.4s, b2.4s; \
+ rev64 b3.4s, b3.4s; \
+ rev64 b4.4s, b4.4s; \
+ rev64 b5.4s, b5.4s; \
+ rev64 b6.4s, b6.4s; \
+ rev64 b7.4s, b7.4s; \
+ ext b0.16b, b0.16b, b0.16b, #8; \
+ ext b1.16b, b1.16b, b1.16b, #8; \
+ ext b2.16b, b2.16b, b2.16b, #8; \
+ ext b3.16b, b3.16b, b3.16b, #8; \
+ ext b4.16b, b4.16b, b4.16b, #8; \
+ ext b5.16b, b5.16b, b5.16b, #8; \
+ ext b6.16b, b6.16b, b6.16b, #8; \
+ ext b7.16b, b7.16b, b7.16b, #8; \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b; \
+ rev32 b4.16b, b4.16b; \
+ rev32 b5.16b, b5.16b; \
+ rev32 b6.16b, b6.16b; \
+ rev32 b7.16b, b7.16b;
+
+
+.align 3
+SYM_FUNC_START(sm4_ce_expand_key)
+ /* input:
+ * x0: 128-bit key
+ * x1: rkey_enc
+ * x2: rkey_dec
+ * x3: fk array
+ * x4: ck array
+ */
+ ld1 {v0.16b}, [x0];
+ rev32 v0.16b, v0.16b;
+ ld1 {v1.16b}, [x3];
+ /* load ck */
+ ld1 {v24.16b-v27.16b}, [x4], #64;
+ ld1 {v28.16b-v31.16b}, [x4];
+
+ /* input ^ fk */
+ eor v0.16b, v0.16b, v1.16b;
+
+ sm4ekey v0.4s, v0.4s, v24.4s;
+ sm4ekey v1.4s, v0.4s, v25.4s;
+ sm4ekey v2.4s, v1.4s, v26.4s;
+ sm4ekey v3.4s, v2.4s, v27.4s;
+ sm4ekey v4.4s, v3.4s, v28.4s;
+ sm4ekey v5.4s, v4.4s, v29.4s;
+ sm4ekey v6.4s, v5.4s, v30.4s;
+ sm4ekey v7.4s, v6.4s, v31.4s;
+
+ st1 {v0.16b-v3.16b}, [x1], #64;
+ st1 {v4.16b-v7.16b}, [x1];
+ rev64 v7.4s, v7.4s;
+ rev64 v6.4s, v6.4s;
+ rev64 v5.4s, v5.4s;
+ rev64 v4.4s, v4.4s;
+ rev64 v3.4s, v3.4s;
+ rev64 v2.4s, v2.4s;
+ rev64 v1.4s, v1.4s;
+ rev64 v0.4s, v0.4s;
+ ext v7.16b, v7.16b, v7.16b, #8;
+ ext v6.16b, v6.16b, v6.16b, #8;
+ ext v5.16b, v5.16b, v5.16b, #8;
+ ext v4.16b, v4.16b, v4.16b, #8;
+ ext v3.16b, v3.16b, v3.16b, #8;
+ ext v2.16b, v2.16b, v2.16b, #8;
+ ext v1.16b, v1.16b, v1.16b, #8;
+ ext v0.16b, v0.16b, v0.16b, #8;
+ st1 {v7.16b}, [x2], #16;
+ st1 {v6.16b}, [x2], #16;
+ st1 {v5.16b}, [x2], #16;
+ st1 {v4.16b}, [x2], #16;
+ st1 {v3.16b}, [x2], #16;
+ st1 {v2.16b}, [x2], #16;
+ st1 {v1.16b}, [x2], #16;
+ st1 {v0.16b}, [x2];
+
+ ret;
+SYM_FUNC_END(sm4_ce_expand_key)
+
+.align 3
+SYM_FUNC_START(sm4_ce_crypt_block)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ */
+ PREPARE;
+
+ ld1 {v0.16b}, [x2];
+ SM4_CRYPT_BLK(v0);
+ st1 {v0.16b}, [x1];
+
+ ret;
+SYM_FUNC_END(sm4_ce_crypt_block)
+
+.align 3
+SYM_FUNC_START(sm4_ce_crypt)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * w3: nblocks
+ */
+ PREPARE;
+
+.Lcrypt_loop_blk:
+ sub w3, w3, #8;
+ tbnz w3, #31, .Lcrypt_tail8;
+
+ ld1 {v0.16b-v3.16b}, [x2], #64;
+ ld1 {v4.16b-v7.16b}, [x2], #64;
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+
+ st1 {v0.16b-v3.16b}, [x1], #64;
+ st1 {v4.16b-v7.16b}, [x1], #64;
+
+ cbz w3, .Lcrypt_end;
+ b .Lcrypt_loop_blk;
+
+.Lcrypt_tail8:
+ add w3, w3, #8;
+ cmp w3, #4;
+ blt .Lcrypt_tail4;
+
+ sub w3, w3, #4;
+
+ ld1 {v0.16b-v3.16b}, [x2], #64;
+ SM4_CRYPT_BLK4(v0, v1, v2, v3);
+ st1 {v0.16b-v3.16b}, [x1], #64;
+
+ cbz w3, .Lcrypt_end;
+
+.Lcrypt_tail4:
+ sub w3, w3, #1;
+
+ ld1 {v0.16b}, [x2], #16;
+ SM4_CRYPT_BLK(v0);
+ st1 {v0.16b}, [x1], #16;
+
+ cbnz w3, .Lcrypt_tail4;
+
+.Lcrypt_end:
+ ret;
+SYM_FUNC_END(sm4_ce_crypt)
+
+.align 3
+SYM_FUNC_START(sm4_ce_cbc_enc)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: iv (big endian, 128 bit)
+ * w4: nblocks
+ */
+ PREPARE;
+
+ ld1 {RIV.16b}, [x3];
+
+.Lcbc_enc_loop:
+ sub w4, w4, #1;
+
+ ld1 {RTMP0.16b}, [x2], #16;
+ eor RIV.16b, RIV.16b, RTMP0.16b;
+
+ SM4_CRYPT_BLK(RIV);
+
+ st1 {RIV.16b}, [x1], #16;
+
+ cbnz w4, .Lcbc_enc_loop;
+
+ /* store new IV */
+ st1 {RIV.16b}, [x3];
+
+ ret;
+SYM_FUNC_END(sm4_ce_cbc_enc)
+
+.align 3
+SYM_FUNC_START(sm4_ce_cbc_dec)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: iv (big endian, 128 bit)
+ * w4: nblocks
+ */
+ PREPARE;
+
+ ld1 {RIV.16b}, [x3];
+
+.Lcbc_loop_blk:
+ sub w4, w4, #8;
+ tbnz w4, #31, .Lcbc_tail8;
+
+ ld1 {v0.16b-v3.16b}, [x2], #64;
+ ld1 {v4.16b-v7.16b}, [x2];
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+
+ sub x2, x2, #64;
+ eor v0.16b, v0.16b, RIV.16b;
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v1.16b, v1.16b, RTMP0.16b;
+ eor v2.16b, v2.16b, RTMP1.16b;
+ eor v3.16b, v3.16b, RTMP2.16b;
+ st1 {v0.16b-v3.16b}, [x1], #64;
+
+ eor v4.16b, v4.16b, RTMP3.16b;
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v5.16b, v5.16b, RTMP0.16b;
+ eor v6.16b, v6.16b, RTMP1.16b;
+ eor v7.16b, v7.16b, RTMP2.16b;
+
+ mov RIV.16b, RTMP3.16b;
+ st1 {v4.16b-v7.16b}, [x1], #64;
+
+ cbz w4, .Lcbc_end;
+ b .Lcbc_loop_blk;
+
+.Lcbc_tail8:
+ add w4, w4, #8;
+ cmp w4, #4;
+ blt .Lcbc_tail4;
+
+ sub w4, w4, #4;
+
+ ld1 {v0.16b-v3.16b}, [x2];
+
+ SM4_CRYPT_BLK4(v0, v1, v2, v3);
+
+ eor v0.16b, v0.16b, RIV.16b;
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v1.16b, v1.16b, RTMP0.16b;
+ eor v2.16b, v2.16b, RTMP1.16b;
+ eor v3.16b, v3.16b, RTMP2.16b;
+
+ mov RIV.16b, RTMP3.16b;
+ st1 {v0.16b-v3.16b}, [x1], #64;
+
+ cbz w4, .Lcbc_end;
+
+.Lcbc_tail4:
+ sub w4, w4, #1;
+
+ ld1 {v0.16b}, [x2];
+
+ SM4_CRYPT_BLK(v0);
+
+ eor v0.16b, v0.16b, RIV.16b;
+ ld1 {RIV.16b}, [x2], #16;
+ st1 {v0.16b}, [x1], #16;
+
+ cbnz w4, .Lcbc_tail4;
+
+.Lcbc_end:
+ /* store new IV */
+ st1 {RIV.16b}, [x3];
+
+ ret;
+SYM_FUNC_END(sm4_ce_cbc_dec)
+
+.align 3
+SYM_FUNC_START(sm4_ce_cfb_enc)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: iv (big endian, 128 bit)
+ * w4: nblocks
+ */
+ PREPARE;
+
+ ld1 {RIV.16b}, [x3];
+
+.Lcfb_enc_loop:
+ sub w4, w4, #1;
+
+ SM4_CRYPT_BLK(RIV);
+
+ ld1 {RTMP0.16b}, [x2], #16;
+ eor RIV.16b, RIV.16b, RTMP0.16b;
+ st1 {RIV.16b}, [x1], #16;
+
+ cbnz w4, .Lcfb_enc_loop;
+
+ /* store new IV */
+ st1 {RIV.16b}, [x3];
+
+ ret;
+SYM_FUNC_END(sm4_ce_cfb_enc)
+
+.align 3
+SYM_FUNC_START(sm4_ce_cfb_dec)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: iv (big endian, 128 bit)
+ * w4: nblocks
+ */
+ PREPARE;
+
+ ld1 {v0.16b}, [x3];
+
+.Lcfb_loop_blk:
+ sub w4, w4, #8;
+ tbnz w4, #31, .Lcfb_tail8;
+
+ ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48;
+ ld1 {v4.16b-v7.16b}, [x2];
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+
+ sub x2, x2, #48;
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v0.16b, v0.16b, RTMP0.16b;
+ eor v1.16b, v1.16b, RTMP1.16b;
+ eor v2.16b, v2.16b, RTMP2.16b;
+ eor v3.16b, v3.16b, RTMP3.16b;
+ st1 {v0.16b-v3.16b}, [x1], #64;
+
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v4.16b, v4.16b, RTMP0.16b;
+ eor v5.16b, v5.16b, RTMP1.16b;
+ eor v6.16b, v6.16b, RTMP2.16b;
+ eor v7.16b, v7.16b, RTMP3.16b;
+ st1 {v4.16b-v7.16b}, [x1], #64;
+
+ mov v0.16b, RTMP3.16b;
+
+ cbz w4, .Lcfb_end;
+ b .Lcfb_loop_blk;
+
+.Lcfb_tail8:
+ add w4, w4, #8;
+ cmp w4, #4;
+ blt .Lcfb_tail4;
+
+ sub w4, w4, #4;
+
+ ld1 {v1.16b, v2.16b, v3.16b}, [x2];
+
+ SM4_CRYPT_BLK4(v0, v1, v2, v3);
+
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v0.16b, v0.16b, RTMP0.16b;
+ eor v1.16b, v1.16b, RTMP1.16b;
+ eor v2.16b, v2.16b, RTMP2.16b;
+ eor v3.16b, v3.16b, RTMP3.16b;
+ st1 {v0.16b-v3.16b}, [x1], #64;
+
+ mov v0.16b, RTMP3.16b;
+
+ cbz w4, .Lcfb_end;
+
+.Lcfb_tail4:
+ sub w4, w4, #1;
+
+ SM4_CRYPT_BLK(v0);
+
+ ld1 {RTMP0.16b}, [x2], #16;
+ eor v0.16b, v0.16b, RTMP0.16b;
+ st1 {v0.16b}, [x1], #16;
+
+ mov v0.16b, RTMP0.16b;
+
+ cbnz w4, .Lcfb_tail4;
+
+.Lcfb_end:
+ /* store new IV */
+ st1 {v0.16b}, [x3];
+
+ ret;
+SYM_FUNC_END(sm4_ce_cfb_dec)
+
+.align 3
+SYM_FUNC_START(sm4_ce_ctr_enc)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: ctr (big endian, 128 bit)
+ * w4: nblocks
+ */
+ PREPARE;
+
+ ldp x7, x8, [x3];
+ rev x7, x7;
+ rev x8, x8;
+
+.Lctr_loop_blk:
+ sub w4, w4, #8;
+ tbnz w4, #31, .Lctr_tail8;
+
+#define inc_le128(vctr) \
+ mov vctr.d[1], x8; \
+ mov vctr.d[0], x7; \
+ adds x8, x8, #1; \
+ adc x7, x7, xzr; \
+ rev64 vctr.16b, vctr.16b;
+
+ /* construct CTRs */
+ inc_le128(v0); /* +0 */
+ inc_le128(v1); /* +1 */
+ inc_le128(v2); /* +2 */
+ inc_le128(v3); /* +3 */
+ inc_le128(v4); /* +4 */
+ inc_le128(v5); /* +5 */
+ inc_le128(v6); /* +6 */
+ inc_le128(v7); /* +7 */
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v0.16b, v0.16b, RTMP0.16b;
+ eor v1.16b, v1.16b, RTMP1.16b;
+ eor v2.16b, v2.16b, RTMP2.16b;
+ eor v3.16b, v3.16b, RTMP3.16b;
+ st1 {v0.16b-v3.16b}, [x1], #64;
+
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v4.16b, v4.16b, RTMP0.16b;
+ eor v5.16b, v5.16b, RTMP1.16b;
+ eor v6.16b, v6.16b, RTMP2.16b;
+ eor v7.16b, v7.16b, RTMP3.16b;
+ st1 {v4.16b-v7.16b}, [x1], #64;
+
+ cbz w4, .Lctr_end;
+ b .Lctr_loop_blk;
+
+.Lctr_tail8:
+ add w4, w4, #8;
+ cmp w4, #4;
+ blt .Lctr_tail4;
+
+ sub w4, w4, #4;
+
+ /* construct CTRs */
+ inc_le128(v0); /* +0 */
+ inc_le128(v1); /* +1 */
+ inc_le128(v2); /* +2 */
+ inc_le128(v3); /* +3 */
+
+ SM4_CRYPT_BLK4(v0, v1, v2, v3);
+
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v0.16b, v0.16b, RTMP0.16b;
+ eor v1.16b, v1.16b, RTMP1.16b;
+ eor v2.16b, v2.16b, RTMP2.16b;
+ eor v3.16b, v3.16b, RTMP3.16b;
+ st1 {v0.16b-v3.16b}, [x1], #64;
+
+ cbz w4, .Lctr_end;
+
+.Lctr_tail4:
+ sub w4, w4, #1;
+
+ /* construct CTRs */
+ inc_le128(v0);
+
+ SM4_CRYPT_BLK(v0);
+
+ ld1 {RTMP0.16b}, [x2], #16;
+ eor v0.16b, v0.16b, RTMP0.16b;
+ st1 {v0.16b}, [x1], #16;
+
+ cbnz w4, .Lctr_tail4;
+
+.Lctr_end:
+ /* store new CTR */
+ rev x7, x7;
+ rev x8, x8;
+ stp x7, x8, [x3];
+
+ ret;
+SYM_FUNC_END(sm4_ce_ctr_enc)
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 9c93cfc4841b..496d55c0d01a 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -1,82 +1,372 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm, using ARMv8 Crypto Extensions
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2022, Alibaba Group.
+ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
-#include <crypto/sm4.h>
#include <crypto/internal/simd.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <linux/crypto.h>
-#include <linux/types.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
-MODULE_ALIAS_CRYPTO("sm4");
-MODULE_ALIAS_CRYPTO("sm4-ce");
-MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
+#define BYTES2BLKS(nbytes) ((nbytes) >> 4)
+
+asmlinkage void sm4_ce_expand_key(const u8 *key, u32 *rkey_enc, u32 *rkey_dec,
+ const u32 *fk, const u32 *ck);
+asmlinkage void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src);
+asmlinkage void sm4_ce_crypt(const u32 *rkey, u8 *dst, const u8 *src,
+ unsigned int nblks);
+asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblks);
+asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblks);
+asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblks);
+asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblks);
+asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblks);
+
+static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ if (key_len != SM4_KEY_SIZE)
+ return -EINVAL;
+
+ sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+ return 0;
+}
+
+static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
+{
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ unsigned int nblks;
+
+ kernel_neon_begin();
+
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_crypt(rkey, dst, src, nblks);
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_neon_end();
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+static int sm4_ecb_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_ecb_do_crypt(req, ctx->rkey_enc);
+}
+
+static int sm4_ecb_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_ecb_do_crypt(req, ctx->rkey_dec);
+}
+
+static int sm4_cbc_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ unsigned int nblks;
+
+ kernel_neon_begin();
+
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_neon_end();
-asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
-static int sm4_ce_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int key_len)
+static int sm4_cbc_decrypt(struct skcipher_request *req)
{
- struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ unsigned int nblks;
+
+ kernel_neon_begin();
+
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, walk.iv, nblks);
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_neon_end();
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
- return sm4_expandkey(ctx, key, key_len);
+ return err;
}
-static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static int sm4_cfb_encrypt(struct skcipher_request *req)
{
- const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ unsigned int nblks;
- if (!crypto_simd_usable()) {
- sm4_crypt_block(ctx->rkey_enc, out, in);
- } else {
kernel_neon_begin();
- sm4_ce_do_crypt(ctx->rkey_enc, out, in);
+
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_cfb_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
kernel_neon_end();
+
+ err = skcipher_walk_done(&walk, nbytes);
}
+
+ return err;
}
-static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static int sm4_cfb_decrypt(struct skcipher_request *req)
{
- const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ unsigned int nblks;
- if (!crypto_simd_usable()) {
- sm4_crypt_block(ctx->rkey_dec, out, in);
- } else {
kernel_neon_begin();
- sm4_ce_do_crypt(ctx->rkey_dec, out, in);
+
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_cfb_dec(ctx->rkey_enc, dst, src, walk.iv, nblks);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
kernel_neon_end();
+
+ err = skcipher_walk_done(&walk, nbytes);
}
+
+ return err;
}
-static struct crypto_alg sm4_ce_alg = {
- .cra_name = "sm4",
- .cra_driver_name = "sm4-ce",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = SM4_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct sm4_ctx),
- .cra_module = THIS_MODULE,
- .cra_u.cipher = {
- .cia_min_keysize = SM4_KEY_SIZE,
- .cia_max_keysize = SM4_KEY_SIZE,
- .cia_setkey = sm4_ce_setkey,
- .cia_encrypt = sm4_ce_encrypt,
- .cia_decrypt = sm4_ce_decrypt
+static int sm4_ctr_crypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ unsigned int nblks;
+
+ kernel_neon_begin();
+
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
+ kernel_neon_end();
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+static struct skcipher_alg sm4_algs[] = {
+ {
+ .base = {
+ .cra_name = "ecb(sm4)",
+ .cra_driver_name = "ecb-sm4-ce",
+ .cra_priority = 400,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .setkey = sm4_setkey,
+ .encrypt = sm4_ecb_encrypt,
+ .decrypt = sm4_ecb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "cbc(sm4)",
+ .cra_driver_name = "cbc-sm4-ce",
+ .cra_priority = 400,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .setkey = sm4_setkey,
+ .encrypt = sm4_cbc_encrypt,
+ .decrypt = sm4_cbc_decrypt,
+ }, {
+ .base = {
+ .cra_name = "cfb(sm4)",
+ .cra_driver_name = "cfb-sm4-ce",
+ .cra_priority = 400,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .setkey = sm4_setkey,
+ .encrypt = sm4_cfb_encrypt,
+ .decrypt = sm4_cfb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "ctr(sm4)",
+ .cra_driver_name = "ctr-sm4-ce",
+ .cra_priority = 400,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .setkey = sm4_setkey,
+ .encrypt = sm4_ctr_crypt,
+ .decrypt = sm4_ctr_crypt,
}
};
-static int __init sm4_ce_mod_init(void)
+static int __init sm4_init(void)
{
- return crypto_register_alg(&sm4_ce_alg);
+ return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
}
-static void __exit sm4_ce_mod_fini(void)
+static void __exit sm4_exit(void)
{
- crypto_unregister_alg(&sm4_ce_alg);
+ crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
}
-module_cpu_feature_match(SM4, sm4_ce_mod_init);
-module_exit(sm4_ce_mod_fini);
+module_cpu_feature_match(SM4, sm4_init);
+module_exit(sm4_exit);
+
+MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 Crypto Extensions");
+MODULE_ALIAS_CRYPTO("sm4-ce");
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("ecb(sm4)");
+MODULE_ALIAS_CRYPTO("cbc(sm4)");
+MODULE_ALIAS_CRYPTO("cfb(sm4)");
+MODULE_ALIAS_CRYPTO("ctr(sm4)");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/crypto/sm4-neon-core.S b/arch/arm64/crypto/sm4-neon-core.S
new file mode 100644
index 000000000000..3d5256b354d2
--- /dev/null
+++ b/arch/arm64/crypto/sm4-neon-core.S
@@ -0,0 +1,487 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm for ARMv8 NEON
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2022, Alibaba Group.
+ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/* Register macros */
+
+#define RTMP0 v8
+#define RTMP1 v9
+#define RTMP2 v10
+#define RTMP3 v11
+
+#define RX0 v12
+#define RX1 v13
+#define RKEY v14
+#define RIV v15
+
+/* Helper macros. */
+
+#define PREPARE \
+ adr_l x5, crypto_sm4_sbox; \
+ ld1 {v16.16b-v19.16b}, [x5], #64; \
+ ld1 {v20.16b-v23.16b}, [x5], #64; \
+ ld1 {v24.16b-v27.16b}, [x5], #64; \
+ ld1 {v28.16b-v31.16b}, [x5];
+
+#define transpose_4x4(s0, s1, s2, s3) \
+ zip1 RTMP0.4s, s0.4s, s1.4s; \
+ zip1 RTMP1.4s, s2.4s, s3.4s; \
+ zip2 RTMP2.4s, s0.4s, s1.4s; \
+ zip2 RTMP3.4s, s2.4s, s3.4s; \
+ zip1 s0.2d, RTMP0.2d, RTMP1.2d; \
+ zip2 s1.2d, RTMP0.2d, RTMP1.2d; \
+ zip1 s2.2d, RTMP2.2d, RTMP3.2d; \
+ zip2 s3.2d, RTMP2.2d, RTMP3.2d;
+
+#define rotate_clockwise_90(s0, s1, s2, s3) \
+ zip1 RTMP0.4s, s1.4s, s0.4s; \
+ zip2 RTMP1.4s, s1.4s, s0.4s; \
+ zip1 RTMP2.4s, s3.4s, s2.4s; \
+ zip2 RTMP3.4s, s3.4s, s2.4s; \
+ zip1 s0.2d, RTMP2.2d, RTMP0.2d; \
+ zip2 s1.2d, RTMP2.2d, RTMP0.2d; \
+ zip1 s2.2d, RTMP3.2d, RTMP1.2d; \
+ zip2 s3.2d, RTMP3.2d, RTMP1.2d;
+
+#define ROUND4(round, s0, s1, s2, s3) \
+ dup RX0.4s, RKEY.s[round]; \
+ /* rk ^ s1 ^ s2 ^ s3 */ \
+ eor RTMP1.16b, s2.16b, s3.16b; \
+ eor RX0.16b, RX0.16b, s1.16b; \
+ eor RX0.16b, RX0.16b, RTMP1.16b; \
+ \
+ /* sbox, non-linear part */ \
+ movi RTMP3.16b, #64; /* sizeof(sbox) / 4 */ \
+ tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \
+ sub RX0.16b, RX0.16b, RTMP3.16b; \
+ tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \
+ sub RX0.16b, RX0.16b, RTMP3.16b; \
+ tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \
+ sub RX0.16b, RX0.16b, RTMP3.16b; \
+ tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \
+ \
+ /* linear part */ \
+ shl RTMP1.4s, RTMP0.4s, #8; \
+ shl RTMP2.4s, RTMP0.4s, #16; \
+ shl RTMP3.4s, RTMP0.4s, #24; \
+ sri RTMP1.4s, RTMP0.4s, #(32-8); \
+ sri RTMP2.4s, RTMP0.4s, #(32-16); \
+ sri RTMP3.4s, RTMP0.4s, #(32-24); \
+ /* RTMP1 = x ^ rol32(x, 8) ^ rol32(x, 16) */ \
+ eor RTMP1.16b, RTMP1.16b, RTMP0.16b; \
+ eor RTMP1.16b, RTMP1.16b, RTMP2.16b; \
+ /* RTMP3 = x ^ rol32(x, 24) ^ rol32(RTMP1, 2) */ \
+ eor RTMP3.16b, RTMP3.16b, RTMP0.16b; \
+ shl RTMP2.4s, RTMP1.4s, 2; \
+ sri RTMP2.4s, RTMP1.4s, #(32-2); \
+ eor RTMP3.16b, RTMP3.16b, RTMP2.16b; \
+ /* s0 ^= RTMP3 */ \
+ eor s0.16b, s0.16b, RTMP3.16b;
+
+#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b; \
+ \
+ transpose_4x4(b0, b1, b2, b3); \
+ \
+ mov x6, 8; \
+4: \
+ ld1 {RKEY.4s}, [x0], #16; \
+ subs x6, x6, #1; \
+ \
+ ROUND4(0, b0, b1, b2, b3); \
+ ROUND4(1, b1, b2, b3, b0); \
+ ROUND4(2, b2, b3, b0, b1); \
+ ROUND4(3, b3, b0, b1, b2); \
+ \
+ bne 4b; \
+ \
+ rotate_clockwise_90(b0, b1, b2, b3); \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b; \
+ \
+ /* repoint to rkey */ \
+ sub x0, x0, #128;
+
+#define ROUND8(round, s0, s1, s2, s3, t0, t1, t2, t3) \
+ /* rk ^ s1 ^ s2 ^ s3 */ \
+ dup RX0.4s, RKEY.s[round]; \
+ eor RTMP0.16b, s2.16b, s3.16b; \
+ mov RX1.16b, RX0.16b; \
+ eor RTMP1.16b, t2.16b, t3.16b; \
+ eor RX0.16b, RX0.16b, s1.16b; \
+ eor RX1.16b, RX1.16b, t1.16b; \
+ eor RX0.16b, RX0.16b, RTMP0.16b; \
+ eor RX1.16b, RX1.16b, RTMP1.16b; \
+ \
+ /* sbox, non-linear part */ \
+ movi RTMP3.16b, #64; /* sizeof(sbox) / 4 */ \
+ tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \
+ tbl RTMP1.16b, {v16.16b-v19.16b}, RX1.16b; \
+ sub RX0.16b, RX0.16b, RTMP3.16b; \
+ sub RX1.16b, RX1.16b, RTMP3.16b; \
+ tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \
+ tbx RTMP1.16b, {v20.16b-v23.16b}, RX1.16b; \
+ sub RX0.16b, RX0.16b, RTMP3.16b; \
+ sub RX1.16b, RX1.16b, RTMP3.16b; \
+ tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \
+ tbx RTMP1.16b, {v24.16b-v27.16b}, RX1.16b; \
+ sub RX0.16b, RX0.16b, RTMP3.16b; \
+ sub RX1.16b, RX1.16b, RTMP3.16b; \
+ tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \
+ tbx RTMP1.16b, {v28.16b-v31.16b}, RX1.16b; \
+ \
+ /* linear part */ \
+ shl RX0.4s, RTMP0.4s, #8; \
+ shl RX1.4s, RTMP1.4s, #8; \
+ shl RTMP2.4s, RTMP0.4s, #16; \
+ shl RTMP3.4s, RTMP1.4s, #16; \
+ sri RX0.4s, RTMP0.4s, #(32 - 8); \
+ sri RX1.4s, RTMP1.4s, #(32 - 8); \
+ sri RTMP2.4s, RTMP0.4s, #(32 - 16); \
+ sri RTMP3.4s, RTMP1.4s, #(32 - 16); \
+ /* RX = x ^ rol32(x, 8) ^ rol32(x, 16) */ \
+ eor RX0.16b, RX0.16b, RTMP0.16b; \
+ eor RX1.16b, RX1.16b, RTMP1.16b; \
+ eor RX0.16b, RX0.16b, RTMP2.16b; \
+ eor RX1.16b, RX1.16b, RTMP3.16b; \
+ /* RTMP0/1 ^= x ^ rol32(x, 24) ^ rol32(RX, 2) */ \
+ shl RTMP2.4s, RTMP0.4s, #24; \
+ shl RTMP3.4s, RTMP1.4s, #24; \
+ sri RTMP2.4s, RTMP0.4s, #(32 - 24); \
+ sri RTMP3.4s, RTMP1.4s, #(32 - 24); \
+ eor RTMP0.16b, RTMP0.16b, RTMP2.16b; \
+ eor RTMP1.16b, RTMP1.16b, RTMP3.16b; \
+ shl RTMP2.4s, RX0.4s, #2; \
+ shl RTMP3.4s, RX1.4s, #2; \
+ sri RTMP2.4s, RX0.4s, #(32 - 2); \
+ sri RTMP3.4s, RX1.4s, #(32 - 2); \
+ eor RTMP0.16b, RTMP0.16b, RTMP2.16b; \
+ eor RTMP1.16b, RTMP1.16b, RTMP3.16b; \
+ /* s0/t0 ^= RTMP0/1 */ \
+ eor s0.16b, s0.16b, RTMP0.16b; \
+ eor t0.16b, t0.16b, RTMP1.16b;
+
+#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b; \
+ rev32 b4.16b, b4.16b; \
+ rev32 b5.16b, b5.16b; \
+ rev32 b6.16b, b6.16b; \
+ rev32 b7.16b, b7.16b; \
+ \
+ transpose_4x4(b0, b1, b2, b3); \
+ transpose_4x4(b4, b5, b6, b7); \
+ \
+ mov x6, 8; \
+8: \
+ ld1 {RKEY.4s}, [x0], #16; \
+ subs x6, x6, #1; \
+ \
+ ROUND8(0, b0, b1, b2, b3, b4, b5, b6, b7); \
+ ROUND8(1, b1, b2, b3, b0, b5, b6, b7, b4); \
+ ROUND8(2, b2, b3, b0, b1, b6, b7, b4, b5); \
+ ROUND8(3, b3, b0, b1, b2, b7, b4, b5, b6); \
+ \
+ bne 8b; \
+ \
+ rotate_clockwise_90(b0, b1, b2, b3); \
+ rotate_clockwise_90(b4, b5, b6, b7); \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b; \
+ rev32 b4.16b, b4.16b; \
+ rev32 b5.16b, b5.16b; \
+ rev32 b6.16b, b6.16b; \
+ rev32 b7.16b, b7.16b; \
+ \
+ /* repoint to rkey */ \
+ sub x0, x0, #128;
+
+
+.align 3
+SYM_FUNC_START_LOCAL(__sm4_neon_crypt_blk1_4)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * w3: num blocks (1..4)
+ */
+ PREPARE;
+
+ ld1 {v0.16b}, [x2], #16;
+ mov v1.16b, v0.16b;
+ mov v2.16b, v0.16b;
+ mov v3.16b, v0.16b;
+ cmp w3, #2;
+ blt .Lblk4_load_input_done;
+ ld1 {v1.16b}, [x2], #16;
+ beq .Lblk4_load_input_done;
+ ld1 {v2.16b}, [x2], #16;
+ cmp w3, #3;
+ beq .Lblk4_load_input_done;
+ ld1 {v3.16b}, [x2];
+
+.Lblk4_load_input_done:
+ SM4_CRYPT_BLK4(v0, v1, v2, v3);
+
+ st1 {v0.16b}, [x1], #16;
+ cmp w3, #2;
+ blt .Lblk4_store_output_done;
+ st1 {v1.16b}, [x1], #16;
+ beq .Lblk4_store_output_done;
+ st1 {v2.16b}, [x1], #16;
+ cmp w3, #3;
+ beq .Lblk4_store_output_done;
+ st1 {v3.16b}, [x1];
+
+.Lblk4_store_output_done:
+ ret;
+SYM_FUNC_END(__sm4_neon_crypt_blk1_4)
+
+.align 3
+SYM_FUNC_START(sm4_neon_crypt_blk1_8)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * w3: num blocks (1..8)
+ */
+ cmp w3, #5;
+ blt __sm4_neon_crypt_blk1_4;
+
+ PREPARE;
+
+ ld1 {v0.16b-v3.16b}, [x2], #64;
+ ld1 {v4.16b}, [x2], #16;
+ mov v5.16b, v4.16b;
+ mov v6.16b, v4.16b;
+ mov v7.16b, v4.16b;
+ beq .Lblk8_load_input_done;
+ ld1 {v5.16b}, [x2], #16;
+ cmp w3, #7;
+ blt .Lblk8_load_input_done;
+ ld1 {v6.16b}, [x2], #16;
+ beq .Lblk8_load_input_done;
+ ld1 {v7.16b}, [x2];
+
+.Lblk8_load_input_done:
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+
+ cmp w3, #6;
+ st1 {v0.16b-v3.16b}, [x1], #64;
+ st1 {v4.16b}, [x1], #16;
+ blt .Lblk8_store_output_done;
+ st1 {v5.16b}, [x1], #16;
+ beq .Lblk8_store_output_done;
+ st1 {v6.16b}, [x1], #16;
+ cmp w3, #7;
+ beq .Lblk8_store_output_done;
+ st1 {v7.16b}, [x1];
+
+.Lblk8_store_output_done:
+ ret;
+SYM_FUNC_END(sm4_neon_crypt_blk1_8)
+
+.align 3
+SYM_FUNC_START(sm4_neon_crypt_blk8)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * w3: nblocks (multiples of 8)
+ */
+ PREPARE;
+
+.Lcrypt_loop_blk:
+ subs w3, w3, #8;
+ bmi .Lcrypt_end;
+
+ ld1 {v0.16b-v3.16b}, [x2], #64;
+ ld1 {v4.16b-v7.16b}, [x2], #64;
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+
+ st1 {v0.16b-v3.16b}, [x1], #64;
+ st1 {v4.16b-v7.16b}, [x1], #64;
+
+ b .Lcrypt_loop_blk;
+
+.Lcrypt_end:
+ ret;
+SYM_FUNC_END(sm4_neon_crypt_blk8)
+
+.align 3
+SYM_FUNC_START(sm4_neon_cbc_dec_blk8)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: iv (big endian, 128 bit)
+ * w4: nblocks (multiples of 8)
+ */
+ PREPARE;
+
+ ld1 {RIV.16b}, [x3];
+
+.Lcbc_loop_blk:
+ subs w4, w4, #8;
+ bmi .Lcbc_end;
+
+ ld1 {v0.16b-v3.16b}, [x2], #64;
+ ld1 {v4.16b-v7.16b}, [x2];
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+
+ sub x2, x2, #64;
+ eor v0.16b, v0.16b, RIV.16b;
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v1.16b, v1.16b, RTMP0.16b;
+ eor v2.16b, v2.16b, RTMP1.16b;
+ eor v3.16b, v3.16b, RTMP2.16b;
+ st1 {v0.16b-v3.16b}, [x1], #64;
+
+ eor v4.16b, v4.16b, RTMP3.16b;
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v5.16b, v5.16b, RTMP0.16b;
+ eor v6.16b, v6.16b, RTMP1.16b;
+ eor v7.16b, v7.16b, RTMP2.16b;
+
+ mov RIV.16b, RTMP3.16b;
+ st1 {v4.16b-v7.16b}, [x1], #64;
+
+ b .Lcbc_loop_blk;
+
+.Lcbc_end:
+ /* store new IV */
+ st1 {RIV.16b}, [x3];
+
+ ret;
+SYM_FUNC_END(sm4_neon_cbc_dec_blk8)
+
+.align 3
+SYM_FUNC_START(sm4_neon_cfb_dec_blk8)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: iv (big endian, 128 bit)
+ * w4: nblocks (multiples of 8)
+ */
+ PREPARE;
+
+ ld1 {v0.16b}, [x3];
+
+.Lcfb_loop_blk:
+ subs w4, w4, #8;
+ bmi .Lcfb_end;
+
+ ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48;
+ ld1 {v4.16b-v7.16b}, [x2];
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+
+ sub x2, x2, #48;
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v0.16b, v0.16b, RTMP0.16b;
+ eor v1.16b, v1.16b, RTMP1.16b;
+ eor v2.16b, v2.16b, RTMP2.16b;
+ eor v3.16b, v3.16b, RTMP3.16b;
+ st1 {v0.16b-v3.16b}, [x1], #64;
+
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v4.16b, v4.16b, RTMP0.16b;
+ eor v5.16b, v5.16b, RTMP1.16b;
+ eor v6.16b, v6.16b, RTMP2.16b;
+ eor v7.16b, v7.16b, RTMP3.16b;
+ st1 {v4.16b-v7.16b}, [x1], #64;
+
+ mov v0.16b, RTMP3.16b;
+
+ b .Lcfb_loop_blk;
+
+.Lcfb_end:
+ /* store new IV */
+ st1 {v0.16b}, [x3];
+
+ ret;
+SYM_FUNC_END(sm4_neon_cfb_dec_blk8)
+
+.align 3
+SYM_FUNC_START(sm4_neon_ctr_enc_blk8)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: ctr (big endian, 128 bit)
+ * w4: nblocks (multiples of 8)
+ */
+ PREPARE;
+
+ ldp x7, x8, [x3];
+ rev x7, x7;
+ rev x8, x8;
+
+.Lctr_loop_blk:
+ subs w4, w4, #8;
+ bmi .Lctr_end;
+
+#define inc_le128(vctr) \
+ mov vctr.d[1], x8; \
+ mov vctr.d[0], x7; \
+ adds x8, x8, #1; \
+ adc x7, x7, xzr; \
+ rev64 vctr.16b, vctr.16b;
+
+ /* construct CTRs */
+ inc_le128(v0); /* +0 */
+ inc_le128(v1); /* +1 */
+ inc_le128(v2); /* +2 */
+ inc_le128(v3); /* +3 */
+ inc_le128(v4); /* +4 */
+ inc_le128(v5); /* +5 */
+ inc_le128(v6); /* +6 */
+ inc_le128(v7); /* +7 */
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v0.16b, v0.16b, RTMP0.16b;
+ eor v1.16b, v1.16b, RTMP1.16b;
+ eor v2.16b, v2.16b, RTMP2.16b;
+ eor v3.16b, v3.16b, RTMP3.16b;
+ st1 {v0.16b-v3.16b}, [x1], #64;
+
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
+ eor v4.16b, v4.16b, RTMP0.16b;
+ eor v5.16b, v5.16b, RTMP1.16b;
+ eor v6.16b, v6.16b, RTMP2.16b;
+ eor v7.16b, v7.16b, RTMP3.16b;
+ st1 {v4.16b-v7.16b}, [x1], #64;
+
+ b .Lctr_loop_blk;
+
+.Lctr_end:
+ /* store new CTR */
+ rev x7, x7;
+ rev x8, x8;
+ stp x7, x8, [x3];
+
+ ret;
+SYM_FUNC_END(sm4_neon_ctr_enc_blk8)
diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c
new file mode 100644
index 000000000000..03a6a6866a31
--- /dev/null
+++ b/arch/arm64/crypto/sm4-neon-glue.c
@@ -0,0 +1,442 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm, using ARMv8 NEON
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2022, Alibaba Group.
+ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
+
+#define BYTES2BLKS(nbytes) ((nbytes) >> 4)
+#define BYTES2BLK8(nbytes) (((nbytes) >> 4) & ~(8 - 1))
+
+asmlinkage void sm4_neon_crypt_blk1_8(const u32 *rkey, u8 *dst, const u8 *src,
+ unsigned int nblks);
+asmlinkage void sm4_neon_crypt_blk8(const u32 *rkey, u8 *dst, const u8 *src,
+ unsigned int nblks);
+asmlinkage void sm4_neon_cbc_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblks);
+asmlinkage void sm4_neon_cfb_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblks);
+asmlinkage void sm4_neon_ctr_enc_blk8(const u32 *rkey, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblks);
+
+static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_expandkey(ctx, key, key_len);
+}
+
+static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
+{
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ unsigned int nblks;
+
+ kernel_neon_begin();
+
+ nblks = BYTES2BLK8(nbytes);
+ if (nblks) {
+ sm4_neon_crypt_blk8(rkey, dst, src, nblks);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_neon_crypt_blk1_8(rkey, dst, src, nblks);
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_neon_end();
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+static int sm4_ecb_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_ecb_do_crypt(req, ctx->rkey_enc);
+}
+
+static int sm4_ecb_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_ecb_do_crypt(req, ctx->rkey_dec);
+}
+
+static int sm4_cbc_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *iv = walk.iv;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
+ sm4_crypt_block(ctx->rkey_enc, dst, dst);
+ iv = dst;
+ src += SM4_BLOCK_SIZE;
+ dst += SM4_BLOCK_SIZE;
+ nbytes -= SM4_BLOCK_SIZE;
+ }
+ if (iv != walk.iv)
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+static int sm4_cbc_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ unsigned int nblks;
+
+ kernel_neon_begin();
+
+ nblks = BYTES2BLK8(nbytes);
+ if (nblks) {
+ sm4_neon_cbc_dec_blk8(ctx->rkey_dec, dst, src,
+ walk.iv, nblks);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+ u8 iv[SM4_BLOCK_SIZE];
+ int i;
+
+ sm4_neon_crypt_blk1_8(ctx->rkey_dec, keystream,
+ src, nblks);
+
+ src += ((int)nblks - 2) * SM4_BLOCK_SIZE;
+ dst += (nblks - 1) * SM4_BLOCK_SIZE;
+ memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
+
+ for (i = nblks - 1; i > 0; i--) {
+ crypto_xor_cpy(dst, src,
+ &keystream[i * SM4_BLOCK_SIZE],
+ SM4_BLOCK_SIZE);
+ src -= SM4_BLOCK_SIZE;
+ dst -= SM4_BLOCK_SIZE;
+ }
+ crypto_xor_cpy(dst, walk.iv,
+ keystream, SM4_BLOCK_SIZE);
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_neon_end();
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+static int sm4_cfb_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+ const u8 *iv = walk.iv;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ sm4_crypt_block(ctx->rkey_enc, keystream, iv);
+ crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE);
+ iv = dst;
+ src += SM4_BLOCK_SIZE;
+ dst += SM4_BLOCK_SIZE;
+ nbytes -= SM4_BLOCK_SIZE;
+ }
+ if (iv != walk.iv)
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+static int sm4_cfb_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ unsigned int nblks;
+
+ kernel_neon_begin();
+
+ nblks = BYTES2BLK8(nbytes);
+ if (nblks) {
+ sm4_neon_cfb_dec_blk8(ctx->rkey_enc, dst, src,
+ walk.iv, nblks);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+
+ memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
+ if (nblks > 1)
+ memcpy(&keystream[SM4_BLOCK_SIZE], src,
+ (nblks - 1) * SM4_BLOCK_SIZE);
+ memcpy(walk.iv, src + (nblks - 1) * SM4_BLOCK_SIZE,
+ SM4_BLOCK_SIZE);
+
+ sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream,
+ keystream, nblks);
+
+ crypto_xor_cpy(dst, src, keystream,
+ nblks * SM4_BLOCK_SIZE);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_neon_end();
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+static int sm4_ctr_crypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ unsigned int nblks;
+
+ kernel_neon_begin();
+
+ nblks = BYTES2BLK8(nbytes);
+ if (nblks) {
+ sm4_neon_ctr_enc_blk8(ctx->rkey_enc, dst, src,
+ walk.iv, nblks);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+ int i;
+
+ for (i = 0; i < nblks; i++) {
+ memcpy(&keystream[i * SM4_BLOCK_SIZE],
+ walk.iv, SM4_BLOCK_SIZE);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+ }
+ sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream,
+ keystream, nblks);
+
+ crypto_xor_cpy(dst, src, keystream,
+ nblks * SM4_BLOCK_SIZE);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_neon_end();
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+static struct skcipher_alg sm4_algs[] = {
+ {
+ .base = {
+ .cra_name = "ecb(sm4)",
+ .cra_driver_name = "ecb-sm4-neon",
+ .cra_priority = 200,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .setkey = sm4_setkey,
+ .encrypt = sm4_ecb_encrypt,
+ .decrypt = sm4_ecb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "cbc(sm4)",
+ .cra_driver_name = "cbc-sm4-neon",
+ .cra_priority = 200,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .setkey = sm4_setkey,
+ .encrypt = sm4_cbc_encrypt,
+ .decrypt = sm4_cbc_decrypt,
+ }, {
+ .base = {
+ .cra_name = "cfb(sm4)",
+ .cra_driver_name = "cfb-sm4-neon",
+ .cra_priority = 200,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .setkey = sm4_setkey,
+ .encrypt = sm4_cfb_encrypt,
+ .decrypt = sm4_cfb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "ctr(sm4)",
+ .cra_driver_name = "ctr-sm4-neon",
+ .cra_priority = 200,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .setkey = sm4_setkey,
+ .encrypt = sm4_ctr_crypt,
+ .decrypt = sm4_ctr_crypt,
+ }
+};
+
+static int __init sm4_init(void)
+{
+ return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
+}
+
+static void __exit sm4_exit(void)
+{
+ crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
+}
+
+module_init(sm4_init);
+module_exit(sm4_exit);
+
+MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 NEON");
+MODULE_ALIAS_CRYPTO("sm4-neon");
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("ecb(sm4)");
+MODULE_ALIAS_CRYPTO("cbc(sm4)");
+MODULE_ALIAS_CRYPTO("cfb(sm4)");
+MODULE_ALIAS_CRYPTO("ctr(sm4)");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
index ccfff73e2c93..2ec51f339cec 100644
--- a/arch/s390/crypto/chacha-glue.c
+++ b/arch/s390/crypto/chacha-glue.c
@@ -62,6 +62,34 @@ static int chacha20_s390(struct skcipher_request *req)
return rc;
}
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ /* TODO: implement hchacha_block_arch() in assembly */
+ hchacha_block_generic(state, stream, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ /* s390 chacha20 implementation has 20 rounds hard-coded,
+ * it cannot handle a block of data or less, but otherwise
+ * it can handle data of arbitrary size
+ */
+ if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20)
+ chacha_crypt_generic(state, dst, src, bytes, nrounds);
+ else
+ chacha20_crypt_s390(state, dst, src, bytes,
+ &state[4], &state[12]);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
static struct skcipher_alg chacha_algs[] = {
{
.base.cra_name = "chacha20",
@@ -83,12 +111,14 @@ static struct skcipher_alg chacha_algs[] = {
static int __init chacha_mod_init(void)
{
- return crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
+ return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
+ crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0;
}
static void __exit chacha_mod_fini(void)
{
- crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
+ if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER))
+ crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
}
module_cpu_feature_match(VXRS, chacha_mod_init);
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index fda6066437aa..ba06322c1e39 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -303,7 +303,7 @@ static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
-static int __init init(void)
+static int __init blowfish_init(void)
{
int err;
@@ -327,15 +327,15 @@ static int __init init(void)
return err;
}
-static void __exit fini(void)
+static void __exit blowfish_fini(void)
{
crypto_unregister_alg(&bf_cipher_alg);
crypto_unregister_skciphers(bf_skcipher_algs,
ARRAY_SIZE(bf_skcipher_algs));
}
-module_init(init);
-module_exit(fini);
+module_init(blowfish_init);
+module_exit(blowfish_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 66c435ba9d3d..d45e9c0c42ac 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -1377,7 +1377,7 @@ static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
-static int __init init(void)
+static int __init camellia_init(void)
{
int err;
@@ -1401,15 +1401,15 @@ static int __init init(void)
return err;
}
-static void __exit fini(void)
+static void __exit camellia_fini(void)
{
crypto_unregister_alg(&camellia_cipher_alg);
crypto_unregister_skciphers(camellia_skcipher_algs,
ARRAY_SIZE(camellia_skcipher_algs));
}
-module_init(init);
-module_exit(fini);
+module_init(camellia_init);
+module_exit(camellia_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized");
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index ccf0b5fa4933..347e97f4b713 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -96,7 +96,7 @@ static struct skcipher_alg serpent_algs[] = {
static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
-static int __init init(void)
+static int __init serpent_avx2_init(void)
{
const char *feature_name;
@@ -115,14 +115,14 @@ static int __init init(void)
serpent_simd_algs);
}
-static void __exit fini(void)
+static void __exit serpent_avx2_fini(void)
{
simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
serpent_simd_algs);
}
-module_init(init);
-module_exit(fini);
+module_init(serpent_avx2_init);
+module_exit(serpent_avx2_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized");
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index 77e06c2da83d..f9c4adc27404 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -81,18 +81,18 @@ static struct crypto_alg alg = {
}
};
-static int __init init(void)
+static int __init twofish_glue_init(void)
{
return crypto_register_alg(&alg);
}
-static void __exit fini(void)
+static void __exit twofish_glue_fini(void)
{
crypto_unregister_alg(&alg);
}
-module_init(init);
-module_exit(fini);
+module_init(twofish_glue_init);
+module_exit(twofish_glue_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized");
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 3507cf2064f1..90454cf18e0d 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -140,7 +140,7 @@ static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
-static int __init init(void)
+static int __init twofish_3way_init(void)
{
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
@@ -154,13 +154,13 @@ static int __init init(void)
ARRAY_SIZE(tf_skciphers));
}
-static void __exit fini(void)
+static void __exit twofish_3way_fini(void)
{
crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
}
-module_init(init);
-module_exit(fini);
+module_init(twofish_3way_init);
+module_exit(twofish_3way_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 41068811fd0e..19197469cfab 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -274,7 +274,7 @@ config CRYPTO_ECRDSA
config CRYPTO_SM2
tristate "SM2 algorithm"
- select CRYPTO_LIB_SM3
+ select CRYPTO_SM3
select CRYPTO_AKCIPHER
select CRYPTO_MANAGER
select MPILIB
@@ -1010,9 +1010,12 @@ config CRYPTO_SHA3
http://keccak.noekeon.org/
config CRYPTO_SM3
+ tristate
+
+config CRYPTO_SM3_GENERIC
tristate "SM3 digest algorithm"
select CRYPTO_HASH
- select CRYPTO_LIB_SM3
+ select CRYPTO_SM3
help
SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
It is part of the Chinese Commercial Cryptography suite.
@@ -1025,7 +1028,7 @@ config CRYPTO_SM3_AVX_X86_64
tristate "SM3 digest algorithm (x86_64/AVX)"
depends on X86 && 64BIT
select CRYPTO_HASH
- select CRYPTO_LIB_SM3
+ select CRYPTO_SM3
help
SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
It is part of the Chinese Commercial Cryptography suite. This is
@@ -1572,9 +1575,12 @@ config CRYPTO_SERPENT_AVX2_X86_64
<https://www.cl.cam.ac.uk/~rja14/serpent.html>
config CRYPTO_SM4
+ tristate
+
+config CRYPTO_SM4_GENERIC
tristate "SM4 cipher algorithm"
select CRYPTO_ALGAPI
- select CRYPTO_LIB_SM4
+ select CRYPTO_SM4
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016).
@@ -1603,7 +1609,7 @@ config CRYPTO_SM4_AESNI_AVX_X86_64
select CRYPTO_SKCIPHER
select CRYPTO_SIMD
select CRYPTO_ALGAPI
- select CRYPTO_LIB_SM4
+ select CRYPTO_SM4
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX).
@@ -1624,7 +1630,7 @@ config CRYPTO_SM4_AESNI_AVX2_X86_64
select CRYPTO_SKCIPHER
select CRYPTO_SIMD
select CRYPTO_ALGAPI
- select CRYPTO_LIB_SM4
+ select CRYPTO_SM4
select CRYPTO_SM4_AESNI_AVX_X86_64
help
SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2).
diff --git a/crypto/Makefile b/crypto/Makefile
index f754c4d17d6b..43bc33e247d1 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -78,7 +78,8 @@ obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o
-obj-$(CONFIG_CRYPTO_SM3) += sm3_generic.o
+obj-$(CONFIG_CRYPTO_SM3) += sm3.o
+obj-$(CONFIG_CRYPTO_SM3_GENERIC) += sm3_generic.o
obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
@@ -134,7 +135,8 @@ obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356
-obj-$(CONFIG_CRYPTO_SM4) += sm4_generic.o
+obj-$(CONFIG_CRYPTO_SM4) += sm4.o
+obj-$(CONFIG_CRYPTO_SM4_GENERIC) += sm4_generic.o
obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index a1bea0f4baa8..668095eca0fa 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -39,6 +39,10 @@ struct cryptd_cpu_queue {
};
struct cryptd_queue {
+ /*
+ * Protected by disabling BH to allow enqueueing from softinterrupt and
+ * dequeuing from kworker (cryptd_queue_worker()).
+ */
struct cryptd_cpu_queue __percpu *cpu_queue;
};
@@ -125,28 +129,28 @@ static void cryptd_fini_queue(struct cryptd_queue *queue)
static int cryptd_enqueue_request(struct cryptd_queue *queue,
struct crypto_async_request *request)
{
- int cpu, err;
+ int err;
struct cryptd_cpu_queue *cpu_queue;
refcount_t *refcnt;
- cpu = get_cpu();
+ local_bh_disable();
cpu_queue = this_cpu_ptr(queue->cpu_queue);
err = crypto_enqueue_request(&cpu_queue->queue, request);
refcnt = crypto_tfm_ctx(request->tfm);
if (err == -ENOSPC)
- goto out_put_cpu;
+ goto out;
- queue_work_on(cpu, cryptd_wq, &cpu_queue->work);
+ queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work);
if (!refcount_read(refcnt))
- goto out_put_cpu;
+ goto out;
refcount_inc(refcnt);
-out_put_cpu:
- put_cpu();
+out:
+ local_bh_enable();
return err;
}
@@ -162,15 +166,10 @@ static void cryptd_queue_worker(struct work_struct *work)
cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
/*
* Only handle one request at a time to avoid hogging crypto workqueue.
- * preempt_disable/enable is used to prevent being preempted by
- * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
- * cryptd_enqueue_request() being accessed from software interrupts.
*/
local_bh_disable();
- preempt_disable();
backlog = crypto_get_backlog(&cpu_queue->queue);
req = crypto_dequeue_request(&cpu_queue->queue);
- preempt_enable();
local_bh_enable();
if (!req)
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index 6056a990c9f2..bb8e77077f02 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -253,6 +253,7 @@ static void crypto_pump_work(struct kthread_work *work)
* crypto_transfer_request - transfer the new request into the engine queue
* @engine: the hardware engine
* @req: the request need to be listed into the engine queue
+ * @need_pump: indicates whether queue the pump of request to kthread_work
*/
static int crypto_transfer_request(struct crypto_engine *engine,
struct crypto_async_request *req,
diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c
index b32ffcaad9ad..f3c6b5e15e75 100644
--- a/crypto/ecrdsa.c
+++ b/crypto/ecrdsa.c
@@ -113,15 +113,15 @@ static int ecrdsa_verify(struct akcipher_request *req)
/* Step 1: verify that 0 < r < q, 0 < s < q */
if (vli_is_zero(r, ndigits) ||
- vli_cmp(r, ctx->curve->n, ndigits) == 1 ||
+ vli_cmp(r, ctx->curve->n, ndigits) >= 0 ||
vli_is_zero(s, ndigits) ||
- vli_cmp(s, ctx->curve->n, ndigits) == 1)
+ vli_cmp(s, ctx->curve->n, ndigits) >= 0)
return -EKEYREJECTED;
/* Step 2: calculate hash (h) of the message (passed as input) */
/* Step 3: calculate e = h \mod q */
vli_from_le64(e, digest, ndigits);
- if (vli_cmp(e, ctx->curve->n, ndigits) == 1)
+ if (vli_cmp(e, ctx->curve->n, ndigits) >= 0)
vli_sub(e, e, ctx->curve->n, ndigits);
if (vli_is_zero(e, ndigits))
e[0] = 1;
@@ -137,7 +137,7 @@ static int ecrdsa_verify(struct akcipher_request *req)
/* Step 6: calculate point C = z_1P + z_2Q, and R = x_c \mod q */
ecc_point_mult_shamir(&cc, z1, &ctx->curve->g, z2, &ctx->pub_key,
ctx->curve);
- if (vli_cmp(cc.x, ctx->curve->n, ndigits) == 1)
+ if (vli_cmp(cc.x, ctx->curve->n, ndigits) >= 0)
vli_sub(cc.x, cc.x, ctx->curve->n, ndigits);
/* Step 7: if R == r signature is valid */
diff --git a/lib/crypto/sm3.c b/crypto/sm3.c
index d473e358a873..d473e358a873 100644
--- a/lib/crypto/sm3.c
+++ b/crypto/sm3.c
diff --git a/lib/crypto/sm4.c b/crypto/sm4.c
index 284e62576d0c..2c44193bc27e 100644
--- a/lib/crypto/sm4.c
+++ b/crypto/sm4.c
@@ -11,7 +11,7 @@
#include <asm/unaligned.h>
#include <crypto/sm4.h>
-static const u32 fk[4] = {
+static const u32 ____cacheline_aligned fk[4] = {
0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
};
@@ -61,6 +61,14 @@ static const u8 ____cacheline_aligned sbox[256] = {
0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
};
+extern const u32 crypto_sm4_fk[4] __alias(fk);
+extern const u32 crypto_sm4_ck[32] __alias(ck);
+extern const u8 crypto_sm4_sbox[256] __alias(sbox);
+
+EXPORT_SYMBOL(crypto_sm4_fk);
+EXPORT_SYMBOL(crypto_sm4_ck);
+EXPORT_SYMBOL(crypto_sm4_sbox);
+
static inline u32 sm4_t_non_lin_sub(u32 x)
{
u32 out;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 4948201065cc..5801a8f9f713 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -232,6 +232,20 @@ enum finalization_type {
FINALIZATION_TYPE_DIGEST, /* use digest() */
};
+/*
+ * Whether the crypto operation will occur in-place, and if so whether the
+ * source and destination scatterlist pointers will coincide (req->src ==
+ * req->dst), or whether they'll merely point to two separate scatterlists
+ * (req->src != req->dst) that reference the same underlying memory.
+ *
+ * This is only relevant for algorithm types that support in-place operation.
+ */
+enum inplace_mode {
+ OUT_OF_PLACE,
+ INPLACE_ONE_SGLIST,
+ INPLACE_TWO_SGLISTS,
+};
+
#define TEST_SG_TOTAL 10000
/**
@@ -265,7 +279,7 @@ struct test_sg_division {
* crypto test vector can be tested.
*
* @name: name of this config, logged for debugging purposes if a test fails
- * @inplace: operate on the data in-place, if applicable for the algorithm type?
+ * @inplace_mode: whether and how to operate on the data in-place, if applicable
* @req_flags: extra request_flags, e.g. CRYPTO_TFM_REQ_MAY_SLEEP
* @src_divs: description of how to arrange the source scatterlist
* @dst_divs: description of how to arrange the dst scatterlist, if applicable
@@ -282,7 +296,7 @@ struct test_sg_division {
*/
struct testvec_config {
const char *name;
- bool inplace;
+ enum inplace_mode inplace_mode;
u32 req_flags;
struct test_sg_division src_divs[XBUFSIZE];
struct test_sg_division dst_divs[XBUFSIZE];
@@ -307,11 +321,16 @@ struct testvec_config {
/* Configs for skciphers and aeads */
static const struct testvec_config default_cipher_testvec_configs[] = {
{
- .name = "in-place",
- .inplace = true,
+ .name = "in-place (one sglist)",
+ .inplace_mode = INPLACE_ONE_SGLIST,
+ .src_divs = { { .proportion_of_total = 10000 } },
+ }, {
+ .name = "in-place (two sglists)",
+ .inplace_mode = INPLACE_TWO_SGLISTS,
.src_divs = { { .proportion_of_total = 10000 } },
}, {
.name = "out-of-place",
+ .inplace_mode = OUT_OF_PLACE,
.src_divs = { { .proportion_of_total = 10000 } },
}, {
.name = "unaligned buffer, offset=1",
@@ -349,7 +368,7 @@ static const struct testvec_config default_cipher_testvec_configs[] = {
.key_offset = 3,
}, {
.name = "misaligned splits crossing pages, inplace",
- .inplace = true,
+ .inplace_mode = INPLACE_ONE_SGLIST,
.src_divs = {
{
.proportion_of_total = 7500,
@@ -749,18 +768,39 @@ static int build_cipher_test_sglists(struct cipher_test_sglists *tsgls,
iov_iter_kvec(&input, WRITE, inputs, nr_inputs, src_total_len);
err = build_test_sglist(&tsgls->src, cfg->src_divs, alignmask,
- cfg->inplace ?
+ cfg->inplace_mode != OUT_OF_PLACE ?
max(dst_total_len, src_total_len) :
src_total_len,
&input, NULL);
if (err)
return err;
- if (cfg->inplace) {
+ /*
+ * In-place crypto operations can use the same scatterlist for both the
+ * source and destination (req->src == req->dst), or can use separate
+ * scatterlists (req->src != req->dst) which point to the same
+ * underlying memory. Make sure to test both cases.
+ */
+ if (cfg->inplace_mode == INPLACE_ONE_SGLIST) {
tsgls->dst.sgl_ptr = tsgls->src.sgl;
tsgls->dst.nents = tsgls->src.nents;
return 0;
}
+ if (cfg->inplace_mode == INPLACE_TWO_SGLISTS) {
+ /*
+ * For now we keep it simple and only test the case where the
+ * two scatterlists have identical entries, rather than
+ * different entries that split up the same memory differently.
+ */
+ memcpy(tsgls->dst.sgl, tsgls->src.sgl,
+ tsgls->src.nents * sizeof(tsgls->src.sgl[0]));
+ memcpy(tsgls->dst.sgl_saved, tsgls->src.sgl,
+ tsgls->src.nents * sizeof(tsgls->src.sgl[0]));
+ tsgls->dst.sgl_ptr = tsgls->dst.sgl;
+ tsgls->dst.nents = tsgls->src.nents;
+ return 0;
+ }
+ /* Out of place */
return build_test_sglist(&tsgls->dst,
cfg->dst_divs[0].proportion_of_total ?
cfg->dst_divs : cfg->src_divs,
@@ -995,9 +1035,19 @@ static void generate_random_testvec_config(struct testvec_config *cfg,
p += scnprintf(p, end - p, "random:");
- if (prandom_u32() % 2 == 0) {
- cfg->inplace = true;
- p += scnprintf(p, end - p, " inplace");
+ switch (prandom_u32() % 4) {
+ case 0:
+ case 1:
+ cfg->inplace_mode = OUT_OF_PLACE;
+ break;
+ case 2:
+ cfg->inplace_mode = INPLACE_ONE_SGLIST;
+ p += scnprintf(p, end - p, " inplace_one_sglist");
+ break;
+ default:
+ cfg->inplace_mode = INPLACE_TWO_SGLISTS;
+ p += scnprintf(p, end - p, " inplace_two_sglists");
+ break;
}
if (prandom_u32() % 2 == 0) {
@@ -1034,7 +1084,7 @@ static void generate_random_testvec_config(struct testvec_config *cfg,
cfg->req_flags);
p += scnprintf(p, end - p, "]");
- if (!cfg->inplace && prandom_u32() % 2 == 0) {
+ if (cfg->inplace_mode == OUT_OF_PLACE && prandom_u32() % 2 == 0) {
p += scnprintf(p, end - p, " dst_divs=[");
p = generate_random_sgl_divisions(cfg->dst_divs,
ARRAY_SIZE(cfg->dst_divs),
@@ -2085,7 +2135,8 @@ static int test_aead_vec_cfg(int enc, const struct aead_testvec *vec,
/* Check for the correct output (ciphertext or plaintext) */
err = verify_correct_output(&tsgls->dst, enc ? vec->ctext : vec->ptext,
enc ? vec->clen : vec->plen,
- vec->alen, enc || !cfg->inplace);
+ vec->alen,
+ enc || cfg->inplace_mode == OUT_OF_PLACE);
if (err == -EOVERFLOW) {
pr_err("alg: aead: %s %s overran dst buffer on test vector %s, cfg=\"%s\"\n",
driver, op, vec_name, cfg->name);
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index a087156a5818..b3f2d55dc551 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -385,6 +385,19 @@ config HW_RANDOM_PIC32
If unsure, say Y.
+config HW_RANDOM_POLARFIRE_SOC
+ tristate "Microchip PolarFire SoC Random Number Generator support"
+ depends on HW_RANDOM && POLARFIRE_SOC_SYS_CTRL
+ help
+ This driver provides kernel-side support for the Random Number
+ Generator hardware found on PolarFire SoC (MPFS).
+
+ To compile this driver as a module, choose M here. The
+ module will be called mfps_rng.
+
+ If unsure, say N.
+
+
config HW_RANDOM_MESON
tristate "Amlogic Meson Random Number Generator support"
depends on HW_RANDOM
@@ -527,7 +540,7 @@ config HW_RANDOM_ARM_SMCCC_TRNG
config HW_RANDOM_CN10K
tristate "Marvell CN10K Random Number Generator support"
- depends on HW_RANDOM && PCI && ARM64
+ depends on HW_RANDOM && PCI && (ARM64 || (64BIT && COMPILE_TEST))
default HW_RANDOM
help
This driver provides support for the True Random Number
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 584d47ba32f7..3e948cf04476 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -46,3 +46,4 @@ obj-$(CONFIG_HW_RANDOM_CCTRNG) += cctrng.o
obj-$(CONFIG_HW_RANDOM_XIPHERA) += xiphera-trng.o
obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o
obj-$(CONFIG_HW_RANDOM_CN10K) += cn10k-rng.o
+obj-$(CONFIG_HW_RANDOM_POLARFIRE_SOC) += mpfs-rng.o
diff --git a/drivers/char/hw_random/cn10k-rng.c b/drivers/char/hw_random/cn10k-rng.c
index 35001c63648b..a01e9307737c 100644
--- a/drivers/char/hw_random/cn10k-rng.c
+++ b/drivers/char/hw_random/cn10k-rng.c
@@ -31,26 +31,23 @@ struct cn10k_rng {
#define PLAT_OCTEONTX_RESET_RNG_EBG_HEALTH_STATE 0xc2000b0f
-static int reset_rng_health_state(struct cn10k_rng *rng)
+static unsigned long reset_rng_health_state(struct cn10k_rng *rng)
{
struct arm_smccc_res res;
/* Send SMC service call to reset EBG health state */
arm_smccc_smc(PLAT_OCTEONTX_RESET_RNG_EBG_HEALTH_STATE, 0, 0, 0, 0, 0, 0, 0, &res);
- if (res.a0 != 0UL)
- return -EIO;
-
- return 0;
+ return res.a0;
}
static int check_rng_health(struct cn10k_rng *rng)
{
u64 status;
- int err;
+ unsigned long err;
/* Skip checking health */
if (!rng->reg_base)
- return 0;
+ return -ENODEV;
status = readq(rng->reg_base + RNM_PF_EBG_HEALTH);
if (status & BIT_ULL(20)) {
@@ -58,7 +55,9 @@ static int check_rng_health(struct cn10k_rng *rng)
if (err) {
dev_err(&rng->pdev->dev, "HWRNG: Health test failed (status=%llx)\n",
status);
- dev_err(&rng->pdev->dev, "HWRNG: error during reset\n");
+ dev_err(&rng->pdev->dev, "HWRNG: error during reset (error=%lx)\n",
+ err);
+ return -EIO;
}
}
return 0;
@@ -90,6 +89,7 @@ static int cn10k_rng_read(struct hwrng *hwrng, void *data,
{
struct cn10k_rng *rng = (struct cn10k_rng *)hwrng->priv;
unsigned int size;
+ u8 *pos = data;
int err = 0;
u64 value;
@@ -102,17 +102,20 @@ static int cn10k_rng_read(struct hwrng *hwrng, void *data,
while (size >= 8) {
cn10k_read_trng(rng, &value);
- *((u64 *)data) = (u64)value;
+ *((u64 *)pos) = value;
size -= 8;
- data += 8;
+ pos += 8;
}
- while (size > 0) {
+ if (size > 0) {
cn10k_read_trng(rng, &value);
- *((u8 *)data) = (u8)value;
- size--;
- data++;
+ while (size > 0) {
+ *pos = (u8)value;
+ value >>= 8;
+ size--;
+ pos++;
+ }
}
return max - size;
diff --git a/drivers/char/hw_random/mpfs-rng.c b/drivers/char/hw_random/mpfs-rng.c
new file mode 100644
index 000000000000..5813da617a48
--- /dev/null
+++ b/drivers/char/hw_random/mpfs-rng.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microchip PolarFire SoC (MPFS) hardware random driver
+ *
+ * Copyright (c) 2020-2022 Microchip Corporation. All rights reserved.
+ *
+ * Author: Conor Dooley <conor.dooley@microchip.com>
+ */
+
+#include <linux/module.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <soc/microchip/mpfs.h>
+
+#define CMD_OPCODE 0x21
+#define CMD_DATA_SIZE 0U
+#define CMD_DATA NULL
+#define MBOX_OFFSET 0U
+#define RESP_OFFSET 0U
+#define RNG_RESP_BYTES 32U
+
+struct mpfs_rng {
+ struct mpfs_sys_controller *sys_controller;
+ struct hwrng rng;
+};
+
+static int mpfs_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
+{
+ struct mpfs_rng *rng_priv = container_of(rng, struct mpfs_rng, rng);
+ u32 response_msg[RNG_RESP_BYTES / sizeof(u32)];
+ unsigned int count = 0, copy_size_bytes;
+ int ret;
+
+ struct mpfs_mss_response response = {
+ .resp_status = 0U,
+ .resp_msg = (u32 *)response_msg,
+ .resp_size = RNG_RESP_BYTES
+ };
+ struct mpfs_mss_msg msg = {
+ .cmd_opcode = CMD_OPCODE,
+ .cmd_data_size = CMD_DATA_SIZE,
+ .response = &response,
+ .cmd_data = CMD_DATA,
+ .mbox_offset = MBOX_OFFSET,
+ .resp_offset = RESP_OFFSET
+ };
+
+ while (count < max) {
+ ret = mpfs_blocking_transaction(rng_priv->sys_controller, &msg);
+ if (ret)
+ return ret;
+
+ copy_size_bytes = max - count > RNG_RESP_BYTES ? RNG_RESP_BYTES : max - count;
+ memcpy(buf + count, response_msg, copy_size_bytes);
+
+ count += copy_size_bytes;
+ if (!wait)
+ break;
+ }
+
+ return count;
+}
+
+static int mpfs_rng_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct mpfs_rng *rng_priv;
+ int ret;
+
+ rng_priv = devm_kzalloc(dev, sizeof(*rng_priv), GFP_KERNEL);
+ if (!rng_priv)
+ return -ENOMEM;
+
+ rng_priv->sys_controller = mpfs_sys_controller_get(&pdev->dev);
+ if (IS_ERR(rng_priv->sys_controller))
+ return dev_err_probe(dev, PTR_ERR(rng_priv->sys_controller),
+ "Failed to register system controller hwrng sub device\n");
+
+ rng_priv->rng.read = mpfs_rng_read;
+ rng_priv->rng.name = pdev->name;
+ rng_priv->rng.quality = 1024;
+
+ platform_set_drvdata(pdev, rng_priv);
+
+ ret = devm_hwrng_register(&pdev->dev, &rng_priv->rng);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to register MPFS hwrng\n");
+
+ dev_info(&pdev->dev, "Registered MPFS hwrng\n");
+
+ return 0;
+}
+
+static struct platform_driver mpfs_rng_driver = {
+ .driver = {
+ .name = "mpfs-rng",
+ },
+ .probe = mpfs_rng_probe,
+};
+module_platform_driver(mpfs_rng_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Conor Dooley <conor.dooley@microchip.com>");
+MODULE_DESCRIPTION("PolarFire SoC (MPFS) hardware random driver");
diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c
index e0d77fa048fb..f06e4f95114f 100644
--- a/drivers/char/hw_random/omap3-rom-rng.c
+++ b/drivers/char/hw_random/omap3-rom-rng.c
@@ -92,7 +92,7 @@ static int __maybe_unused omap_rom_rng_runtime_resume(struct device *dev)
r = ddata->rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT);
if (r != 0) {
- clk_disable(ddata->clk);
+ clk_disable_unprepare(ddata->clk);
dev_err(dev, "HW init failed: %d\n", r);
return -EIO;
diff --git a/drivers/char/hw_random/optee-rng.c b/drivers/char/hw_random/optee-rng.c
index a948c0727b2b..96b5d546d136 100644
--- a/drivers/char/hw_random/optee-rng.c
+++ b/drivers/char/hw_random/optee-rng.c
@@ -115,7 +115,7 @@ static size_t get_optee_rng_data(struct optee_rng_private *pvt_data,
static int optee_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
{
struct optee_rng_private *pvt_data = to_optee_rng_private(rng);
- size_t read = 0, rng_size = 0;
+ size_t read = 0, rng_size;
int timeout = 1;
u8 *data = buf;
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 7b2d138bc83e..ee99c02c84e8 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -216,9 +216,9 @@ config CRYPTO_AES_S390
config CRYPTO_CHACHA_S390
tristate "ChaCha20 stream cipher"
depends on S390
- select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
- select CRYPTO_CHACHA20
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
help
This is the s390 SIMD implementation of the ChaCha20 stream
cipher (RFC 7539).
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 0a4fff23d272..f81703a86b98 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_CRYPTO_DEV_ALLWINNER) += allwinner/
obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
+# __init ordering requires atmel-i2c being before atmel-ecc and atmel-sha204a.
obj-$(CONFIG_CRYPTO_DEV_ATMEL_I2C) += atmel-i2c.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA204A) += atmel-sha204a.o
diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c
index dec79fa3ebaf..10fe9f73a5fb 100644
--- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c
@@ -20,7 +20,6 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq)
unsigned int ivsize = crypto_skcipher_ivsize(tfm);
struct sun4i_cipher_req_ctx *ctx = skcipher_request_ctx(areq);
u32 mode = ctx->mode;
- void *backup_iv = NULL;
/* when activating SS, the default FIFO space is SS_RX_DEFAULT(32) */
u32 rx_cnt = SS_RX_DEFAULT;
u32 tx_cnt = 0;
@@ -48,10 +47,8 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq)
}
if (areq->iv && ivsize > 0 && mode & SS_DECRYPTION) {
- backup_iv = kzalloc(ivsize, GFP_KERNEL);
- if (!backup_iv)
- return -ENOMEM;
- scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0);
+ scatterwalk_map_and_copy(ctx->backup_iv, areq->src,
+ areq->cryptlen - ivsize, ivsize, 0);
}
if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) {
@@ -134,8 +131,8 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq)
if (areq->iv) {
if (mode & SS_DECRYPTION) {
- memcpy(areq->iv, backup_iv, ivsize);
- kfree_sensitive(backup_iv);
+ memcpy(areq->iv, ctx->backup_iv, ivsize);
+ memzero_explicit(ctx->backup_iv, ivsize);
} else {
scatterwalk_map_and_copy(areq->iv, areq->dst, areq->cryptlen - ivsize,
ivsize, 0);
@@ -199,7 +196,6 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
unsigned int ileft = areq->cryptlen;
unsigned int oleft = areq->cryptlen;
unsigned int todo;
- void *backup_iv = NULL;
struct sg_mapping_iter mi, mo;
unsigned long pi = 0, po = 0; /* progress for in and out */
bool miter_err;
@@ -244,10 +240,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
return sun4i_ss_cipher_poll_fallback(areq);
if (areq->iv && ivsize > 0 && mode & SS_DECRYPTION) {
- backup_iv = kzalloc(ivsize, GFP_KERNEL);
- if (!backup_iv)
- return -ENOMEM;
- scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0);
+ scatterwalk_map_and_copy(ctx->backup_iv, areq->src,
+ areq->cryptlen - ivsize, ivsize, 0);
}
if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) {
@@ -384,8 +378,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
}
if (areq->iv) {
if (mode & SS_DECRYPTION) {
- memcpy(areq->iv, backup_iv, ivsize);
- kfree_sensitive(backup_iv);
+ memcpy(areq->iv, ctx->backup_iv, ivsize);
+ memzero_explicit(ctx->backup_iv, ivsize);
} else {
scatterwalk_map_and_copy(areq->iv, areq->dst, areq->cryptlen - ivsize,
ivsize, 0);
diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h
index 0fee6f4e2d90..ba59c7a48825 100644
--- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h
@@ -183,6 +183,7 @@ struct sun4i_tfm_ctx {
struct sun4i_cipher_req_ctx {
u32 mode;
+ u8 backup_iv[AES_BLOCK_SIZE];
struct skcipher_request fallback_req; // keep at the end
};
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
index 35e3cadccac2..74b4e910a38d 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -25,26 +25,62 @@ static int sun8i_ce_cipher_need_fallback(struct skcipher_request *areq)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
struct scatterlist *sg;
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct sun8i_ce_alg_template *algt;
+ unsigned int todo, len;
+
+ algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
+
+ if (sg_nents_for_len(areq->src, areq->cryptlen) > MAX_SG ||
+ sg_nents_for_len(areq->dst, areq->cryptlen) > MAX_SG) {
+ algt->stat_fb_maxsg++;
+ return true;
+ }
- if (sg_nents(areq->src) > MAX_SG || sg_nents(areq->dst) > MAX_SG)
+ if (areq->cryptlen < crypto_skcipher_ivsize(tfm)) {
+ algt->stat_fb_leniv++;
return true;
+ }
- if (areq->cryptlen < crypto_skcipher_ivsize(tfm))
+ if (areq->cryptlen == 0) {
+ algt->stat_fb_len0++;
return true;
+ }
- if (areq->cryptlen == 0 || areq->cryptlen % 16)
+ if (areq->cryptlen % 16) {
+ algt->stat_fb_mod16++;
return true;
+ }
+ len = areq->cryptlen;
sg = areq->src;
while (sg) {
- if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32)))
+ if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
+ algt->stat_fb_srcali++;
+ return true;
+ }
+ todo = min(len, sg->length);
+ if (todo % 4) {
+ algt->stat_fb_srclen++;
return true;
+ }
+ len -= todo;
sg = sg_next(sg);
}
+
+ len = areq->cryptlen;
sg = areq->dst;
while (sg) {
- if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32)))
+ if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
+ algt->stat_fb_dstali++;
+ return true;
+ }
+ todo = min(len, sg->length);
+ if (todo % 4) {
+ algt->stat_fb_dstlen++;
return true;
+ }
+ len -= todo;
sg = sg_next(sg);
}
return false;
@@ -94,6 +130,8 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
int nr_sgs = 0;
int nr_sgd = 0;
int err = 0;
+ int ns = sg_nents_for_len(areq->src, areq->cryptlen);
+ int nd = sg_nents_for_len(areq->dst, areq->cryptlen);
algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
@@ -152,23 +190,13 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
ivsize = crypto_skcipher_ivsize(tfm);
if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
rctx->ivlen = ivsize;
- rctx->bounce_iv = kzalloc(ivsize, GFP_KERNEL | GFP_DMA);
- if (!rctx->bounce_iv) {
- err = -ENOMEM;
- goto theend_key;
- }
if (rctx->op_dir & CE_DECRYPTION) {
- rctx->backup_iv = kzalloc(ivsize, GFP_KERNEL);
- if (!rctx->backup_iv) {
- err = -ENOMEM;
- goto theend_key;
- }
offset = areq->cryptlen - ivsize;
- scatterwalk_map_and_copy(rctx->backup_iv, areq->src,
+ scatterwalk_map_and_copy(chan->backup_iv, areq->src,
offset, ivsize, 0);
}
- memcpy(rctx->bounce_iv, areq->iv, ivsize);
- rctx->addr_iv = dma_map_single(ce->dev, rctx->bounce_iv, rctx->ivlen,
+ memcpy(chan->bounce_iv, areq->iv, ivsize);
+ rctx->addr_iv = dma_map_single(ce->dev, chan->bounce_iv, rctx->ivlen,
DMA_TO_DEVICE);
if (dma_mapping_error(ce->dev, rctx->addr_iv)) {
dev_err(ce->dev, "Cannot DMA MAP IV\n");
@@ -179,8 +207,7 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
}
if (areq->src == areq->dst) {
- nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src),
- DMA_BIDIRECTIONAL);
+ nr_sgs = dma_map_sg(ce->dev, areq->src, ns, DMA_BIDIRECTIONAL);
if (nr_sgs <= 0 || nr_sgs > MAX_SG) {
dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
err = -EINVAL;
@@ -188,15 +215,13 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
}
nr_sgd = nr_sgs;
} else {
- nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src),
- DMA_TO_DEVICE);
+ nr_sgs = dma_map_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
if (nr_sgs <= 0 || nr_sgs > MAX_SG) {
dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
err = -EINVAL;
goto theend_iv;
}
- nr_sgd = dma_map_sg(ce->dev, areq->dst, sg_nents(areq->dst),
- DMA_FROM_DEVICE);
+ nr_sgd = dma_map_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE);
if (nr_sgd <= 0 || nr_sgd > MAX_SG) {
dev_err(ce->dev, "Invalid sg number %d\n", nr_sgd);
err = -EINVAL;
@@ -241,14 +266,11 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
theend_sgs:
if (areq->src == areq->dst) {
- dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src),
- DMA_BIDIRECTIONAL);
+ dma_unmap_sg(ce->dev, areq->src, ns, DMA_BIDIRECTIONAL);
} else {
if (nr_sgs > 0)
- dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src),
- DMA_TO_DEVICE);
- dma_unmap_sg(ce->dev, areq->dst, sg_nents(areq->dst),
- DMA_FROM_DEVICE);
+ dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
+ dma_unmap_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE);
}
theend_iv:
@@ -257,16 +279,15 @@ theend_iv:
dma_unmap_single(ce->dev, rctx->addr_iv, rctx->ivlen, DMA_TO_DEVICE);
offset = areq->cryptlen - ivsize;
if (rctx->op_dir & CE_DECRYPTION) {
- memcpy(areq->iv, rctx->backup_iv, ivsize);
- kfree_sensitive(rctx->backup_iv);
+ memcpy(areq->iv, chan->backup_iv, ivsize);
+ memzero_explicit(chan->backup_iv, ivsize);
} else {
scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
ivsize, 0);
}
- kfree(rctx->bounce_iv);
+ memzero_explicit(chan->bounce_iv, ivsize);
}
-theend_key:
dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE);
theend:
@@ -322,13 +343,13 @@ static int sun8i_ce_cipher_unprepare(struct crypto_engine *engine, void *async_r
dma_unmap_single(ce->dev, rctx->addr_iv, rctx->ivlen, DMA_TO_DEVICE);
offset = areq->cryptlen - ivsize;
if (rctx->op_dir & CE_DECRYPTION) {
- memcpy(areq->iv, rctx->backup_iv, ivsize);
- kfree_sensitive(rctx->backup_iv);
+ memcpy(areq->iv, chan->backup_iv, ivsize);
+ memzero_explicit(chan->backup_iv, ivsize);
} else {
scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
ivsize, 0);
}
- kfree(rctx->bounce_iv);
+ memzero_explicit(chan->bounce_iv, ivsize);
}
dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE);
@@ -398,10 +419,9 @@ int sun8i_ce_cipher_init(struct crypto_tfm *tfm)
sktfm->reqsize = sizeof(struct sun8i_cipher_req_ctx) +
crypto_skcipher_reqsize(op->fallback_tfm);
-
- dev_info(op->ce->dev, "Fallback for %s is %s\n",
- crypto_tfm_alg_driver_name(&sktfm->base),
- crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)));
+ memcpy(algt->fbname,
+ crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)),
+ CRYPTO_MAX_ALG_NAME);
op->enginectx.op.do_one_request = sun8i_ce_cipher_run;
op->enginectx.op.prepare_request = sun8i_ce_cipher_prepare;
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
index d8623c7e0d1d..9f6594699835 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
@@ -283,7 +283,7 @@ static struct sun8i_ce_alg_template ce_algs[] = {
.cra_priority = 400,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
- CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+ CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK,
.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
.cra_module = THIS_MODULE,
@@ -310,7 +310,7 @@ static struct sun8i_ce_alg_template ce_algs[] = {
.cra_priority = 400,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
- CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+ CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK,
.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
.cra_module = THIS_MODULE,
@@ -336,7 +336,7 @@ static struct sun8i_ce_alg_template ce_algs[] = {
.cra_priority = 400,
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
- CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+ CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK,
.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
.cra_module = THIS_MODULE,
@@ -363,7 +363,7 @@ static struct sun8i_ce_alg_template ce_algs[] = {
.cra_priority = 400,
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
- CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
+ CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK,
.cra_ctxsize = sizeof(struct sun8i_cipher_tfm_ctx),
.cra_module = THIS_MODULE,
@@ -595,19 +595,47 @@ static int sun8i_ce_debugfs_show(struct seq_file *seq, void *v)
continue;
switch (ce_algs[i].type) {
case CRYPTO_ALG_TYPE_SKCIPHER:
- seq_printf(seq, "%s %s %lu %lu\n",
+ seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
ce_algs[i].alg.skcipher.base.cra_driver_name,
ce_algs[i].alg.skcipher.base.cra_name,
ce_algs[i].stat_req, ce_algs[i].stat_fb);
+ seq_printf(seq, "\tLast fallback is: %s\n",
+ ce_algs[i].fbname);
+ seq_printf(seq, "\tFallback due to 0 length: %lu\n",
+ ce_algs[i].stat_fb_len0);
+ seq_printf(seq, "\tFallback due to length !mod16: %lu\n",
+ ce_algs[i].stat_fb_mod16);
+ seq_printf(seq, "\tFallback due to length < IV: %lu\n",
+ ce_algs[i].stat_fb_leniv);
+ seq_printf(seq, "\tFallback due to source alignment: %lu\n",
+ ce_algs[i].stat_fb_srcali);
+ seq_printf(seq, "\tFallback due to dest alignment: %lu\n",
+ ce_algs[i].stat_fb_dstali);
+ seq_printf(seq, "\tFallback due to source length: %lu\n",
+ ce_algs[i].stat_fb_srclen);
+ seq_printf(seq, "\tFallback due to dest length: %lu\n",
+ ce_algs[i].stat_fb_dstlen);
+ seq_printf(seq, "\tFallback due to SG numbers: %lu\n",
+ ce_algs[i].stat_fb_maxsg);
break;
case CRYPTO_ALG_TYPE_AHASH:
- seq_printf(seq, "%s %s %lu %lu\n",
+ seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
ce_algs[i].alg.hash.halg.base.cra_driver_name,
ce_algs[i].alg.hash.halg.base.cra_name,
ce_algs[i].stat_req, ce_algs[i].stat_fb);
+ seq_printf(seq, "\tLast fallback is: %s\n",
+ ce_algs[i].fbname);
+ seq_printf(seq, "\tFallback due to 0 length: %lu\n",
+ ce_algs[i].stat_fb_len0);
+ seq_printf(seq, "\tFallback due to length: %lu\n",
+ ce_algs[i].stat_fb_srclen);
+ seq_printf(seq, "\tFallback due to alignment: %lu\n",
+ ce_algs[i].stat_fb_srcali);
+ seq_printf(seq, "\tFallback due to SG numbers: %lu\n",
+ ce_algs[i].stat_fb_maxsg);
break;
case CRYPTO_ALG_TYPE_RNG:
- seq_printf(seq, "%s %s %lu %lu\n",
+ seq_printf(seq, "%s %s reqs=%lu bytes=%lu\n",
ce_algs[i].alg.rng.base.cra_driver_name,
ce_algs[i].alg.rng.base.cra_name,
ce_algs[i].stat_req, ce_algs[i].stat_bytes);
@@ -673,6 +701,18 @@ static int sun8i_ce_allocate_chanlist(struct sun8i_ce_dev *ce)
err = -ENOMEM;
goto error_engine;
}
+ ce->chanlist[i].bounce_iv = devm_kmalloc(ce->dev, AES_BLOCK_SIZE,
+ GFP_KERNEL | GFP_DMA);
+ if (!ce->chanlist[i].bounce_iv) {
+ err = -ENOMEM;
+ goto error_engine;
+ }
+ ce->chanlist[i].backup_iv = devm_kmalloc(ce->dev, AES_BLOCK_SIZE,
+ GFP_KERNEL);
+ if (!ce->chanlist[i].backup_iv) {
+ err = -ENOMEM;
+ goto error_engine;
+ }
}
return 0;
error_engine:
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
index 859b7522faaa..8b5b9b9d04c3 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
@@ -50,9 +50,9 @@ int sun8i_ce_hash_crainit(struct crypto_tfm *tfm)
sizeof(struct sun8i_ce_hash_reqctx) +
crypto_ahash_reqsize(op->fallback_tfm));
- dev_info(op->ce->dev, "Fallback for %s is %s\n",
- crypto_tfm_alg_driver_name(tfm),
- crypto_tfm_alg_driver_name(&op->fallback_tfm->base));
+ memcpy(algt->fbname, crypto_tfm_alg_driver_name(&op->fallback_tfm->base),
+ CRYPTO_MAX_ALG_NAME);
+
err = pm_runtime_get_sync(op->ce->dev);
if (err < 0)
goto error_pm;
@@ -199,17 +199,32 @@ static int sun8i_ce_hash_digest_fb(struct ahash_request *areq)
static bool sun8i_ce_hash_need_fallback(struct ahash_request *areq)
{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+ struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
+ struct sun8i_ce_alg_template *algt;
struct scatterlist *sg;
- if (areq->nbytes == 0)
+ algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash);
+
+ if (areq->nbytes == 0) {
+ algt->stat_fb_len0++;
return true;
+ }
/* we need to reserve one SG for padding one */
- if (sg_nents(areq->src) > MAX_SG - 1)
+ if (sg_nents_for_len(areq->src, areq->nbytes) > MAX_SG - 1) {
+ algt->stat_fb_maxsg++;
return true;
+ }
sg = areq->src;
while (sg) {
- if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32)))
+ if (sg->length % 4) {
+ algt->stat_fb_srclen++;
return true;
+ }
+ if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
+ algt->stat_fb_srcali++;
+ return true;
+ }
sg = sg_next(sg);
}
return false;
@@ -229,7 +244,7 @@ int sun8i_ce_hash_digest(struct ahash_request *areq)
if (sun8i_ce_hash_need_fallback(areq))
return sun8i_ce_hash_digest_fb(areq);
- nr_sgs = sg_nents(areq->src);
+ nr_sgs = sg_nents_for_len(areq->src, areq->nbytes);
if (nr_sgs > MAX_SG - 1)
return sun8i_ce_hash_digest_fb(areq);
@@ -248,6 +263,64 @@ int sun8i_ce_hash_digest(struct ahash_request *areq)
return crypto_transfer_hash_request_to_engine(engine, areq);
}
+static u64 hash_pad(__le32 *buf, unsigned int bufsize, u64 padi, u64 byte_count, bool le, int bs)
+{
+ u64 fill, min_fill, j, k;
+ __be64 *bebits;
+ __le64 *lebits;
+
+ j = padi;
+ buf[j++] = cpu_to_le32(0x80);
+
+ if (bs == 64) {
+ fill = 64 - (byte_count % 64);
+ min_fill = 2 * sizeof(u32) + sizeof(u32);
+ } else {
+ fill = 128 - (byte_count % 128);
+ min_fill = 4 * sizeof(u32) + sizeof(u32);
+ }
+
+ if (fill < min_fill)
+ fill += bs;
+
+ k = j;
+ j += (fill - min_fill) / sizeof(u32);
+ if (j * 4 > bufsize) {
+ pr_err("%s OVERFLOW %llu\n", __func__, j);
+ return 0;
+ }
+ for (; k < j; k++)
+ buf[k] = 0;
+
+ if (le) {
+ /* MD5 */
+ lebits = (__le64 *)&buf[j];
+ *lebits = cpu_to_le64(byte_count << 3);
+ j += 2;
+ } else {
+ if (bs == 64) {
+ /* sha1 sha224 sha256 */
+ bebits = (__be64 *)&buf[j];
+ *bebits = cpu_to_be64(byte_count << 3);
+ j += 2;
+ } else {
+ /* sha384 sha512*/
+ bebits = (__be64 *)&buf[j];
+ *bebits = cpu_to_be64(byte_count >> 61);
+ j += 2;
+ bebits = (__be64 *)&buf[j];
+ *bebits = cpu_to_be64(byte_count << 3);
+ j += 2;
+ }
+ }
+ if (j * 4 > bufsize) {
+ pr_err("%s OVERFLOW %llu\n", __func__, j);
+ return 0;
+ }
+
+ return j;
+}
+
int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
{
struct ahash_request *areq = container_of(breq, struct ahash_request, base);
@@ -266,14 +339,11 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
__le32 *bf;
void *buf = NULL;
int j, i, todo;
- int nbw = 0;
- u64 fill, min_fill;
- __be64 *bebits;
- __le64 *lebits;
void *result = NULL;
u64 bs;
int digestsize;
dma_addr_t addr_res, addr_pad;
+ int ns = sg_nents_for_len(areq->src, areq->nbytes);
algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash);
ce = algt->ce;
@@ -318,7 +388,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
cet->t_sym_ctl = 0;
cet->t_asym_ctl = 0;
- nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
+ nr_sgs = dma_map_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
if (nr_sgs <= 0 || nr_sgs > MAX_SG) {
dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
err = -EINVAL;
@@ -348,44 +418,25 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
byte_count = areq->nbytes;
j = 0;
- bf[j++] = cpu_to_le32(0x80);
-
- if (bs == 64) {
- fill = 64 - (byte_count % 64);
- min_fill = 2 * sizeof(u32) + (nbw ? 0 : sizeof(u32));
- } else {
- fill = 128 - (byte_count % 128);
- min_fill = 4 * sizeof(u32) + (nbw ? 0 : sizeof(u32));
- }
-
- if (fill < min_fill)
- fill += bs;
-
- j += (fill - min_fill) / sizeof(u32);
switch (algt->ce_algo_id) {
case CE_ID_HASH_MD5:
- lebits = (__le64 *)&bf[j];
- *lebits = cpu_to_le64(byte_count << 3);
- j += 2;
+ j = hash_pad(bf, 2 * bs, j, byte_count, true, bs);
break;
case CE_ID_HASH_SHA1:
case CE_ID_HASH_SHA224:
case CE_ID_HASH_SHA256:
- bebits = (__be64 *)&bf[j];
- *bebits = cpu_to_be64(byte_count << 3);
- j += 2;
+ j = hash_pad(bf, 2 * bs, j, byte_count, false, bs);
break;
case CE_ID_HASH_SHA384:
case CE_ID_HASH_SHA512:
- bebits = (__be64 *)&bf[j];
- *bebits = cpu_to_be64(byte_count >> 61);
- j += 2;
- bebits = (__be64 *)&bf[j];
- *bebits = cpu_to_be64(byte_count << 3);
- j += 2;
+ j = hash_pad(bf, 2 * bs, j, byte_count, false, bs);
break;
}
+ if (!j) {
+ err = -EINVAL;
+ goto theend;
+ }
addr_pad = dma_map_single(ce->dev, buf, j * 4, DMA_TO_DEVICE);
cet->t_src[i].addr = cpu_to_le32(addr_pad);
@@ -406,8 +457,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(areq->base.tfm));
dma_unmap_single(ce->dev, addr_pad, j * 4, DMA_TO_DEVICE);
- dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src),
- DMA_TO_DEVICE);
+ dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
dma_unmap_single(ce->dev, addr_res, digestsize, DMA_FROM_DEVICE);
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
index b3a9bbfb8831..b3cc43ea6c8a 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
@@ -108,11 +108,9 @@ int sun8i_ce_prng_generate(struct crypto_rng *tfm, const u8 *src,
goto err_dst;
}
- err = pm_runtime_get_sync(ce->dev);
- if (err < 0) {
- pm_runtime_put_noidle(ce->dev);
+ err = pm_runtime_resume_and_get(ce->dev);
+ if (err < 0)
goto err_pm;
- }
mutex_lock(&ce->rnglock);
chan = &ce->chanlist[flow];
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
index 624a5926f21f..8177aaba4434 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
@@ -186,6 +186,8 @@ struct ce_task {
* @status: set to 1 by interrupt if task is done
* @t_phy: Physical address of task
* @tl: pointer to the current ce_task for this flow
+ * @backup_iv: buffer which contain the next IV to store
+ * @bounce_iv: buffer which contain the IV
* @stat_req: number of request done by this flow
*/
struct sun8i_ce_flow {
@@ -195,6 +197,8 @@ struct sun8i_ce_flow {
dma_addr_t t_phy;
int timeout;
struct ce_task *tl;
+ void *backup_iv;
+ void *bounce_iv;
#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
unsigned long stat_req;
#endif
@@ -241,8 +245,6 @@ struct sun8i_ce_dev {
* struct sun8i_cipher_req_ctx - context for a skcipher request
* @op_dir: direction (encrypt vs decrypt) for this request
* @flow: the flow to use for this request
- * @backup_iv: buffer which contain the next IV to store
- * @bounce_iv: buffer which contain the IV
* @ivlen: size of bounce_iv
* @nr_sgs: The number of source SG (as given by dma_map_sg())
* @nr_sgd: The number of destination SG (as given by dma_map_sg())
@@ -253,8 +255,6 @@ struct sun8i_ce_dev {
struct sun8i_cipher_req_ctx {
u32 op_dir;
int flow;
- void *backup_iv;
- void *bounce_iv;
unsigned int ivlen;
int nr_sgs;
int nr_sgd;
@@ -333,11 +333,18 @@ struct sun8i_ce_alg_template {
struct ahash_alg hash;
struct rng_alg rng;
} alg;
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
unsigned long stat_req;
unsigned long stat_fb;
unsigned long stat_bytes;
-#endif
+ unsigned long stat_fb_maxsg;
+ unsigned long stat_fb_leniv;
+ unsigned long stat_fb_len0;
+ unsigned long stat_fb_mod16;
+ unsigned long stat_fb_srcali;
+ unsigned long stat_fb_srclen;
+ unsigned long stat_fb_dstali;
+ unsigned long stat_fb_dstlen;
+ char fbname[CRYPTO_MAX_ALG_NAME];
};
int sun8i_ce_enqueue(struct crypto_async_request *areq, u32 type);
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 554e400d41ca..5bb950182026 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -22,34 +22,53 @@
static bool sun8i_ss_need_fallback(struct skcipher_request *areq)
{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct sun8i_ss_alg_template *algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
struct scatterlist *in_sg = areq->src;
struct scatterlist *out_sg = areq->dst;
struct scatterlist *sg;
+ unsigned int todo, len;
- if (areq->cryptlen == 0 || areq->cryptlen % 16)
+ if (areq->cryptlen == 0 || areq->cryptlen % 16) {
+ algt->stat_fb_len++;
return true;
+ }
- if (sg_nents(areq->src) > 8 || sg_nents(areq->dst) > 8)
+ if (sg_nents_for_len(areq->src, areq->cryptlen) > 8 ||
+ sg_nents_for_len(areq->dst, areq->cryptlen) > 8) {
+ algt->stat_fb_sgnum++;
return true;
+ }
+ len = areq->cryptlen;
sg = areq->src;
while (sg) {
- if ((sg->length % 16) != 0)
- return true;
- if ((sg_dma_len(sg) % 16) != 0)
+ todo = min(len, sg->length);
+ if ((todo % 16) != 0) {
+ algt->stat_fb_sglen++;
return true;
- if (!IS_ALIGNED(sg->offset, 16))
+ }
+ if (!IS_ALIGNED(sg->offset, 16)) {
+ algt->stat_fb_align++;
return true;
+ }
+ len -= todo;
sg = sg_next(sg);
}
+ len = areq->cryptlen;
sg = areq->dst;
while (sg) {
- if ((sg->length % 16) != 0)
- return true;
- if ((sg_dma_len(sg) % 16) != 0)
+ todo = min(len, sg->length);
+ if ((todo % 16) != 0) {
+ algt->stat_fb_sglen++;
return true;
- if (!IS_ALIGNED(sg->offset, 16))
+ }
+ if (!IS_ALIGNED(sg->offset, 16)) {
+ algt->stat_fb_align++;
return true;
+ }
+ len -= todo;
sg = sg_next(sg);
}
@@ -93,6 +112,68 @@ static int sun8i_ss_cipher_fallback(struct skcipher_request *areq)
return err;
}
+static int sun8i_ss_setup_ivs(struct skcipher_request *areq)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct sun8i_ss_dev *ss = op->ss;
+ struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+ struct scatterlist *sg = areq->src;
+ unsigned int todo, offset;
+ unsigned int len = areq->cryptlen;
+ unsigned int ivsize = crypto_skcipher_ivsize(tfm);
+ struct sun8i_ss_flow *sf = &ss->flows[rctx->flow];
+ int i = 0;
+ u32 a;
+ int err;
+
+ rctx->ivlen = ivsize;
+ if (rctx->op_dir & SS_DECRYPTION) {
+ offset = areq->cryptlen - ivsize;
+ scatterwalk_map_and_copy(sf->biv, areq->src, offset,
+ ivsize, 0);
+ }
+
+ /* we need to copy all IVs from source in case DMA is bi-directionnal */
+ while (sg && len) {
+ if (sg_dma_len(sg) == 0) {
+ sg = sg_next(sg);
+ continue;
+ }
+ if (i == 0)
+ memcpy(sf->iv[0], areq->iv, ivsize);
+ a = dma_map_single(ss->dev, sf->iv[i], ivsize, DMA_TO_DEVICE);
+ if (dma_mapping_error(ss->dev, a)) {
+ memzero_explicit(sf->iv[i], ivsize);
+ dev_err(ss->dev, "Cannot DMA MAP IV\n");
+ err = -EFAULT;
+ goto dma_iv_error;
+ }
+ rctx->p_iv[i] = a;
+ /* we need to setup all others IVs only in the decrypt way */
+ if (rctx->op_dir & SS_ENCRYPTION)
+ return 0;
+ todo = min(len, sg_dma_len(sg));
+ len -= todo;
+ i++;
+ if (i < MAX_SG) {
+ offset = sg->length - ivsize;
+ scatterwalk_map_and_copy(sf->iv[i], sg, offset, ivsize, 0);
+ }
+ rctx->niv = i;
+ sg = sg_next(sg);
+ }
+
+ return 0;
+dma_iv_error:
+ i--;
+ while (i >= 0) {
+ dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE);
+ memzero_explicit(sf->iv[i], ivsize);
+ }
+ return err;
+}
+
static int sun8i_ss_cipher(struct skcipher_request *areq)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
@@ -101,12 +182,14 @@ static int sun8i_ss_cipher(struct skcipher_request *areq)
struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
struct sun8i_ss_alg_template *algt;
+ struct sun8i_ss_flow *sf = &ss->flows[rctx->flow];
struct scatterlist *sg;
unsigned int todo, len, offset, ivsize;
- void *backup_iv = NULL;
int nr_sgs = 0;
int nr_sgd = 0;
int err = 0;
+ int nsgs = sg_nents_for_len(areq->src, areq->cryptlen);
+ int nsgd = sg_nents_for_len(areq->dst, areq->cryptlen);
int i;
algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
@@ -134,34 +217,12 @@ static int sun8i_ss_cipher(struct skcipher_request *areq)
ivsize = crypto_skcipher_ivsize(tfm);
if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
- rctx->ivlen = ivsize;
- rctx->biv = kzalloc(ivsize, GFP_KERNEL | GFP_DMA);
- if (!rctx->biv) {
- err = -ENOMEM;
+ err = sun8i_ss_setup_ivs(areq);
+ if (err)
goto theend_key;
- }
- if (rctx->op_dir & SS_DECRYPTION) {
- backup_iv = kzalloc(ivsize, GFP_KERNEL);
- if (!backup_iv) {
- err = -ENOMEM;
- goto theend_key;
- }
- offset = areq->cryptlen - ivsize;
- scatterwalk_map_and_copy(backup_iv, areq->src, offset,
- ivsize, 0);
- }
- memcpy(rctx->biv, areq->iv, ivsize);
- rctx->p_iv = dma_map_single(ss->dev, rctx->biv, rctx->ivlen,
- DMA_TO_DEVICE);
- if (dma_mapping_error(ss->dev, rctx->p_iv)) {
- dev_err(ss->dev, "Cannot DMA MAP IV\n");
- err = -ENOMEM;
- goto theend_iv;
- }
}
if (areq->src == areq->dst) {
- nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src),
- DMA_BIDIRECTIONAL);
+ nr_sgs = dma_map_sg(ss->dev, areq->src, nsgs, DMA_BIDIRECTIONAL);
if (nr_sgs <= 0 || nr_sgs > 8) {
dev_err(ss->dev, "Invalid sg number %d\n", nr_sgs);
err = -EINVAL;
@@ -169,15 +230,13 @@ static int sun8i_ss_cipher(struct skcipher_request *areq)
}
nr_sgd = nr_sgs;
} else {
- nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src),
- DMA_TO_DEVICE);
+ nr_sgs = dma_map_sg(ss->dev, areq->src, nsgs, DMA_TO_DEVICE);
if (nr_sgs <= 0 || nr_sgs > 8) {
dev_err(ss->dev, "Invalid sg number %d\n", nr_sgs);
err = -EINVAL;
goto theend_iv;
}
- nr_sgd = dma_map_sg(ss->dev, areq->dst, sg_nents(areq->dst),
- DMA_FROM_DEVICE);
+ nr_sgd = dma_map_sg(ss->dev, areq->dst, nsgd, DMA_FROM_DEVICE);
if (nr_sgd <= 0 || nr_sgd > 8) {
dev_err(ss->dev, "Invalid sg number %d\n", nr_sgd);
err = -EINVAL;
@@ -233,31 +292,26 @@ sgd_next:
theend_sgs:
if (areq->src == areq->dst) {
- dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src),
- DMA_BIDIRECTIONAL);
+ dma_unmap_sg(ss->dev, areq->src, nsgs, DMA_BIDIRECTIONAL);
} else {
- dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src),
- DMA_TO_DEVICE);
- dma_unmap_sg(ss->dev, areq->dst, sg_nents(areq->dst),
- DMA_FROM_DEVICE);
+ dma_unmap_sg(ss->dev, areq->src, nsgs, DMA_TO_DEVICE);
+ dma_unmap_sg(ss->dev, areq->dst, nsgd, DMA_FROM_DEVICE);
}
theend_iv:
- if (rctx->p_iv)
- dma_unmap_single(ss->dev, rctx->p_iv, rctx->ivlen,
- DMA_TO_DEVICE);
-
if (areq->iv && ivsize > 0) {
- if (rctx->biv) {
- offset = areq->cryptlen - ivsize;
- if (rctx->op_dir & SS_DECRYPTION) {
- memcpy(areq->iv, backup_iv, ivsize);
- kfree_sensitive(backup_iv);
- } else {
- scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
- ivsize, 0);
- }
- kfree(rctx->biv);
+ for (i = 0; i < rctx->niv; i++) {
+ dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE);
+ memzero_explicit(sf->iv[i], ivsize);
+ }
+
+ offset = areq->cryptlen - ivsize;
+ if (rctx->op_dir & SS_DECRYPTION) {
+ memcpy(areq->iv, sf->biv, ivsize);
+ memzero_explicit(sf->biv, ivsize);
+ } else {
+ scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
+ ivsize, 0);
}
}
@@ -349,9 +403,9 @@ int sun8i_ss_cipher_init(struct crypto_tfm *tfm)
crypto_skcipher_reqsize(op->fallback_tfm);
- dev_info(op->ss->dev, "Fallback for %s is %s\n",
- crypto_tfm_alg_driver_name(&sktfm->base),
- crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)));
+ memcpy(algt->fbname,
+ crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)),
+ CRYPTO_MAX_ALG_NAME);
op->enginectx.op.do_one_request = sun8i_ss_handle_cipher_request;
op->enginectx.op.prepare_request = NULL;
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
index 319fe3279a71..98593a0cff69 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
@@ -66,6 +66,7 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx
const char *name)
{
int flow = rctx->flow;
+ unsigned int ivlen = rctx->ivlen;
u32 v = SS_START;
int i;
@@ -104,15 +105,14 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx
mutex_lock(&ss->mlock);
writel(rctx->p_key, ss->base + SS_KEY_ADR_REG);
- if (i == 0) {
- if (rctx->p_iv)
- writel(rctx->p_iv, ss->base + SS_IV_ADR_REG);
- } else {
- if (rctx->biv) {
- if (rctx->op_dir == SS_ENCRYPTION)
- writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG);
+ if (ivlen) {
+ if (rctx->op_dir == SS_ENCRYPTION) {
+ if (i == 0)
+ writel(rctx->p_iv[0], ss->base + SS_IV_ADR_REG);
else
- writel(rctx->t_src[i - 1].addr + rctx->t_src[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG);
+ writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - ivlen, ss->base + SS_IV_ADR_REG);
+ } else {
+ writel(rctx->p_iv[i], ss->base + SS_IV_ADR_REG);
}
}
@@ -409,6 +409,37 @@ static struct sun8i_ss_alg_template ss_algs[] = {
}
}
},
+{ .type = CRYPTO_ALG_TYPE_AHASH,
+ .ss_algo_id = SS_ID_HASH_SHA1,
+ .alg.hash = {
+ .init = sun8i_ss_hash_init,
+ .update = sun8i_ss_hash_update,
+ .final = sun8i_ss_hash_final,
+ .finup = sun8i_ss_hash_finup,
+ .digest = sun8i_ss_hash_digest,
+ .export = sun8i_ss_hash_export,
+ .import = sun8i_ss_hash_import,
+ .setkey = sun8i_ss_hmac_setkey,
+ .halg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .statesize = sizeof(struct sha1_state),
+ .base = {
+ .cra_name = "hmac(sha1)",
+ .cra_driver_name = "hmac-sha1-sun8i-ss",
+ .cra_priority = 300,
+ .cra_alignmask = 3,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sun8i_ss_hash_tfm_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = sun8i_ss_hash_crainit,
+ .cra_exit = sun8i_ss_hash_craexit,
+ }
+ }
+ }
+},
#endif
};
@@ -430,6 +461,17 @@ static int sun8i_ss_debugfs_show(struct seq_file *seq, void *v)
ss_algs[i].alg.skcipher.base.cra_driver_name,
ss_algs[i].alg.skcipher.base.cra_name,
ss_algs[i].stat_req, ss_algs[i].stat_fb);
+
+ seq_printf(seq, "\tLast fallback is: %s\n",
+ ss_algs[i].fbname);
+ seq_printf(seq, "\tFallback due to length: %lu\n",
+ ss_algs[i].stat_fb_len);
+ seq_printf(seq, "\tFallback due to SG length: %lu\n",
+ ss_algs[i].stat_fb_sglen);
+ seq_printf(seq, "\tFallback due to alignment: %lu\n",
+ ss_algs[i].stat_fb_align);
+ seq_printf(seq, "\tFallback due to SG numbers: %lu\n",
+ ss_algs[i].stat_fb_sgnum);
break;
case CRYPTO_ALG_TYPE_RNG:
seq_printf(seq, "%s %s reqs=%lu tsize=%lu\n",
@@ -442,6 +484,16 @@ static int sun8i_ss_debugfs_show(struct seq_file *seq, void *v)
ss_algs[i].alg.hash.halg.base.cra_driver_name,
ss_algs[i].alg.hash.halg.base.cra_name,
ss_algs[i].stat_req, ss_algs[i].stat_fb);
+ seq_printf(seq, "\tLast fallback is: %s\n",
+ ss_algs[i].fbname);
+ seq_printf(seq, "\tFallback due to length: %lu\n",
+ ss_algs[i].stat_fb_len);
+ seq_printf(seq, "\tFallback due to SG length: %lu\n",
+ ss_algs[i].stat_fb_sglen);
+ seq_printf(seq, "\tFallback due to alignment: %lu\n",
+ ss_algs[i].stat_fb_align);
+ seq_printf(seq, "\tFallback due to SG numbers: %lu\n",
+ ss_algs[i].stat_fb_sgnum);
break;
}
}
@@ -464,7 +516,7 @@ static void sun8i_ss_free_flows(struct sun8i_ss_dev *ss, int i)
*/
static int allocate_flows(struct sun8i_ss_dev *ss)
{
- int i, err;
+ int i, j, err;
ss->flows = devm_kcalloc(ss->dev, MAXFLOW, sizeof(struct sun8i_ss_flow),
GFP_KERNEL);
@@ -474,6 +526,28 @@ static int allocate_flows(struct sun8i_ss_dev *ss)
for (i = 0; i < MAXFLOW; i++) {
init_completion(&ss->flows[i].complete);
+ ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
+ GFP_KERNEL | GFP_DMA);
+ if (!ss->flows[i].biv)
+ goto error_engine;
+
+ for (j = 0; j < MAX_SG; j++) {
+ ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
+ GFP_KERNEL | GFP_DMA);
+ if (!ss->flows[i].iv[j])
+ goto error_engine;
+ }
+
+ /* the padding could be up to two block. */
+ ss->flows[i].pad = devm_kmalloc(ss->dev, MAX_PAD_SIZE,
+ GFP_KERNEL | GFP_DMA);
+ if (!ss->flows[i].pad)
+ goto error_engine;
+ ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE,
+ GFP_KERNEL | GFP_DMA);
+ if (!ss->flows[i].result)
+ goto error_engine;
+
ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true);
if (!ss->flows[i].engine) {
dev_err(ss->dev, "Cannot allocate engine\n");
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
index 1a71ed49d233..ac417a6b39e5 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
@@ -14,11 +14,99 @@
#include <linux/pm_runtime.h>
#include <linux/scatterlist.h>
#include <crypto/internal/hash.h>
+#include <crypto/hmac.h>
+#include <crypto/scatterwalk.h>
#include <crypto/sha1.h>
#include <crypto/sha2.h>
#include <crypto/md5.h>
#include "sun8i-ss.h"
+static int sun8i_ss_hashkey(struct sun8i_ss_hash_tfm_ctx *tfmctx, const u8 *key,
+ unsigned int keylen)
+{
+ struct crypto_shash *xtfm;
+ struct shash_desc *sdesc;
+ size_t len;
+ int ret = 0;
+
+ xtfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_NEED_FALLBACK);
+ if (!xtfm)
+ return -ENOMEM;
+
+ len = sizeof(*sdesc) + crypto_shash_descsize(xtfm);
+ sdesc = kmalloc(len, GFP_KERNEL);
+ if (!sdesc) {
+ ret = -ENOMEM;
+ goto err_hashkey_sdesc;
+ }
+ sdesc->tfm = xtfm;
+
+ ret = crypto_shash_init(sdesc);
+ if (ret) {
+ dev_err(tfmctx->ss->dev, "shash init error ret=%d\n", ret);
+ goto err_hashkey;
+ }
+ ret = crypto_shash_finup(sdesc, key, keylen, tfmctx->key);
+ if (ret)
+ dev_err(tfmctx->ss->dev, "shash finup error\n");
+err_hashkey:
+ kfree(sdesc);
+err_hashkey_sdesc:
+ crypto_free_shash(xtfm);
+ return ret;
+}
+
+int sun8i_ss_hmac_setkey(struct crypto_ahash *ahash, const u8 *key,
+ unsigned int keylen)
+{
+ struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(ahash);
+ struct ahash_alg *alg = __crypto_ahash_alg(ahash->base.__crt_alg);
+ struct sun8i_ss_alg_template *algt;
+ int digestsize, i;
+ int bs = crypto_ahash_blocksize(ahash);
+ int ret;
+
+ algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
+ digestsize = algt->alg.hash.halg.digestsize;
+
+ if (keylen > bs) {
+ ret = sun8i_ss_hashkey(tfmctx, key, keylen);
+ if (ret)
+ return ret;
+ tfmctx->keylen = digestsize;
+ } else {
+ tfmctx->keylen = keylen;
+ memcpy(tfmctx->key, key, keylen);
+ }
+
+ tfmctx->ipad = kzalloc(bs, GFP_KERNEL | GFP_DMA);
+ if (!tfmctx->ipad)
+ return -ENOMEM;
+ tfmctx->opad = kzalloc(bs, GFP_KERNEL | GFP_DMA);
+ if (!tfmctx->opad) {
+ ret = -ENOMEM;
+ goto err_opad;
+ }
+
+ memset(tfmctx->key + tfmctx->keylen, 0, bs - tfmctx->keylen);
+ memcpy(tfmctx->ipad, tfmctx->key, tfmctx->keylen);
+ memcpy(tfmctx->opad, tfmctx->key, tfmctx->keylen);
+ for (i = 0; i < bs; i++) {
+ tfmctx->ipad[i] ^= HMAC_IPAD_VALUE;
+ tfmctx->opad[i] ^= HMAC_OPAD_VALUE;
+ }
+
+ ret = crypto_ahash_setkey(tfmctx->fallback_tfm, key, keylen);
+ if (!ret)
+ return 0;
+
+ memzero_explicit(tfmctx->key, keylen);
+ kfree_sensitive(tfmctx->opad);
+err_opad:
+ kfree_sensitive(tfmctx->ipad);
+ return ret;
+}
+
int sun8i_ss_hash_crainit(struct crypto_tfm *tfm)
{
struct sun8i_ss_hash_tfm_ctx *op = crypto_tfm_ctx(tfm);
@@ -50,9 +138,8 @@ int sun8i_ss_hash_crainit(struct crypto_tfm *tfm)
sizeof(struct sun8i_ss_hash_reqctx) +
crypto_ahash_reqsize(op->fallback_tfm));
- dev_info(op->ss->dev, "Fallback for %s is %s\n",
- crypto_tfm_alg_driver_name(tfm),
- crypto_tfm_alg_driver_name(&op->fallback_tfm->base));
+ memcpy(algt->fbname, crypto_tfm_alg_driver_name(&op->fallback_tfm->base), CRYPTO_MAX_ALG_NAME);
+
err = pm_runtime_get_sync(op->ss->dev);
if (err < 0)
goto error_pm;
@@ -67,6 +154,9 @@ void sun8i_ss_hash_craexit(struct crypto_tfm *tfm)
{
struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_tfm_ctx(tfm);
+ kfree_sensitive(tfmctx->ipad);
+ kfree_sensitive(tfmctx->opad);
+
crypto_free_ahash(tfmctx->fallback_tfm);
pm_runtime_put_sync_suspend(tfmctx->ss->dev);
}
@@ -258,23 +348,48 @@ static int sun8i_ss_run_hash_task(struct sun8i_ss_dev *ss,
static bool sun8i_ss_hash_need_fallback(struct ahash_request *areq)
{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+ struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
+ struct sun8i_ss_alg_template *algt;
struct scatterlist *sg;
- if (areq->nbytes == 0)
+ algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
+
+ if (areq->nbytes == 0) {
+ algt->stat_fb_len++;
+ return true;
+ }
+
+ if (areq->nbytes >= MAX_PAD_SIZE - 64) {
+ algt->stat_fb_len++;
return true;
+ }
+
/* we need to reserve one SG for the padding one */
- if (sg_nents(areq->src) > MAX_SG - 1)
+ if (sg_nents(areq->src) > MAX_SG - 1) {
+ algt->stat_fb_sgnum++;
return true;
+ }
+
sg = areq->src;
while (sg) {
/* SS can operate hash only on full block size
* since SS support only MD5,sha1,sha224 and sha256, blocksize
* is always 64
- * TODO: handle request if last SG is not len%64
- * but this will need to copy data on a new SG of size=64
*/
- if (sg->length % 64 || !IS_ALIGNED(sg->offset, sizeof(u32)))
+ /* Only the last block could be bounced to the pad buffer */
+ if (sg->length % 64 && sg_next(sg)) {
+ algt->stat_fb_sglen++;
+ return true;
+ }
+ if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
+ algt->stat_fb_align++;
+ return true;
+ }
+ if (sg->length % 4) {
+ algt->stat_fb_sglen++;
return true;
+ }
sg = sg_next(sg);
}
return false;
@@ -288,21 +403,11 @@ int sun8i_ss_hash_digest(struct ahash_request *areq)
struct sun8i_ss_alg_template *algt;
struct sun8i_ss_dev *ss;
struct crypto_engine *engine;
- struct scatterlist *sg;
- int nr_sgs, e, i;
+ int e;
if (sun8i_ss_hash_need_fallback(areq))
return sun8i_ss_hash_digest_fb(areq);
- nr_sgs = sg_nents(areq->src);
- if (nr_sgs > MAX_SG - 1)
- return sun8i_ss_hash_digest_fb(areq);
-
- for_each_sg(areq->src, sg, nr_sgs, i) {
- if (sg->length % 4 || !IS_ALIGNED(sg->offset, sizeof(u32)))
- return sun8i_ss_hash_digest_fb(areq);
- }
-
algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
ss = algt->ss;
@@ -313,6 +418,64 @@ int sun8i_ss_hash_digest(struct ahash_request *areq)
return crypto_transfer_hash_request_to_engine(engine, areq);
}
+static u64 hash_pad(__le32 *buf, unsigned int bufsize, u64 padi, u64 byte_count, bool le, int bs)
+{
+ u64 fill, min_fill, j, k;
+ __be64 *bebits;
+ __le64 *lebits;
+
+ j = padi;
+ buf[j++] = cpu_to_le32(0x80);
+
+ if (bs == 64) {
+ fill = 64 - (byte_count % 64);
+ min_fill = 2 * sizeof(u32) + sizeof(u32);
+ } else {
+ fill = 128 - (byte_count % 128);
+ min_fill = 4 * sizeof(u32) + sizeof(u32);
+ }
+
+ if (fill < min_fill)
+ fill += bs;
+
+ k = j;
+ j += (fill - min_fill) / sizeof(u32);
+ if (j * 4 > bufsize) {
+ pr_err("%s OVERFLOW %llu\n", __func__, j);
+ return 0;
+ }
+ for (; k < j; k++)
+ buf[k] = 0;
+
+ if (le) {
+ /* MD5 */
+ lebits = (__le64 *)&buf[j];
+ *lebits = cpu_to_le64(byte_count << 3);
+ j += 2;
+ } else {
+ if (bs == 64) {
+ /* sha1 sha224 sha256 */
+ bebits = (__be64 *)&buf[j];
+ *bebits = cpu_to_be64(byte_count << 3);
+ j += 2;
+ } else {
+ /* sha384 sha512*/
+ bebits = (__be64 *)&buf[j];
+ *bebits = cpu_to_be64(byte_count >> 61);
+ j += 2;
+ bebits = (__be64 *)&buf[j];
+ *bebits = cpu_to_be64(byte_count << 3);
+ j += 2;
+ }
+ }
+ if (j * 4 > bufsize) {
+ pr_err("%s OVERFLOW %llu\n", __func__, j);
+ return 0;
+ }
+
+ return j;
+}
+
/* sun8i_ss_hash_run - run an ahash request
* Send the data of the request to the SS along with an extra SG with padding
*/
@@ -320,20 +483,26 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
{
struct ahash_request *areq = container_of(breq, struct ahash_request, base);
struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+ struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
struct sun8i_ss_hash_reqctx *rctx = ahash_request_ctx(areq);
struct sun8i_ss_alg_template *algt;
struct sun8i_ss_dev *ss;
struct scatterlist *sg;
+ int bs = crypto_ahash_blocksize(tfm);
int nr_sgs, err, digestsize;
unsigned int len;
- u64 fill, min_fill, byte_count;
+ u64 byte_count;
void *pad, *result;
- int j, i, todo;
- __be64 *bebits;
- __le64 *lebits;
- dma_addr_t addr_res, addr_pad;
+ int j, i, k, todo;
+ dma_addr_t addr_res, addr_pad, addr_xpad;
__le32 *bf;
+ /* HMAC step:
+ * 0: normal hashing
+ * 1: IPAD
+ * 2: OPAD
+ */
+ int hmac = 0;
algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
ss = algt->ss;
@@ -342,18 +511,10 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
if (digestsize == SHA224_DIGEST_SIZE)
digestsize = SHA256_DIGEST_SIZE;
- /* the padding could be up to two block. */
- pad = kzalloc(algt->alg.hash.halg.base.cra_blocksize * 2, GFP_KERNEL | GFP_DMA);
- if (!pad)
- return -ENOMEM;
+ result = ss->flows[rctx->flow].result;
+ pad = ss->flows[rctx->flow].pad;
bf = (__le32 *)pad;
- result = kzalloc(digestsize, GFP_KERNEL | GFP_DMA);
- if (!result) {
- kfree(pad);
- return -ENOMEM;
- }
-
for (i = 0; i < MAX_SG; i++) {
rctx->t_dst[i].addr = 0;
rctx->t_dst[i].len = 0;
@@ -376,17 +537,33 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
if (dma_mapping_error(ss->dev, addr_res)) {
dev_err(ss->dev, "DMA map dest\n");
err = -EINVAL;
- goto theend;
+ goto err_dma_result;
}
+ j = 0;
len = areq->nbytes;
- for_each_sg(areq->src, sg, nr_sgs, i) {
- rctx->t_src[i].addr = sg_dma_address(sg);
+ sg = areq->src;
+ i = 0;
+ while (len > 0 && sg) {
+ if (sg_dma_len(sg) == 0) {
+ sg = sg_next(sg);
+ continue;
+ }
todo = min(len, sg_dma_len(sg));
- rctx->t_src[i].len = todo / 4;
- len -= todo;
- rctx->t_dst[i].addr = addr_res;
- rctx->t_dst[i].len = digestsize / 4;
+ /* only the last SG could be with a size not modulo64 */
+ if (todo % 64 == 0) {
+ rctx->t_src[i].addr = sg_dma_address(sg);
+ rctx->t_src[i].len = todo / 4;
+ rctx->t_dst[i].addr = addr_res;
+ rctx->t_dst[i].len = digestsize / 4;
+ len -= todo;
+ } else {
+ scatterwalk_map_and_copy(bf, sg, 0, todo, 0);
+ j += todo / 4;
+ len -= todo;
+ }
+ sg = sg_next(sg);
+ i++;
}
if (len > 0) {
dev_err(ss->dev, "remaining len %d\n", len);
@@ -394,55 +571,135 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
goto theend;
}
+ if (j > 0)
+ i--;
+
+retry:
byte_count = areq->nbytes;
- j = 0;
- bf[j++] = cpu_to_le32(0x80);
+ if (tfmctx->keylen && hmac == 0) {
+ hmac = 1;
+ /* shift all SG one slot up, to free slot 0 for IPAD */
+ for (k = 6; k >= 0; k--) {
+ rctx->t_src[k + 1].addr = rctx->t_src[k].addr;
+ rctx->t_src[k + 1].len = rctx->t_src[k].len;
+ rctx->t_dst[k + 1].addr = rctx->t_dst[k].addr;
+ rctx->t_dst[k + 1].len = rctx->t_dst[k].len;
+ }
+ addr_xpad = dma_map_single(ss->dev, tfmctx->ipad, bs, DMA_TO_DEVICE);
+ if (dma_mapping_error(ss->dev, addr_xpad)) {
+ dev_err(ss->dev, "Fail to create DMA mapping of ipad\n");
+ goto err_dma_xpad;
+ }
+ rctx->t_src[0].addr = addr_xpad;
+ rctx->t_src[0].len = bs / 4;
+ rctx->t_dst[0].addr = addr_res;
+ rctx->t_dst[0].len = digestsize / 4;
+ i++;
+ byte_count = areq->nbytes + bs;
+ }
+ if (tfmctx->keylen && hmac == 2) {
+ for (i = 0; i < MAX_SG; i++) {
+ rctx->t_src[i].addr = 0;
+ rctx->t_src[i].len = 0;
+ rctx->t_dst[i].addr = 0;
+ rctx->t_dst[i].len = 0;
+ }
- fill = 64 - (byte_count % 64);
- min_fill = 3 * sizeof(u32);
+ addr_res = dma_map_single(ss->dev, result, digestsize, DMA_FROM_DEVICE);
+ if (dma_mapping_error(ss->dev, addr_res)) {
+ dev_err(ss->dev, "Fail to create DMA mapping of result\n");
+ err = -EINVAL;
+ goto err_dma_result;
+ }
+ addr_xpad = dma_map_single(ss->dev, tfmctx->opad, bs, DMA_TO_DEVICE);
+ if (dma_mapping_error(ss->dev, addr_xpad)) {
+ dev_err(ss->dev, "Fail to create DMA mapping of opad\n");
+ goto err_dma_xpad;
+ }
+ rctx->t_src[0].addr = addr_xpad;
+ rctx->t_src[0].len = bs / 4;
- if (fill < min_fill)
- fill += 64;
+ memcpy(bf, result, digestsize);
+ j = digestsize / 4;
+ i = 1;
+ byte_count = digestsize + bs;
- j += (fill - min_fill) / sizeof(u32);
+ rctx->t_dst[0].addr = addr_res;
+ rctx->t_dst[0].len = digestsize / 4;
+ }
switch (algt->ss_algo_id) {
case SS_ID_HASH_MD5:
- lebits = (__le64 *)&bf[j];
- *lebits = cpu_to_le64(byte_count << 3);
- j += 2;
+ j = hash_pad(bf, 4096, j, byte_count, true, bs);
break;
case SS_ID_HASH_SHA1:
case SS_ID_HASH_SHA224:
case SS_ID_HASH_SHA256:
- bebits = (__be64 *)&bf[j];
- *bebits = cpu_to_be64(byte_count << 3);
- j += 2;
+ j = hash_pad(bf, 4096, j, byte_count, false, bs);
break;
}
+ if (!j) {
+ err = -EINVAL;
+ goto theend;
+ }
addr_pad = dma_map_single(ss->dev, pad, j * 4, DMA_TO_DEVICE);
- rctx->t_src[i].addr = addr_pad;
- rctx->t_src[i].len = j;
- rctx->t_dst[i].addr = addr_res;
- rctx->t_dst[i].len = digestsize / 4;
if (dma_mapping_error(ss->dev, addr_pad)) {
dev_err(ss->dev, "DMA error on padding SG\n");
err = -EINVAL;
- goto theend;
+ goto err_dma_pad;
}
+ rctx->t_src[i].addr = addr_pad;
+ rctx->t_src[i].len = j;
+ rctx->t_dst[i].addr = addr_res;
+ rctx->t_dst[i].len = digestsize / 4;
err = sun8i_ss_run_hash_task(ss, rctx, crypto_tfm_alg_name(areq->base.tfm));
+ /*
+ * mini helper for checking dma map/unmap
+ * flow start for hmac = 0 (and HMAC = 1)
+ * HMAC = 0
+ * MAP src
+ * MAP res
+ *
+ * retry:
+ * if hmac then hmac = 1
+ * MAP xpad (ipad)
+ * if hmac == 2
+ * MAP res
+ * MAP xpad (opad)
+ * MAP pad
+ * ACTION!
+ * UNMAP pad
+ * if hmac
+ * UNMAP xpad
+ * UNMAP res
+ * if hmac < 2
+ * UNMAP SRC
+ *
+ * if hmac = 1 then hmac = 2 goto retry
+ */
+
dma_unmap_single(ss->dev, addr_pad, j * 4, DMA_TO_DEVICE);
- dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src),
- DMA_TO_DEVICE);
+
+err_dma_pad:
+ if (hmac > 0)
+ dma_unmap_single(ss->dev, addr_xpad, bs, DMA_TO_DEVICE);
+err_dma_xpad:
dma_unmap_single(ss->dev, addr_res, digestsize, DMA_FROM_DEVICE);
+err_dma_result:
+ if (hmac < 2)
+ dma_unmap_sg(ss->dev, areq->src, sg_nents(areq->src),
+ DMA_TO_DEVICE);
+ if (hmac == 1 && !err) {
+ hmac = 2;
+ goto retry;
+ }
- memcpy(areq->result, result, algt->alg.hash.halg.digestsize);
+ if (!err)
+ memcpy(areq->result, result, algt->alg.hash.halg.digestsize);
theend:
- kfree(pad);
- kfree(result);
local_bh_disable();
crypto_finalize_hash_request(engine, breq, err);
local_bh_enable();
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
index 246a6782674c..dd677e9ed06f 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
@@ -112,11 +112,9 @@ int sun8i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src,
goto err_iv;
}
- err = pm_runtime_get_sync(ss->dev);
- if (err < 0) {
- pm_runtime_put_noidle(ss->dev);
+ err = pm_runtime_resume_and_get(ss->dev);
+ if (err < 0)
goto err_pm;
- }
err = 0;
mutex_lock(&ss->mlock);
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
index 28188685b910..df6f08f6092f 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
@@ -82,6 +82,8 @@
#define PRNG_DATA_SIZE (160 / 8)
#define PRNG_SEED_SIZE DIV_ROUND_UP(175, 8)
+#define MAX_PAD_SIZE 4096
+
/*
* struct ss_clock - Describe clocks used by sun8i-ss
* @name: Name of clock needed by this variant
@@ -121,11 +123,19 @@ struct sginfo {
* @complete: completion for the current task on this flow
* @status: set to 1 by interrupt if task is done
* @stat_req: number of request done by this flow
+ * @iv: list of IV to use for each step
+ * @biv: buffer which contain the backuped IV
+ * @pad: padding buffer for hash operations
+ * @result: buffer for storing the result of hash operations
*/
struct sun8i_ss_flow {
struct crypto_engine *engine;
struct completion complete;
int status;
+ u8 *iv[MAX_SG];
+ u8 *biv;
+ void *pad;
+ void *result;
#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
unsigned long stat_req;
#endif
@@ -164,28 +174,28 @@ struct sun8i_ss_dev {
* @t_src: list of mapped SGs with their size
* @t_dst: list of mapped SGs with their size
* @p_key: DMA address of the key
- * @p_iv: DMA address of the IV
+ * @p_iv: DMA address of the IVs
+ * @niv: Number of IVs DMA mapped
* @method: current algorithm for this request
* @op_mode: op_mode for this request
* @op_dir: direction (encrypt vs decrypt) for this request
* @flow: the flow to use for this request
- * @ivlen: size of biv
+ * @ivlen: size of IVs
* @keylen: keylen for this request
- * @biv: buffer which contain the IV
* @fallback_req: request struct for invoking the fallback skcipher TFM
*/
struct sun8i_cipher_req_ctx {
struct sginfo t_src[MAX_SG];
struct sginfo t_dst[MAX_SG];
u32 p_key;
- u32 p_iv;
+ u32 p_iv[MAX_SG];
+ int niv;
u32 method;
u32 op_mode;
u32 op_dir;
int flow;
unsigned int ivlen;
unsigned int keylen;
- void *biv;
struct skcipher_request fallback_req; // keep at the end
};
@@ -229,6 +239,10 @@ struct sun8i_ss_hash_tfm_ctx {
struct crypto_engine_ctx enginectx;
struct crypto_ahash *fallback_tfm;
struct sun8i_ss_dev *ss;
+ u8 *ipad;
+ u8 *opad;
+ u8 key[SHA256_BLOCK_SIZE];
+ int keylen;
};
/*
@@ -269,11 +283,14 @@ struct sun8i_ss_alg_template {
struct rng_alg rng;
struct ahash_alg hash;
} alg;
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
unsigned long stat_req;
unsigned long stat_fb;
unsigned long stat_bytes;
-#endif
+ unsigned long stat_fb_len;
+ unsigned long stat_fb_sglen;
+ unsigned long stat_fb_align;
+ unsigned long stat_fb_sgnum;
+ char fbname[CRYPTO_MAX_ALG_NAME];
};
int sun8i_ss_enqueue(struct crypto_async_request *areq, u32 type);
@@ -306,3 +323,5 @@ int sun8i_ss_hash_update(struct ahash_request *areq);
int sun8i_ss_hash_finup(struct ahash_request *areq);
int sun8i_ss_hash_digest(struct ahash_request *areq);
int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq);
+int sun8i_ss_hmac_setkey(struct crypto_ahash *ahash, const u8 *key,
+ unsigned int keylen);
diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index 333fbefbbccb..59a57279e77b 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -398,7 +398,7 @@ static int __init atmel_ecc_init(void)
static void __exit atmel_ecc_exit(void)
{
- flush_scheduled_work();
+ atmel_i2c_flush_queue();
i2c_del_driver(&atmel_ecc_driver);
}
diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
index 6fd3e969211d..81ce09bedda8 100644
--- a/drivers/crypto/atmel-i2c.c
+++ b/drivers/crypto/atmel-i2c.c
@@ -263,6 +263,8 @@ static void atmel_i2c_work_handler(struct work_struct *work)
work_data->cbk(work_data, work_data->areq, status);
}
+static struct workqueue_struct *atmel_wq;
+
void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data,
void (*cbk)(struct atmel_i2c_work_data *work_data,
void *areq, int status),
@@ -272,10 +274,16 @@ void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data,
work_data->areq = areq;
INIT_WORK(&work_data->work, atmel_i2c_work_handler);
- schedule_work(&work_data->work);
+ queue_work(atmel_wq, &work_data->work);
}
EXPORT_SYMBOL(atmel_i2c_enqueue);
+void atmel_i2c_flush_queue(void)
+{
+ flush_workqueue(atmel_wq);
+}
+EXPORT_SYMBOL(atmel_i2c_flush_queue);
+
static inline size_t atmel_i2c_wake_token_sz(u32 bus_clk_rate)
{
u32 no_of_bits = DIV_ROUND_UP(TWLO_USEC * bus_clk_rate, USEC_PER_SEC);
@@ -364,14 +372,24 @@ int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
i2c_set_clientdata(client, i2c_priv);
- ret = device_sanity_check(client);
- if (ret)
- return ret;
-
- return 0;
+ return device_sanity_check(client);
}
EXPORT_SYMBOL(atmel_i2c_probe);
+static int __init atmel_i2c_init(void)
+{
+ atmel_wq = alloc_workqueue("atmel_wq", 0, 0);
+ return atmel_wq ? 0 : -ENOMEM;
+}
+
+static void __exit atmel_i2c_exit(void)
+{
+ destroy_workqueue(atmel_wq);
+}
+
+module_init(atmel_i2c_init);
+module_exit(atmel_i2c_exit);
+
MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>");
MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h
index 63b97b104f16..48929efe2a5b 100644
--- a/drivers/crypto/atmel-i2c.h
+++ b/drivers/crypto/atmel-i2c.h
@@ -173,6 +173,7 @@ void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data,
void (*cbk)(struct atmel_i2c_work_data *work_data,
void *areq, int status),
void *areq);
+void atmel_i2c_flush_queue(void);
int atmel_i2c_send_receive(struct i2c_client *client, struct atmel_i2c_cmd *cmd);
diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
index c96c14e7dab1..e4087bdd2475 100644
--- a/drivers/crypto/atmel-sha204a.c
+++ b/drivers/crypto/atmel-sha204a.c
@@ -121,23 +121,24 @@ static int atmel_sha204a_remove(struct i2c_client *client)
struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
if (atomic_read(&i2c_priv->tfm_count)) {
- dev_err(&client->dev, "Device is busy\n");
- return -EBUSY;
+ dev_emerg(&client->dev, "Device is busy, will remove it anyhow\n");
+ return 0;
}
- if (i2c_priv->hwrng.priv)
- kfree((void *)i2c_priv->hwrng.priv);
+ kfree((void *)i2c_priv->hwrng.priv);
return 0;
}
static const struct of_device_id atmel_sha204a_dt_ids[] = {
+ { .compatible = "atmel,atsha204", },
{ .compatible = "atmel,atsha204a", },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, atmel_sha204a_dt_ids);
static const struct i2c_device_id atmel_sha204a_id[] = {
+ { "atsha204", 0 },
{ "atsha204a", 0 },
{ /* sentinel */ }
};
@@ -159,7 +160,7 @@ static int __init atmel_sha204a_init(void)
static void __exit atmel_sha204a_exit(void)
{
- flush_scheduled_work();
+ atmel_i2c_flush_queue();
i2c_del_driver(&atmel_sha204a_driver);
}
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index ea9f8b1ae981..ec6a9e6ad4d2 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -151,6 +151,14 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
Selecting this will register the SEC4 hardware rng to
the hw_random API for supplying the kernel entropy pool.
+config CRYPTO_DEV_FSL_CAAM_PRNG_API
+ bool "Register Pseudo random number generation implementation with Crypto API"
+ default y
+ select CRYPTO_RNG
+ help
+ Selecting this will register the SEC hardware prng to
+ the Crypto API.
+
config CRYPTO_DEV_FSL_CAAM_BLOB_GEN
bool
diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile
index 25f7ae5a4642..acf1b197eb84 100644
--- a/drivers/crypto/caam/Makefile
+++ b/drivers/crypto/caam/Makefile
@@ -20,6 +20,7 @@ caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += caamalg_qi.o
caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
+caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PRNG_API) += caamprng.o
caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caampkc.o pkc_desc.o
caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_BLOB_GEN) += blob_gen.o
diff --git a/drivers/crypto/caam/caamprng.c b/drivers/crypto/caam/caamprng.c
new file mode 100644
index 000000000000..4839e66300a2
--- /dev/null
+++ b/drivers/crypto/caam/caamprng.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Driver to expose SEC4 PRNG via crypto RNG API
+ *
+ * Copyright 2022 NXP
+ *
+ */
+
+#include <linux/completion.h>
+#include <crypto/internal/rng.h>
+#include "compat.h"
+#include "regs.h"
+#include "intern.h"
+#include "desc_constr.h"
+#include "jr.h"
+#include "error.h"
+
+/*
+ * Length of used descriptors, see caam_init_desc()
+ */
+#define CAAM_PRNG_MAX_DESC_LEN (CAAM_CMD_SZ + \
+ CAAM_CMD_SZ + \
+ CAAM_CMD_SZ + CAAM_PTR_SZ_MAX)
+
+/* prng per-device context */
+struct caam_prng_ctx {
+ int err;
+ struct completion done;
+};
+
+struct caam_prng_alg {
+ struct rng_alg rng;
+ bool registered;
+};
+
+static void caam_prng_done(struct device *jrdev, u32 *desc, u32 err,
+ void *context)
+{
+ struct caam_prng_ctx *jctx = context;
+
+ jctx->err = err ? caam_jr_strstatus(jrdev, err) : 0;
+
+ complete(&jctx->done);
+}
+
+static u32 *caam_init_reseed_desc(u32 *desc)
+{
+ init_job_desc(desc, 0); /* + 1 cmd_sz */
+ /* Generate random bytes: + 1 cmd_sz */
+ append_operation(desc, OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
+ OP_ALG_AS_FINALIZE);
+
+ print_hex_dump_debug("prng reseed desc@: ", DUMP_PREFIX_ADDRESS,
+ 16, 4, desc, desc_bytes(desc), 1);
+
+ return desc;
+}
+
+static u32 *caam_init_prng_desc(u32 *desc, dma_addr_t dst_dma, u32 len)
+{
+ init_job_desc(desc, 0); /* + 1 cmd_sz */
+ /* Generate random bytes: + 1 cmd_sz */
+ append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
+ /* Store bytes: + 1 cmd_sz + caam_ptr_sz */
+ append_fifo_store(desc, dst_dma,
+ len, FIFOST_TYPE_RNGSTORE);
+
+ print_hex_dump_debug("prng job desc@: ", DUMP_PREFIX_ADDRESS,
+ 16, 4, desc, desc_bytes(desc), 1);
+
+ return desc;
+}
+
+static int caam_prng_generate(struct crypto_rng *tfm,
+ const u8 *src, unsigned int slen,
+ u8 *dst, unsigned int dlen)
+{
+ struct caam_prng_ctx ctx;
+ struct device *jrdev;
+ dma_addr_t dst_dma;
+ u32 *desc;
+ u8 *buf;
+ int ret;
+
+ buf = kzalloc(dlen, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ jrdev = caam_jr_alloc();
+ ret = PTR_ERR_OR_ZERO(jrdev);
+ if (ret) {
+ pr_err("Job Ring Device allocation failed\n");
+ kfree(buf);
+ return ret;
+ }
+
+ desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA);
+ if (!desc) {
+ ret = -ENOMEM;
+ goto out1;
+ }
+
+ dst_dma = dma_map_single(jrdev, buf, dlen, DMA_FROM_DEVICE);
+ if (dma_mapping_error(jrdev, dst_dma)) {
+ dev_err(jrdev, "Failed to map destination buffer memory\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ init_completion(&ctx.done);
+ ret = caam_jr_enqueue(jrdev,
+ caam_init_prng_desc(desc, dst_dma, dlen),
+ caam_prng_done, &ctx);
+
+ if (ret == -EINPROGRESS) {
+ wait_for_completion(&ctx.done);
+ ret = ctx.err;
+ }
+
+ dma_unmap_single(jrdev, dst_dma, dlen, DMA_FROM_DEVICE);
+
+ if (!ret)
+ memcpy(dst, buf, dlen);
+out:
+ kfree(desc);
+out1:
+ caam_jr_free(jrdev);
+ kfree(buf);
+ return ret;
+}
+
+static void caam_prng_exit(struct crypto_tfm *tfm) {}
+
+static int caam_prng_init(struct crypto_tfm *tfm)
+{
+ return 0;
+}
+
+static int caam_prng_seed(struct crypto_rng *tfm,
+ const u8 *seed, unsigned int slen)
+{
+ struct caam_prng_ctx ctx;
+ struct device *jrdev;
+ u32 *desc;
+ int ret;
+
+ if (slen) {
+ pr_err("Seed length should be zero\n");
+ return -EINVAL;
+ }
+
+ jrdev = caam_jr_alloc();
+ ret = PTR_ERR_OR_ZERO(jrdev);
+ if (ret) {
+ pr_err("Job Ring Device allocation failed\n");
+ return ret;
+ }
+
+ desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA);
+ if (!desc) {
+ caam_jr_free(jrdev);
+ return -ENOMEM;
+ }
+
+ init_completion(&ctx.done);
+ ret = caam_jr_enqueue(jrdev,
+ caam_init_reseed_desc(desc),
+ caam_prng_done, &ctx);
+
+ if (ret == -EINPROGRESS) {
+ wait_for_completion(&ctx.done);
+ ret = ctx.err;
+ }
+
+ kfree(desc);
+ caam_jr_free(jrdev);
+ return ret;
+}
+
+static struct caam_prng_alg caam_prng_alg = {
+ .rng = {
+ .generate = caam_prng_generate,
+ .seed = caam_prng_seed,
+ .seedsize = 0,
+ .base = {
+ .cra_name = "stdrng",
+ .cra_driver_name = "prng-caam",
+ .cra_priority = 500,
+ .cra_ctxsize = sizeof(struct caam_prng_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = caam_prng_init,
+ .cra_exit = caam_prng_exit,
+ },
+ }
+};
+
+void caam_prng_unregister(void *data)
+{
+ if (caam_prng_alg.registered)
+ crypto_unregister_rng(&caam_prng_alg.rng);
+}
+
+int caam_prng_register(struct device *ctrldev)
+{
+ struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
+ u32 rng_inst;
+ int ret = 0;
+
+ /* Check for available RNG blocks before registration */
+ if (priv->era < 10)
+ rng_inst = (rd_reg32(&priv->jr[0]->perfmon.cha_num_ls) &
+ CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT;
+ else
+ rng_inst = rd_reg32(&priv->jr[0]->vreg.rng) & CHA_VER_NUM_MASK;
+
+ if (!rng_inst) {
+ dev_dbg(ctrldev, "RNG block is not available... skipping registering algorithm\n");
+ return ret;
+ }
+
+ ret = crypto_register_rng(&caam_prng_alg.rng);
+ if (ret) {
+ dev_err(ctrldev,
+ "couldn't register rng crypto alg: %d\n",
+ ret);
+ return ret;
+ }
+
+ caam_prng_alg.registered = true;
+
+ dev_info(ctrldev,
+ "rng crypto API alg registered %s\n", caam_prng_alg.rng.base.cra_driver_name);
+
+ return 0;
+}
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 38c4d88a9d03..32253a064d0f 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -609,6 +609,13 @@ static bool check_version(struct fsl_mc_version *mc_version, u32 major,
}
#endif
+static bool needs_entropy_delay_adjustment(void)
+{
+ if (of_machine_is_compatible("fsl,imx6sx"))
+ return true;
+ return false;
+}
+
/* Probe routine for CAAM top (controller) level */
static int caam_probe(struct platform_device *pdev)
{
@@ -868,6 +875,8 @@ static int caam_probe(struct platform_device *pdev)
* Also, if a handle was instantiated, do not change
* the TRNG parameters.
*/
+ if (needs_entropy_delay_adjustment())
+ ent_delay = 12000;
if (!(ctrlpriv->rng4_sh_init || inst_handles)) {
dev_info(dev,
"Entropy delay = %u\n",
@@ -884,6 +893,15 @@ static int caam_probe(struct platform_device *pdev)
*/
ret = instantiate_rng(dev, inst_handles,
gen_sk);
+ /*
+ * Entropy delay is determined via TRNG characterization.
+ * TRNG characterization is run across different voltages
+ * and temperatures.
+ * If worst case value for ent_dly is identified,
+ * the loop can be skipped for that platform.
+ */
+ if (needs_entropy_delay_adjustment())
+ break;
if (ret == -EAGAIN)
/*
* if here, the loop will rerun,
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index e92210e2ab76..572cf66c887a 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -186,6 +186,21 @@ static inline void caam_rng_exit(struct device *dev) {}
#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API */
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_PRNG_API
+
+int caam_prng_register(struct device *dev);
+void caam_prng_unregister(void *data);
+
+#else
+
+static inline int caam_prng_register(struct device *dev)
+{
+ return 0;
+}
+
+static inline void caam_prng_unregister(void *data) {}
+#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_PRNG_API */
+
#ifdef CONFIG_CAAM_QI
int caam_qi_algapi_init(struct device *dev);
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 7f2b1101f567..724fdec18bf9 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -39,6 +39,7 @@ static void register_algs(struct caam_drv_private_jr *jrpriv,
caam_algapi_hash_init(dev);
caam_pkc_init(dev);
jrpriv->hwrng = !caam_rng_init(dev);
+ caam_prng_register(dev);
caam_qi_algapi_init(dev);
algs_unlock:
@@ -53,7 +54,7 @@ static void unregister_algs(void)
goto algs_unlock;
caam_qi_algapi_exit();
-
+ caam_prng_unregister(NULL);
caam_pkc_exit();
caam_algapi_hash_exit();
caam_algapi_exit();
diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index 6c61817996a3..432a61aca0c5 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -269,15 +269,17 @@ static void nitrox_remove_from_devlist(struct nitrox_device *ndev)
struct nitrox_device *nitrox_get_first_device(void)
{
- struct nitrox_device *ndev;
+ struct nitrox_device *ndev = NULL, *iter;
mutex_lock(&devlist_lock);
- list_for_each_entry(ndev, &ndevlist, list) {
- if (nitrox_ready(ndev))
+ list_for_each_entry(iter, &ndevlist, list) {
+ if (nitrox_ready(iter)) {
+ ndev = iter;
break;
+ }
}
mutex_unlock(&devlist_lock);
- if (&ndev->list == &ndevlist)
+ if (!ndev)
return NULL;
refcount_inc(&ndev->refcnt);
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index ae7b44599914..c9c741ac8442 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -70,17 +70,23 @@ static unsigned int psp_get_capability(struct psp_device *psp)
*/
if (val == 0xffffffff) {
dev_notice(psp->dev, "psp: unable to access the device: you might be running a broken BIOS.\n");
- return 0;
+ return -ENODEV;
}
+ psp->capability = val;
+
+ /* Detect if TSME and SME are both enabled */
+ if (psp->capability & PSP_CAPABILITY_PSP_SECURITY_REPORTING &&
+ psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET) &&
+ cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+ dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n");
- return val;
+ return 0;
}
-static int psp_check_sev_support(struct psp_device *psp,
- unsigned int capability)
+static int psp_check_sev_support(struct psp_device *psp)
{
/* Check if device supports SEV feature */
- if (!(capability & 1)) {
+ if (!(psp->capability & PSP_CAPABILITY_SEV)) {
dev_dbg(psp->dev, "psp does not support SEV\n");
return -ENODEV;
}
@@ -88,11 +94,10 @@ static int psp_check_sev_support(struct psp_device *psp,
return 0;
}
-static int psp_check_tee_support(struct psp_device *psp,
- unsigned int capability)
+static int psp_check_tee_support(struct psp_device *psp)
{
/* Check if device supports TEE feature */
- if (!(capability & 2)) {
+ if (!(psp->capability & PSP_CAPABILITY_TEE)) {
dev_dbg(psp->dev, "psp does not support TEE\n");
return -ENODEV;
}
@@ -100,30 +105,17 @@ static int psp_check_tee_support(struct psp_device *psp,
return 0;
}
-static int psp_check_support(struct psp_device *psp,
- unsigned int capability)
-{
- int sev_support = psp_check_sev_support(psp, capability);
- int tee_support = psp_check_tee_support(psp, capability);
-
- /* Return error if device neither supports SEV nor TEE */
- if (sev_support && tee_support)
- return -ENODEV;
-
- return 0;
-}
-
-static int psp_init(struct psp_device *psp, unsigned int capability)
+static int psp_init(struct psp_device *psp)
{
int ret;
- if (!psp_check_sev_support(psp, capability)) {
+ if (!psp_check_sev_support(psp)) {
ret = sev_dev_init(psp);
if (ret)
return ret;
}
- if (!psp_check_tee_support(psp, capability)) {
+ if (!psp_check_tee_support(psp)) {
ret = tee_dev_init(psp);
if (ret)
return ret;
@@ -136,7 +128,6 @@ int psp_dev_init(struct sp_device *sp)
{
struct device *dev = sp->dev;
struct psp_device *psp;
- unsigned int capability;
int ret;
ret = -ENOMEM;
@@ -155,11 +146,7 @@ int psp_dev_init(struct sp_device *sp)
psp->io_regs = sp->io_map;
- capability = psp_get_capability(psp);
- if (!capability)
- goto e_disable;
-
- ret = psp_check_support(psp, capability);
+ ret = psp_get_capability(psp);
if (ret)
goto e_disable;
@@ -174,7 +161,7 @@ int psp_dev_init(struct sp_device *sp)
goto e_err;
}
- ret = psp_init(psp, capability);
+ ret = psp_init(psp);
if (ret)
goto e_irq;
diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h
index ef38e4135d81..d528eb04c3ef 100644
--- a/drivers/crypto/ccp/psp-dev.h
+++ b/drivers/crypto/ccp/psp-dev.h
@@ -45,6 +45,8 @@ struct psp_device {
void *sev_data;
void *tee_data;
+
+ unsigned int capability;
};
void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler,
@@ -57,4 +59,24 @@ void psp_clear_tee_irq_handler(struct psp_device *psp);
struct psp_device *psp_get_master_device(void);
+#define PSP_CAPABILITY_SEV BIT(0)
+#define PSP_CAPABILITY_TEE BIT(1)
+#define PSP_CAPABILITY_PSP_SECURITY_REPORTING BIT(7)
+
+#define PSP_CAPABILITY_PSP_SECURITY_OFFSET 8
+/*
+ * The PSP doesn't directly store these bits in the capability register
+ * but instead copies them from the results of query command.
+ *
+ * The offsets from the query command are below, and shifted when used.
+ */
+#define PSP_SECURITY_FUSED_PART BIT(0)
+#define PSP_SECURITY_DEBUG_LOCK_ON BIT(2)
+#define PSP_SECURITY_TSME_STATUS BIT(5)
+#define PSP_SECURITY_ANTI_ROLLBACK_STATUS BIT(7)
+#define PSP_SECURITY_RPMC_PRODUCTION_ENABLED BIT(8)
+#define PSP_SECURITY_RPMC_SPIROM_AVAILABLE BIT(9)
+#define PSP_SECURITY_HSP_TPM_AVAILABLE BIT(10)
+#define PSP_SECURITY_ROM_ARMOR_ENFORCED BIT(11)
+
#endif /* __PSP_DEV_H */
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 6ab93dfd478a..799b476fc3e8 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -23,6 +23,7 @@
#include <linux/gfp.h>
#include <linux/cpufeature.h>
#include <linux/fs.h>
+#include <linux/fs_struct.h>
#include <asm/smp.h>
@@ -170,6 +171,31 @@ static void *sev_fw_alloc(unsigned long len)
return page_address(page);
}
+static struct file *open_file_as_root(const char *filename, int flags, umode_t mode)
+{
+ struct file *fp;
+ struct path root;
+ struct cred *cred;
+ const struct cred *old_cred;
+
+ task_lock(&init_task);
+ get_fs_root(init_task.fs, &root);
+ task_unlock(&init_task);
+
+ cred = prepare_creds();
+ if (!cred)
+ return ERR_PTR(-ENOMEM);
+ cred->fsuid = GLOBAL_ROOT_UID;
+ old_cred = override_creds(cred);
+
+ fp = file_open_root(&root, filename, flags, mode);
+ path_put(&root);
+
+ revert_creds(old_cred);
+
+ return fp;
+}
+
static int sev_read_init_ex_file(void)
{
struct sev_device *sev = psp_master->sev_data;
@@ -181,7 +207,7 @@ static int sev_read_init_ex_file(void)
if (!sev_init_ex_buffer)
return -EOPNOTSUPP;
- fp = filp_open(init_ex_path, O_RDONLY, 0);
+ fp = open_file_as_root(init_ex_path, O_RDONLY, 0);
if (IS_ERR(fp)) {
int ret = PTR_ERR(fp);
@@ -217,7 +243,7 @@ static void sev_write_init_ex_file(void)
if (!sev_init_ex_buffer)
return;
- fp = filp_open(init_ex_path, O_CREAT | O_WRONLY, 0600);
+ fp = open_file_as_root(init_ex_path, O_CREAT | O_WRONLY, 0600);
if (IS_ERR(fp)) {
dev_err(sev->dev,
"SEV: could not open file for write, error %ld\n",
@@ -435,7 +461,7 @@ static int __sev_platform_init_locked(int *error)
* initialization function should succeed by replacing the state
* with a reset state.
*/
- dev_dbg(sev->dev, "SEV: retrying INIT command");
+ dev_err(sev->dev, "SEV: retrying INIT command because of SECURE_DATA_INVALID error. Retrying once to reset PSP SEV state.");
rc = init_function(&psp_ret);
}
if (error)
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 88c672ad27e4..b5970ae54d0e 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -32,6 +32,67 @@ struct sp_pci {
};
static struct sp_device *sp_dev_master;
+#define attribute_show(name, def) \
+static ssize_t name##_show(struct device *d, struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct sp_device *sp = dev_get_drvdata(d); \
+ struct psp_device *psp = sp->psp_data; \
+ int bit = PSP_SECURITY_##def << PSP_CAPABILITY_PSP_SECURITY_OFFSET; \
+ return sysfs_emit(buf, "%d\n", (psp->capability & bit) > 0); \
+}
+
+attribute_show(fused_part, FUSED_PART)
+static DEVICE_ATTR_RO(fused_part);
+attribute_show(debug_lock_on, DEBUG_LOCK_ON)
+static DEVICE_ATTR_RO(debug_lock_on);
+attribute_show(tsme_status, TSME_STATUS)
+static DEVICE_ATTR_RO(tsme_status);
+attribute_show(anti_rollback_status, ANTI_ROLLBACK_STATUS)
+static DEVICE_ATTR_RO(anti_rollback_status);
+attribute_show(rpmc_production_enabled, RPMC_PRODUCTION_ENABLED)
+static DEVICE_ATTR_RO(rpmc_production_enabled);
+attribute_show(rpmc_spirom_available, RPMC_SPIROM_AVAILABLE)
+static DEVICE_ATTR_RO(rpmc_spirom_available);
+attribute_show(hsp_tpm_available, HSP_TPM_AVAILABLE)
+static DEVICE_ATTR_RO(hsp_tpm_available);
+attribute_show(rom_armor_enforced, ROM_ARMOR_ENFORCED)
+static DEVICE_ATTR_RO(rom_armor_enforced);
+
+static struct attribute *psp_attrs[] = {
+ &dev_attr_fused_part.attr,
+ &dev_attr_debug_lock_on.attr,
+ &dev_attr_tsme_status.attr,
+ &dev_attr_anti_rollback_status.attr,
+ &dev_attr_rpmc_production_enabled.attr,
+ &dev_attr_rpmc_spirom_available.attr,
+ &dev_attr_hsp_tpm_available.attr,
+ &dev_attr_rom_armor_enforced.attr,
+ NULL
+};
+
+static umode_t psp_security_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct sp_device *sp = dev_get_drvdata(dev);
+ struct psp_device *psp = sp->psp_data;
+
+ if (psp && (psp->capability & PSP_CAPABILITY_PSP_SECURITY_REPORTING))
+ return 0444;
+
+ return 0;
+}
+
+static struct attribute_group psp_attr_group = {
+ .attrs = psp_attrs,
+ .is_visible = psp_security_is_visible,
+};
+
+static const struct attribute_group *psp_groups[] = {
+ &psp_attr_group,
+ NULL,
+};
+
static int sp_get_msix_irqs(struct sp_device *sp)
{
struct sp_pci *sp_pci = sp->dev_specific;
@@ -391,6 +452,7 @@ static struct pci_driver sp_pci_driver = {
.remove = sp_pci_remove,
.shutdown = sp_pci_shutdown,
.driver.pm = &sp_pci_pm_ops,
+ .dev_groups = psp_groups,
};
int sp_pci_init(void)
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c
index 11e0278c8631..6140e4927322 100644
--- a/drivers/crypto/ccree/cc_buffer_mgr.c
+++ b/drivers/crypto/ccree/cc_buffer_mgr.c
@@ -356,12 +356,14 @@ void cc_unmap_cipher_request(struct device *dev, void *ctx,
req_ctx->mlli_params.mlli_dma_addr);
}
- dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL);
- dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src));
-
if (src != dst) {
- dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_BIDIRECTIONAL);
+ dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_TO_DEVICE);
+ dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_FROM_DEVICE);
dev_dbg(dev, "Unmapped req->dst=%pK\n", sg_virt(dst));
+ dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src));
+ } else {
+ dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL);
+ dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src));
}
}
@@ -377,6 +379,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
u32 dummy = 0;
int rc = 0;
u32 mapped_nents = 0;
+ int src_direction = (src != dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL);
req_ctx->dma_buf_type = CC_DMA_BUF_DLLI;
mlli_params->curr_pool = NULL;
@@ -399,7 +402,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
}
/* Map the src SGL */
- rc = cc_map_sg(dev, src, nbytes, DMA_BIDIRECTIONAL, &req_ctx->in_nents,
+ rc = cc_map_sg(dev, src, nbytes, src_direction, &req_ctx->in_nents,
LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents);
if (rc)
goto cipher_exit;
@@ -416,7 +419,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
}
} else {
/* Map the dst sg */
- rc = cc_map_sg(dev, dst, nbytes, DMA_BIDIRECTIONAL,
+ rc = cc_map_sg(dev, dst, nbytes, DMA_FROM_DEVICE,
&req_ctx->out_nents, LLI_MAX_NUM_OF_DATA_ENTRIES,
&dummy, &mapped_nents);
if (rc)
@@ -456,6 +459,7 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req)
struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
unsigned int hw_iv_size = areq_ctx->hw_iv_size;
struct cc_drvdata *drvdata = dev_get_drvdata(dev);
+ int src_direction = (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL);
if (areq_ctx->mac_buf_dma_addr) {
dma_unmap_single(dev, areq_ctx->mac_buf_dma_addr,
@@ -514,13 +518,11 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req)
sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents,
areq_ctx->assoclen, req->cryptlen);
- dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents,
- DMA_BIDIRECTIONAL);
+ dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, src_direction);
if (req->src != req->dst) {
dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n",
sg_virt(req->dst));
- dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents,
- DMA_BIDIRECTIONAL);
+ dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, DMA_FROM_DEVICE);
}
if (drvdata->coherent &&
areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT &&
@@ -843,7 +845,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
else
size_for_map -= authsize;
- rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL,
+ rc = cc_map_sg(dev, req->dst, size_for_map, DMA_FROM_DEVICE,
&areq_ctx->dst.mapped_nents,
LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes,
&dst_mapped_nents);
@@ -1056,7 +1058,8 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req)
size_to_map += authsize;
}
- rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL,
+ rc = cc_map_sg(dev, req->src, size_to_map,
+ (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL),
&areq_ctx->src.mapped_nents,
(LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES +
LLI_MAX_NUM_OF_DATA_ENTRIES),
diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
index 790fa9058a36..7d1bee86d581 100644
--- a/drivers/crypto/ccree/cc_driver.c
+++ b/drivers/crypto/ccree/cc_driver.c
@@ -529,24 +529,26 @@ static int init_cc_resources(struct platform_device *plat_dev)
goto post_req_mgr_err;
}
- /* Allocate crypto algs */
- rc = cc_cipher_alloc(new_drvdata);
+ /* hash must be allocated first due to use of send_request_init()
+ * and dependency of AEAD on it
+ */
+ rc = cc_hash_alloc(new_drvdata);
if (rc) {
- dev_err(dev, "cc_cipher_alloc failed\n");
+ dev_err(dev, "cc_hash_alloc failed\n");
goto post_buf_mgr_err;
}
- /* hash must be allocated before aead since hash exports APIs */
- rc = cc_hash_alloc(new_drvdata);
+ /* Allocate crypto algs */
+ rc = cc_cipher_alloc(new_drvdata);
if (rc) {
- dev_err(dev, "cc_hash_alloc failed\n");
- goto post_cipher_err;
+ dev_err(dev, "cc_cipher_alloc failed\n");
+ goto post_hash_err;
}
rc = cc_aead_alloc(new_drvdata);
if (rc) {
dev_err(dev, "cc_aead_alloc failed\n");
- goto post_hash_err;
+ goto post_cipher_err;
}
/* If we got here and FIPS mode is enabled
@@ -558,10 +560,10 @@ static int init_cc_resources(struct platform_device *plat_dev)
pm_runtime_put(dev);
return 0;
-post_hash_err:
- cc_hash_free(new_drvdata);
post_cipher_err:
cc_cipher_free(new_drvdata);
+post_hash_err:
+ cc_hash_free(new_drvdata);
post_buf_mgr_err:
cc_buffer_mgr_fini(new_drvdata);
post_req_mgr_err:
@@ -593,8 +595,8 @@ static void cleanup_cc_resources(struct platform_device *plat_dev)
(struct cc_drvdata *)platform_get_drvdata(plat_dev);
cc_aead_free(drvdata);
- cc_hash_free(drvdata);
cc_cipher_free(drvdata);
+ cc_hash_free(drvdata);
cc_buffer_mgr_fini(drvdata);
cc_req_mgr_fini(drvdata);
cc_fips_fini(drvdata);
diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig
index e572f9982d4e..27e1fa912063 100644
--- a/drivers/crypto/hisilicon/Kconfig
+++ b/drivers/crypto/hisilicon/Kconfig
@@ -26,6 +26,7 @@ config CRYPTO_DEV_HISI_SEC2
select CRYPTO_SHA1
select CRYPTO_SHA256
select CRYPTO_SHA512
+ select CRYPTO_SM4
depends on PCI && PCI_MSI
depends on UACCE || UACCE=n
depends on ARM64 || (COMPILE_TEST && 64BIT)
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 36ab30e9e654..9d529df0eab9 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -36,6 +36,12 @@
#define HPRE_DATA_WUSER_CFG 0x301040
#define HPRE_INT_MASK 0x301400
#define HPRE_INT_STATUS 0x301800
+#define HPRE_HAC_INT_MSK 0x301400
+#define HPRE_HAC_RAS_CE_ENB 0x301410
+#define HPRE_HAC_RAS_NFE_ENB 0x301414
+#define HPRE_HAC_RAS_FE_ENB 0x301418
+#define HPRE_HAC_INT_SET 0x301500
+#define HPRE_RNG_TIMEOUT_NUM 0x301A34
#define HPRE_CORE_INT_ENABLE 0
#define HPRE_CORE_INT_DISABLE GENMASK(21, 0)
#define HPRE_RDCHN_INI_ST 0x301a00
@@ -107,6 +113,15 @@
#define HPRE_SQE_MASK_OFFSET 8
#define HPRE_SQE_MASK_LEN 24
+#define HPRE_DFX_BASE 0x301000
+#define HPRE_DFX_COMMON1 0x301400
+#define HPRE_DFX_COMMON2 0x301A00
+#define HPRE_DFX_CORE 0x302000
+#define HPRE_DFX_BASE_LEN 0x55
+#define HPRE_DFX_COMMON1_LEN 0x41
+#define HPRE_DFX_COMMON2_LEN 0xE
+#define HPRE_DFX_CORE_LEN 0x43
+
static const char hpre_name[] = "hisi_hpre";
static struct dentry *hpre_debugfs_root;
static const struct pci_device_id hpre_dev_ids[] = {
@@ -192,28 +207,32 @@ static const u64 hpre_cluster_offsets[] = {
};
static const struct debugfs_reg32 hpre_cluster_dfx_regs[] = {
- {"CORES_EN_STATUS ", HPRE_CORE_EN_OFFSET},
- {"CORES_INI_CFG ", HPRE_CORE_INI_CFG_OFFSET},
- {"CORES_INI_STATUS ", HPRE_CORE_INI_STATUS_OFFSET},
- {"CORES_HTBT_WARN ", HPRE_CORE_HTBT_WARN_OFFSET},
- {"CORES_IS_SCHD ", HPRE_CORE_IS_SCHD_OFFSET},
+ {"CORES_EN_STATUS ", HPRE_CORE_EN_OFFSET},
+ {"CORES_INI_CFG ", HPRE_CORE_INI_CFG_OFFSET},
+ {"CORES_INI_STATUS ", HPRE_CORE_INI_STATUS_OFFSET},
+ {"CORES_HTBT_WARN ", HPRE_CORE_HTBT_WARN_OFFSET},
+ {"CORES_IS_SCHD ", HPRE_CORE_IS_SCHD_OFFSET},
};
static const struct debugfs_reg32 hpre_com_dfx_regs[] = {
- {"READ_CLR_EN ", HPRE_CTRL_CNT_CLR_CE},
- {"AXQOS ", HPRE_VFG_AXQOS},
- {"AWUSR_CFG ", HPRE_AWUSR_FP_CFG},
- {"QM_ARUSR_MCFG1 ", QM_ARUSER_M_CFG_1},
- {"QM_AWUSR_MCFG1 ", QM_AWUSER_M_CFG_1},
- {"BD_ENDIAN ", HPRE_BD_ENDIAN},
- {"ECC_CHECK_CTRL ", HPRE_ECC_BYPASS},
- {"RAS_INT_WIDTH ", HPRE_RAS_WIDTH_CFG},
- {"POISON_BYPASS ", HPRE_POISON_BYPASS},
- {"BD_ARUSER ", HPRE_BD_ARUSR_CFG},
- {"BD_AWUSER ", HPRE_BD_AWUSR_CFG},
- {"DATA_ARUSER ", HPRE_DATA_RUSER_CFG},
- {"DATA_AWUSER ", HPRE_DATA_WUSER_CFG},
- {"INT_STATUS ", HPRE_INT_STATUS},
+ {"READ_CLR_EN ", HPRE_CTRL_CNT_CLR_CE},
+ {"AXQOS ", HPRE_VFG_AXQOS},
+ {"AWUSR_CFG ", HPRE_AWUSR_FP_CFG},
+ {"BD_ENDIAN ", HPRE_BD_ENDIAN},
+ {"ECC_CHECK_CTRL ", HPRE_ECC_BYPASS},
+ {"RAS_INT_WIDTH ", HPRE_RAS_WIDTH_CFG},
+ {"POISON_BYPASS ", HPRE_POISON_BYPASS},
+ {"BD_ARUSER ", HPRE_BD_ARUSR_CFG},
+ {"BD_AWUSER ", HPRE_BD_AWUSR_CFG},
+ {"DATA_ARUSER ", HPRE_DATA_RUSER_CFG},
+ {"DATA_AWUSER ", HPRE_DATA_WUSER_CFG},
+ {"INT_STATUS ", HPRE_INT_STATUS},
+ {"INT_MASK ", HPRE_HAC_INT_MSK},
+ {"RAS_CE_ENB ", HPRE_HAC_RAS_CE_ENB},
+ {"RAS_NFE_ENB ", HPRE_HAC_RAS_NFE_ENB},
+ {"RAS_FE_ENB ", HPRE_HAC_RAS_FE_ENB},
+ {"INT_SET ", HPRE_HAC_INT_SET},
+ {"RNG_TIMEOUT_NUM ", HPRE_RNG_TIMEOUT_NUM},
};
static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = {
@@ -226,6 +245,53 @@ static const char *hpre_dfx_files[HPRE_DFX_FILE_NUM] = {
"invalid_req_cnt"
};
+/* define the HPRE's dfx regs region and region length */
+static struct dfx_diff_registers hpre_diff_regs[] = {
+ {
+ .reg_offset = HPRE_DFX_BASE,
+ .reg_len = HPRE_DFX_BASE_LEN,
+ }, {
+ .reg_offset = HPRE_DFX_COMMON1,
+ .reg_len = HPRE_DFX_COMMON1_LEN,
+ }, {
+ .reg_offset = HPRE_DFX_COMMON2,
+ .reg_len = HPRE_DFX_COMMON2_LEN,
+ }, {
+ .reg_offset = HPRE_DFX_CORE,
+ .reg_len = HPRE_DFX_CORE_LEN,
+ },
+};
+
+static int hpre_diff_regs_show(struct seq_file *s, void *unused)
+{
+ struct hisi_qm *qm = s->private;
+
+ hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs,
+ ARRAY_SIZE(hpre_diff_regs));
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hpre_diff_regs);
+
+static int hpre_com_regs_show(struct seq_file *s, void *unused)
+{
+ hisi_qm_regs_dump(s, s->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hpre_com_regs);
+
+static int hpre_cluster_regs_show(struct seq_file *s, void *unused)
+{
+ hisi_qm_regs_dump(s, s->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs);
+
static const struct kernel_param_ops hpre_uacce_mode_ops = {
.set = uacce_mode_set,
.get = param_get_int,
@@ -779,24 +845,6 @@ static int hpre_debugfs_atomic64_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(hpre_atomic64_ops, hpre_debugfs_atomic64_get,
hpre_debugfs_atomic64_set, "%llu\n");
-static int hpre_com_regs_show(struct seq_file *s, void *unused)
-{
- hisi_qm_regs_dump(s, s->private);
-
- return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(hpre_com_regs);
-
-static int hpre_cluster_regs_show(struct seq_file *s, void *unused)
-{
- hisi_qm_regs_dump(s, s->private);
-
- return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(hpre_cluster_regs);
-
static int hpre_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
enum hpre_ctrl_dbgfs_file type, int indx)
{
@@ -895,6 +943,7 @@ static int hpre_ctrl_debug_init(struct hisi_qm *qm)
static void hpre_dfx_debug_init(struct hisi_qm *qm)
{
+ struct dfx_diff_registers *hpre_regs = qm->debug.acc_diff_regs;
struct hpre *hpre = container_of(qm, struct hpre, qm);
struct hpre_dfx *dfx = hpre->debug.dfx;
struct dentry *parent;
@@ -906,6 +955,10 @@ static void hpre_dfx_debug_init(struct hisi_qm *qm)
debugfs_create_file(hpre_dfx_files[i], 0644, parent, &dfx[i],
&hpre_atomic64_ops);
}
+
+ if (qm->fun_type == QM_HW_PF && hpre_regs)
+ debugfs_create_file("diff_regs", 0444, parent,
+ qm, &hpre_diff_regs_fops);
}
static int hpre_debugfs_init(struct hisi_qm *qm)
@@ -918,6 +971,13 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET;
qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN;
+ ret = hisi_qm_diff_regs_init(qm, hpre_diff_regs,
+ ARRAY_SIZE(hpre_diff_regs));
+ if (ret) {
+ dev_warn(dev, "Failed to init HPRE diff regs!\n");
+ goto debugfs_remove;
+ }
+
hisi_qm_debug_init(qm);
if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) {
@@ -931,12 +991,16 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
return 0;
failed_to_create:
+ hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
+debugfs_remove:
debugfs_remove_recursive(qm->debug.debug_root);
return ret;
}
static void hpre_debugfs_exit(struct hisi_qm *qm)
{
+ hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
+
debugfs_remove_recursive(qm->debug.debug_root);
}
@@ -969,6 +1033,82 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
return hisi_qm_init(qm);
}
+static int hpre_show_last_regs_init(struct hisi_qm *qm)
+{
+ int cluster_dfx_regs_num = ARRAY_SIZE(hpre_cluster_dfx_regs);
+ int com_dfx_regs_num = ARRAY_SIZE(hpre_com_dfx_regs);
+ u8 clusters_num = hpre_cluster_num(qm);
+ struct qm_debug *debug = &qm->debug;
+ void __iomem *io_base;
+ int i, j, idx;
+
+ debug->last_words = kcalloc(cluster_dfx_regs_num * clusters_num +
+ com_dfx_regs_num, sizeof(unsigned int), GFP_KERNEL);
+ if (!debug->last_words)
+ return -ENOMEM;
+
+ for (i = 0; i < com_dfx_regs_num; i++)
+ debug->last_words[i] = readl_relaxed(qm->io_base +
+ hpre_com_dfx_regs[i].offset);
+
+ for (i = 0; i < clusters_num; i++) {
+ io_base = qm->io_base + hpre_cluster_offsets[i];
+ for (j = 0; j < cluster_dfx_regs_num; j++) {
+ idx = com_dfx_regs_num + i * cluster_dfx_regs_num + j;
+ debug->last_words[idx] = readl_relaxed(
+ io_base + hpre_cluster_dfx_regs[j].offset);
+ }
+ }
+
+ return 0;
+}
+
+static void hpre_show_last_regs_uninit(struct hisi_qm *qm)
+{
+ struct qm_debug *debug = &qm->debug;
+
+ if (qm->fun_type == QM_HW_VF || !debug->last_words)
+ return;
+
+ kfree(debug->last_words);
+ debug->last_words = NULL;
+}
+
+static void hpre_show_last_dfx_regs(struct hisi_qm *qm)
+{
+ int cluster_dfx_regs_num = ARRAY_SIZE(hpre_cluster_dfx_regs);
+ int com_dfx_regs_num = ARRAY_SIZE(hpre_com_dfx_regs);
+ u8 clusters_num = hpre_cluster_num(qm);
+ struct qm_debug *debug = &qm->debug;
+ struct pci_dev *pdev = qm->pdev;
+ void __iomem *io_base;
+ int i, j, idx;
+ u32 val;
+
+ if (qm->fun_type == QM_HW_VF || !debug->last_words)
+ return;
+
+ /* dumps last word of the debugging registers during controller reset */
+ for (i = 0; i < com_dfx_regs_num; i++) {
+ val = readl_relaxed(qm->io_base + hpre_com_dfx_regs[i].offset);
+ if (debug->last_words[i] != val)
+ pci_info(pdev, "Common_core:%s \t= 0x%08x => 0x%08x\n",
+ hpre_com_dfx_regs[i].name, debug->last_words[i], val);
+ }
+
+ for (i = 0; i < clusters_num; i++) {
+ io_base = qm->io_base + hpre_cluster_offsets[i];
+ for (j = 0; j < cluster_dfx_regs_num; j++) {
+ val = readl_relaxed(io_base +
+ hpre_cluster_dfx_regs[j].offset);
+ idx = com_dfx_regs_num + i * cluster_dfx_regs_num + j;
+ if (debug->last_words[idx] != val)
+ pci_info(pdev, "cluster-%d:%s \t= 0x%08x => 0x%08x\n",
+ i, hpre_cluster_dfx_regs[j].name, debug->last_words[idx], val);
+ }
+ }
+}
+
static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts)
{
const struct hpre_hw_error *err = hpre_hw_errors;
@@ -1027,6 +1167,7 @@ static const struct hisi_qm_err_ini hpre_err_ini = {
.open_axi_master_ooo = hpre_open_axi_master_ooo,
.open_sva_prefetch = hpre_open_sva_prefetch,
.close_sva_prefetch = hpre_close_sva_prefetch,
+ .show_last_dfx_regs = hpre_show_last_dfx_regs,
.err_info_init = hpre_err_info_init,
};
@@ -1044,8 +1185,11 @@ static int hpre_pf_probe_init(struct hpre *hpre)
qm->err_ini = &hpre_err_ini;
qm->err_ini->err_info_init(qm);
hisi_qm_dev_err_init(qm);
+ ret = hpre_show_last_regs_init(qm);
+ if (ret)
+ pci_err(qm->pdev, "Failed to init last word regs!\n");
- return 0;
+ return ret;
}
static int hpre_probe_init(struct hpre *hpre)
@@ -1131,6 +1275,7 @@ err_with_qm_start:
hisi_qm_stop(qm, QM_NORMAL);
err_with_err_init:
+ hpre_show_last_regs_uninit(qm);
hisi_qm_dev_err_uninit(qm);
err_with_qm_init:
@@ -1161,6 +1306,7 @@ static void hpre_remove(struct pci_dev *pdev)
if (qm->fun_type == QM_HW_PF) {
hpre_cnt_regs_clear(qm);
qm->debug.curr_qm_qp_num = 0;
+ hpre_show_last_regs_uninit(qm);
hisi_qm_dev_err_uninit(qm);
}
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 009132333d2b..b4ca2eb034d7 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -253,7 +253,15 @@
#define QM_QOS_MAX_CIR_U 6
#define QM_QOS_MAX_CIR_S 11
#define QM_QOS_VAL_MAX_LEN 32
-
+#define QM_DFX_BASE 0x0100000
+#define QM_DFX_STATE1 0x0104000
+#define QM_DFX_STATE2 0x01040C8
+#define QM_DFX_COMMON 0x0000
+#define QM_DFX_BASE_LEN 0x5A
+#define QM_DFX_STATE1_LEN 0x2E
+#define QM_DFX_STATE2_LEN 0x11
+#define QM_DFX_COMMON_LEN 0xC3
+#define QM_DFX_REGS_LEN 4UL
#define QM_AUTOSUSPEND_DELAY 3000
#define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
@@ -467,6 +475,23 @@ static const struct hisi_qm_hw_error qm_hw_error[] = {
{ /* sentinel */ }
};
+/* define the QM's dfx regs region and region length */
+static struct dfx_diff_registers qm_diff_regs[] = {
+ {
+ .reg_offset = QM_DFX_BASE,
+ .reg_len = QM_DFX_BASE_LEN,
+ }, {
+ .reg_offset = QM_DFX_STATE1,
+ .reg_len = QM_DFX_STATE1_LEN,
+ }, {
+ .reg_offset = QM_DFX_STATE2,
+ .reg_len = QM_DFX_STATE2_LEN,
+ }, {
+ .reg_offset = QM_DFX_COMMON,
+ .reg_len = QM_DFX_COMMON_LEN,
+ },
+};
+
static const char * const qm_db_timeout[] = {
"sq", "cq", "eq", "aeq",
};
@@ -687,13 +712,13 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src)
if (!IS_ENABLED(CONFIG_ARM64)) {
memcpy_toio(fun_base, src, 16);
- wmb();
+ dma_wmb();
return;
}
asm volatile("ldp %0, %1, %3\n"
"stp %0, %1, %2\n"
- "dsb sy\n"
+ "dmb oshst\n"
: "=&r" (tmp0),
"=&r" (tmp1),
"+Q" (*((char __iomem *)fun_base))
@@ -982,7 +1007,7 @@ static void qm_set_qp_disable(struct hisi_qp *qp, int offset)
*addr = 1;
/* make sure setup is completed */
- mb();
+ smp_wmb();
}
static void qm_disable_qp(struct hisi_qm *qm, u32 qp_id)
@@ -1625,6 +1650,156 @@ static int qm_regs_show(struct seq_file *s, void *unused)
DEFINE_SHOW_ATTRIBUTE(qm_regs);
+static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm,
+ const struct dfx_diff_registers *cregs, int reg_len)
+{
+ struct dfx_diff_registers *diff_regs;
+ u32 j, base_offset;
+ int i;
+
+ diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL);
+ if (!diff_regs)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < reg_len; i++) {
+ if (!cregs[i].reg_len)
+ continue;
+
+ diff_regs[i].reg_offset = cregs[i].reg_offset;
+ diff_regs[i].reg_len = cregs[i].reg_len;
+ diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len,
+ GFP_KERNEL);
+ if (!diff_regs[i].regs)
+ goto alloc_error;
+
+ for (j = 0; j < diff_regs[i].reg_len; j++) {
+ base_offset = diff_regs[i].reg_offset +
+ j * QM_DFX_REGS_LEN;
+ diff_regs[i].regs[j] = readl(qm->io_base + base_offset);
+ }
+ }
+
+ return diff_regs;
+
+alloc_error:
+ while (i > 0) {
+ i--;
+ kfree(diff_regs[i].regs);
+ }
+ kfree(diff_regs);
+ return ERR_PTR(-ENOMEM);
+}
+
+static void dfx_regs_uninit(struct hisi_qm *qm,
+ struct dfx_diff_registers *dregs, int reg_len)
+{
+ int i;
+
+ /* Setting the pointer is NULL to prevent double free */
+ for (i = 0; i < reg_len; i++) {
+ kfree(dregs[i].regs);
+ dregs[i].regs = NULL;
+ }
+ kfree(dregs);
+ dregs = NULL;
+}
+
+/**
+ * hisi_qm_diff_regs_init() - Allocate memory for registers.
+ * @qm: device qm handle.
+ * @dregs: diff registers handle.
+ * @reg_len: diff registers region length.
+ */
+int hisi_qm_diff_regs_init(struct hisi_qm *qm,
+ struct dfx_diff_registers *dregs, int reg_len)
+{
+ if (!qm || !dregs || reg_len <= 0)
+ return -EINVAL;
+
+ if (qm->fun_type != QM_HW_PF)
+ return 0;
+
+ qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs,
+ ARRAY_SIZE(qm_diff_regs));
+ if (IS_ERR(qm->debug.qm_diff_regs))
+ return PTR_ERR(qm->debug.qm_diff_regs);
+
+ qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len);
+ if (IS_ERR(qm->debug.acc_diff_regs)) {
+ dfx_regs_uninit(qm, qm->debug.qm_diff_regs,
+ ARRAY_SIZE(qm_diff_regs));
+ return PTR_ERR(qm->debug.acc_diff_regs);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_init);
+
+/**
+ * hisi_qm_diff_regs_uninit() - Free memory for registers.
+ * @qm: device qm handle.
+ * @reg_len: diff registers region length.
+ */
+void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len)
+{
+ if (!qm || reg_len <= 0 || qm->fun_type != QM_HW_PF)
+ return;
+
+ dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len);
+ dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+}
+EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit);
+
+/**
+ * hisi_qm_acc_diff_regs_dump() - Dump registers's value.
+ * @qm: device qm handle.
+ * @s: Debugfs file handle.
+ * @dregs: diff registers handle.
+ * @regs_len: diff registers region length.
+ */
+void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
+ struct dfx_diff_registers *dregs, int regs_len)
+{
+ u32 j, val, base_offset;
+ int i, ret;
+
+ if (!qm || !s || !dregs || regs_len <= 0)
+ return;
+
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return;
+
+ down_read(&qm->qps_lock);
+ for (i = 0; i < regs_len; i++) {
+ if (!dregs[i].reg_len)
+ continue;
+
+ for (j = 0; j < dregs[i].reg_len; j++) {
+ base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN;
+ val = readl(qm->io_base + base_offset);
+ if (val != dregs[i].regs[j])
+ seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n",
+ base_offset, dregs[i].regs[j], val);
+ }
+ }
+ up_read(&qm->qps_lock);
+
+ hisi_qm_put_dfx_access(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump);
+
+static int qm_diff_regs_show(struct seq_file *s, void *unused)
+{
+ struct hisi_qm *qm = s->private;
+
+ hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs,
+ ARRAY_SIZE(qm_diff_regs));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(qm_diff_regs);
+
static ssize_t qm_cmd_read(struct file *filp, char __user *buffer,
size_t count, loff_t *pos)
{
@@ -2660,7 +2835,7 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type)
* return created qp, -EBUSY if all qps in qm allocated, -ENOMEM if allocating
* qp memory fails.
*/
-struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
+static struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
{
struct hisi_qp *qp;
int ret;
@@ -2678,7 +2853,6 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
return qp;
}
-EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
/**
* hisi_qm_release_qp() - Release a qp back to its qm.
@@ -2686,7 +2860,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
*
* This function releases the resource of a qp.
*/
-void hisi_qm_release_qp(struct hisi_qp *qp)
+static void hisi_qm_release_qp(struct hisi_qp *qp)
{
struct hisi_qm *qm = qp->qm;
@@ -2704,7 +2878,6 @@ void hisi_qm_release_qp(struct hisi_qp *qp)
qm_pm_put_sync(qm);
}
-EXPORT_SYMBOL_GPL(hisi_qm_release_qp);
static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid)
{
@@ -3053,9 +3226,17 @@ static void qm_qp_event_notifier(struct hisi_qp *qp)
wake_up_interruptible(&qp->uacce_q->wait);
}
+ /* This function returns free number of qp in qm. */
static int hisi_qm_get_available_instances(struct uacce_device *uacce)
{
- return hisi_qm_get_free_qp_num(uacce->priv);
+ struct hisi_qm *qm = uacce->priv;
+ int ret;
+
+ down_read(&qm->qps_lock);
+ ret = qm->qp_num - qm->qp_in_used;
+ up_read(&qm->qps_lock);
+
+ return ret;
}
static void hisi_qm_set_hw_reset(struct hisi_qm *qm, int offset)
@@ -3367,24 +3548,6 @@ void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
}
EXPORT_SYMBOL_GPL(hisi_qm_wait_task_finish);
-/**
- * hisi_qm_get_free_qp_num() - Get free number of qp in qm.
- * @qm: The qm which want to get free qp.
- *
- * This function return free number of qp in qm.
- */
-int hisi_qm_get_free_qp_num(struct hisi_qm *qm)
-{
- int ret;
-
- down_read(&qm->qps_lock);
- ret = qm->qp_num - qm->qp_in_used;
- up_read(&qm->qps_lock);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(hisi_qm_get_free_qp_num);
-
static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num)
{
struct device *dev = &qm->pdev->dev;
@@ -3498,6 +3661,17 @@ static void hisi_qm_set_state(struct hisi_qm *qm, u8 state)
writel(state, qm->io_base + QM_VF_STATE);
}
+static void qm_last_regs_uninit(struct hisi_qm *qm)
+{
+ struct qm_debug *debug = &qm->debug;
+
+ if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
+ return;
+
+ kfree(debug->qm_last_words);
+ debug->qm_last_words = NULL;
+}
+
/**
* hisi_qm_uninit() - Uninitialize qm.
* @qm: The qm needed uninit.
@@ -3509,6 +3683,8 @@ void hisi_qm_uninit(struct hisi_qm *qm)
struct pci_dev *pdev = qm->pdev;
struct device *dev = &pdev->dev;
+ qm_last_regs_uninit(qm);
+
qm_cmd_uninit(qm);
kfree(qm->factor);
down_write(&qm->qps_lock);
@@ -3550,7 +3726,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_uninit);
*
* qm hw v1 does not support this interface.
*/
-int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
+static int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
{
if (!base || !number)
return -EINVAL;
@@ -3562,7 +3738,6 @@ int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
return qm->ops->get_vft(qm, base, number);
}
-EXPORT_SYMBOL_GPL(hisi_qm_get_vft);
/**
* hisi_qm_set_vft() - Set vft to a qm.
@@ -4484,6 +4659,7 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm)
*/
void hisi_qm_debug_init(struct hisi_qm *qm)
{
+ struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs;
struct qm_dfx *dfx = &qm->debug.dfx;
struct dentry *qm_d;
void *data;
@@ -4499,6 +4675,10 @@ void hisi_qm_debug_init(struct hisi_qm *qm)
qm_create_debugfs_file(qm, qm->debug.qm_d, i);
}
+ if (qm_regs)
+ debugfs_create_file("diff_regs", 0444, qm->debug.qm_d,
+ qm, &qm_diff_regs_fops);
+
debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops);
@@ -5181,6 +5361,24 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
return 0;
}
+static void qm_show_last_dfx_regs(struct hisi_qm *qm)
+{
+ struct qm_debug *debug = &qm->debug;
+ struct pci_dev *pdev = qm->pdev;
+ u32 val;
+ int i;
+
+ if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) {
+ val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset);
+ if (debug->qm_last_words[i] != val)
+ pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n",
+ qm_dfx_regs[i].name, debug->qm_last_words[i], val);
+ }
+}
+
static int qm_controller_reset(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
@@ -5196,6 +5394,10 @@ static int qm_controller_reset(struct hisi_qm *qm)
return ret;
}
+ qm_show_last_dfx_regs(qm);
+ if (qm->err_ini->show_last_dfx_regs)
+ qm->err_ini->show_last_dfx_regs(qm);
+
ret = qm_soft_reset(qm);
if (ret) {
pci_err(pdev, "Controller reset failed (%d)\n", ret);
@@ -5906,6 +6108,26 @@ err_alloc_qdma:
return ret;
}
+static void qm_last_regs_init(struct hisi_qm *qm)
+{
+ int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs);
+ struct qm_debug *debug = &qm->debug;
+ int i;
+
+ if (qm->fun_type == QM_HW_VF)
+ return;
+
+ debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int),
+ GFP_KERNEL);
+ if (!debug->qm_last_words)
+ return;
+
+ for (i = 0; i < dfx_regs_num; i++) {
+ debug->qm_last_words[i] = readl_relaxed(qm->io_base +
+ qm_dfx_regs[i].offset);
+ }
+}
+
/**
* hisi_qm_init() - Initialize configures about qm.
* @qm: The qm needing init.
@@ -5958,6 +6180,8 @@ int hisi_qm_init(struct hisi_qm *qm)
qm_cmd_init(qm);
atomic_set(&qm->status.flags, QM_INIT);
+ qm_last_regs_init(qm);
+
return 0;
err_alloc_uacce:
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index a91635c348b5..6eebe739893c 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -2113,7 +2113,6 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
.cra_driver_name = "hisi_sec_"sec_cra_name,\
.cra_priority = SEC_PRIORITY,\
.cra_flags = CRYPTO_ALG_ASYNC |\
- CRYPTO_ALG_ALLOCATES_MEMORY |\
CRYPTO_ALG_NEED_FALLBACK,\
.cra_blocksize = blk_size,\
.cra_ctxsize = sizeof(struct sec_ctx),\
@@ -2366,7 +2365,6 @@ static int sec_aead_decrypt(struct aead_request *a_req)
.cra_driver_name = "hisi_sec_"sec_cra_name,\
.cra_priority = SEC_PRIORITY,\
.cra_flags = CRYPTO_ALG_ASYNC |\
- CRYPTO_ALG_ALLOCATES_MEMORY |\
CRYPTO_ALG_NEED_FALLBACK,\
.cra_blocksize = blk_size,\
.cra_ctxsize = sizeof(struct sec_ctx),\
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 92fae706bdb2..4d85d2cbf376 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -110,6 +110,15 @@
#define SEC_SQE_MASK_LEN 48
#define SEC_SHAPER_TYPE_RATE 400
+#define SEC_DFX_BASE 0x301000
+#define SEC_DFX_CORE 0x302100
+#define SEC_DFX_COMMON1 0x301600
+#define SEC_DFX_COMMON2 0x301C00
+#define SEC_DFX_BASE_LEN 0x9D
+#define SEC_DFX_CORE_LEN 0x32B
+#define SEC_DFX_COMMON1_LEN 0x45
+#define SEC_DFX_COMMON2_LEN 0xBA
+
struct sec_hw_error {
u32 int_msk;
const char *msg;
@@ -226,6 +235,34 @@ static const struct debugfs_reg32 sec_dfx_regs[] = {
{"SEC_BD_SAA8 ", 0x301C40},
};
+/* define the SEC's dfx regs region and region length */
+static struct dfx_diff_registers sec_diff_regs[] = {
+ {
+ .reg_offset = SEC_DFX_BASE,
+ .reg_len = SEC_DFX_BASE_LEN,
+ }, {
+ .reg_offset = SEC_DFX_COMMON1,
+ .reg_len = SEC_DFX_COMMON1_LEN,
+ }, {
+ .reg_offset = SEC_DFX_COMMON2,
+ .reg_len = SEC_DFX_COMMON2_LEN,
+ }, {
+ .reg_offset = SEC_DFX_CORE,
+ .reg_len = SEC_DFX_CORE_LEN,
+ },
+};
+
+static int sec_diff_regs_show(struct seq_file *s, void *unused)
+{
+ struct hisi_qm *qm = s->private;
+
+ hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs,
+ ARRAY_SIZE(sec_diff_regs));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(sec_diff_regs);
+
static int sec_pf_q_num_set(const char *val, const struct kernel_param *kp)
{
return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_SEC_PF);
@@ -729,6 +766,7 @@ DEFINE_SHOW_ATTRIBUTE(sec_regs);
static int sec_core_debug_init(struct hisi_qm *qm)
{
+ struct dfx_diff_registers *sec_regs = qm->debug.acc_diff_regs;
struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
struct device *dev = &qm->pdev->dev;
struct sec_dfx *dfx = &sec->debug.dfx;
@@ -749,6 +787,9 @@ static int sec_core_debug_init(struct hisi_qm *qm)
if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF)
debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops);
+ if (qm->fun_type == QM_HW_PF && sec_regs)
+ debugfs_create_file("diff_regs", 0444, tmp_d,
+ qm, &sec_diff_regs_fops);
for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) {
atomic64_t *data = (atomic64_t *)((uintptr_t)dfx +
@@ -790,6 +831,14 @@ static int sec_debugfs_init(struct hisi_qm *qm)
sec_debugfs_root);
qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET;
qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN;
+
+ ret = hisi_qm_diff_regs_init(qm, sec_diff_regs,
+ ARRAY_SIZE(sec_diff_regs));
+ if (ret) {
+ dev_warn(dev, "Failed to init SEC diff regs!\n");
+ goto debugfs_remove;
+ }
+
hisi_qm_debug_init(qm);
ret = sec_debug_init(qm);
@@ -799,15 +848,66 @@ static int sec_debugfs_init(struct hisi_qm *qm)
return 0;
failed_to_create:
+ hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
+debugfs_remove:
debugfs_remove_recursive(sec_debugfs_root);
return ret;
}
static void sec_debugfs_exit(struct hisi_qm *qm)
{
+ hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
+
debugfs_remove_recursive(qm->debug.debug_root);
}
+static int sec_show_last_regs_init(struct hisi_qm *qm)
+{
+ struct qm_debug *debug = &qm->debug;
+ int i;
+
+ debug->last_words = kcalloc(ARRAY_SIZE(sec_dfx_regs),
+ sizeof(unsigned int), GFP_KERNEL);
+ if (!debug->last_words)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(sec_dfx_regs); i++)
+ debug->last_words[i] = readl_relaxed(qm->io_base +
+ sec_dfx_regs[i].offset);
+
+ return 0;
+}
+
+static void sec_show_last_regs_uninit(struct hisi_qm *qm)
+{
+ struct qm_debug *debug = &qm->debug;
+
+ if (qm->fun_type == QM_HW_VF || !debug->last_words)
+ return;
+
+ kfree(debug->last_words);
+ debug->last_words = NULL;
+}
+
+static void sec_show_last_dfx_regs(struct hisi_qm *qm)
+{
+ struct qm_debug *debug = &qm->debug;
+ struct pci_dev *pdev = qm->pdev;
+ u32 val;
+ int i;
+
+ if (qm->fun_type == QM_HW_VF || !debug->last_words)
+ return;
+
+ /* dumps last word of the debugging registers during controller reset */
+ for (i = 0; i < ARRAY_SIZE(sec_dfx_regs); i++) {
+ val = readl_relaxed(qm->io_base + sec_dfx_regs[i].offset);
+ if (val != debug->last_words[i])
+ pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n",
+ sec_dfx_regs[i].name, debug->last_words[i], val);
+ }
+}
+
static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts)
{
const struct sec_hw_error *errs = sec_hw_errors;
@@ -874,6 +974,7 @@ static const struct hisi_qm_err_ini sec_err_ini = {
.open_axi_master_ooo = sec_open_axi_master_ooo,
.open_sva_prefetch = sec_open_sva_prefetch,
.close_sva_prefetch = sec_close_sva_prefetch,
+ .show_last_dfx_regs = sec_show_last_dfx_regs,
.err_info_init = sec_err_info_init,
};
@@ -892,8 +993,11 @@ static int sec_pf_probe_init(struct sec_dev *sec)
sec_open_sva_prefetch(qm);
hisi_qm_dev_err_init(qm);
sec_debug_regs_clear(qm);
+ ret = sec_show_last_regs_init(qm);
+ if (ret)
+ pci_err(qm->pdev, "Failed to init last word regs!\n");
- return 0;
+ return ret;
}
static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
@@ -1067,6 +1171,7 @@ err_qm_stop:
sec_debugfs_exit(qm);
hisi_qm_stop(qm, QM_NORMAL);
err_probe_uninit:
+ sec_show_last_regs_uninit(qm);
sec_probe_uninit(qm);
err_qm_uninit:
sec_qm_uninit(qm);
@@ -1091,6 +1196,7 @@ static void sec_remove(struct pci_dev *pdev)
if (qm->fun_type == QM_HW_PF)
sec_debug_regs_clear(qm);
+ sec_show_last_regs_uninit(qm);
sec_probe_uninit(qm);
diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c
index f7efc02b065f..2b6f2281cfd6 100644
--- a/drivers/crypto/hisilicon/sgl.c
+++ b/drivers/crypto/hisilicon/sgl.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 HiSilicon Limited. */
+#include <linux/align.h>
#include <linux/dma-mapping.h>
#include <linux/hisi_acc_qm.h>
#include <linux/module.h>
@@ -64,8 +65,9 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX)
return ERR_PTR(-EINVAL);
- sgl_size = sizeof(struct acc_hw_sge) * sge_nr +
- sizeof(struct hisi_acc_hw_sgl);
+ sgl_size = ALIGN(sizeof(struct acc_hw_sge) * sge_nr +
+ sizeof(struct hisi_acc_hw_sgl),
+ HISI_ACC_SGL_ALIGN_SIZE);
/*
* the pool may allocate a block of memory of size PAGE_SIZE * 2^(MAX_ORDER - 1),
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
index 9520a4113c81..67869513e48c 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -521,7 +521,7 @@ static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *ctx,
static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx)
{
hisi_qm_stop_qp(ctx->qp);
- hisi_qm_release_qp(ctx->qp);
+ hisi_qm_free_qps(&ctx->qp, 1);
}
static const struct hisi_zip_sqe_ops hisi_zip_ops_v1 = {
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 4534e1e107d1..9c925e9c0a2d 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -49,14 +49,18 @@
#define HZIP_QM_IDEL_STATUS 0x3040e4
-#define HZIP_CORE_DEBUG_COMP_0 0x302000
-#define HZIP_CORE_DEBUG_COMP_1 0x303000
-#define HZIP_CORE_DEBUG_DECOMP_0 0x304000
-#define HZIP_CORE_DEBUG_DECOMP_1 0x305000
-#define HZIP_CORE_DEBUG_DECOMP_2 0x306000
-#define HZIP_CORE_DEBUG_DECOMP_3 0x307000
-#define HZIP_CORE_DEBUG_DECOMP_4 0x308000
-#define HZIP_CORE_DEBUG_DECOMP_5 0x309000
+#define HZIP_CORE_DFX_BASE 0x301000
+#define HZIP_CLOCK_GATED_CONTL 0X301004
+#define HZIP_CORE_DFX_COMP_0 0x302000
+#define HZIP_CORE_DFX_COMP_1 0x303000
+#define HZIP_CORE_DFX_DECOMP_0 0x304000
+#define HZIP_CORE_DFX_DECOMP_1 0x305000
+#define HZIP_CORE_DFX_DECOMP_2 0x306000
+#define HZIP_CORE_DFX_DECOMP_3 0x307000
+#define HZIP_CORE_DFX_DECOMP_4 0x308000
+#define HZIP_CORE_DFX_DECOMP_5 0x309000
+#define HZIP_CORE_REGS_BASE_LEN 0xB0
+#define HZIP_CORE_REGS_DFX_LEN 0x28
#define HZIP_CORE_INT_SOURCE 0x3010A0
#define HZIP_CORE_INT_MASK_REG 0x3010A4
@@ -230,6 +234,64 @@ static const struct debugfs_reg32 hzip_dfx_regs[] = {
{"HZIP_DECOMP_LZ77_CURR_ST ", 0x9cull},
};
+static const struct debugfs_reg32 hzip_com_dfx_regs[] = {
+ {"HZIP_CLOCK_GATE_CTRL ", 0x301004},
+ {"HZIP_CORE_INT_RAS_CE_ENB ", 0x301160},
+ {"HZIP_CORE_INT_RAS_NFE_ENB ", 0x301164},
+ {"HZIP_CORE_INT_RAS_FE_ENB ", 0x301168},
+ {"HZIP_UNCOM_ERR_RAS_CTRL ", 0x30116C},
+};
+
+static const struct debugfs_reg32 hzip_dump_dfx_regs[] = {
+ {"HZIP_GET_BD_NUM ", 0x00ull},
+ {"HZIP_GET_RIGHT_BD ", 0x04ull},
+ {"HZIP_GET_ERROR_BD ", 0x08ull},
+ {"HZIP_DONE_BD_NUM ", 0x0cull},
+ {"HZIP_MAX_DELAY ", 0x20ull},
+};
+
+/* define the ZIP's dfx regs region and region length */
+static struct dfx_diff_registers hzip_diff_regs[] = {
+ {
+ .reg_offset = HZIP_CORE_DFX_BASE,
+ .reg_len = HZIP_CORE_REGS_BASE_LEN,
+ }, {
+ .reg_offset = HZIP_CORE_DFX_COMP_0,
+ .reg_len = HZIP_CORE_REGS_DFX_LEN,
+ }, {
+ .reg_offset = HZIP_CORE_DFX_COMP_1,
+ .reg_len = HZIP_CORE_REGS_DFX_LEN,
+ }, {
+ .reg_offset = HZIP_CORE_DFX_DECOMP_0,
+ .reg_len = HZIP_CORE_REGS_DFX_LEN,
+ }, {
+ .reg_offset = HZIP_CORE_DFX_DECOMP_1,
+ .reg_len = HZIP_CORE_REGS_DFX_LEN,
+ }, {
+ .reg_offset = HZIP_CORE_DFX_DECOMP_2,
+ .reg_len = HZIP_CORE_REGS_DFX_LEN,
+ }, {
+ .reg_offset = HZIP_CORE_DFX_DECOMP_3,
+ .reg_len = HZIP_CORE_REGS_DFX_LEN,
+ }, {
+ .reg_offset = HZIP_CORE_DFX_DECOMP_4,
+ .reg_len = HZIP_CORE_REGS_DFX_LEN,
+ }, {
+ .reg_offset = HZIP_CORE_DFX_DECOMP_5,
+ .reg_len = HZIP_CORE_REGS_DFX_LEN,
+ },
+};
+
+static int hzip_diff_regs_show(struct seq_file *s, void *unused)
+{
+ struct hisi_qm *qm = s->private;
+
+ hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.acc_diff_regs,
+ ARRAY_SIZE(hzip_diff_regs));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(hzip_diff_regs);
static const struct kernel_param_ops zip_uacce_mode_ops = {
.set = uacce_mode_set,
.get = param_get_int,
@@ -621,6 +683,7 @@ static int hisi_zip_core_debug_init(struct hisi_qm *qm)
static void hisi_zip_dfx_debug_init(struct hisi_qm *qm)
{
+ struct dfx_diff_registers *hzip_regs = qm->debug.acc_diff_regs;
struct hisi_zip *zip = container_of(qm, struct hisi_zip, qm);
struct hisi_zip_dfx *dfx = &zip->dfx;
struct dentry *tmp_dir;
@@ -634,6 +697,10 @@ static void hisi_zip_dfx_debug_init(struct hisi_qm *qm)
0644, tmp_dir, data,
&zip_atomic64_ops);
}
+
+ if (qm->fun_type == QM_HW_PF && hzip_regs)
+ debugfs_create_file("diff_regs", 0444, tmp_dir,
+ qm, &hzip_diff_regs_fops);
}
static int hisi_zip_ctrl_debug_init(struct hisi_qm *qm)
@@ -666,6 +733,13 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm)
qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET;
qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN;
qm->debug.debug_root = dev_d;
+ ret = hisi_qm_diff_regs_init(qm, hzip_diff_regs,
+ ARRAY_SIZE(hzip_diff_regs));
+ if (ret) {
+ dev_warn(dev, "Failed to init ZIP diff regs!\n");
+ goto debugfs_remove;
+ }
+
hisi_qm_debug_init(qm);
if (qm->fun_type == QM_HW_PF) {
@@ -679,6 +753,8 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm)
return 0;
failed_to_create:
+ hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
+debugfs_remove:
debugfs_remove_recursive(hzip_debugfs_root);
return ret;
}
@@ -703,6 +779,8 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm)
static void hisi_zip_debugfs_exit(struct hisi_qm *qm)
{
+ hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
+
debugfs_remove_recursive(qm->debug.debug_root);
if (qm->fun_type == QM_HW_PF) {
@@ -711,6 +789,87 @@ static void hisi_zip_debugfs_exit(struct hisi_qm *qm)
}
}
+static int hisi_zip_show_last_regs_init(struct hisi_qm *qm)
+{
+ int core_dfx_regs_num = ARRAY_SIZE(hzip_dump_dfx_regs);
+ int com_dfx_regs_num = ARRAY_SIZE(hzip_com_dfx_regs);
+ struct qm_debug *debug = &qm->debug;
+ void __iomem *io_base;
+ int i, j, idx;
+
+ debug->last_words = kcalloc(core_dfx_regs_num * HZIP_CORE_NUM +
+ com_dfx_regs_num, sizeof(unsigned int), GFP_KERNEL);
+ if (!debug->last_words)
+ return -ENOMEM;
+
+ for (i = 0; i < com_dfx_regs_num; i++) {
+ io_base = qm->io_base + hzip_com_dfx_regs[i].offset;
+ debug->last_words[i] = readl_relaxed(io_base);
+ }
+
+ for (i = 0; i < HZIP_CORE_NUM; i++) {
+ io_base = qm->io_base + core_offsets[i];
+ for (j = 0; j < core_dfx_regs_num; j++) {
+ idx = com_dfx_regs_num + i * core_dfx_regs_num + j;
+ debug->last_words[idx] = readl_relaxed(
+ io_base + hzip_dump_dfx_regs[j].offset);
+ }
+ }
+
+ return 0;
+}
+
+static void hisi_zip_show_last_regs_uninit(struct hisi_qm *qm)
+{
+ struct qm_debug *debug = &qm->debug;
+
+ if (qm->fun_type == QM_HW_VF || !debug->last_words)
+ return;
+
+ kfree(debug->last_words);
+ debug->last_words = NULL;
+}
+
+static void hisi_zip_show_last_dfx_regs(struct hisi_qm *qm)
+{
+ int core_dfx_regs_num = ARRAY_SIZE(hzip_dump_dfx_regs);
+ int com_dfx_regs_num = ARRAY_SIZE(hzip_com_dfx_regs);
+ struct qm_debug *debug = &qm->debug;
+ char buf[HZIP_BUF_SIZE];
+ void __iomem *base;
+ int i, j, idx;
+ u32 val;
+
+ if (qm->fun_type == QM_HW_VF || !debug->last_words)
+ return;
+
+ for (i = 0; i < com_dfx_regs_num; i++) {
+ val = readl_relaxed(qm->io_base + hzip_com_dfx_regs[i].offset);
+ if (debug->last_words[i] != val)
+ pci_info(qm->pdev, "com_dfx: %s \t= 0x%08x => 0x%08x\n",
+ hzip_com_dfx_regs[i].name, debug->last_words[i], val);
+ }
+
+ for (i = 0; i < HZIP_CORE_NUM; i++) {
+ if (i < HZIP_COMP_CORE_NUM)
+ scnprintf(buf, sizeof(buf), "Comp_core-%d", i);
+ else
+ scnprintf(buf, sizeof(buf), "Decomp_core-%d",
+ i - HZIP_COMP_CORE_NUM);
+ base = qm->io_base + core_offsets[i];
+
+ pci_info(qm->pdev, "==>%s:\n", buf);
+ /* dump last word for dfx regs during control resetting */
+ for (j = 0; j < core_dfx_regs_num; j++) {
+ idx = com_dfx_regs_num + i * core_dfx_regs_num + j;
+ val = readl_relaxed(base + hzip_dump_dfx_regs[j].offset);
+ if (debug->last_words[idx] != val)
+ pci_info(qm->pdev, "%s \t= 0x%08x => 0x%08x\n",
+ hzip_dump_dfx_regs[j].name, debug->last_words[idx], val);
+ }
+ }
+}
+
static void hisi_zip_log_hw_error(struct hisi_qm *qm, u32 err_sts)
{
const struct hisi_zip_hw_error *err = zip_hw_error;
@@ -798,6 +957,7 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = {
.close_axi_master_ooo = hisi_zip_close_axi_master_ooo,
.open_sva_prefetch = hisi_zip_open_sva_prefetch,
.close_sva_prefetch = hisi_zip_close_sva_prefetch,
+ .show_last_dfx_regs = hisi_zip_show_last_dfx_regs,
.err_info_init = hisi_zip_err_info_init,
};
@@ -805,6 +965,7 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
{
struct hisi_qm *qm = &hisi_zip->qm;
struct hisi_zip_ctrl *ctrl;
+ int ret;
ctrl = devm_kzalloc(&qm->pdev->dev, sizeof(*ctrl), GFP_KERNEL);
if (!ctrl)
@@ -820,7 +981,11 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
hisi_qm_dev_err_init(qm);
hisi_zip_debug_regs_clear(qm);
- return 0;
+ ret = hisi_zip_show_last_regs_init(qm);
+ if (ret)
+ pci_err(qm->pdev, "Failed to init last word regs!\n");
+
+ return ret;
}
static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
@@ -964,6 +1129,7 @@ err_qm_stop:
hisi_qm_stop(qm, QM_NORMAL);
err_dev_err_uninit:
+ hisi_zip_show_last_regs_uninit(qm);
hisi_qm_dev_err_uninit(qm);
err_qm_uninit:
@@ -985,6 +1151,7 @@ static void hisi_zip_remove(struct pci_dev *pdev)
hisi_zip_debugfs_exit(qm);
hisi_qm_stop(qm, QM_NORMAL);
+ hisi_zip_show_last_regs_uninit(qm);
hisi_qm_dev_err_uninit(qm);
hisi_zip_qm_uninit(qm);
}
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index 9ff885d50edf..9b1a158aec29 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -1997,3 +1997,12 @@ MODULE_AUTHOR("Igal Liberman <igall@marvell.com>");
MODULE_DESCRIPTION("Support for SafeXcel cryptographic engines: EIP97 & EIP197");
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CRYPTO_INTERNAL);
+
+MODULE_FIRMWARE("ifpp.bin");
+MODULE_FIRMWARE("ipue.bin");
+MODULE_FIRMWARE("inside-secure/eip197b/ifpp.bin");
+MODULE_FIRMWARE("inside-secure/eip197b/ipue.bin");
+MODULE_FIRMWARE("inside-secure/eip197d/ifpp.bin");
+MODULE_FIRMWARE("inside-secure/eip197d/ipue.bin");
+MODULE_FIRMWARE("inside-secure/eip197_minifw/ifpp.bin");
+MODULE_FIRMWARE("inside-secure/eip197_minifw/ipue.bin");
diff --git a/drivers/crypto/keembay/keembay-ocs-aes-core.c b/drivers/crypto/keembay/keembay-ocs-aes-core.c
index e2a39fdaf623..9953f5590ac4 100644
--- a/drivers/crypto/keembay/keembay-ocs-aes-core.c
+++ b/drivers/crypto/keembay/keembay-ocs-aes-core.c
@@ -1598,7 +1598,6 @@ static int kmb_ocs_aes_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct ocs_aes_dev *aes_dev;
- struct resource *aes_mem;
int rc;
aes_dev = devm_kzalloc(dev, sizeof(*aes_dev), GFP_KERNEL);
@@ -1616,13 +1615,7 @@ static int kmb_ocs_aes_probe(struct platform_device *pdev)
}
/* Get base register address. */
- aes_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!aes_mem) {
- dev_err(dev, "Could not retrieve io mem resource\n");
- return -ENODEV;
- }
-
- aes_dev->base_reg = devm_ioremap_resource(&pdev->dev, aes_mem);
+ aes_dev->base_reg = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(aes_dev->base_reg))
return PTR_ERR(aes_dev->base_reg);
diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c
index b739d3b873dc..c6f2fa753b7c 100644
--- a/drivers/crypto/marvell/cesa/cipher.c
+++ b/drivers/crypto/marvell/cesa/cipher.c
@@ -624,7 +624,6 @@ struct skcipher_alg mv_cesa_ecb_des3_ede_alg = {
.decrypt = mv_cesa_ecb_des3_ede_decrypt,
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
- .ivsize = DES3_EDE_BLOCK_SIZE,
.base = {
.cra_name = "ecb(des3_ede)",
.cra_driver_name = "mv-ecb-des3-ede",
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
index f8f8542ce3e4..67530e90bbfe 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
@@ -896,7 +896,6 @@ static int otx2_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher,
struct crypto_authenc_key_param *param;
int enckeylen = 0, authkeylen = 0;
struct rtattr *rta = (void *)key;
- int status;
if (!RTA_OK(rta, keylen))
return -EINVAL;
@@ -938,11 +937,7 @@ static int otx2_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher,
ctx->enc_key_len = enckeylen;
ctx->auth_key_len = authkeylen;
- status = aead_hmac_init(cipher);
- if (status)
- return status;
-
- return 0;
+ return aead_hmac_init(cipher);
}
static int otx2_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher,
diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c
index fa4c350c1bf9..181fa1c8b3c7 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c
@@ -14,6 +14,7 @@
static const struct pci_device_id adf_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, ADF_4XXX_PCI_DEVICE_ID), },
+ { PCI_VDEVICE(INTEL, ADF_401XX_PCI_DEVICE_ID), },
{ }
};
MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
@@ -75,13 +76,6 @@ static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
if (ret)
goto err;
- /* Temporarily set the number of crypto instances to zero to avoid
- * registering the crypto algorithms.
- * This will be removed when the algorithms will support the
- * CRYPTO_TFM_REQ_MAY_BACKLOG flag
- */
- instances = 0;
-
for (i = 0; i < instances; i++) {
val = i;
bank = i * 2;
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
index b941fe3713ff..50d5afa26a9b 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
@@ -78,19 +78,6 @@ static const u32 *adf_get_arbiter_mapping(void)
return thrd_to_arb_map;
}
-static void adf_enable_ints(struct adf_accel_dev *accel_dev)
-{
- void __iomem *addr;
-
- addr = (&GET_BARS(accel_dev)[ADF_C3XXX_PMISC_BAR])->virt_addr;
-
- /* Enable bundle and misc interrupts */
- ADF_CSR_WR(addr, ADF_C3XXX_SMIAPF0_MASK_OFFSET,
- ADF_C3XXX_SMIA0_MASK);
- ADF_CSR_WR(addr, ADF_C3XXX_SMIAPF1_MASK_OFFSET,
- ADF_C3XXX_SMIA1_MASK);
-}
-
static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable)
{
adf_gen2_cfg_iov_thds(accel_dev, enable,
@@ -133,7 +120,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
- hw_data->enable_ints = adf_enable_ints;
+ hw_data->enable_ints = adf_gen2_enable_ints;
hw_data->reset_device = adf_reset_flr;
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
hw_data->disable_iov = adf_disable_sriov;
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
index 1b86f828725c..336a06f11dbd 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
+++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
@@ -13,10 +13,6 @@
#define ADF_C3XXX_ACCELERATORS_MASK 0x7
#define ADF_C3XXX_ACCELENGINES_MASK 0x3F
#define ADF_C3XXX_ETR_MAX_BANKS 16
-#define ADF_C3XXX_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28)
-#define ADF_C3XXX_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30)
-#define ADF_C3XXX_SMIA0_MASK 0xFFFF
-#define ADF_C3XXX_SMIA1_MASK 0x1
#define ADF_C3XXX_SOFTSTRAP_CSR_OFFSET 0x2EC
/* AE to function mapping */
diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
index b1eac2f81faa..c00386fe6587 100644
--- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
+++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
@@ -80,19 +80,6 @@ static const u32 *adf_get_arbiter_mapping(void)
return thrd_to_arb_map;
}
-static void adf_enable_ints(struct adf_accel_dev *accel_dev)
-{
- void __iomem *addr;
-
- addr = (&GET_BARS(accel_dev)[ADF_C62X_PMISC_BAR])->virt_addr;
-
- /* Enable bundle and misc interrupts */
- ADF_CSR_WR(addr, ADF_C62X_SMIAPF0_MASK_OFFSET,
- ADF_C62X_SMIA0_MASK);
- ADF_CSR_WR(addr, ADF_C62X_SMIAPF1_MASK_OFFSET,
- ADF_C62X_SMIA1_MASK);
-}
-
static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable)
{
adf_gen2_cfg_iov_thds(accel_dev, enable,
@@ -135,7 +122,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
- hw_data->enable_ints = adf_enable_ints;
+ hw_data->enable_ints = adf_gen2_enable_ints;
hw_data->reset_device = adf_reset_flr;
hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
hw_data->disable_iov = adf_disable_sriov;
diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h
index 68c3436bd3aa..008c0a3a9769 100644
--- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h
+++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h
@@ -13,10 +13,6 @@
#define ADF_C62X_ACCELERATORS_MASK 0x1F
#define ADF_C62X_ACCELENGINES_MASK 0x3FF
#define ADF_C62X_ETR_MAX_BANKS 16
-#define ADF_C62X_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28)
-#define ADF_C62X_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30)
-#define ADF_C62X_SMIA0_MASK 0xFFFF
-#define ADF_C62X_SMIA1_MASK 0x1
#define ADF_C62X_SOFTSTRAP_CSR_OFFSET 0x2EC
/* AE to function mapping */
diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index f25a6c8edfc7..04f058acc4d3 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -16,6 +16,7 @@ intel_qat-objs := adf_cfg.o \
qat_crypto.o \
qat_algs.o \
qat_asym_algs.o \
+ qat_algs_send.o \
qat_uclo.o \
qat_hal.o
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index a03c6cf72331..ede6458c9dbf 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -19,6 +19,8 @@
#define ADF_4XXX_DEVICE_NAME "4xxx"
#define ADF_4XXX_PCI_DEVICE_ID 0x4940
#define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941
+#define ADF_401XX_PCI_DEVICE_ID 0x4942
+#define ADF_401XXIOV_PCI_DEVICE_ID 0x4943
#define ADF_DEVICE_FUSECTL_OFFSET 0x40
#define ADF_DEVICE_LEGFUSE_OFFSET 0x4C
#define ADF_DEVICE_FUSECTL_MASK 0x80000000
@@ -152,9 +154,9 @@ struct adf_pfvf_ops {
int (*enable_comms)(struct adf_accel_dev *accel_dev);
u32 (*get_pf2vf_offset)(u32 i);
u32 (*get_vf2pf_offset)(u32 i);
- u32 (*get_vf2pf_sources)(void __iomem *pmisc_addr);
void (*enable_vf2pf_interrupts)(void __iomem *pmisc_addr, u32 vf_mask);
- void (*disable_vf2pf_interrupts)(void __iomem *pmisc_addr, u32 vf_mask);
+ void (*disable_all_vf2pf_interrupts)(void __iomem *pmisc_addr);
+ u32 (*disable_pending_vf2pf_interrupts)(void __iomem *pmisc_addr);
int (*send_msg)(struct adf_accel_dev *accel_dev, struct pfvf_message msg,
u32 pfvf_offset, struct mutex *csr_lock);
struct pfvf_message (*recv_msg)(struct adf_accel_dev *accel_dev,
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index e8c9b77c0d66..0464fa257929 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -195,10 +195,8 @@ bool adf_misc_wq_queue_work(struct work_struct *work);
#if defined(CONFIG_PCI_IOV)
int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
void adf_disable_sriov(struct adf_accel_dev *accel_dev);
-void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
- u32 vf_mask);
-void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev,
- u32 vf_mask);
+void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask);
+void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev);
bool adf_recv_and_handle_pf2vf_msg(struct adf_accel_dev *accel_dev);
bool adf_recv_and_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u32 vf_nr);
int adf_pf2vf_handle_pf_restarting(struct adf_accel_dev *accel_dev);
@@ -217,14 +215,6 @@ static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev)
{
}
-static inline void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
-{
-}
-
-static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev)
-{
-}
-
static inline int adf_init_pf_wq(void)
{
return 0;
@@ -243,10 +233,6 @@ static inline void adf_exit_vf_wq(void)
{
}
-static inline void adf_flush_vf_wq(struct adf_accel_dev *accel_dev)
-{
-}
-
#endif
static inline void __iomem *adf_get_pmisc_base(struct adf_accel_dev *accel_dev)
diff --git a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c
index 57035b7dd4b2..d1884547b5a1 100644
--- a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c
+++ b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.c
@@ -98,6 +98,19 @@ void adf_gen2_get_arb_info(struct arb_info *arb_info)
}
EXPORT_SYMBOL_GPL(adf_gen2_get_arb_info);
+void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev)
+{
+ void __iomem *addr = adf_get_pmisc_base(accel_dev);
+ u32 val;
+
+ val = accel_dev->pf.vf_info ? 0 : BIT_ULL(GET_MAX_BANKS(accel_dev)) - 1;
+
+ /* Enable bundle and misc interrupts */
+ ADF_CSR_WR(addr, ADF_GEN2_SMIAPF0_MASK_OFFSET, val);
+ ADF_CSR_WR(addr, ADF_GEN2_SMIAPF1_MASK_OFFSET, ADF_GEN2_SMIA1_MASK);
+}
+EXPORT_SYMBOL_GPL(adf_gen2_enable_ints);
+
static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size)
{
return BUILD_RING_BASE_ADDR(addr, size);
diff --git a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h
index f2e0451b11c0..e4bc07529be4 100644
--- a/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h
+++ b/drivers/crypto/qat/qat_common/adf_gen2_hw_data.h
@@ -145,6 +145,11 @@ do { \
#define ADF_GEN2_CERRSSMSH(i) ((i) * 0x4000 + 0x10)
#define ADF_GEN2_ERRSSMSH_EN BIT(3)
+/* Interrupts */
+#define ADF_GEN2_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28)
+#define ADF_GEN2_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30)
+#define ADF_GEN2_SMIA1_MASK 0x1
+
u32 adf_gen2_get_num_accels(struct adf_hw_device_data *self);
u32 adf_gen2_get_num_aes(struct adf_hw_device_data *self);
void adf_gen2_enable_error_correction(struct adf_accel_dev *accel_dev);
@@ -153,6 +158,7 @@ void adf_gen2_cfg_iov_thds(struct adf_accel_dev *accel_dev, bool enable,
void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops);
void adf_gen2_get_admin_info(struct admin_info *admin_csrs_info);
void adf_gen2_get_arb_info(struct arb_info *arb_info);
+void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev);
u32 adf_gen2_get_accel_cap(struct adf_accel_dev *accel_dev);
void adf_gen2_set_ssm_wdtimer(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c
index 1a9072aac2ca..70ef11963938 100644
--- a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c
+++ b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c
@@ -13,8 +13,9 @@
#include "adf_pfvf_utils.h"
/* VF2PF interrupts */
+#define ADF_GEN2_VF_MSK 0xFFFF
#define ADF_GEN2_ERR_REG_VF2PF(vf_src) (((vf_src) & 0x01FFFE00) >> 9)
-#define ADF_GEN2_ERR_MSK_VF2PF(vf_mask) (((vf_mask) & 0xFFFF) << 9)
+#define ADF_GEN2_ERR_MSK_VF2PF(vf_mask) (((vf_mask) & ADF_GEN2_VF_MSK) << 9)
#define ADF_GEN2_PF_PF2VF_OFFSET(i) (0x3A000 + 0x280 + ((i) * 0x04))
#define ADF_GEN2_VF_PF2VF_OFFSET 0x200
@@ -50,43 +51,60 @@ static u32 adf_gen2_vf_get_pfvf_offset(u32 i)
return ADF_GEN2_VF_PF2VF_OFFSET;
}
-static u32 adf_gen2_get_vf2pf_sources(void __iomem *pmisc_addr)
-{
- u32 errsou3, errmsk3, vf_int_mask;
-
- /* Get the interrupt sources triggered by VFs */
- errsou3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU3);
- vf_int_mask = ADF_GEN2_ERR_REG_VF2PF(errsou3);
-
- /* To avoid adding duplicate entries to work queue, clear
- * vf_int_mask_sets bits that are already masked in ERRMSK register.
- */
- errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3);
- vf_int_mask &= ~ADF_GEN2_ERR_REG_VF2PF(errmsk3);
-
- return vf_int_mask;
-}
-
-static void adf_gen2_enable_vf2pf_interrupts(void __iomem *pmisc_addr,
- u32 vf_mask)
+static void adf_gen2_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask)
{
/* Enable VF2PF Messaging Ints - VFs 0 through 15 per vf_mask[15:0] */
- if (vf_mask & 0xFFFF) {
+ if (vf_mask & ADF_GEN2_VF_MSK) {
u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3)
& ~ADF_GEN2_ERR_MSK_VF2PF(vf_mask);
ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val);
}
}
-static void adf_gen2_disable_vf2pf_interrupts(void __iomem *pmisc_addr,
- u32 vf_mask)
+static void adf_gen2_disable_all_vf2pf_interrupts(void __iomem *pmisc_addr)
{
/* Disable VF2PF interrupts for VFs 0 through 15 per vf_mask[15:0] */
- if (vf_mask & 0xFFFF) {
- u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3)
- | ADF_GEN2_ERR_MSK_VF2PF(vf_mask);
- ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val);
- }
+ u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3)
+ | ADF_GEN2_ERR_MSK_VF2PF(ADF_GEN2_VF_MSK);
+ ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val);
+}
+
+static u32 adf_gen2_disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr)
+{
+ u32 sources, disabled, pending;
+ u32 errsou3, errmsk3;
+
+ /* Get the interrupt sources triggered by VFs */
+ errsou3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU3);
+ sources = ADF_GEN2_ERR_REG_VF2PF(errsou3);
+
+ if (!sources)
+ return 0;
+
+ /* Get the already disabled interrupts */
+ errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3);
+ disabled = ADF_GEN2_ERR_REG_VF2PF(errmsk3);
+
+ pending = sources & ~disabled;
+ if (!pending)
+ return 0;
+
+ /* Due to HW limitations, when disabling the interrupts, we can't
+ * just disable the requested sources, as this would lead to missed
+ * interrupts if ERRSOU3 changes just before writing to ERRMSK3.
+ * To work around it, disable all and re-enable only the sources that
+ * are not in vf_mask and were not already disabled. Re-enabling will
+ * trigger a new interrupt for the sources that have changed in the
+ * meantime, if any.
+ */
+ errmsk3 |= ADF_GEN2_ERR_MSK_VF2PF(ADF_GEN2_VF_MSK);
+ ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3);
+
+ errmsk3 &= ADF_GEN2_ERR_MSK_VF2PF(sources | disabled);
+ ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3);
+
+ /* Return the sources of the (new) interrupt(s) */
+ return pending;
}
static u32 gen2_csr_get_int_bit(enum gen2_csr_pos offset)
@@ -362,9 +380,9 @@ void adf_gen2_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops)
pfvf_ops->enable_comms = adf_enable_pf2vf_comms;
pfvf_ops->get_pf2vf_offset = adf_gen2_pf_get_pfvf_offset;
pfvf_ops->get_vf2pf_offset = adf_gen2_pf_get_pfvf_offset;
- pfvf_ops->get_vf2pf_sources = adf_gen2_get_vf2pf_sources;
pfvf_ops->enable_vf2pf_interrupts = adf_gen2_enable_vf2pf_interrupts;
- pfvf_ops->disable_vf2pf_interrupts = adf_gen2_disable_vf2pf_interrupts;
+ pfvf_ops->disable_all_vf2pf_interrupts = adf_gen2_disable_all_vf2pf_interrupts;
+ pfvf_ops->disable_pending_vf2pf_interrupts = adf_gen2_disable_pending_vf2pf_interrupts;
pfvf_ops->send_msg = adf_gen2_pf2vf_send;
pfvf_ops->recv_msg = adf_gen2_vf2pf_recv;
}
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
index d80d493a7756..8e8efe93f3ee 100644
--- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
+++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c
@@ -15,6 +15,7 @@
/* VF2PF interrupt source registers */
#define ADF_4XXX_VM2PF_SOU 0x41A180
#define ADF_4XXX_VM2PF_MSK 0x41A1C0
+#define ADF_GEN4_VF_MSK 0xFFFF
#define ADF_PFVF_GEN4_MSGTYPE_SHIFT 2
#define ADF_PFVF_GEN4_MSGTYPE_MASK 0x3F
@@ -36,32 +37,48 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i)
return ADF_4XXX_VM2PF_OFFSET(i);
}
-static u32 adf_gen4_get_vf2pf_sources(void __iomem *pmisc_addr)
+static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask)
{
- u32 sou, mask;
+ u32 val;
- sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU);
- mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK);
-
- return sou & ~mask;
+ val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) & ~vf_mask;
+ ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val);
}
-static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr,
- u32 vf_mask)
+static void adf_gen4_disable_all_vf2pf_interrupts(void __iomem *pmisc_addr)
{
- unsigned int val;
-
- val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) & ~vf_mask;
- ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val);
+ ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, ADF_GEN4_VF_MSK);
}
-static void adf_gen4_disable_vf2pf_interrupts(void __iomem *pmisc_addr,
- u32 vf_mask)
+static u32 adf_gen4_disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr)
{
- unsigned int val;
+ u32 sources, disabled, pending;
+
+ /* Get the interrupt sources triggered by VFs */
+ sources = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU);
+ if (!sources)
+ return 0;
+
+ /* Get the already disabled interrupts */
+ disabled = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK);
+
+ pending = sources & ~disabled;
+ if (!pending)
+ return 0;
+
+ /* Due to HW limitations, when disabling the interrupts, we can't
+ * just disable the requested sources, as this would lead to missed
+ * interrupts if VM2PF_SOU changes just before writing to VM2PF_MSK.
+ * To work around it, disable all and re-enable only the sources that
+ * are not in vf_mask and were not already disabled. Re-enabling will
+ * trigger a new interrupt for the sources that have changed in the
+ * meantime, if any.
+ */
+ ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, ADF_GEN4_VF_MSK);
+ ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, disabled | sources);
- val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) | vf_mask;
- ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val);
+ /* Return the sources of the (new) interrupt(s) */
+ return pending;
}
static int adf_gen4_pfvf_send(struct adf_accel_dev *accel_dev,
@@ -96,10 +113,16 @@ static struct pfvf_message adf_gen4_pfvf_recv(struct adf_accel_dev *accel_dev,
u32 pfvf_offset, u8 compat_ver)
{
void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev);
+ struct pfvf_message msg = { 0 };
u32 csr_val;
/* Read message from the CSR */
csr_val = ADF_CSR_RD(pmisc_addr, pfvf_offset);
+ if (!(csr_val & ADF_PFVF_INT)) {
+ dev_info(&GET_DEV(accel_dev),
+ "Spurious PFVF interrupt, msg 0x%.8x. Ignored\n", csr_val);
+ return msg;
+ }
/* We can now acknowledge the message reception by clearing the
* interrupt bit
@@ -115,9 +138,9 @@ void adf_gen4_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops)
pfvf_ops->enable_comms = adf_enable_pf2vf_comms;
pfvf_ops->get_pf2vf_offset = adf_gen4_pf_get_pf2vf_offset;
pfvf_ops->get_vf2pf_offset = adf_gen4_pf_get_vf2pf_offset;
- pfvf_ops->get_vf2pf_sources = adf_gen4_get_vf2pf_sources;
pfvf_ops->enable_vf2pf_interrupts = adf_gen4_enable_vf2pf_interrupts;
- pfvf_ops->disable_vf2pf_interrupts = adf_gen4_disable_vf2pf_interrupts;
+ pfvf_ops->disable_all_vf2pf_interrupts = adf_gen4_disable_all_vf2pf_interrupts;
+ pfvf_ops->disable_pending_vf2pf_interrupts = adf_gen4_disable_pending_vf2pf_interrupts;
pfvf_ops->send_msg = adf_gen4_pfvf_send;
pfvf_ops->recv_msg = adf_gen4_pfvf_recv;
}
diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c
index a35149f8bf1e..ad9e135b8560 100644
--- a/drivers/crypto/qat/qat_common/adf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_isr.c
@@ -66,42 +66,39 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
}
-void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
+void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev)
{
void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev);
unsigned long flags;
spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
- GET_PFVF_OPS(accel_dev)->disable_vf2pf_interrupts(pmisc_addr, vf_mask);
+ GET_PFVF_OPS(accel_dev)->disable_all_vf2pf_interrupts(pmisc_addr);
spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
}
-static void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev,
- u32 vf_mask)
+static u32 adf_disable_pending_vf2pf_interrupts(struct adf_accel_dev *accel_dev)
{
void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev);
+ u32 pending;
spin_lock(&accel_dev->pf.vf2pf_ints_lock);
- GET_PFVF_OPS(accel_dev)->disable_vf2pf_interrupts(pmisc_addr, vf_mask);
+ pending = GET_PFVF_OPS(accel_dev)->disable_pending_vf2pf_interrupts(pmisc_addr);
spin_unlock(&accel_dev->pf.vf2pf_ints_lock);
+
+ return pending;
}
static bool adf_handle_vf2pf_int(struct adf_accel_dev *accel_dev)
{
- void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev);
bool irq_handled = false;
unsigned long vf_mask;
- /* Get the interrupt sources triggered by VFs */
- vf_mask = GET_PFVF_OPS(accel_dev)->get_vf2pf_sources(pmisc_addr);
-
+ /* Get the interrupt sources triggered by VFs, except for those already disabled */
+ vf_mask = adf_disable_pending_vf2pf_interrupts(accel_dev);
if (vf_mask) {
struct adf_accel_vf_info *vf_info;
int i;
- /* Disable VF2PF interrupts for VFs with pending ints */
- adf_disable_vf2pf_interrupts_irq(accel_dev, vf_mask);
-
/*
* Handle VF2PF interrupt unless the VF is malicious and
* is attempting to flood the host OS with VF2PF interrupts.
diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_msg.h b/drivers/crypto/qat/qat_common/adf_pfvf_msg.h
index 9c37a2661392..204a42438992 100644
--- a/drivers/crypto/qat/qat_common/adf_pfvf_msg.h
+++ b/drivers/crypto/qat/qat_common/adf_pfvf_msg.h
@@ -8,8 +8,8 @@
/*
* PF<->VF Gen2 Messaging format
*
- * The PF has an array of 32-bit PF2VF registers, one for each VF. The
- * PF can access all these registers; each VF can access only the one
+ * The PF has an array of 32-bit PF2VF registers, one for each VF. The
+ * PF can access all these registers while each VF can access only the one
* register associated with that particular VF.
*
* The register functionally is split into two parts:
diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c b/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c
index 588352de1ef0..388e58bcbcaf 100644
--- a/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c
+++ b/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c
@@ -154,7 +154,7 @@ static struct pfvf_message handle_blkmsg_req(struct adf_accel_vf_info *vf_info,
if (FIELD_GET(ADF_VF2PF_BLOCK_CRC_REQ_MASK, req.data)) {
dev_dbg(&GET_DEV(vf_info->accel_dev),
"BlockMsg of type %d for CRC over %d bytes received from VF%d\n",
- blk_type, blk_byte, vf_info->vf_nr);
+ blk_type, blk_byte + 1, vf_info->vf_nr);
if (!adf_pf2vf_blkmsg_get_data(vf_info, blk_type, blk_byte,
byte_max, &resp_data,
@@ -242,7 +242,9 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr,
"VersionRequest received from VF%d (vers %d) to PF (vers %d)\n",
vf_nr, vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
- if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION)
+ if (vf_compat_ver == 0)
+ compat = ADF_PF2VF_VF_INCOMPATIBLE;
+ else if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION)
compat = ADF_PF2VF_VF_COMPATIBLE;
else
compat = ADF_PF2VF_VF_COMPAT_UNKNOWN;
diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c
index b960bca1f9d2..f38b2ffde146 100644
--- a/drivers/crypto/qat/qat_common/adf_sriov.c
+++ b/drivers/crypto/qat/qat_common/adf_sriov.c
@@ -3,7 +3,6 @@
#include <linux/workqueue.h>
#include <linux/pci.h>
#include <linux/device.h>
-#include <linux/iommu.h>
#include "adf_common_drv.h"
#include "adf_cfg.h"
#include "adf_pfvf_pf_msg.h"
@@ -74,8 +73,7 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
hw_data->configure_iov_threads(accel_dev, true);
/* Enable VF to PF interrupts for all VFs */
- if (hw_data->pfvf_ops.get_pf2vf_offset)
- adf_enable_vf2pf_interrupts(accel_dev, BIT_ULL(totalvfs) - 1);
+ adf_enable_vf2pf_interrupts(accel_dev, BIT_ULL(totalvfs) - 1);
/*
* Due to the hardware design, when SR-IOV and the ring arbiter
@@ -104,22 +102,18 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
if (!accel_dev->pf.vf_info)
return;
- if (hw_data->pfvf_ops.get_pf2vf_offset)
- adf_pf2vf_notify_restarting(accel_dev);
-
+ adf_pf2vf_notify_restarting(accel_dev);
pci_disable_sriov(accel_to_pci_dev(accel_dev));
/* Disable VF to PF interrupts */
- if (hw_data->pfvf_ops.get_pf2vf_offset)
- adf_disable_vf2pf_interrupts(accel_dev, GENMASK(31, 0));
+ adf_disable_all_vf2pf_interrupts(accel_dev);
/* Clear Valid bits in AE Thread to PCIe Function Mapping */
if (hw_data->configure_iov_threads)
hw_data->configure_iov_threads(accel_dev, false);
- for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) {
+ for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++)
mutex_destroy(&vf->pf2vf_lock);
- }
kfree(accel_dev->pf.vf_info);
accel_dev->pf.vf_info = NULL;
@@ -176,7 +170,7 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
return -EFAULT;
}
- if (!iommu_present(&pci_bus_type))
+ if (!device_iommu_mapped(&pdev->dev))
dev_warn(&pdev->dev, "IOMMU should be enabled for SR-IOV to work correctly\n");
if (accel_dev->pf.vf_info) {
diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c
index 8ba28409fb74..630d0483c4e0 100644
--- a/drivers/crypto/qat/qat_common/adf_transport.c
+++ b/drivers/crypto/qat/qat_common/adf_transport.c
@@ -8,6 +8,9 @@
#include "adf_cfg.h"
#include "adf_common_drv.h"
+#define ADF_MAX_RING_THRESHOLD 80
+#define ADF_PERCENT(tot, percent) (((tot) * (percent)) / 100)
+
static inline u32 adf_modulo(u32 data, u32 shift)
{
u32 div = data >> shift;
@@ -77,6 +80,11 @@ static void adf_disable_ring_irq(struct adf_etr_bank_data *bank, u32 ring)
bank->irq_mask);
}
+bool adf_ring_nearly_full(struct adf_etr_ring_data *ring)
+{
+ return atomic_read(ring->inflights) > ring->threshold;
+}
+
int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg)
{
struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(ring->bank->accel_dev);
@@ -217,6 +225,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
struct adf_etr_bank_data *bank;
struct adf_etr_ring_data *ring;
char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
+ int max_inflights;
u32 ring_num;
int ret;
@@ -263,6 +272,8 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
ring->ring_size = adf_verify_ring_size(msg_size, num_msgs);
ring->head = 0;
ring->tail = 0;
+ max_inflights = ADF_MAX_INFLIGHTS(ring->ring_size, ring->msg_size);
+ ring->threshold = ADF_PERCENT(max_inflights, ADF_MAX_RING_THRESHOLD);
atomic_set(ring->inflights, 0);
ret = adf_init_ring(ring);
if (ret)
diff --git a/drivers/crypto/qat/qat_common/adf_transport.h b/drivers/crypto/qat/qat_common/adf_transport.h
index 2c95f1697c76..e6ef6f9b7691 100644
--- a/drivers/crypto/qat/qat_common/adf_transport.h
+++ b/drivers/crypto/qat/qat_common/adf_transport.h
@@ -14,6 +14,7 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
const char *ring_name, adf_callback_fn callback,
int poll_mode, struct adf_etr_ring_data **ring_ptr);
+bool adf_ring_nearly_full(struct adf_etr_ring_data *ring);
int adf_send_message(struct adf_etr_ring_data *ring, u32 *msg);
void adf_remove_ring(struct adf_etr_ring_data *ring);
#endif
diff --git a/drivers/crypto/qat/qat_common/adf_transport_internal.h b/drivers/crypto/qat/qat_common/adf_transport_internal.h
index 501bcf0f1809..8b2c92ba7ca1 100644
--- a/drivers/crypto/qat/qat_common/adf_transport_internal.h
+++ b/drivers/crypto/qat/qat_common/adf_transport_internal.h
@@ -22,6 +22,7 @@ struct adf_etr_ring_data {
spinlock_t lock; /* protects ring data struct */
u16 head;
u16 tail;
+ u32 threshold;
u8 ring_number;
u8 ring_size;
u8 msg_size;
diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c
index 86c3bd0c9c2b..8c95fcd8e64b 100644
--- a/drivers/crypto/qat/qat_common/adf_vf_isr.c
+++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c
@@ -70,6 +70,7 @@ static void adf_dev_stop_async(struct work_struct *work)
container_of(work, struct adf_vf_stop_data, work);
struct adf_accel_dev *accel_dev = stop_data->accel_dev;
+ adf_dev_restarting_notify(accel_dev);
adf_dev_stop(accel_dev);
adf_dev_shutdown(accel_dev);
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index f998ed58457c..148edbe379e3 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -17,7 +17,7 @@
#include <crypto/xts.h>
#include <linux/dma-mapping.h>
#include "adf_accel_devices.h"
-#include "adf_transport.h"
+#include "qat_algs_send.h"
#include "adf_common_drv.h"
#include "qat_crypto.h"
#include "icp_qat_hw.h"
@@ -46,19 +46,6 @@
static DEFINE_MUTEX(algs_lock);
static unsigned int active_devs;
-struct qat_alg_buf {
- u32 len;
- u32 resrvd;
- u64 addr;
-} __packed;
-
-struct qat_alg_buf_list {
- u64 resrvd;
- u32 num_bufs;
- u32 num_mapped_bufs;
- struct qat_alg_buf bufers[];
-} __packed __aligned(64);
-
/* Common content descriptor */
struct qat_alg_cd {
union {
@@ -693,7 +680,10 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst,
bl->bufers[i].len, DMA_BIDIRECTIONAL);
dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE);
- kfree(bl);
+
+ if (!qat_req->buf.sgl_src_valid)
+ kfree(bl);
+
if (blp != blpout) {
/* If out of place operation dma unmap only data */
int bufless = blout->num_bufs - blout->num_mapped_bufs;
@@ -704,14 +694,17 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst,
DMA_BIDIRECTIONAL);
}
dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE);
- kfree(blout);
+
+ if (!qat_req->buf.sgl_dst_valid)
+ kfree(blout);
}
}
static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
struct scatterlist *sgl,
struct scatterlist *sglout,
- struct qat_crypto_request *qat_req)
+ struct qat_crypto_request *qat_req,
+ gfp_t flags)
{
struct device *dev = &GET_DEV(inst->accel_dev);
int i, sg_nctr = 0;
@@ -721,15 +714,24 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
dma_addr_t blp = DMA_MAPPING_ERROR;
dma_addr_t bloutp = DMA_MAPPING_ERROR;
struct scatterlist *sg;
- size_t sz_out, sz = struct_size(bufl, bufers, n + 1);
+ size_t sz_out, sz = struct_size(bufl, bufers, n);
+ int node = dev_to_node(&GET_DEV(inst->accel_dev));
if (unlikely(!n))
return -EINVAL;
- bufl = kzalloc_node(sz, GFP_ATOMIC,
- dev_to_node(&GET_DEV(inst->accel_dev)));
- if (unlikely(!bufl))
- return -ENOMEM;
+ qat_req->buf.sgl_src_valid = false;
+ qat_req->buf.sgl_dst_valid = false;
+
+ if (n > QAT_MAX_BUFF_DESC) {
+ bufl = kzalloc_node(sz, flags, node);
+ if (unlikely(!bufl))
+ return -ENOMEM;
+ } else {
+ bufl = &qat_req->buf.sgl_src.sgl_hdr;
+ memset(bufl, 0, sizeof(struct qat_alg_buf_list));
+ qat_req->buf.sgl_src_valid = true;
+ }
for_each_sg(sgl, sg, n, i)
bufl->bufers[i].addr = DMA_MAPPING_ERROR;
@@ -760,12 +762,18 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
struct qat_alg_buf *bufers;
n = sg_nents(sglout);
- sz_out = struct_size(buflout, bufers, n + 1);
+ sz_out = struct_size(buflout, bufers, n);
sg_nctr = 0;
- buflout = kzalloc_node(sz_out, GFP_ATOMIC,
- dev_to_node(&GET_DEV(inst->accel_dev)));
- if (unlikely(!buflout))
- goto err_in;
+
+ if (n > QAT_MAX_BUFF_DESC) {
+ buflout = kzalloc_node(sz_out, flags, node);
+ if (unlikely(!buflout))
+ goto err_in;
+ } else {
+ buflout = &qat_req->buf.sgl_dst.sgl_hdr;
+ memset(buflout, 0, sizeof(struct qat_alg_buf_list));
+ qat_req->buf.sgl_dst_valid = true;
+ }
bufers = buflout->bufers;
for_each_sg(sglout, sg, n, i)
@@ -810,7 +818,9 @@ err_out:
dma_unmap_single(dev, buflout->bufers[i].addr,
buflout->bufers[i].len,
DMA_BIDIRECTIONAL);
- kfree(buflout);
+
+ if (!qat_req->buf.sgl_dst_valid)
+ kfree(buflout);
err_in:
if (!dma_mapping_error(dev, blp))
@@ -823,7 +833,8 @@ err_in:
bufl->bufers[i].len,
DMA_BIDIRECTIONAL);
- kfree(bufl);
+ if (!qat_req->buf.sgl_src_valid)
+ kfree(bufl);
dev_err(dev, "Failed to map buf for dma\n");
return -ENOMEM;
@@ -925,8 +936,25 @@ void qat_alg_callback(void *resp)
struct icp_qat_fw_la_resp *qat_resp = resp;
struct qat_crypto_request *qat_req =
(void *)(__force long)qat_resp->opaque_data;
+ struct qat_instance_backlog *backlog = qat_req->alg_req.backlog;
qat_req->cb(qat_resp, qat_req);
+
+ qat_alg_send_backlog(backlog);
+}
+
+static int qat_alg_send_sym_message(struct qat_crypto_request *qat_req,
+ struct qat_crypto_instance *inst,
+ struct crypto_async_request *base)
+{
+ struct qat_alg_req *alg_req = &qat_req->alg_req;
+
+ alg_req->fw_req = (u32 *)&qat_req->req;
+ alg_req->tx_ring = inst->sym_tx;
+ alg_req->base = base;
+ alg_req->backlog = &inst->backlog;
+
+ return qat_alg_send_message(alg_req);
}
static int qat_alg_aead_dec(struct aead_request *areq)
@@ -939,14 +967,15 @@ static int qat_alg_aead_dec(struct aead_request *areq)
struct icp_qat_fw_la_auth_req_params *auth_param;
struct icp_qat_fw_la_bulk_req *msg;
int digst_size = crypto_aead_authsize(aead_tfm);
- int ret, ctr = 0;
+ gfp_t f = qat_algs_alloc_flags(&areq->base);
+ int ret;
u32 cipher_len;
cipher_len = areq->cryptlen - digst_size;
if (cipher_len % AES_BLOCK_SIZE != 0)
return -EINVAL;
- ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req);
+ ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
if (unlikely(ret))
return ret;
@@ -965,15 +994,12 @@ static int qat_alg_aead_dec(struct aead_request *areq)
auth_param = (void *)((u8 *)cipher_param + sizeof(*cipher_param));
auth_param->auth_off = 0;
auth_param->auth_len = areq->assoclen + cipher_param->cipher_length;
- do {
- ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
- } while (ret == -EAGAIN && ctr++ < 10);
- if (ret == -EAGAIN) {
+ ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base);
+ if (ret == -ENOSPC)
qat_alg_free_bufl(ctx->inst, qat_req);
- return -EBUSY;
- }
- return -EINPROGRESS;
+
+ return ret;
}
static int qat_alg_aead_enc(struct aead_request *areq)
@@ -984,14 +1010,15 @@ static int qat_alg_aead_enc(struct aead_request *areq)
struct qat_crypto_request *qat_req = aead_request_ctx(areq);
struct icp_qat_fw_la_cipher_req_params *cipher_param;
struct icp_qat_fw_la_auth_req_params *auth_param;
+ gfp_t f = qat_algs_alloc_flags(&areq->base);
struct icp_qat_fw_la_bulk_req *msg;
u8 *iv = areq->iv;
- int ret, ctr = 0;
+ int ret;
if (areq->cryptlen % AES_BLOCK_SIZE != 0)
return -EINVAL;
- ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req);
+ ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
if (unlikely(ret))
return ret;
@@ -1013,15 +1040,11 @@ static int qat_alg_aead_enc(struct aead_request *areq)
auth_param->auth_off = 0;
auth_param->auth_len = areq->assoclen + areq->cryptlen;
- do {
- ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
- } while (ret == -EAGAIN && ctr++ < 10);
-
- if (ret == -EAGAIN) {
+ ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base);
+ if (ret == -ENOSPC)
qat_alg_free_bufl(ctx->inst, qat_req);
- return -EBUSY;
- }
- return -EINPROGRESS;
+
+ return ret;
}
static int qat_alg_skcipher_rekey(struct qat_alg_skcipher_ctx *ctx,
@@ -1173,13 +1196,14 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
struct qat_alg_skcipher_ctx *ctx = crypto_tfm_ctx(tfm);
struct qat_crypto_request *qat_req = skcipher_request_ctx(req);
struct icp_qat_fw_la_cipher_req_params *cipher_param;
+ gfp_t f = qat_algs_alloc_flags(&req->base);
struct icp_qat_fw_la_bulk_req *msg;
- int ret, ctr = 0;
+ int ret;
if (req->cryptlen == 0)
return 0;
- ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req);
+ ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
if (unlikely(ret))
return ret;
@@ -1198,15 +1222,11 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
qat_alg_set_req_iv(qat_req);
- do {
- ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
- } while (ret == -EAGAIN && ctr++ < 10);
-
- if (ret == -EAGAIN) {
+ ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base);
+ if (ret == -ENOSPC)
qat_alg_free_bufl(ctx->inst, qat_req);
- return -EBUSY;
- }
- return -EINPROGRESS;
+
+ return ret;
}
static int qat_alg_skcipher_blk_encrypt(struct skcipher_request *req)
@@ -1242,13 +1262,14 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
struct qat_alg_skcipher_ctx *ctx = crypto_tfm_ctx(tfm);
struct qat_crypto_request *qat_req = skcipher_request_ctx(req);
struct icp_qat_fw_la_cipher_req_params *cipher_param;
+ gfp_t f = qat_algs_alloc_flags(&req->base);
struct icp_qat_fw_la_bulk_req *msg;
- int ret, ctr = 0;
+ int ret;
if (req->cryptlen == 0)
return 0;
- ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req);
+ ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
if (unlikely(ret))
return ret;
@@ -1268,15 +1289,11 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
qat_alg_set_req_iv(qat_req);
qat_alg_update_iv(qat_req);
- do {
- ret = adf_send_message(ctx->inst->sym_tx, (u32 *)msg);
- } while (ret == -EAGAIN && ctr++ < 10);
-
- if (ret == -EAGAIN) {
+ ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base);
+ if (ret == -ENOSPC)
qat_alg_free_bufl(ctx->inst, qat_req);
- return -EBUSY;
- }
- return -EINPROGRESS;
+
+ return ret;
}
static int qat_alg_skcipher_blk_decrypt(struct skcipher_request *req)
diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.c b/drivers/crypto/qat/qat_common/qat_algs_send.c
new file mode 100644
index 000000000000..ff5b4347f783
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/qat_algs_send.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+/* Copyright(c) 2022 Intel Corporation */
+#include "adf_transport.h"
+#include "qat_algs_send.h"
+#include "qat_crypto.h"
+
+#define ADF_MAX_RETRIES 20
+
+static int qat_alg_send_message_retry(struct qat_alg_req *req)
+{
+ int ret = 0, ctr = 0;
+
+ do {
+ ret = adf_send_message(req->tx_ring, req->fw_req);
+ } while (ret == -EAGAIN && ctr++ < ADF_MAX_RETRIES);
+
+ if (ret == -EAGAIN)
+ return -ENOSPC;
+
+ return -EINPROGRESS;
+}
+
+void qat_alg_send_backlog(struct qat_instance_backlog *backlog)
+{
+ struct qat_alg_req *req, *tmp;
+
+ spin_lock_bh(&backlog->lock);
+ list_for_each_entry_safe(req, tmp, &backlog->list, list) {
+ if (adf_send_message(req->tx_ring, req->fw_req)) {
+ /* The HW ring is full. Do nothing.
+ * qat_alg_send_backlog() will be invoked again by
+ * another callback.
+ */
+ break;
+ }
+ list_del(&req->list);
+ req->base->complete(req->base, -EINPROGRESS);
+ }
+ spin_unlock_bh(&backlog->lock);
+}
+
+static void qat_alg_backlog_req(struct qat_alg_req *req,
+ struct qat_instance_backlog *backlog)
+{
+ INIT_LIST_HEAD(&req->list);
+
+ spin_lock_bh(&backlog->lock);
+ list_add_tail(&req->list, &backlog->list);
+ spin_unlock_bh(&backlog->lock);
+}
+
+static int qat_alg_send_message_maybacklog(struct qat_alg_req *req)
+{
+ struct qat_instance_backlog *backlog = req->backlog;
+ struct adf_etr_ring_data *tx_ring = req->tx_ring;
+ u32 *fw_req = req->fw_req;
+
+ /* If any request is already backlogged, then add to backlog list */
+ if (!list_empty(&backlog->list))
+ goto enqueue;
+
+ /* If ring is nearly full, then add to backlog list */
+ if (adf_ring_nearly_full(tx_ring))
+ goto enqueue;
+
+ /* If adding request to HW ring fails, then add to backlog list */
+ if (adf_send_message(tx_ring, fw_req))
+ goto enqueue;
+
+ return -EINPROGRESS;
+
+enqueue:
+ qat_alg_backlog_req(req, backlog);
+
+ return -EBUSY;
+}
+
+int qat_alg_send_message(struct qat_alg_req *req)
+{
+ u32 flags = req->base->flags;
+
+ if (flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+ return qat_alg_send_message_maybacklog(req);
+ else
+ return qat_alg_send_message_retry(req);
+}
diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.h b/drivers/crypto/qat/qat_common/qat_algs_send.h
new file mode 100644
index 000000000000..5ce9f4f69d8f
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/qat_algs_send.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef QAT_ALGS_SEND_H
+#define QAT_ALGS_SEND_H
+
+#include "qat_crypto.h"
+
+int qat_alg_send_message(struct qat_alg_req *req);
+void qat_alg_send_backlog(struct qat_instance_backlog *backlog);
+
+#endif
diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c
index b0b78445418b..16d97db9ea15 100644
--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
@@ -12,6 +12,7 @@
#include <crypto/scatterwalk.h>
#include "icp_qat_fw_pke.h"
#include "adf_accel_devices.h"
+#include "qat_algs_send.h"
#include "adf_transport.h"
#include "adf_common_drv.h"
#include "qat_crypto.h"
@@ -135,8 +136,23 @@ struct qat_asym_request {
} areq;
int err;
void (*cb)(struct icp_qat_fw_pke_resp *resp);
+ struct qat_alg_req alg_req;
} __aligned(64);
+static int qat_alg_send_asym_message(struct qat_asym_request *qat_req,
+ struct qat_crypto_instance *inst,
+ struct crypto_async_request *base)
+{
+ struct qat_alg_req *alg_req = &qat_req->alg_req;
+
+ alg_req->fw_req = (u32 *)&qat_req->req;
+ alg_req->tx_ring = inst->pke_tx;
+ alg_req->base = base;
+ alg_req->backlog = &inst->backlog;
+
+ return qat_alg_send_message(alg_req);
+}
+
static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp)
{
struct qat_asym_request *req = (void *)(__force long)resp->opaque;
@@ -148,26 +164,21 @@ static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp)
err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL;
if (areq->src) {
- if (req->src_align)
- dma_free_coherent(dev, req->ctx.dh->p_size,
- req->src_align, req->in.dh.in.b);
- else
- dma_unmap_single(dev, req->in.dh.in.b,
- req->ctx.dh->p_size, DMA_TO_DEVICE);
+ dma_unmap_single(dev, req->in.dh.in.b, req->ctx.dh->p_size,
+ DMA_TO_DEVICE);
+ kfree_sensitive(req->src_align);
}
areq->dst_len = req->ctx.dh->p_size;
if (req->dst_align) {
scatterwalk_map_and_copy(req->dst_align, areq->dst, 0,
areq->dst_len, 1);
-
- dma_free_coherent(dev, req->ctx.dh->p_size, req->dst_align,
- req->out.dh.r);
- } else {
- dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size,
- DMA_FROM_DEVICE);
+ kfree_sensitive(req->dst_align);
}
+ dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size,
+ DMA_FROM_DEVICE);
+
dma_unmap_single(dev, req->phy_in, sizeof(struct qat_dh_input_params),
DMA_TO_DEVICE);
dma_unmap_single(dev, req->phy_out,
@@ -213,8 +224,10 @@ static int qat_dh_compute_value(struct kpp_request *req)
struct qat_asym_request *qat_req =
PTR_ALIGN(kpp_request_ctx(req), 64);
struct icp_qat_fw_pke_request *msg = &qat_req->req;
- int ret, ctr = 0;
+ gfp_t flags = qat_algs_alloc_flags(&req->base);
int n_input_params = 0;
+ u8 *vaddr;
+ int ret;
if (unlikely(!ctx->xa))
return -EINVAL;
@@ -223,6 +236,10 @@ static int qat_dh_compute_value(struct kpp_request *req)
req->dst_len = ctx->p_size;
return -EOVERFLOW;
}
+
+ if (req->src_len > ctx->p_size)
+ return -EINVAL;
+
memset(msg, '\0', sizeof(*msg));
ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr,
ICP_QAT_FW_COMN_REQ_FLAG_SET);
@@ -271,27 +288,24 @@ static int qat_dh_compute_value(struct kpp_request *req)
*/
if (sg_is_last(req->src) && req->src_len == ctx->p_size) {
qat_req->src_align = NULL;
- qat_req->in.dh.in.b = dma_map_single(dev,
- sg_virt(req->src),
- req->src_len,
- DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(dev,
- qat_req->in.dh.in.b)))
- return ret;
-
+ vaddr = sg_virt(req->src);
} else {
int shift = ctx->p_size - req->src_len;
- qat_req->src_align = dma_alloc_coherent(dev,
- ctx->p_size,
- &qat_req->in.dh.in.b,
- GFP_KERNEL);
+ qat_req->src_align = kzalloc(ctx->p_size, flags);
if (unlikely(!qat_req->src_align))
return ret;
scatterwalk_map_and_copy(qat_req->src_align + shift,
req->src, 0, req->src_len, 0);
+
+ vaddr = qat_req->src_align;
}
+
+ qat_req->in.dh.in.b = dma_map_single(dev, vaddr, ctx->p_size,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->in.dh.in.b)))
+ goto unmap_src;
}
/*
* dst can be of any size in valid range, but HW expects it to be the
@@ -302,20 +316,18 @@ static int qat_dh_compute_value(struct kpp_request *req)
*/
if (sg_is_last(req->dst) && req->dst_len == ctx->p_size) {
qat_req->dst_align = NULL;
- qat_req->out.dh.r = dma_map_single(dev, sg_virt(req->dst),
- req->dst_len,
- DMA_FROM_DEVICE);
-
- if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r)))
- goto unmap_src;
-
+ vaddr = sg_virt(req->dst);
} else {
- qat_req->dst_align = dma_alloc_coherent(dev, ctx->p_size,
- &qat_req->out.dh.r,
- GFP_KERNEL);
+ qat_req->dst_align = kzalloc(ctx->p_size, flags);
if (unlikely(!qat_req->dst_align))
goto unmap_src;
+
+ vaddr = qat_req->dst_align;
}
+ qat_req->out.dh.r = dma_map_single(dev, vaddr, ctx->p_size,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r)))
+ goto unmap_dst;
qat_req->in.dh.in_tab[n_input_params] = 0;
qat_req->out.dh.out_tab[1] = 0;
@@ -338,13 +350,13 @@ static int qat_dh_compute_value(struct kpp_request *req)
msg->input_param_count = n_input_params;
msg->output_param_count = 1;
- do {
- ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg);
- } while (ret == -EBUSY && ctr++ < 100);
+ ret = qat_alg_send_asym_message(qat_req, inst, &req->base);
+ if (ret == -ENOSPC)
+ goto unmap_all;
- if (!ret)
- return -EINPROGRESS;
+ return ret;
+unmap_all:
if (!dma_mapping_error(dev, qat_req->phy_out))
dma_unmap_single(dev, qat_req->phy_out,
sizeof(struct qat_dh_output_params),
@@ -355,23 +367,17 @@ unmap_in_params:
sizeof(struct qat_dh_input_params),
DMA_TO_DEVICE);
unmap_dst:
- if (qat_req->dst_align)
- dma_free_coherent(dev, ctx->p_size, qat_req->dst_align,
- qat_req->out.dh.r);
- else
- if (!dma_mapping_error(dev, qat_req->out.dh.r))
- dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size,
- DMA_FROM_DEVICE);
+ if (!dma_mapping_error(dev, qat_req->out.dh.r))
+ dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size,
+ DMA_FROM_DEVICE);
+ kfree_sensitive(qat_req->dst_align);
unmap_src:
if (req->src) {
- if (qat_req->src_align)
- dma_free_coherent(dev, ctx->p_size, qat_req->src_align,
- qat_req->in.dh.in.b);
- else
- if (!dma_mapping_error(dev, qat_req->in.dh.in.b))
- dma_unmap_single(dev, qat_req->in.dh.in.b,
- ctx->p_size,
- DMA_TO_DEVICE);
+ if (!dma_mapping_error(dev, qat_req->in.dh.in.b))
+ dma_unmap_single(dev, qat_req->in.dh.in.b,
+ ctx->p_size,
+ DMA_TO_DEVICE);
+ kfree_sensitive(qat_req->src_align);
}
return ret;
}
@@ -420,14 +426,17 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params)
static void qat_dh_clear_ctx(struct device *dev, struct qat_dh_ctx *ctx)
{
if (ctx->g) {
+ memset(ctx->g, 0, ctx->p_size);
dma_free_coherent(dev, ctx->p_size, ctx->g, ctx->dma_g);
ctx->g = NULL;
}
if (ctx->xa) {
+ memset(ctx->xa, 0, ctx->p_size);
dma_free_coherent(dev, ctx->p_size, ctx->xa, ctx->dma_xa);
ctx->xa = NULL;
}
if (ctx->p) {
+ memset(ctx->p, 0, ctx->p_size);
dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p);
ctx->p = NULL;
}
@@ -510,25 +519,22 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp)
err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL;
- if (req->src_align)
- dma_free_coherent(dev, req->ctx.rsa->key_sz, req->src_align,
- req->in.rsa.enc.m);
- else
- dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz,
- DMA_TO_DEVICE);
+ kfree_sensitive(req->src_align);
+
+ dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz,
+ DMA_TO_DEVICE);
areq->dst_len = req->ctx.rsa->key_sz;
if (req->dst_align) {
scatterwalk_map_and_copy(req->dst_align, areq->dst, 0,
areq->dst_len, 1);
- dma_free_coherent(dev, req->ctx.rsa->key_sz, req->dst_align,
- req->out.rsa.enc.c);
- } else {
- dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz,
- DMA_FROM_DEVICE);
+ kfree_sensitive(req->dst_align);
}
+ dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz,
+ DMA_FROM_DEVICE);
+
dma_unmap_single(dev, req->phy_in, sizeof(struct qat_rsa_input_params),
DMA_TO_DEVICE);
dma_unmap_single(dev, req->phy_out,
@@ -542,8 +548,11 @@ void qat_alg_asym_callback(void *_resp)
{
struct icp_qat_fw_pke_resp *resp = _resp;
struct qat_asym_request *areq = (void *)(__force long)resp->opaque;
+ struct qat_instance_backlog *backlog = areq->alg_req.backlog;
areq->cb(resp);
+
+ qat_alg_send_backlog(backlog);
}
#define PKE_RSA_EP_512 0x1c161b21
@@ -642,7 +651,9 @@ static int qat_rsa_enc(struct akcipher_request *req)
struct qat_asym_request *qat_req =
PTR_ALIGN(akcipher_request_ctx(req), 64);
struct icp_qat_fw_pke_request *msg = &qat_req->req;
- int ret, ctr = 0;
+ gfp_t flags = qat_algs_alloc_flags(&req->base);
+ u8 *vaddr;
+ int ret;
if (unlikely(!ctx->n || !ctx->e))
return -EINVAL;
@@ -651,6 +662,10 @@ static int qat_rsa_enc(struct akcipher_request *req)
req->dst_len = ctx->key_sz;
return -EOVERFLOW;
}
+
+ if (req->src_len > ctx->key_sz)
+ return -EINVAL;
+
memset(msg, '\0', sizeof(*msg));
ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr,
ICP_QAT_FW_COMN_REQ_FLAG_SET);
@@ -679,40 +694,39 @@ static int qat_rsa_enc(struct akcipher_request *req)
*/
if (sg_is_last(req->src) && req->src_len == ctx->key_sz) {
qat_req->src_align = NULL;
- qat_req->in.rsa.enc.m = dma_map_single(dev, sg_virt(req->src),
- req->src_len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m)))
- return ret;
-
+ vaddr = sg_virt(req->src);
} else {
int shift = ctx->key_sz - req->src_len;
- qat_req->src_align = dma_alloc_coherent(dev, ctx->key_sz,
- &qat_req->in.rsa.enc.m,
- GFP_KERNEL);
+ qat_req->src_align = kzalloc(ctx->key_sz, flags);
if (unlikely(!qat_req->src_align))
return ret;
scatterwalk_map_and_copy(qat_req->src_align + shift, req->src,
0, req->src_len, 0);
+ vaddr = qat_req->src_align;
}
- if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
- qat_req->dst_align = NULL;
- qat_req->out.rsa.enc.c = dma_map_single(dev, sg_virt(req->dst),
- req->dst_len,
- DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c)))
- goto unmap_src;
+ qat_req->in.rsa.enc.m = dma_map_single(dev, vaddr, ctx->key_sz,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m)))
+ goto unmap_src;
+ if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
+ qat_req->dst_align = NULL;
+ vaddr = sg_virt(req->dst);
} else {
- qat_req->dst_align = dma_alloc_coherent(dev, ctx->key_sz,
- &qat_req->out.rsa.enc.c,
- GFP_KERNEL);
+ qat_req->dst_align = kzalloc(ctx->key_sz, flags);
if (unlikely(!qat_req->dst_align))
goto unmap_src;
-
+ vaddr = qat_req->dst_align;
}
+
+ qat_req->out.rsa.enc.c = dma_map_single(dev, vaddr, ctx->key_sz,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c)))
+ goto unmap_dst;
+
qat_req->in.rsa.in_tab[3] = 0;
qat_req->out.rsa.out_tab[1] = 0;
qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m,
@@ -732,13 +746,14 @@ static int qat_rsa_enc(struct akcipher_request *req)
msg->pke_mid.opaque = (u64)(__force long)qat_req;
msg->input_param_count = 3;
msg->output_param_count = 1;
- do {
- ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg);
- } while (ret == -EBUSY && ctr++ < 100);
- if (!ret)
- return -EINPROGRESS;
+ ret = qat_alg_send_asym_message(qat_req, inst, &req->base);
+ if (ret == -ENOSPC)
+ goto unmap_all;
+ return ret;
+
+unmap_all:
if (!dma_mapping_error(dev, qat_req->phy_out))
dma_unmap_single(dev, qat_req->phy_out,
sizeof(struct qat_rsa_output_params),
@@ -749,21 +764,15 @@ unmap_in_params:
sizeof(struct qat_rsa_input_params),
DMA_TO_DEVICE);
unmap_dst:
- if (qat_req->dst_align)
- dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align,
- qat_req->out.rsa.enc.c);
- else
- if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c))
- dma_unmap_single(dev, qat_req->out.rsa.enc.c,
- ctx->key_sz, DMA_FROM_DEVICE);
+ if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c))
+ dma_unmap_single(dev, qat_req->out.rsa.enc.c,
+ ctx->key_sz, DMA_FROM_DEVICE);
+ kfree_sensitive(qat_req->dst_align);
unmap_src:
- if (qat_req->src_align)
- dma_free_coherent(dev, ctx->key_sz, qat_req->src_align,
- qat_req->in.rsa.enc.m);
- else
- if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m))
- dma_unmap_single(dev, qat_req->in.rsa.enc.m,
- ctx->key_sz, DMA_TO_DEVICE);
+ if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m))
+ dma_unmap_single(dev, qat_req->in.rsa.enc.m, ctx->key_sz,
+ DMA_TO_DEVICE);
+ kfree_sensitive(qat_req->src_align);
return ret;
}
@@ -776,7 +785,9 @@ static int qat_rsa_dec(struct akcipher_request *req)
struct qat_asym_request *qat_req =
PTR_ALIGN(akcipher_request_ctx(req), 64);
struct icp_qat_fw_pke_request *msg = &qat_req->req;
- int ret, ctr = 0;
+ gfp_t flags = qat_algs_alloc_flags(&req->base);
+ u8 *vaddr;
+ int ret;
if (unlikely(!ctx->n || !ctx->d))
return -EINVAL;
@@ -785,6 +796,10 @@ static int qat_rsa_dec(struct akcipher_request *req)
req->dst_len = ctx->key_sz;
return -EOVERFLOW;
}
+
+ if (req->src_len > ctx->key_sz)
+ return -EINVAL;
+
memset(msg, '\0', sizeof(*msg));
ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr,
ICP_QAT_FW_COMN_REQ_FLAG_SET);
@@ -823,40 +838,37 @@ static int qat_rsa_dec(struct akcipher_request *req)
*/
if (sg_is_last(req->src) && req->src_len == ctx->key_sz) {
qat_req->src_align = NULL;
- qat_req->in.rsa.dec.c = dma_map_single(dev, sg_virt(req->src),
- req->dst_len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c)))
- return ret;
-
+ vaddr = sg_virt(req->src);
} else {
int shift = ctx->key_sz - req->src_len;
- qat_req->src_align = dma_alloc_coherent(dev, ctx->key_sz,
- &qat_req->in.rsa.dec.c,
- GFP_KERNEL);
+ qat_req->src_align = kzalloc(ctx->key_sz, flags);
if (unlikely(!qat_req->src_align))
return ret;
scatterwalk_map_and_copy(qat_req->src_align + shift, req->src,
0, req->src_len, 0);
+ vaddr = qat_req->src_align;
}
- if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
- qat_req->dst_align = NULL;
- qat_req->out.rsa.dec.m = dma_map_single(dev, sg_virt(req->dst),
- req->dst_len,
- DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m)))
- goto unmap_src;
+ qat_req->in.rsa.dec.c = dma_map_single(dev, vaddr, ctx->key_sz,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c)))
+ goto unmap_src;
+ if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) {
+ qat_req->dst_align = NULL;
+ vaddr = sg_virt(req->dst);
} else {
- qat_req->dst_align = dma_alloc_coherent(dev, ctx->key_sz,
- &qat_req->out.rsa.dec.m,
- GFP_KERNEL);
+ qat_req->dst_align = kzalloc(ctx->key_sz, flags);
if (unlikely(!qat_req->dst_align))
goto unmap_src;
-
+ vaddr = qat_req->dst_align;
}
+ qat_req->out.rsa.dec.m = dma_map_single(dev, vaddr, ctx->key_sz,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m)))
+ goto unmap_dst;
if (ctx->crt_mode)
qat_req->in.rsa.in_tab[6] = 0;
@@ -884,13 +896,14 @@ static int qat_rsa_dec(struct akcipher_request *req)
msg->input_param_count = 3;
msg->output_param_count = 1;
- do {
- ret = adf_send_message(ctx->inst->pke_tx, (u32 *)msg);
- } while (ret == -EBUSY && ctr++ < 100);
- if (!ret)
- return -EINPROGRESS;
+ ret = qat_alg_send_asym_message(qat_req, inst, &req->base);
+ if (ret == -ENOSPC)
+ goto unmap_all;
+
+ return ret;
+unmap_all:
if (!dma_mapping_error(dev, qat_req->phy_out))
dma_unmap_single(dev, qat_req->phy_out,
sizeof(struct qat_rsa_output_params),
@@ -901,21 +914,15 @@ unmap_in_params:
sizeof(struct qat_rsa_input_params),
DMA_TO_DEVICE);
unmap_dst:
- if (qat_req->dst_align)
- dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align,
- qat_req->out.rsa.dec.m);
- else
- if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m))
- dma_unmap_single(dev, qat_req->out.rsa.dec.m,
- ctx->key_sz, DMA_FROM_DEVICE);
+ if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m))
+ dma_unmap_single(dev, qat_req->out.rsa.dec.m,
+ ctx->key_sz, DMA_FROM_DEVICE);
+ kfree_sensitive(qat_req->dst_align);
unmap_src:
- if (qat_req->src_align)
- dma_free_coherent(dev, ctx->key_sz, qat_req->src_align,
- qat_req->in.rsa.dec.c);
- else
- if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c))
- dma_unmap_single(dev, qat_req->in.rsa.dec.c,
- ctx->key_sz, DMA_TO_DEVICE);
+ if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c))
+ dma_unmap_single(dev, qat_req->in.rsa.dec.c, ctx->key_sz,
+ DMA_TO_DEVICE);
+ kfree_sensitive(qat_req->src_align);
return ret;
}
@@ -1233,18 +1240,8 @@ static void qat_rsa_exit_tfm(struct crypto_akcipher *tfm)
struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
struct device *dev = &GET_DEV(ctx->inst->accel_dev);
- if (ctx->n)
- dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n);
- if (ctx->e)
- dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e);
- if (ctx->d) {
- memset(ctx->d, '\0', ctx->key_sz);
- dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d);
- }
+ qat_rsa_clear_ctx(dev, ctx);
qat_crypto_put_instance(ctx->inst);
- ctx->n = NULL;
- ctx->e = NULL;
- ctx->d = NULL;
}
static struct akcipher_alg rsa = {
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
index 67c9588e89df..9341d892533a 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/qat/qat_common/qat_crypto.c
@@ -161,13 +161,6 @@ int qat_crypto_dev_config(struct adf_accel_dev *accel_dev)
if (ret)
goto err;
- /* Temporarily set the number of crypto instances to zero to avoid
- * registering the crypto algorithms.
- * This will be removed when the algorithms will support the
- * CRYPTO_TFM_REQ_MAY_BACKLOG flag
- */
- instances = 0;
-
for (i = 0; i < instances; i++) {
val = i;
snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i);
@@ -353,6 +346,9 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev)
&inst->pke_rx);
if (ret)
goto err;
+
+ INIT_LIST_HEAD(&inst->backlog.list);
+ spin_lock_init(&inst->backlog.lock);
}
return 0;
err:
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h
index b6a4c95ae003..df3c738ce323 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.h
+++ b/drivers/crypto/qat/qat_common/qat_crypto.h
@@ -9,6 +9,19 @@
#include "adf_accel_devices.h"
#include "icp_qat_fw_la.h"
+struct qat_instance_backlog {
+ struct list_head list;
+ spinlock_t lock; /* protects backlog list */
+};
+
+struct qat_alg_req {
+ u32 *fw_req;
+ struct adf_etr_ring_data *tx_ring;
+ struct crypto_async_request *base;
+ struct list_head list;
+ struct qat_instance_backlog *backlog;
+};
+
struct qat_crypto_instance {
struct adf_etr_ring_data *sym_tx;
struct adf_etr_ring_data *sym_rx;
@@ -19,8 +32,29 @@ struct qat_crypto_instance {
unsigned long state;
int id;
atomic_t refctr;
+ struct qat_instance_backlog backlog;
};
+#define QAT_MAX_BUFF_DESC 4
+
+struct qat_alg_buf {
+ u32 len;
+ u32 resrvd;
+ u64 addr;
+} __packed;
+
+struct qat_alg_buf_list {
+ u64 resrvd;
+ u32 num_bufs;
+ u32 num_mapped_bufs;
+ struct qat_alg_buf bufers[];
+} __packed;
+
+struct qat_alg_fixed_buf_list {
+ struct qat_alg_buf_list sgl_hdr;
+ struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC];
+} __packed __aligned(64);
+
struct qat_crypto_request_buffs {
struct qat_alg_buf_list *bl;
dma_addr_t blp;
@@ -28,6 +62,10 @@ struct qat_crypto_request_buffs {
dma_addr_t bloutp;
size_t sz;
size_t sz_out;
+ bool sgl_src_valid;
+ bool sgl_dst_valid;
+ struct qat_alg_fixed_buf_list sgl_src;
+ struct qat_alg_fixed_buf_list sgl_dst;
};
struct qat_crypto_request;
@@ -53,6 +91,7 @@ struct qat_crypto_request {
u8 iv[AES_BLOCK_SIZE];
};
bool encryption;
+ struct qat_alg_req alg_req;
};
static inline bool adf_hw_dev_has_crypto(struct adf_accel_dev *accel_dev)
@@ -70,4 +109,9 @@ static inline bool adf_hw_dev_has_crypto(struct adf_accel_dev *accel_dev)
return true;
}
+static inline gfp_t qat_algs_alloc_flags(struct crypto_async_request *req)
+{
+ return req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
+}
+
#endif
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index 4bfd8f3566f7..7bba35280dac 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -695,6 +695,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle,
handle->pci_dev = pci_info->pci_dev;
switch (handle->pci_dev->device) {
case ADF_4XXX_PCI_DEVICE_ID:
+ case ADF_401XX_PCI_DEVICE_ID:
handle->chip_info->mmp_sram_size = 0;
handle->chip_info->nn = false;
handle->chip_info->lm2lm3 = true;
diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c
index 6356402a2c9e..0fe5a474aa45 100644
--- a/drivers/crypto/qat/qat_common/qat_uclo.c
+++ b/drivers/crypto/qat/qat_common/qat_uclo.c
@@ -519,7 +519,7 @@ qat_uclo_map_chunk(char *buf, struct icp_qat_uof_filehdr *file_hdr,
return NULL;
}
-static unsigned int
+static int
qat_uclo_check_image_compat(struct icp_qat_uof_encap_obj *encap_uof_obj,
struct icp_qat_uof_image *image)
{
@@ -731,6 +731,7 @@ qat_uclo_get_dev_type(struct icp_qat_fw_loader_handle *handle)
case PCI_DEVICE_ID_INTEL_QAT_C3XXX:
return ICP_QAT_AC_C3XXX_DEV_TYPE;
case ADF_4XXX_PCI_DEVICE_ID:
+ case ADF_401XX_PCI_DEVICE_ID:
return ICP_QAT_AC_4XXX_A_DEV_TYPE;
default:
pr_err("QAT: unsupported device 0x%x\n",
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index 09599fe4d2f3..cb3bdd3618fb 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -7,6 +7,8 @@
#include "adf_dh895xcc_hw_data.h"
#include "icp_qat_hw.h"
+#define ADF_DH895XCC_VF_MSK 0xFFFFFFFF
+
/* Worker thread to service arbiter mappings */
static const u32 thrd_to_arb_map[ADF_DH895XCC_MAX_ACCELENGINES] = {
0x12222AAA, 0x11666666, 0x12222AAA, 0x11666666,
@@ -58,17 +60,24 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev)
capabilities = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC |
ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC |
- ICP_ACCEL_CAPABILITIES_AUTHENTICATION;
+ ICP_ACCEL_CAPABILITIES_AUTHENTICATION |
+ ICP_ACCEL_CAPABILITIES_CIPHER |
+ ICP_ACCEL_CAPABILITIES_COMPRESSION;
/* Read accelerator capabilities mask */
pci_read_config_dword(pdev, ADF_DEVICE_LEGFUSE_OFFSET, &legfuses);
- if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE)
+ /* A set bit in legfuses means the feature is OFF in this SKU */
+ if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE) {
capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC;
+ capabilities &= ~ICP_ACCEL_CAPABILITIES_CIPHER;
+ }
if (legfuses & ICP_ACCEL_MASK_PKE_SLICE)
capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC;
- if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE)
+ if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE) {
capabilities &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION;
+ capabilities &= ~ICP_ACCEL_CAPABILITIES_CIPHER;
+ }
if (legfuses & ICP_ACCEL_MASK_COMPRESS_SLICE)
capabilities &= ~ICP_ACCEL_CAPABILITIES_COMPRESSION;
@@ -100,43 +109,6 @@ static const u32 *adf_get_arbiter_mapping(void)
return thrd_to_arb_map;
}
-static void adf_enable_ints(struct adf_accel_dev *accel_dev)
-{
- void __iomem *addr;
-
- addr = (&GET_BARS(accel_dev)[ADF_DH895XCC_PMISC_BAR])->virt_addr;
-
- /* Enable bundle and misc interrupts */
- ADF_CSR_WR(addr, ADF_DH895XCC_SMIAPF0_MASK_OFFSET,
- accel_dev->pf.vf_info ? 0 :
- BIT_ULL(GET_MAX_BANKS(accel_dev)) - 1);
- ADF_CSR_WR(addr, ADF_DH895XCC_SMIAPF1_MASK_OFFSET,
- ADF_DH895XCC_SMIA1_MASK);
-}
-
-static u32 get_vf2pf_sources(void __iomem *pmisc_bar)
-{
- u32 errsou3, errmsk3, errsou5, errmsk5, vf_int_mask;
-
- /* Get the interrupt sources triggered by VFs */
- errsou3 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRSOU3);
- vf_int_mask = ADF_DH895XCC_ERR_REG_VF2PF_L(errsou3);
-
- /* To avoid adding duplicate entries to work queue, clear
- * vf_int_mask_sets bits that are already masked in ERRMSK register.
- */
- errmsk3 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRMSK3);
- vf_int_mask &= ~ADF_DH895XCC_ERR_REG_VF2PF_L(errmsk3);
-
- /* Do the same for ERRSOU5 */
- errsou5 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRSOU5);
- errmsk5 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRMSK5);
- vf_int_mask |= ADF_DH895XCC_ERR_REG_VF2PF_U(errsou5);
- vf_int_mask &= ~ADF_DH895XCC_ERR_REG_VF2PF_U(errmsk5);
-
- return vf_int_mask;
-}
-
static void enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask)
{
/* Enable VF2PF Messaging Ints - VFs 0 through 15 per vf_mask[15:0] */
@@ -150,27 +122,71 @@ static void enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask)
if (vf_mask >> 16) {
u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK5)
& ~ADF_DH895XCC_ERR_MSK_VF2PF_U(vf_mask);
-
ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, val);
}
}
-static void disable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask)
+static void disable_all_vf2pf_interrupts(void __iomem *pmisc_addr)
{
+ u32 val;
+
/* Disable VF2PF interrupts for VFs 0 through 15 per vf_mask[15:0] */
- if (vf_mask & 0xFFFF) {
- u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3)
- | ADF_DH895XCC_ERR_MSK_VF2PF_L(vf_mask);
- ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val);
- }
+ val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3)
+ | ADF_DH895XCC_ERR_MSK_VF2PF_L(ADF_DH895XCC_VF_MSK);
+ ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, val);
/* Disable VF2PF interrupts for VFs 16 through 31 per vf_mask[31:16] */
- if (vf_mask >> 16) {
- u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK5)
- | ADF_DH895XCC_ERR_MSK_VF2PF_U(vf_mask);
+ val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK5)
+ | ADF_DH895XCC_ERR_MSK_VF2PF_U(ADF_DH895XCC_VF_MSK);
+ ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, val);
+}
- ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, val);
- }
+static u32 disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr)
+{
+ u32 sources, pending, disabled;
+ u32 errsou3, errmsk3;
+ u32 errsou5, errmsk5;
+
+ /* Get the interrupt sources triggered by VFs */
+ errsou3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU3);
+ errsou5 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU5);
+ sources = ADF_DH895XCC_ERR_REG_VF2PF_L(errsou3)
+ | ADF_DH895XCC_ERR_REG_VF2PF_U(errsou5);
+
+ if (!sources)
+ return 0;
+
+ /* Get the already disabled interrupts */
+ errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3);
+ errmsk5 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK5);
+ disabled = ADF_DH895XCC_ERR_REG_VF2PF_L(errmsk3)
+ | ADF_DH895XCC_ERR_REG_VF2PF_U(errmsk5);
+
+ pending = sources & ~disabled;
+ if (!pending)
+ return 0;
+
+ /* Due to HW limitations, when disabling the interrupts, we can't
+ * just disable the requested sources, as this would lead to missed
+ * interrupts if sources changes just before writing to ERRMSK3 and
+ * ERRMSK5.
+ * To work around it, disable all and re-enable only the sources that
+ * are not in vf_mask and were not already disabled. Re-enabling will
+ * trigger a new interrupt for the sources that have changed in the
+ * meantime, if any.
+ */
+ errmsk3 |= ADF_DH895XCC_ERR_MSK_VF2PF_L(ADF_DH895XCC_VF_MSK);
+ errmsk5 |= ADF_DH895XCC_ERR_MSK_VF2PF_U(ADF_DH895XCC_VF_MSK);
+ ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3);
+ ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, errmsk5);
+
+ errmsk3 &= ADF_DH895XCC_ERR_MSK_VF2PF_L(sources | disabled);
+ errmsk5 &= ADF_DH895XCC_ERR_MSK_VF2PF_U(sources | disabled);
+ ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3);
+ ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, errmsk5);
+
+ /* Return the sources of the (new) interrupt(s) */
+ return pending;
}
static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable)
@@ -215,14 +231,14 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
hw_data->init_arb = adf_init_arb;
hw_data->exit_arb = adf_exit_arb;
hw_data->get_arb_mapping = adf_get_arbiter_mapping;
- hw_data->enable_ints = adf_enable_ints;
+ hw_data->enable_ints = adf_gen2_enable_ints;
hw_data->reset_device = adf_reset_sbr;
hw_data->disable_iov = adf_disable_sriov;
adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
- hw_data->pfvf_ops.get_vf2pf_sources = get_vf2pf_sources;
hw_data->pfvf_ops.enable_vf2pf_interrupts = enable_vf2pf_interrupts;
- hw_data->pfvf_ops.disable_vf2pf_interrupts = disable_vf2pf_interrupts;
+ hw_data->pfvf_ops.disable_all_vf2pf_interrupts = disable_all_vf2pf_interrupts;
+ hw_data->pfvf_ops.disable_pending_vf2pf_interrupts = disable_pending_vf2pf_interrupts;
adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
}
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
index aa17272a1507..7b674bbe4192 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
@@ -19,10 +19,6 @@
#define ADF_DH895XCC_ACCELERATORS_MASK 0x3F
#define ADF_DH895XCC_ACCELENGINES_MASK 0xFFF
#define ADF_DH895XCC_ETR_MAX_BANKS 32
-#define ADF_DH895XCC_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28)
-#define ADF_DH895XCC_SMIAPF1_MASK_OFFSET (0x3A000 + 0x30)
-#define ADF_DH895XCC_SMIA0_MASK 0xFFFFFFFF
-#define ADF_DH895XCC_SMIA1_MASK 0x1
/* Masks for VF2PF interrupts */
#define ADF_DH895XCC_ERR_REG_VF2PF_L(vf_src) (((vf_src) & 0x01FFFE00) >> 9)
diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index 51b58e57153f..6957a125b447 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -2379,6 +2379,7 @@ static const struct of_device_id of_match[] = {
{ .compatible = "ti,j721e-sa2ul", .data = &am654_match_data, },
{ .compatible = "ti,am654-sa2ul", .data = &am654_match_data, },
{ .compatible = "ti,am64-sa2ul", .data = &am64_match_data, },
+ { .compatible = "ti,am62-sa3ul", .data = &am64_match_data, },
{},
};
MODULE_DEVICE_TABLE(of, of_match);
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 25c9f825b8b5..c9ad6c213090 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1709,7 +1709,7 @@ static void common_nonsnoop_hash_unmap(struct device *dev,
struct talitos_desc *desc2 = (struct talitos_desc *)
(edesc->buf + edesc->dma_len);
- unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
+ unmap_single_talitos_ptr(dev, &desc->ptr[5], DMA_FROM_DEVICE);
if (desc->next_desc &&
desc->ptr[5].ptr != desc2->ptr[5].ptr)
unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE);
@@ -1721,8 +1721,8 @@ static void common_nonsnoop_hash_unmap(struct device *dev,
talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
/* When using hashctx-in, must unmap it. */
- if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1))
- unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1],
+ if (from_talitos_ptr_len(&desc->ptr[1], is_sec1))
+ unmap_single_talitos_ptr(dev, &desc->ptr[1],
DMA_TO_DEVICE);
else if (desc->next_desc)
unmap_single_talitos_ptr(dev, &desc2->ptr[1],
@@ -1736,8 +1736,8 @@ static void common_nonsnoop_hash_unmap(struct device *dev,
dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
DMA_BIDIRECTIONAL);
- if (edesc->desc.next_desc)
- dma_unmap_single(dev, be32_to_cpu(edesc->desc.next_desc),
+ if (desc->next_desc)
+ dma_unmap_single(dev, be32_to_cpu(desc->next_desc),
TALITOS_DESC_SIZE, DMA_BIDIRECTIONAL);
}
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 5157c118d642..265ef3e96fdd 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -877,9 +877,7 @@ static int hash_dma_final(struct ahash_request *req)
__func__);
goto out;
}
- }
-
- if (!req_ctx->updated) {
+ } else {
ret = hash_setconfiguration(device_data, &ctx->config);
if (ret) {
dev_err(device_data->dev,
diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
index 709670d2b553..2560cfea1dec 100644
--- a/drivers/crypto/vmx/Makefile
+++ b/drivers/crypto/vmx/Makefile
@@ -2,21 +2,10 @@
obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override flavour := linux-ppc64le
-else
-override flavour := linux-ppc64
-endif
-
-quiet_cmd_perl = PERL $@
- cmd_perl = $(PERL) $(<) $(flavour) > $(@)
+quiet_cmd_perl = PERL $@
+ cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@
targets += aesp8-ppc.S ghashp8-ppc.S
-$(obj)/aesp8-ppc.S: $(src)/aesp8-ppc.pl FORCE
- $(call if_changed,perl)
-
-$(obj)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl FORCE
+$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
$(call if_changed,perl)
-
-clean-files := aesp8-ppc.S ghashp8-ppc.S
diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h
index 709f286e7b25..9656a9a40326 100644
--- a/include/crypto/sm4.h
+++ b/include/crypto/sm4.h
@@ -21,6 +21,10 @@ struct sm4_ctx {
u32 rkey_dec[SM4_RKEY_WORDS];
};
+extern const u32 crypto_sm4_fk[];
+extern const u32 crypto_sm4_ck[];
+extern const u8 crypto_sm4_sbox[];
+
/**
* sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016
* @ctx: The location where the computed key will be stored.
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 177f7b7cd414..6cabafffd0dd 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -168,6 +168,12 @@ enum qm_vf_state {
QM_NOT_READY,
};
+struct dfx_diff_registers {
+ u32 *regs;
+ u32 reg_offset;
+ u32 reg_len;
+};
+
struct qm_dfx {
atomic64_t err_irq_cnt;
atomic64_t aeq_irq_cnt;
@@ -190,6 +196,11 @@ struct qm_debug {
struct dentry *debug_root;
struct dentry *qm_d;
struct debugfs_file files[DEBUG_FILE_NUM];
+ unsigned int *qm_last_words;
+ /* ACC engines recoreding last regs */
+ unsigned int *last_words;
+ struct dfx_diff_registers *qm_diff_regs;
+ struct dfx_diff_registers *acc_diff_regs;
};
struct qm_shaper_factor {
@@ -243,6 +254,7 @@ struct hisi_qm_err_ini {
void (*open_sva_prefetch)(struct hisi_qm *qm);
void (*close_sva_prefetch)(struct hisi_qm *qm);
void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts);
+ void (*show_last_dfx_regs)(struct hisi_qm *qm);
void (*err_info_init)(struct hisi_qm *qm);
};
@@ -433,21 +445,22 @@ int hisi_qm_init(struct hisi_qm *qm);
void hisi_qm_uninit(struct hisi_qm *qm);
int hisi_qm_start(struct hisi_qm *qm);
int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r);
-struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type);
int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg);
int hisi_qm_stop_qp(struct hisi_qp *qp);
-void hisi_qm_release_qp(struct hisi_qp *qp);
int hisi_qp_send(struct hisi_qp *qp, const void *msg);
-int hisi_qm_get_free_qp_num(struct hisi_qm *qm);
-int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number);
void hisi_qm_debug_init(struct hisi_qm *qm);
-enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev);
void hisi_qm_debug_regs_clear(struct hisi_qm *qm);
int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs);
int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen);
int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs);
void hisi_qm_dev_err_init(struct hisi_qm *qm);
void hisi_qm_dev_err_uninit(struct hisi_qm *qm);
+int hisi_qm_diff_regs_init(struct hisi_qm *qm,
+ struct dfx_diff_registers *dregs, int reg_len);
+void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len);
+void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
+ struct dfx_diff_registers *dregs, int regs_len);
+
pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
pci_channel_state_t state);
pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev);
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index a80b8c4dc2cf..a143ddf9279a 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -124,10 +124,4 @@ config CRYPTO_LIB_CHACHA20POLY1305
config CRYPTO_LIB_SHA256
tristate
-config CRYPTO_LIB_SM3
- tristate
-
-config CRYPTO_LIB_SM4
- tristate
-
endmenu
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 6c872d05d1e6..26be2bbe09c5 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -37,12 +37,6 @@ libpoly1305-y += poly1305.o
obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
libsha256-y := sha256.o
-obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o
-libsm3-y := sm3.o
-
-obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o
-libsm4-y := sm4.o
-
ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
libblake2s-y += blake2s-selftest.o
libchacha20poly1305-y += chacha20poly1305-selftest.o
diff --git a/tools/testing/crypto/chacha20-s390/Makefile b/tools/testing/crypto/chacha20-s390/Makefile
new file mode 100644
index 000000000000..db81cd2fb9c5
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Red Hat, Inc.
+# Author: Vladis Dronov <vdronoff@gmail.com>
+
+obj-m += test_cipher.o
+test_cipher-y := test-cipher.o
+
+all:
+ make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules
+clean:
+ make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean
diff --git a/tools/testing/crypto/chacha20-s390/run-tests.sh b/tools/testing/crypto/chacha20-s390/run-tests.sh
new file mode 100644
index 000000000000..43108794b996
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/run-tests.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Red Hat, Inc.
+# Author: Vladis Dronov <vdronoff@gmail.com>
+#
+# This script runs (via instmod) test-cipher.ko module which invokes
+# generic and s390-native ChaCha20 encryprion algorithms with different
+# size of data. Check 'dmesg' for results.
+#
+# The insmod error is expected:
+# insmod: ERROR: could not insert module test_cipher.ko: Operation not permitted
+
+lsmod | grep chacha | cut -f1 -d' ' | xargs rmmod
+modprobe chacha_generic
+modprobe chacha_s390
+
+# run encryption for different data size, including whole block(s) +/- 1
+insmod test_cipher.ko size=63
+insmod test_cipher.ko size=64
+insmod test_cipher.ko size=65
+insmod test_cipher.ko size=127
+insmod test_cipher.ko size=128
+insmod test_cipher.ko size=129
+insmod test_cipher.ko size=511
+insmod test_cipher.ko size=512
+insmod test_cipher.ko size=513
+insmod test_cipher.ko size=4096
+insmod test_cipher.ko size=65611
+insmod test_cipher.ko size=6291456
+insmod test_cipher.ko size=62914560
+
+# print test logs
+dmesg | tail -170
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c
new file mode 100644
index 000000000000..34e8b855266f
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/test-cipher.c
@@ -0,0 +1,372 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ * Author: Vladis Dronov <vdronoff@gmail.com>
+ */
+
+#include <asm/elf.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <crypto/skcipher.h>
+#include <crypto/akcipher.h>
+#include <crypto/acompress.h>
+#include <crypto/rng.h>
+#include <crypto/drbg.h>
+#include <crypto/kpp.h>
+#include <crypto/internal/simd.h>
+#include <crypto/chacha.h>
+#include <crypto/aead.h>
+#include <crypto/hash.h>
+#include <linux/crypto.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/fips.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/scatterlist.h>
+#include <linux/time.h>
+#include <linux/vmalloc.h>
+#include <linux/zlib.h>
+#include <linux/once.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+static unsigned int data_size __read_mostly = 256;
+static unsigned int debug __read_mostly = 0;
+
+/* tie all skcipher structures together */
+struct skcipher_def {
+ struct scatterlist sginp, sgout;
+ struct crypto_skcipher *tfm;
+ struct skcipher_request *req;
+ struct crypto_wait wait;
+};
+
+/* Perform cipher operations with the chacha lib */
+static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
+{
+ u32 chacha_state[CHACHA_STATE_WORDS];
+ u8 iv[16], key[32];
+ u64 start, end;
+
+ memset(key, 'X', sizeof(key));
+ memset(iv, 'I', sizeof(iv));
+
+ if (debug) {
+ print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET,
+ 16, 1, key, 32, 1);
+
+ print_hex_dump(KERN_INFO, "iv: ", DUMP_PREFIX_OFFSET,
+ 16, 1, iv, 16, 1);
+ }
+
+ /* Encrypt */
+ chacha_init_arch(chacha_state, (u32*)key, iv);
+
+ start = ktime_get_ns();
+ chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20);
+ end = ktime_get_ns();
+
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET,
+ 16, 1, cipher,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ pr_info("lib encryption took: %lld nsec", end - start);
+
+ /* Decrypt */
+ chacha_init_arch(chacha_state, (u32 *)key, iv);
+
+ start = ktime_get_ns();
+ chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20);
+ end = ktime_get_ns();
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET,
+ 16, 1, revert,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ pr_info("lib decryption took: %lld nsec", end - start);
+
+ return 0;
+}
+
+/* Perform cipher operations with skcipher */
+static unsigned int test_skcipher_encdec(struct skcipher_def *sk,
+ int enc)
+{
+ int rc;
+
+ if (enc) {
+ rc = crypto_wait_req(crypto_skcipher_encrypt(sk->req),
+ &sk->wait);
+ if (rc)
+ pr_info("skcipher encrypt returned with result"
+ "%d\n", rc);
+ }
+ else
+ {
+ rc = crypto_wait_req(crypto_skcipher_decrypt(sk->req),
+ &sk->wait);
+ if (rc)
+ pr_info("skcipher decrypt returned with result"
+ "%d\n", rc);
+ }
+
+ return rc;
+}
+
+/* Initialize and trigger cipher operations */
+static int test_skcipher(char *name, u8 *revert, u8 *cipher, u8 *plain)
+{
+ struct skcipher_def sk;
+ struct crypto_skcipher *skcipher = NULL;
+ struct skcipher_request *req = NULL;
+ u8 iv[16], key[32];
+ u64 start, end;
+ int ret = -EFAULT;
+
+ skcipher = crypto_alloc_skcipher(name, 0, 0);
+ if (IS_ERR(skcipher)) {
+ pr_info("could not allocate skcipher %s handle\n", name);
+ return PTR_ERR(skcipher);
+ }
+
+ req = skcipher_request_alloc(skcipher, GFP_KERNEL);
+ if (!req) {
+ pr_info("could not allocate skcipher request\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done,
+ &sk.wait);
+
+ memset(key, 'X', sizeof(key));
+ memset(iv, 'I', sizeof(iv));
+
+ if (crypto_skcipher_setkey(skcipher, key, 32)) {
+ pr_info("key could not be set\n");
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ if (debug) {
+ print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET,
+ 16, 1, key, 32, 1);
+
+ print_hex_dump(KERN_INFO, "iv: ", DUMP_PREFIX_OFFSET,
+ 16, 1, iv, 16, 1);
+ }
+
+ sk.tfm = skcipher;
+ sk.req = req;
+
+ /* Encrypt in one pass */
+ sg_init_one(&sk.sginp, plain, data_size);
+ sg_init_one(&sk.sgout, cipher, data_size);
+ skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout,
+ data_size, iv);
+ crypto_init_wait(&sk.wait);
+
+ /* Encrypt data */
+ start = ktime_get_ns();
+ ret = test_skcipher_encdec(&sk, 1);
+ end = ktime_get_ns();
+
+ if (ret)
+ goto out;
+
+ pr_info("%s tfm encryption successful, took %lld nsec\n", name, end - start);
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET,
+ 16, 1, cipher,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ /* Prepare for decryption */
+ memset(iv, 'I', sizeof(iv));
+
+ sg_init_one(&sk.sginp, cipher, data_size);
+ sg_init_one(&sk.sgout, revert, data_size);
+ skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout,
+ data_size, iv);
+ crypto_init_wait(&sk.wait);
+
+ /* Decrypt data */
+ start = ktime_get_ns();
+ ret = test_skcipher_encdec(&sk, 0);
+ end = ktime_get_ns();
+
+ if (ret)
+ goto out;
+
+ pr_info("%s tfm decryption successful, took %lld nsec\n", name, end - start);
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET,
+ 16, 1, revert,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ /* Dump some internal skcipher data */
+ if (debug)
+ pr_info("skcipher %s: cryptlen %d blksize %d stride %d "
+ "ivsize %d alignmask 0x%x\n",
+ name, sk.req->cryptlen,
+ crypto_skcipher_blocksize(sk.tfm),
+ crypto_skcipher_alg(sk.tfm)->walksize,
+ crypto_skcipher_ivsize(sk.tfm),
+ crypto_skcipher_alignmask(sk.tfm));
+
+out:
+ if (skcipher)
+ crypto_free_skcipher(skcipher);
+ if (req)
+ skcipher_request_free(req);
+ return ret;
+}
+
+static int __init chacha_s390_test_init(void)
+{
+ u8 *plain = NULL, *revert = NULL;
+ u8 *cipher_generic = NULL, *cipher_s390 = NULL;
+ int ret = -1;
+
+ pr_info("s390 ChaCha20 test module: size=%d debug=%d\n",
+ data_size, debug);
+
+ /* Allocate and fill buffers */
+ plain = vmalloc(data_size);
+ if (!plain) {
+ pr_info("could not allocate plain buffer\n");
+ ret = -2;
+ goto out;
+ }
+ memset(plain, 'a', data_size);
+ get_random_bytes(plain, (data_size > 256 ? 256 : data_size));
+
+ cipher_generic = vmalloc(data_size);
+ if (!cipher_generic) {
+ pr_info("could not allocate cipher_generic buffer\n");
+ ret = -2;
+ goto out;
+ }
+ memset(cipher_generic, 0, data_size);
+
+ cipher_s390 = vmalloc(data_size);
+ if (!cipher_s390) {
+ pr_info("could not allocate cipher_s390 buffer\n");
+ ret = -2;
+ goto out;
+ }
+ memset(cipher_s390, 0, data_size);
+
+ revert = vmalloc(data_size);
+ if (!revert) {
+ pr_info("could not allocate revert buffer\n");
+ ret = -2;
+ goto out;
+ }
+ memset(revert, 0, data_size);
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "src: ", DUMP_PREFIX_OFFSET,
+ 16, 1, plain,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ /* Use chacha20 generic */
+ ret = test_skcipher("chacha20-generic", revert, cipher_generic, plain);
+ if (ret)
+ goto out;
+
+ if (memcmp(plain, revert, data_size)) {
+ pr_info("generic en/decryption check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("generic en/decryption check OK\n");
+
+ memset(revert, 0, data_size);
+
+ /* Use chacha20 s390 */
+ ret = test_skcipher("chacha20-s390", revert, cipher_s390, plain);
+ if (ret)
+ goto out;
+
+ if (memcmp(plain, revert, data_size)) {
+ pr_info("s390 en/decryption check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("s390 en/decryption check OK\n");
+
+ if (memcmp(cipher_generic, cipher_s390, data_size)) {
+ pr_info("s390 vs generic check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("s390 vs generic check OK\n");
+
+ memset(cipher_s390, 0, data_size);
+ memset(revert, 0, data_size);
+
+ /* Use chacha20 lib */
+ test_lib_chacha(revert, cipher_s390, plain);
+
+ if (memcmp(plain, revert, data_size)) {
+ pr_info("lib en/decryption check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("lib en/decryption check OK\n");
+
+ if (memcmp(cipher_generic, cipher_s390, data_size)) {
+ pr_info("lib vs generic check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("lib vs generic check OK\n");
+
+ pr_info("--- chacha20 s390 test end ---\n");
+
+out:
+ if (plain)
+ vfree(plain);
+ if (cipher_generic)
+ vfree(cipher_generic);
+ if (cipher_s390)
+ vfree(cipher_s390);
+ if (revert)
+ vfree(revert);
+
+ return -1;
+}
+
+static void __exit chacha_s390_test_exit(void)
+{
+ pr_info("s390 ChaCha20 test module exit\n");
+}
+
+module_param_named(size, data_size, uint, 0660);
+module_param(debug, int, 0660);
+MODULE_PARM_DESC(size, "Size of a plaintext");
+MODULE_PARM_DESC(debug, "Debug level (0=off,1=on)");
+
+module_init(chacha_s390_test_init);
+module_exit(chacha_s390_test_exit);
+
+MODULE_DESCRIPTION("s390 ChaCha20 self-test");
+MODULE_AUTHOR("Vladis Dronov <vdronoff@gmail.com>");
+MODULE_LICENSE("GPL v2");