whiterose

linux unikernel
Log | Files | Refs | README | LICENSE | git clone https://git.ne02ptzero.me/git/whiterose

commit fa3294c58c58c4fa87ee0356b6cb1901db00533e
parent a215ce8f0e00c2d707080236f1aafec337371043
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Sat,  2 Mar 2019 08:32:02 -0800

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull more crypto fixes from Herbert Xu:
 "This fixes a couple of issues in arm64/chacha that was introduced in
  5.0"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: arm64/chacha - fix hchacha_block_neon() for big endian
  crypto: arm64/chacha - fix chacha_4block_xor_neon() for big endian

Diffstat:
March/arm64/crypto/chacha-neon-core.S | 20++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S @@ -158,8 +158,8 @@ ENTRY(hchacha_block_neon) mov w3, w2 bl chacha_permute - st1 {v0.16b}, [x1], #16 - st1 {v3.16b}, [x1] + st1 {v0.4s}, [x1], #16 + st1 {v3.4s}, [x1] ldp x29, x30, [sp], #16 ret @@ -532,6 +532,10 @@ ENTRY(chacha_4block_xor_neon) add v3.4s, v3.4s, v19.4s add a2, a2, w8 add a3, a3, w9 +CPU_BE( rev a0, a0 ) +CPU_BE( rev a1, a1 ) +CPU_BE( rev a2, a2 ) +CPU_BE( rev a3, a3 ) ld4r {v24.4s-v27.4s}, [x0], #16 ld4r {v28.4s-v31.4s}, [x0] @@ -552,6 +556,10 @@ ENTRY(chacha_4block_xor_neon) add v7.4s, v7.4s, v23.4s add a6, a6, w8 add a7, a7, w9 +CPU_BE( rev a4, a4 ) +CPU_BE( rev a5, a5 ) +CPU_BE( rev a6, a6 ) +CPU_BE( rev a7, a7 ) // x8[0-3] += s2[0] // x9[0-3] += s2[1] @@ -569,6 +577,10 @@ ENTRY(chacha_4block_xor_neon) add v11.4s, v11.4s, v27.4s add a10, a10, w8 add a11, a11, w9 +CPU_BE( rev a8, a8 ) +CPU_BE( rev a9, a9 ) +CPU_BE( rev a10, a10 ) +CPU_BE( rev a11, a11 ) // x12[0-3] += s3[0] // x13[0-3] += s3[1] @@ -586,6 +598,10 @@ ENTRY(chacha_4block_xor_neon) add v15.4s, v15.4s, v31.4s add a14, a14, w8 add a15, a15, w9 +CPU_BE( rev a12, a12 ) +CPU_BE( rev a13, a13 ) +CPU_BE( rev a14, a14 ) +CPU_BE( rev a15, a15 ) // interleave 32-bit words in state n, n+1 ldp w6, w7, [x2], #64