diff options
Diffstat (limited to 'arch/arm64/crypto/aes-modes.S')
-rw-r--r-- | arch/arm64/crypto/aes-modes.S | 88 |
1 files changed, 42 insertions, 46 deletions
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index c53dbeae79f2..838dad5c209f 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S | |||
@@ -193,15 +193,16 @@ AES_ENTRY(aes_cbc_encrypt) | |||
193 | cbz w6, .Lcbcencloop | 193 | cbz w6, .Lcbcencloop |
194 | 194 | ||
195 | ld1 {v0.16b}, [x5] /* get iv */ | 195 | ld1 {v0.16b}, [x5] /* get iv */ |
196 | enc_prepare w3, x2, x5 | 196 | enc_prepare w3, x2, x6 |
197 | 197 | ||
198 | .Lcbcencloop: | 198 | .Lcbcencloop: |
199 | ld1 {v1.16b}, [x1], #16 /* get next pt block */ | 199 | ld1 {v1.16b}, [x1], #16 /* get next pt block */ |
200 | eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ | 200 | eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ |
201 | encrypt_block v0, w3, x2, x5, w6 | 201 | encrypt_block v0, w3, x2, x6, w7 |
202 | st1 {v0.16b}, [x0], #16 | 202 | st1 {v0.16b}, [x0], #16 |
203 | subs w4, w4, #1 | 203 | subs w4, w4, #1 |
204 | bne .Lcbcencloop | 204 | bne .Lcbcencloop |
205 | st1 {v0.16b}, [x5] /* return iv */ | ||
205 | ret | 206 | ret |
206 | AES_ENDPROC(aes_cbc_encrypt) | 207 | AES_ENDPROC(aes_cbc_encrypt) |
207 | 208 | ||
@@ -211,7 +212,7 @@ AES_ENTRY(aes_cbc_decrypt) | |||
211 | cbz w6, .LcbcdecloopNx | 212 | cbz w6, .LcbcdecloopNx |
212 | 213 | ||
213 | ld1 {v7.16b}, [x5] /* get iv */ | 214 | ld1 {v7.16b}, [x5] /* get iv */ |
214 | dec_prepare w3, x2, x5 | 215 | dec_prepare w3, x2, x6 |
215 | 216 | ||
216 | .LcbcdecloopNx: | 217 | .LcbcdecloopNx: |
217 | #if INTERLEAVE >= 2 | 218 | #if INTERLEAVE >= 2 |
@@ -248,7 +249,7 @@ AES_ENTRY(aes_cbc_decrypt) | |||
248 | .Lcbcdecloop: | 249 | .Lcbcdecloop: |
249 | ld1 {v1.16b}, [x1], #16 /* get next ct block */ | 250 | ld1 {v1.16b}, [x1], #16 /* get next ct block */ |
250 | mov v0.16b, v1.16b /* ...and copy to v0 */ | 251 | mov v0.16b, v1.16b /* ...and copy to v0 */ |
251 | decrypt_block v0, w3, x2, x5, w6 | 252 | decrypt_block v0, w3, x2, x6, w7 |
252 | eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ | 253 | eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ |
253 | mov v7.16b, v1.16b /* ct is next iv */ | 254 | mov v7.16b, v1.16b /* ct is next iv */ |
254 | st1 {v0.16b}, [x0], #16 | 255 | st1 {v0.16b}, [x0], #16 |
@@ -256,6 +257,7 @@ AES_ENTRY(aes_cbc_decrypt) | |||
256 | bne .Lcbcdecloop | 257 | bne .Lcbcdecloop |
257 | .Lcbcdecout: | 258 | .Lcbcdecout: |
258 | FRAME_POP | 259 | FRAME_POP |
260 | st1 {v7.16b}, [x5] /* return iv */ | ||
259 | ret | 261 | ret |
260 | AES_ENDPROC(aes_cbc_decrypt) | 262 | AES_ENDPROC(aes_cbc_decrypt) |
261 | 263 | ||
@@ -267,24 +269,15 @@ AES_ENDPROC(aes_cbc_decrypt) | |||
267 | 269 | ||
268 | AES_ENTRY(aes_ctr_encrypt) | 270 | AES_ENTRY(aes_ctr_encrypt) |
269 | FRAME_PUSH | 271 | FRAME_PUSH |
270 | cbnz w6, .Lctrfirst /* 1st time around? */ | 272 | cbz w6, .Lctrnotfirst /* 1st time around? */ |
271 | umov x5, v4.d[1] /* keep swabbed ctr in reg */ | ||
272 | rev x5, x5 | ||
273 | #if INTERLEAVE >= 2 | ||
274 | cmn w5, w4 /* 32 bit overflow? */ | ||
275 | bcs .Lctrinc | ||
276 | add x5, x5, #1 /* increment BE ctr */ | ||
277 | b .LctrincNx | ||
278 | #else | ||
279 | b .Lctrinc | ||
280 | #endif | ||
281 | .Lctrfirst: | ||
282 | enc_prepare w3, x2, x6 | 273 | enc_prepare w3, x2, x6 |
283 | ld1 {v4.16b}, [x5] | 274 | ld1 {v4.16b}, [x5] |
284 | umov x5, v4.d[1] /* keep swabbed ctr in reg */ | 275 | |
285 | rev x5, x5 | 276 | .Lctrnotfirst: |
277 | umov x8, v4.d[1] /* keep swabbed ctr in reg */ | ||
278 | rev x8, x8 | ||
286 | #if INTERLEAVE >= 2 | 279 | #if INTERLEAVE >= 2 |
287 | cmn w5, w4 /* 32 bit overflow? */ | 280 | cmn w8, w4 /* 32 bit overflow? */ |
288 | bcs .Lctrloop | 281 | bcs .Lctrloop |
289 | .LctrloopNx: | 282 | .LctrloopNx: |
290 | subs w4, w4, #INTERLEAVE | 283 | subs w4, w4, #INTERLEAVE |
@@ -292,11 +285,11 @@ AES_ENTRY(aes_ctr_encrypt) | |||
292 | #if INTERLEAVE == 2 | 285 | #if INTERLEAVE == 2 |
293 | mov v0.8b, v4.8b | 286 | mov v0.8b, v4.8b |
294 | mov v1.8b, v4.8b | 287 | mov v1.8b, v4.8b |
295 | rev x7, x5 | 288 | rev x7, x8 |
296 | add x5, x5, #1 | 289 | add x8, x8, #1 |
297 | ins v0.d[1], x7 | 290 | ins v0.d[1], x7 |
298 | rev x7, x5 | 291 | rev x7, x8 |
299 | add x5, x5, #1 | 292 | add x8, x8, #1 |
300 | ins v1.d[1], x7 | 293 | ins v1.d[1], x7 |
301 | ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ | 294 | ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ |
302 | do_encrypt_block2x | 295 | do_encrypt_block2x |
@@ -305,7 +298,7 @@ AES_ENTRY(aes_ctr_encrypt) | |||
305 | st1 {v0.16b-v1.16b}, [x0], #32 | 298 | st1 {v0.16b-v1.16b}, [x0], #32 |
306 | #else | 299 | #else |
307 | ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ | 300 | ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ |
308 | dup v7.4s, w5 | 301 | dup v7.4s, w8 |
309 | mov v0.16b, v4.16b | 302 | mov v0.16b, v4.16b |
310 | add v7.4s, v7.4s, v8.4s | 303 | add v7.4s, v7.4s, v8.4s |
311 | mov v1.16b, v4.16b | 304 | mov v1.16b, v4.16b |
@@ -323,18 +316,12 @@ AES_ENTRY(aes_ctr_encrypt) | |||
323 | eor v2.16b, v7.16b, v2.16b | 316 | eor v2.16b, v7.16b, v2.16b |
324 | eor v3.16b, v5.16b, v3.16b | 317 | eor v3.16b, v5.16b, v3.16b |
325 | st1 {v0.16b-v3.16b}, [x0], #64 | 318 | st1 {v0.16b-v3.16b}, [x0], #64 |
326 | add x5, x5, #INTERLEAVE | 319 | add x8, x8, #INTERLEAVE |
327 | #endif | 320 | #endif |
328 | cbz w4, .LctroutNx | 321 | rev x7, x8 |
329 | .LctrincNx: | ||
330 | rev x7, x5 | ||
331 | ins v4.d[1], x7 | 322 | ins v4.d[1], x7 |
323 | cbz w4, .Lctrout | ||
332 | b .LctrloopNx | 324 | b .LctrloopNx |
333 | .LctroutNx: | ||
334 | sub x5, x5, #1 | ||
335 | rev x7, x5 | ||
336 | ins v4.d[1], x7 | ||
337 | b .Lctrout | ||
338 | .Lctr1x: | 325 | .Lctr1x: |
339 | adds w4, w4, #INTERLEAVE | 326 | adds w4, w4, #INTERLEAVE |
340 | beq .Lctrout | 327 | beq .Lctrout |
@@ -342,30 +329,39 @@ AES_ENTRY(aes_ctr_encrypt) | |||
342 | .Lctrloop: | 329 | .Lctrloop: |
343 | mov v0.16b, v4.16b | 330 | mov v0.16b, v4.16b |
344 | encrypt_block v0, w3, x2, x6, w7 | 331 | encrypt_block v0, w3, x2, x6, w7 |
332 | |||
333 | adds x8, x8, #1 /* increment BE ctr */ | ||
334 | rev x7, x8 | ||
335 | ins v4.d[1], x7 | ||
336 | bcs .Lctrcarry /* overflow? */ | ||
337 | |||
338 | .Lctrcarrydone: | ||
345 | subs w4, w4, #1 | 339 | subs w4, w4, #1 |
346 | bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ | 340 | bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ |
347 | ld1 {v3.16b}, [x1], #16 | 341 | ld1 {v3.16b}, [x1], #16 |
348 | eor v3.16b, v0.16b, v3.16b | 342 | eor v3.16b, v0.16b, v3.16b |
349 | st1 {v3.16b}, [x0], #16 | 343 | st1 {v3.16b}, [x0], #16 |
350 | beq .Lctrout | 344 | bne .Lctrloop |
351 | .Lctrinc: | 345 | |
352 | adds x5, x5, #1 /* increment BE ctr */ | 346 | .Lctrout: |
353 | rev x7, x5 | 347 | st1 {v4.16b}, [x5] /* return next CTR value */ |
354 | ins v4.d[1], x7 | 348 | FRAME_POP |
355 | bcc .Lctrloop /* no overflow? */ | 349 | ret |
356 | umov x7, v4.d[0] /* load upper word of ctr */ | 350 | |
357 | rev x7, x7 /* ... to handle the carry */ | ||
358 | add x7, x7, #1 | ||
359 | rev x7, x7 | ||
360 | ins v4.d[0], x7 | ||
361 | b .Lctrloop | ||
362 | .Lctrhalfblock: | 351 | .Lctrhalfblock: |
363 | ld1 {v3.8b}, [x1] | 352 | ld1 {v3.8b}, [x1] |
364 | eor v3.8b, v0.8b, v3.8b | 353 | eor v3.8b, v0.8b, v3.8b |
365 | st1 {v3.8b}, [x0] | 354 | st1 {v3.8b}, [x0] |
366 | .Lctrout: | ||
367 | FRAME_POP | 355 | FRAME_POP |
368 | ret | 356 | ret |
357 | |||
358 | .Lctrcarry: | ||
359 | umov x7, v4.d[0] /* load upper word of ctr */ | ||
360 | rev x7, x7 /* ... to handle the carry */ | ||
361 | add x7, x7, #1 | ||
362 | rev x7, x7 | ||
363 | ins v4.d[0], x7 | ||
364 | b .Lctrcarrydone | ||
369 | AES_ENDPROC(aes_ctr_encrypt) | 365 | AES_ENDPROC(aes_ctr_encrypt) |
370 | .ltorg | 366 | .ltorg |
371 | 367 | ||