Browse Source

deps: update openssl asm and asm_obsolete files

Regenerate asm files with Makefile and CC=gcc and ASM=gcc where
gcc-4.8.4. Also asm files in asm_obsolete dir to support old compiler
and assembler are regenerated without CC and ASM envs.

Fixes: https://github.com/nodejs/node/issues/6458
PR-URL: https://github.com/nodejs/node/pull/6550
Reviewed-By: Myles Borins <myles.borins@gmail.com>
Reviewed-By: Fedor Indutny <fedor.indutny@gmail.com>
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
process-exit-stdio-flushing
Shigeki Ohtsu 9 years ago
parent
commit
70ae03135c
  1. 70
      deps/openssl/asm/x64-elf-gas/aes/aes-x86_64.s
  2. 929
      deps/openssl/asm/x64-elf-gas/aes/aesni-mb-x86_64.s
  3. 1335
      deps/openssl/asm/x64-elf-gas/aes/aesni-sha1-x86_64.s
  4. 4297
      deps/openssl/asm/x64-elf-gas/aes/aesni-sha256-x86_64.s
  5. 82
      deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s
  6. 158
      deps/openssl/asm/x64-elf-gas/aes/bsaes-x86_64.s
  7. 20
      deps/openssl/asm/x64-elf-gas/aes/vpaes-x86_64.s
  8. 1701
      deps/openssl/asm/x64-elf-gas/bn/rsaz-avx2.s
  9. 654
      deps/openssl/asm/x64-elf-gas/bn/rsaz-x86_64.s
  10. 386
      deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s
  11. 1340
      deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s
  12. 2
      deps/openssl/asm/x64-elf-gas/camellia/cmll-x86_64.s
  13. 1511
      deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s
  14. 34
      deps/openssl/asm/x64-elf-gas/md5/md5-x86_64.s
  15. 752
      deps/openssl/asm/x64-elf-gas/modes/aesni-gcm-x86_64.s
  16. 523
      deps/openssl/asm/x64-elf-gas/modes/ghash-x86_64.s
  17. 4299
      deps/openssl/asm/x64-elf-gas/sha/sha1-mb-x86_64.s
  18. 2813
      deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s
  19. 4660
      deps/openssl/asm/x64-elf-gas/sha/sha256-mb-x86_64.s
  20. 2309
      deps/openssl/asm/x64-elf-gas/sha/sha256-x86_64.s
  21. 3582
      deps/openssl/asm/x64-elf-gas/sha/sha512-x86_64.s
  22. 48
      deps/openssl/asm/x64-elf-gas/x86_64cpuid.s
  23. 70
      deps/openssl/asm/x64-macosx-gas/aes/aes-x86_64.s
  24. 929
      deps/openssl/asm/x64-macosx-gas/aes/aesni-mb-x86_64.s
  25. 1335
      deps/openssl/asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s
  26. 4296
      deps/openssl/asm/x64-macosx-gas/aes/aesni-sha256-x86_64.s
  27. 82
      deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s
  28. 158
      deps/openssl/asm/x64-macosx-gas/aes/bsaes-x86_64.s
  29. 20
      deps/openssl/asm/x64-macosx-gas/aes/vpaes-x86_64.s
  30. 1696
      deps/openssl/asm/x64-macosx-gas/bn/rsaz-avx2.s
  31. 654
      deps/openssl/asm/x64-macosx-gas/bn/rsaz-x86_64.s
  32. 386
      deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s
  33. 1340
      deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s
  34. 2
      deps/openssl/asm/x64-macosx-gas/camellia/cmll-x86_64.s
  35. 1511
      deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s
  36. 34
      deps/openssl/asm/x64-macosx-gas/md5/md5-x86_64.s
  37. 749
      deps/openssl/asm/x64-macosx-gas/modes/aesni-gcm-x86_64.s
  38. 523
      deps/openssl/asm/x64-macosx-gas/modes/ghash-x86_64.s
  39. 4299
      deps/openssl/asm/x64-macosx-gas/sha/sha1-mb-x86_64.s
  40. 2813
      deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s
  41. 4660
      deps/openssl/asm/x64-macosx-gas/sha/sha256-mb-x86_64.s
  42. 2309
      deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s
  43. 3581
      deps/openssl/asm/x64-macosx-gas/sha/sha512-x86_64.s
  44. 48
      deps/openssl/asm/x64-macosx-gas/x86_64cpuid.s
  45. 42
      deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm
  46. 59
      deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm
  47. 78
      deps/openssl/asm/x86-elf-gas/bn/x86-mont.s
  48. 1175
      deps/openssl/asm/x86-elf-gas/sha/sha1-586.s
  49. 2248
      deps/openssl/asm/x86-elf-gas/sha/sha256-586.s
  50. 84
      deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s
  51. 1173
      deps/openssl/asm/x86-macosx-gas/sha/sha1-586.s
  52. 2248
      deps/openssl/asm/x86-macosx-gas/sha/sha256-586.s
  53. 78
      deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm
  54. 1174
      deps/openssl/asm/x86-win32-masm/sha/sha1-586.asm
  55. 2248
      deps/openssl/asm/x86-win32-masm/sha/sha256-586.asm
  56. 8
      deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-sha1-x86_64.s
  57. 30
      deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-x86_64.s
  58. 31
      deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s
  59. 41
      deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s
  60. 34
      deps/openssl/asm_obsolete/x64-elf-gas/md5/md5-x86_64.s
  61. 48
      deps/openssl/asm_obsolete/x64-elf-gas/modes/ghash-x86_64.s
  62. 12
      deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-mb-x86_64.s
  63. 8
      deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s
  64. 16
      deps/openssl/asm_obsolete/x64-elf-gas/sha/sha256-mb-x86_64.s
  65. 8
      deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-sha1-x86_64.s
  66. 30
      deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-x86_64.s
  67. 31
      deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s
  68. 41
      deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s
  69. 34
      deps/openssl/asm_obsolete/x64-macosx-gas/md5/md5-x86_64.s
  70. 48
      deps/openssl/asm_obsolete/x64-macosx-gas/modes/ghash-x86_64.s
  71. 12
      deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-mb-x86_64.s
  72. 8
      deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s
  73. 16
      deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha256-mb-x86_64.s
  74. 31
      deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm
  75. 41
      deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm
  76. 78
      deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s
  77. 84
      deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s
  78. 78
      deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm

70
deps/openssl/asm/x64-elf-gas/aes/aes-x86_64.s

@ -81,8 +81,8 @@ _x86_64_AES_encrypt:
movl 0(%r14,%rdi,8),%edi
movl 0(%r14,%rbp,8),%ebp
andl $65280,%edi
andl $65280,%ebp
andl $0x0000ff00,%edi
andl $0x0000ff00,%ebp
xorl %edi,%r10d
xorl %ebp,%r11d
@ -94,8 +94,8 @@ _x86_64_AES_encrypt:
movl 0(%r14,%rsi,8),%esi
movl 0(%r14,%rdi,8),%edi
andl $65280,%esi
andl $65280,%edi
andl $0x0000ff00,%esi
andl $0x0000ff00,%edi
shrl $16,%ebx
xorl %esi,%r12d
xorl %edi,%r8d
@ -108,9 +108,9 @@ _x86_64_AES_encrypt:
movl 0(%r14,%rdi,8),%edi
movl 0(%r14,%rbp,8),%ebp
andl $16711680,%esi
andl $16711680,%edi
andl $16711680,%ebp
andl $0x00ff0000,%esi
andl $0x00ff0000,%edi
andl $0x00ff0000,%ebp
xorl %esi,%r10d
xorl %edi,%r11d
@ -123,9 +123,9 @@ _x86_64_AES_encrypt:
movl 2(%r14,%rdi,8),%edi
movl 2(%r14,%rbp,8),%ebp
andl $16711680,%esi
andl $4278190080,%edi
andl $4278190080,%ebp
andl $0x00ff0000,%esi
andl $0xff000000,%edi
andl $0xff000000,%ebp
xorl %esi,%r8d
xorl %edi,%r10d
@ -138,8 +138,8 @@ _x86_64_AES_encrypt:
movl 2(%r14,%rdi,8),%edi
movl 16+0(%r15),%eax
andl $4278190080,%esi
andl $4278190080,%edi
andl $0xff000000,%esi
andl $0xff000000,%edi
xorl %esi,%r12d
xorl %edi,%r8d
@ -241,8 +241,8 @@ _x86_64_AES_encrypt_compact:
xorl %r8d,%edx
cmpq 16(%rsp),%r15
je .Lenc_compact_done
movl $2155905152,%r10d
movl $2155905152,%r11d
movl $0x80808080,%r10d
movl $0x80808080,%r11d
andl %eax,%r10d
andl %ebx,%r11d
movl %r10d,%esi
@ -253,10 +253,10 @@ _x86_64_AES_encrypt_compact:
leal (%rbx,%rbx,1),%r9d
subl %r10d,%esi
subl %r11d,%edi
andl $4278124286,%r8d
andl $4278124286,%r9d
andl $454761243,%esi
andl $454761243,%edi
andl $0xfefefefe,%r8d
andl $0xfefefefe,%r9d
andl $0x1b1b1b1b,%esi
andl $0x1b1b1b1b,%edi
movl %eax,%r10d
movl %ebx,%r11d
xorl %esi,%r8d
@ -264,9 +264,9 @@ _x86_64_AES_encrypt_compact:
xorl %r8d,%eax
xorl %r9d,%ebx
movl $2155905152,%r12d
movl $0x80808080,%r12d
roll $24,%eax
movl $2155905152,%ebp
movl $0x80808080,%ebp
roll $24,%ebx
andl %ecx,%r12d
andl %edx,%ebp
@ -289,10 +289,10 @@ _x86_64_AES_encrypt_compact:
xorl %r10d,%eax
xorl %r11d,%ebx
andl $4278124286,%r8d
andl $4278124286,%r9d
andl $454761243,%esi
andl $454761243,%edi
andl $0xfefefefe,%r8d
andl $0xfefefefe,%r9d
andl $0x1b1b1b1b,%esi
andl $0x1b1b1b1b,%edi
movl %ecx,%r12d
movl %edx,%ebp
xorl %esi,%r8d
@ -345,7 +345,7 @@ AES_encrypt:
andq $-64,%rsp
subq %rsp,%rcx
negq %rcx
andq $960,%rcx
andq $0x3c0,%rcx
subq %rcx,%rsp
subq $32,%rsp
@ -370,7 +370,7 @@ AES_encrypt:
leaq .LAES_Te+2048(%rip),%r14
leaq 768(%rsp),%rbp
subq %r14,%rbp
andq $768,%rbp
andq $0x300,%rbp
leaq (%r14,%rbp,1),%r14
call _x86_64_AES_encrypt_compact
@ -792,7 +792,7 @@ AES_decrypt:
andq $-64,%rsp
subq %rsp,%rcx
negq %rcx
andq $960,%rcx
andq $0x3c0,%rcx
subq %rcx,%rsp
subq $32,%rsp
@ -817,7 +817,7 @@ AES_decrypt:
leaq .LAES_Td+2048(%rip),%r14
leaq 768(%rsp),%rbp
subq %r14,%rbp
andq $768,%rbp
andq $0x300,%rbp
leaq (%r14,%rbp,1),%r14
shrq $3,%rbp
addq %rbp,%r14
@ -1333,9 +1333,9 @@ AES_cbc_encrypt:
movq %r14,%r10
leaq 2304(%r14),%r11
movq %r15,%r12
andq $4095,%r10
andq $4095,%r11
andq $4095,%r12
andq $0xFFF,%r10
andq $0xFFF,%r11
andq $0xFFF,%r12
cmpq %r11,%r12
jb .Lcbc_te_break_out
@ -1344,7 +1344,7 @@ AES_cbc_encrypt:
jmp .Lcbc_te_ok
.Lcbc_te_break_out:
subq %r10,%r12
andq $4095,%r12
andq $0xFFF,%r12
addq $320,%r12
subq %r12,%r15
.align 4
@ -1370,7 +1370,7 @@ AES_cbc_encrypt:
movq %r15,%r10
subq %r14,%r10
andq $4095,%r10
andq $0xfff,%r10
cmpq $2304,%r10
jb .Lcbc_do_ecopy
cmpq $4096-248,%r10
@ -1557,7 +1557,7 @@ AES_cbc_encrypt:
leaq -88-63(%rcx),%r10
subq %rbp,%r10
negq %r10
andq $960,%r10
andq $0x3c0,%r10
subq %r10,%rbp
xchgq %rsp,%rbp
@ -1586,7 +1586,7 @@ AES_cbc_encrypt:
leaq 2048(%r14),%r14
leaq 768-8(%rsp),%rax
subq %r14,%rax
andq $768,%rax
andq $0x300,%rax
leaq (%r14,%rax,1),%r14
cmpq $0,%rbx

929
deps/openssl/asm/x64-elf-gas/aes/aesni-mb-x86_64.s

@ -6,6 +6,14 @@
.type aesni_multi_cbc_encrypt,@function
.align 32
aesni_multi_cbc_encrypt:
cmpl $2,%edx
jb .Lenc_non_avx
movl OPENSSL_ia32cap_P+4(%rip),%ecx
testl $268435456,%ecx
jnz _avx_cbc_enc_shortcut
jmp .Lenc_non_avx
.align 16
.Lenc_non_avx:
movq %rsp,%rax
pushq %rbx
pushq %rbp
@ -262,6 +270,14 @@ aesni_multi_cbc_encrypt:
.type aesni_multi_cbc_decrypt,@function
.align 32
aesni_multi_cbc_decrypt:
cmpl $2,%edx
jb .Ldec_non_avx
movl OPENSSL_ia32cap_P+4(%rip),%ecx
testl $268435456,%ecx
jnz _avx_cbc_dec_shortcut
jmp .Ldec_non_avx
.align 16
.Ldec_non_avx:
movq %rsp,%rax
pushq %rbx
pushq %rbp
@ -504,3 +520,916 @@ aesni_multi_cbc_decrypt:
.Ldec4x_epilogue:
.byte 0xf3,0xc3
.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
.type aesni_multi_cbc_encrypt_avx,@function
.align 32
aesni_multi_cbc_encrypt_avx:
_avx_cbc_enc_shortcut:
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
subq $192,%rsp
andq $-128,%rsp
movq %rax,16(%rsp)
.Lenc8x_body:
vzeroupper
vmovdqu (%rsi),%xmm15
leaq 120(%rsi),%rsi
leaq 160(%rdi),%rdi
shrl $1,%edx
.Lenc8x_loop_grande:
xorl %edx,%edx
movl -144(%rdi),%ecx
movq -160(%rdi),%r8
cmpl %edx,%ecx
movq -152(%rdi),%rbx
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -136(%rdi),%xmm2
movl %ecx,32(%rsp)
cmovleq %rsp,%r8
subq %r8,%rbx
movq %rbx,64(%rsp)
movl -104(%rdi),%ecx
movq -120(%rdi),%r9
cmpl %edx,%ecx
movq -112(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -96(%rdi),%xmm3
movl %ecx,36(%rsp)
cmovleq %rsp,%r9
subq %r9,%rbp
movq %rbp,72(%rsp)
movl -64(%rdi),%ecx
movq -80(%rdi),%r10
cmpl %edx,%ecx
movq -72(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -56(%rdi),%xmm4
movl %ecx,40(%rsp)
cmovleq %rsp,%r10
subq %r10,%rbp
movq %rbp,80(%rsp)
movl -24(%rdi),%ecx
movq -40(%rdi),%r11
cmpl %edx,%ecx
movq -32(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -16(%rdi),%xmm5
movl %ecx,44(%rsp)
cmovleq %rsp,%r11
subq %r11,%rbp
movq %rbp,88(%rsp)
movl 16(%rdi),%ecx
movq 0(%rdi),%r12
cmpl %edx,%ecx
movq 8(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 24(%rdi),%xmm6
movl %ecx,48(%rsp)
cmovleq %rsp,%r12
subq %r12,%rbp
movq %rbp,96(%rsp)
movl 56(%rdi),%ecx
movq 40(%rdi),%r13
cmpl %edx,%ecx
movq 48(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 64(%rdi),%xmm7
movl %ecx,52(%rsp)
cmovleq %rsp,%r13
subq %r13,%rbp
movq %rbp,104(%rsp)
movl 96(%rdi),%ecx
movq 80(%rdi),%r14
cmpl %edx,%ecx
movq 88(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 104(%rdi),%xmm8
movl %ecx,56(%rsp)
cmovleq %rsp,%r14
subq %r14,%rbp
movq %rbp,112(%rsp)
movl 136(%rdi),%ecx
movq 120(%rdi),%r15
cmpl %edx,%ecx
movq 128(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 144(%rdi),%xmm9
movl %ecx,60(%rsp)
cmovleq %rsp,%r15
subq %r15,%rbp
movq %rbp,120(%rsp)
testl %edx,%edx
jz .Lenc8x_done
vmovups 16-120(%rsi),%xmm1
vmovups 32-120(%rsi),%xmm0
movl 240-120(%rsi),%eax
vpxor (%r8),%xmm15,%xmm10
leaq 128(%rsp),%rbp
vpxor (%r9),%xmm15,%xmm11
vpxor (%r10),%xmm15,%xmm12
vpxor (%r11),%xmm15,%xmm13
vpxor %xmm10,%xmm2,%xmm2
vpxor (%r12),%xmm15,%xmm10
vpxor %xmm11,%xmm3,%xmm3
vpxor (%r13),%xmm15,%xmm11
vpxor %xmm12,%xmm4,%xmm4
vpxor (%r14),%xmm15,%xmm12
vpxor %xmm13,%xmm5,%xmm5
vpxor (%r15),%xmm15,%xmm13
vpxor %xmm10,%xmm6,%xmm6
movl $1,%ecx
vpxor %xmm11,%xmm7,%xmm7
vpxor %xmm12,%xmm8,%xmm8
vpxor %xmm13,%xmm9,%xmm9
jmp .Loop_enc8x
.align 32
.Loop_enc8x:
vaesenc %xmm1,%xmm2,%xmm2
cmpl 32+0(%rsp),%ecx
vaesenc %xmm1,%xmm3,%xmm3
prefetcht0 31(%r8)
vaesenc %xmm1,%xmm4,%xmm4
vaesenc %xmm1,%xmm5,%xmm5
leaq (%r8,%rbx,1),%rbx
cmovgeq %rsp,%r8
vaesenc %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm1,%xmm7,%xmm7
subq %r8,%rbx
vaesenc %xmm1,%xmm8,%xmm8
vpxor 16(%r8),%xmm15,%xmm10
movq %rbx,64+0(%rsp)
vaesenc %xmm1,%xmm9,%xmm9
vmovups -72(%rsi),%xmm1
leaq 16(%r8,%rbx,1),%r8
vmovdqu %xmm10,0(%rbp)
vaesenc %xmm0,%xmm2,%xmm2
cmpl 32+4(%rsp),%ecx
movq 64+8(%rsp),%rbx
vaesenc %xmm0,%xmm3,%xmm3
prefetcht0 31(%r9)
vaesenc %xmm0,%xmm4,%xmm4
vaesenc %xmm0,%xmm5,%xmm5
leaq (%r9,%rbx,1),%rbx
cmovgeq %rsp,%r9
vaesenc %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm0,%xmm7,%xmm7
subq %r9,%rbx
vaesenc %xmm0,%xmm8,%xmm8
vpxor 16(%r9),%xmm15,%xmm11
movq %rbx,64+8(%rsp)
vaesenc %xmm0,%xmm9,%xmm9
vmovups -56(%rsi),%xmm0
leaq 16(%r9,%rbx,1),%r9
vmovdqu %xmm11,16(%rbp)
vaesenc %xmm1,%xmm2,%xmm2
cmpl 32+8(%rsp),%ecx
movq 64+16(%rsp),%rbx
vaesenc %xmm1,%xmm3,%xmm3
prefetcht0 31(%r10)
vaesenc %xmm1,%xmm4,%xmm4
prefetcht0 15(%r8)
vaesenc %xmm1,%xmm5,%xmm5
leaq (%r10,%rbx,1),%rbx
cmovgeq %rsp,%r10
vaesenc %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm1,%xmm7,%xmm7
subq %r10,%rbx
vaesenc %xmm1,%xmm8,%xmm8
vpxor 16(%r10),%xmm15,%xmm12
movq %rbx,64+16(%rsp)
vaesenc %xmm1,%xmm9,%xmm9
vmovups -40(%rsi),%xmm1
leaq 16(%r10,%rbx,1),%r10
vmovdqu %xmm12,32(%rbp)
vaesenc %xmm0,%xmm2,%xmm2
cmpl 32+12(%rsp),%ecx
movq 64+24(%rsp),%rbx
vaesenc %xmm0,%xmm3,%xmm3
prefetcht0 31(%r11)
vaesenc %xmm0,%xmm4,%xmm4
prefetcht0 15(%r9)
vaesenc %xmm0,%xmm5,%xmm5
leaq (%r11,%rbx,1),%rbx
cmovgeq %rsp,%r11
vaesenc %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm0,%xmm7,%xmm7
subq %r11,%rbx
vaesenc %xmm0,%xmm8,%xmm8
vpxor 16(%r11),%xmm15,%xmm13
movq %rbx,64+24(%rsp)
vaesenc %xmm0,%xmm9,%xmm9
vmovups -24(%rsi),%xmm0
leaq 16(%r11,%rbx,1),%r11
vmovdqu %xmm13,48(%rbp)
vaesenc %xmm1,%xmm2,%xmm2
cmpl 32+16(%rsp),%ecx
movq 64+32(%rsp),%rbx
vaesenc %xmm1,%xmm3,%xmm3
prefetcht0 31(%r12)
vaesenc %xmm1,%xmm4,%xmm4
prefetcht0 15(%r10)
vaesenc %xmm1,%xmm5,%xmm5
leaq (%r12,%rbx,1),%rbx
cmovgeq %rsp,%r12
vaesenc %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm1,%xmm7,%xmm7
subq %r12,%rbx
vaesenc %xmm1,%xmm8,%xmm8
vpxor 16(%r12),%xmm15,%xmm10
movq %rbx,64+32(%rsp)
vaesenc %xmm1,%xmm9,%xmm9
vmovups -8(%rsi),%xmm1
leaq 16(%r12,%rbx,1),%r12
vaesenc %xmm0,%xmm2,%xmm2
cmpl 32+20(%rsp),%ecx
movq 64+40(%rsp),%rbx
vaesenc %xmm0,%xmm3,%xmm3
prefetcht0 31(%r13)
vaesenc %xmm0,%xmm4,%xmm4
prefetcht0 15(%r11)
vaesenc %xmm0,%xmm5,%xmm5
leaq (%rbx,%r13,1),%rbx
cmovgeq %rsp,%r13
vaesenc %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm0,%xmm7,%xmm7
subq %r13,%rbx
vaesenc %xmm0,%xmm8,%xmm8
vpxor 16(%r13),%xmm15,%xmm11
movq %rbx,64+40(%rsp)
vaesenc %xmm0,%xmm9,%xmm9
vmovups 8(%rsi),%xmm0
leaq 16(%r13,%rbx,1),%r13
vaesenc %xmm1,%xmm2,%xmm2
cmpl 32+24(%rsp),%ecx
movq 64+48(%rsp),%rbx
vaesenc %xmm1,%xmm3,%xmm3
prefetcht0 31(%r14)
vaesenc %xmm1,%xmm4,%xmm4
prefetcht0 15(%r12)
vaesenc %xmm1,%xmm5,%xmm5
leaq (%r14,%rbx,1),%rbx
cmovgeq %rsp,%r14
vaesenc %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm1,%xmm7,%xmm7
subq %r14,%rbx
vaesenc %xmm1,%xmm8,%xmm8
vpxor 16(%r14),%xmm15,%xmm12
movq %rbx,64+48(%rsp)
vaesenc %xmm1,%xmm9,%xmm9
vmovups 24(%rsi),%xmm1
leaq 16(%r14,%rbx,1),%r14
vaesenc %xmm0,%xmm2,%xmm2
cmpl 32+28(%rsp),%ecx
movq 64+56(%rsp),%rbx
vaesenc %xmm0,%xmm3,%xmm3
prefetcht0 31(%r15)
vaesenc %xmm0,%xmm4,%xmm4
prefetcht0 15(%r13)
vaesenc %xmm0,%xmm5,%xmm5
leaq (%r15,%rbx,1),%rbx
cmovgeq %rsp,%r15
vaesenc %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm0,%xmm7,%xmm7
subq %r15,%rbx
vaesenc %xmm0,%xmm8,%xmm8
vpxor 16(%r15),%xmm15,%xmm13
movq %rbx,64+56(%rsp)
vaesenc %xmm0,%xmm9,%xmm9
vmovups 40(%rsi),%xmm0
leaq 16(%r15,%rbx,1),%r15
vmovdqu 32(%rsp),%xmm14
prefetcht0 15(%r14)
prefetcht0 15(%r15)
cmpl $11,%eax
jb .Lenc8x_tail
vaesenc %xmm1,%xmm2,%xmm2
vaesenc %xmm1,%xmm3,%xmm3
vaesenc %xmm1,%xmm4,%xmm4
vaesenc %xmm1,%xmm5,%xmm5
vaesenc %xmm1,%xmm6,%xmm6
vaesenc %xmm1,%xmm7,%xmm7
vaesenc %xmm1,%xmm8,%xmm8
vaesenc %xmm1,%xmm9,%xmm9
vmovups 176-120(%rsi),%xmm1
vaesenc %xmm0,%xmm2,%xmm2
vaesenc %xmm0,%xmm3,%xmm3
vaesenc %xmm0,%xmm4,%xmm4
vaesenc %xmm0,%xmm5,%xmm5
vaesenc %xmm0,%xmm6,%xmm6
vaesenc %xmm0,%xmm7,%xmm7
vaesenc %xmm0,%xmm8,%xmm8
vaesenc %xmm0,%xmm9,%xmm9
vmovups 192-120(%rsi),%xmm0
je .Lenc8x_tail
vaesenc %xmm1,%xmm2,%xmm2
vaesenc %xmm1,%xmm3,%xmm3
vaesenc %xmm1,%xmm4,%xmm4
vaesenc %xmm1,%xmm5,%xmm5
vaesenc %xmm1,%xmm6,%xmm6
vaesenc %xmm1,%xmm7,%xmm7
vaesenc %xmm1,%xmm8,%xmm8
vaesenc %xmm1,%xmm9,%xmm9
vmovups 208-120(%rsi),%xmm1
vaesenc %xmm0,%xmm2,%xmm2
vaesenc %xmm0,%xmm3,%xmm3
vaesenc %xmm0,%xmm4,%xmm4
vaesenc %xmm0,%xmm5,%xmm5
vaesenc %xmm0,%xmm6,%xmm6
vaesenc %xmm0,%xmm7,%xmm7
vaesenc %xmm0,%xmm8,%xmm8
vaesenc %xmm0,%xmm9,%xmm9
vmovups 224-120(%rsi),%xmm0
.Lenc8x_tail:
vaesenc %xmm1,%xmm2,%xmm2
vpxor %xmm15,%xmm15,%xmm15
vaesenc %xmm1,%xmm3,%xmm3
vaesenc %xmm1,%xmm4,%xmm4
vpcmpgtd %xmm15,%xmm14,%xmm15
vaesenc %xmm1,%xmm5,%xmm5
vaesenc %xmm1,%xmm6,%xmm6
vpaddd %xmm14,%xmm15,%xmm15
vmovdqu 48(%rsp),%xmm14
vaesenc %xmm1,%xmm7,%xmm7
movq 64(%rsp),%rbx
vaesenc %xmm1,%xmm8,%xmm8
vaesenc %xmm1,%xmm9,%xmm9
vmovups 16-120(%rsi),%xmm1
vaesenclast %xmm0,%xmm2,%xmm2
vmovdqa %xmm15,32(%rsp)
vpxor %xmm15,%xmm15,%xmm15
vaesenclast %xmm0,%xmm3,%xmm3
vaesenclast %xmm0,%xmm4,%xmm4
vpcmpgtd %xmm15,%xmm14,%xmm15
vaesenclast %xmm0,%xmm5,%xmm5
vaesenclast %xmm0,%xmm6,%xmm6
vpaddd %xmm15,%xmm14,%xmm14
vmovdqu -120(%rsi),%xmm15
vaesenclast %xmm0,%xmm7,%xmm7
vaesenclast %xmm0,%xmm8,%xmm8
vmovdqa %xmm14,48(%rsp)
vaesenclast %xmm0,%xmm9,%xmm9
vmovups 32-120(%rsi),%xmm0
vmovups %xmm2,-16(%r8)
subq %rbx,%r8
vpxor 0(%rbp),%xmm2,%xmm2
vmovups %xmm3,-16(%r9)
subq 72(%rsp),%r9
vpxor 16(%rbp),%xmm3,%xmm3
vmovups %xmm4,-16(%r10)
subq 80(%rsp),%r10
vpxor 32(%rbp),%xmm4,%xmm4
vmovups %xmm5,-16(%r11)
subq 88(%rsp),%r11
vpxor 48(%rbp),%xmm5,%xmm5
vmovups %xmm6,-16(%r12)
subq 96(%rsp),%r12
vpxor %xmm10,%xmm6,%xmm6
vmovups %xmm7,-16(%r13)
subq 104(%rsp),%r13
vpxor %xmm11,%xmm7,%xmm7
vmovups %xmm8,-16(%r14)
subq 112(%rsp),%r14
vpxor %xmm12,%xmm8,%xmm8
vmovups %xmm9,-16(%r15)
subq 120(%rsp),%r15
vpxor %xmm13,%xmm9,%xmm9
decl %edx
jnz .Loop_enc8x
movq 16(%rsp),%rax
.Lenc8x_done:
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
.Lenc8x_epilogue:
.byte 0xf3,0xc3
.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx
.type aesni_multi_cbc_decrypt_avx,@function
.align 32
aesni_multi_cbc_decrypt_avx:
_avx_cbc_dec_shortcut:
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
subq $256,%rsp
andq $-256,%rsp
subq $192,%rsp
movq %rax,16(%rsp)
.Ldec8x_body:
vzeroupper
vmovdqu (%rsi),%xmm15
leaq 120(%rsi),%rsi
leaq 160(%rdi),%rdi
shrl $1,%edx
.Ldec8x_loop_grande:
xorl %edx,%edx
movl -144(%rdi),%ecx
movq -160(%rdi),%r8
cmpl %edx,%ecx
movq -152(%rdi),%rbx
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -136(%rdi),%xmm2
movl %ecx,32(%rsp)
cmovleq %rsp,%r8
subq %r8,%rbx
movq %rbx,64(%rsp)
vmovdqu %xmm2,192(%rsp)
movl -104(%rdi),%ecx
movq -120(%rdi),%r9
cmpl %edx,%ecx
movq -112(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -96(%rdi),%xmm3
movl %ecx,36(%rsp)
cmovleq %rsp,%r9
subq %r9,%rbp
movq %rbp,72(%rsp)
vmovdqu %xmm3,208(%rsp)
movl -64(%rdi),%ecx
movq -80(%rdi),%r10
cmpl %edx,%ecx
movq -72(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -56(%rdi),%xmm4
movl %ecx,40(%rsp)
cmovleq %rsp,%r10
subq %r10,%rbp
movq %rbp,80(%rsp)
vmovdqu %xmm4,224(%rsp)
movl -24(%rdi),%ecx
movq -40(%rdi),%r11
cmpl %edx,%ecx
movq -32(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -16(%rdi),%xmm5
movl %ecx,44(%rsp)
cmovleq %rsp,%r11
subq %r11,%rbp
movq %rbp,88(%rsp)
vmovdqu %xmm5,240(%rsp)
movl 16(%rdi),%ecx
movq 0(%rdi),%r12
cmpl %edx,%ecx
movq 8(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 24(%rdi),%xmm6
movl %ecx,48(%rsp)
cmovleq %rsp,%r12
subq %r12,%rbp
movq %rbp,96(%rsp)
vmovdqu %xmm6,256(%rsp)
movl 56(%rdi),%ecx
movq 40(%rdi),%r13
cmpl %edx,%ecx
movq 48(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 64(%rdi),%xmm7
movl %ecx,52(%rsp)
cmovleq %rsp,%r13
subq %r13,%rbp
movq %rbp,104(%rsp)
vmovdqu %xmm7,272(%rsp)
movl 96(%rdi),%ecx
movq 80(%rdi),%r14
cmpl %edx,%ecx
movq 88(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 104(%rdi),%xmm8
movl %ecx,56(%rsp)
cmovleq %rsp,%r14
subq %r14,%rbp
movq %rbp,112(%rsp)
vmovdqu %xmm8,288(%rsp)
movl 136(%rdi),%ecx
movq 120(%rdi),%r15
cmpl %edx,%ecx
movq 128(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 144(%rdi),%xmm9
movl %ecx,60(%rsp)
cmovleq %rsp,%r15
subq %r15,%rbp
movq %rbp,120(%rsp)
vmovdqu %xmm9,304(%rsp)
testl %edx,%edx
jz .Ldec8x_done
vmovups 16-120(%rsi),%xmm1
vmovups 32-120(%rsi),%xmm0
movl 240-120(%rsi),%eax
leaq 192+128(%rsp),%rbp
vmovdqu (%r8),%xmm2
vmovdqu (%r9),%xmm3
vmovdqu (%r10),%xmm4
vmovdqu (%r11),%xmm5
vmovdqu (%r12),%xmm6
vmovdqu (%r13),%xmm7
vmovdqu (%r14),%xmm8
vmovdqu (%r15),%xmm9
vmovdqu %xmm2,0(%rbp)
vpxor %xmm15,%xmm2,%xmm2
vmovdqu %xmm3,16(%rbp)
vpxor %xmm15,%xmm3,%xmm3
vmovdqu %xmm4,32(%rbp)
vpxor %xmm15,%xmm4,%xmm4
vmovdqu %xmm5,48(%rbp)
vpxor %xmm15,%xmm5,%xmm5
vmovdqu %xmm6,64(%rbp)
vpxor %xmm15,%xmm6,%xmm6
vmovdqu %xmm7,80(%rbp)
vpxor %xmm15,%xmm7,%xmm7
vmovdqu %xmm8,96(%rbp)
vpxor %xmm15,%xmm8,%xmm8
vmovdqu %xmm9,112(%rbp)
vpxor %xmm15,%xmm9,%xmm9
xorq $0x80,%rbp
movl $1,%ecx
jmp .Loop_dec8x
.align 32
.Loop_dec8x:
vaesdec %xmm1,%xmm2,%xmm2
cmpl 32+0(%rsp),%ecx
vaesdec %xmm1,%xmm3,%xmm3
prefetcht0 31(%r8)
vaesdec %xmm1,%xmm4,%xmm4
vaesdec %xmm1,%xmm5,%xmm5
leaq (%r8,%rbx,1),%rbx
cmovgeq %rsp,%r8
vaesdec %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm1,%xmm7,%xmm7
subq %r8,%rbx
vaesdec %xmm1,%xmm8,%xmm8
vmovdqu 16(%r8),%xmm10
movq %rbx,64+0(%rsp)
vaesdec %xmm1,%xmm9,%xmm9
vmovups -72(%rsi),%xmm1
leaq 16(%r8,%rbx,1),%r8
vmovdqu %xmm10,128(%rsp)
vaesdec %xmm0,%xmm2,%xmm2
cmpl 32+4(%rsp),%ecx
movq 64+8(%rsp),%rbx
vaesdec %xmm0,%xmm3,%xmm3
prefetcht0 31(%r9)
vaesdec %xmm0,%xmm4,%xmm4
vaesdec %xmm0,%xmm5,%xmm5
leaq (%r9,%rbx,1),%rbx
cmovgeq %rsp,%r9
vaesdec %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm0,%xmm7,%xmm7
subq %r9,%rbx
vaesdec %xmm0,%xmm8,%xmm8
vmovdqu 16(%r9),%xmm11
movq %rbx,64+8(%rsp)
vaesdec %xmm0,%xmm9,%xmm9
vmovups -56(%rsi),%xmm0
leaq 16(%r9,%rbx,1),%r9
vmovdqu %xmm11,144(%rsp)
vaesdec %xmm1,%xmm2,%xmm2
cmpl 32+8(%rsp),%ecx
movq 64+16(%rsp),%rbx
vaesdec %xmm1,%xmm3,%xmm3
prefetcht0 31(%r10)
vaesdec %xmm1,%xmm4,%xmm4
prefetcht0 15(%r8)
vaesdec %xmm1,%xmm5,%xmm5
leaq (%r10,%rbx,1),%rbx
cmovgeq %rsp,%r10
vaesdec %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm1,%xmm7,%xmm7
subq %r10,%rbx
vaesdec %xmm1,%xmm8,%xmm8
vmovdqu 16(%r10),%xmm12
movq %rbx,64+16(%rsp)
vaesdec %xmm1,%xmm9,%xmm9
vmovups -40(%rsi),%xmm1
leaq 16(%r10,%rbx,1),%r10
vmovdqu %xmm12,160(%rsp)
vaesdec %xmm0,%xmm2,%xmm2
cmpl 32+12(%rsp),%ecx
movq 64+24(%rsp),%rbx
vaesdec %xmm0,%xmm3,%xmm3
prefetcht0 31(%r11)
vaesdec %xmm0,%xmm4,%xmm4
prefetcht0 15(%r9)
vaesdec %xmm0,%xmm5,%xmm5
leaq (%r11,%rbx,1),%rbx
cmovgeq %rsp,%r11
vaesdec %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm0,%xmm7,%xmm7
subq %r11,%rbx
vaesdec %xmm0,%xmm8,%xmm8
vmovdqu 16(%r11),%xmm13
movq %rbx,64+24(%rsp)
vaesdec %xmm0,%xmm9,%xmm9
vmovups -24(%rsi),%xmm0
leaq 16(%r11,%rbx,1),%r11
vmovdqu %xmm13,176(%rsp)
vaesdec %xmm1,%xmm2,%xmm2
cmpl 32+16(%rsp),%ecx
movq 64+32(%rsp),%rbx
vaesdec %xmm1,%xmm3,%xmm3
prefetcht0 31(%r12)
vaesdec %xmm1,%xmm4,%xmm4
prefetcht0 15(%r10)
vaesdec %xmm1,%xmm5,%xmm5
leaq (%r12,%rbx,1),%rbx
cmovgeq %rsp,%r12
vaesdec %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm1,%xmm7,%xmm7
subq %r12,%rbx
vaesdec %xmm1,%xmm8,%xmm8
vmovdqu 16(%r12),%xmm10
movq %rbx,64+32(%rsp)
vaesdec %xmm1,%xmm9,%xmm9
vmovups -8(%rsi),%xmm1
leaq 16(%r12,%rbx,1),%r12
vaesdec %xmm0,%xmm2,%xmm2
cmpl 32+20(%rsp),%ecx
movq 64+40(%rsp),%rbx
vaesdec %xmm0,%xmm3,%xmm3
prefetcht0 31(%r13)
vaesdec %xmm0,%xmm4,%xmm4
prefetcht0 15(%r11)
vaesdec %xmm0,%xmm5,%xmm5
leaq (%rbx,%r13,1),%rbx
cmovgeq %rsp,%r13
vaesdec %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm0,%xmm7,%xmm7
subq %r13,%rbx
vaesdec %xmm0,%xmm8,%xmm8
vmovdqu 16(%r13),%xmm11
movq %rbx,64+40(%rsp)
vaesdec %xmm0,%xmm9,%xmm9
vmovups 8(%rsi),%xmm0
leaq 16(%r13,%rbx,1),%r13
vaesdec %xmm1,%xmm2,%xmm2
cmpl 32+24(%rsp),%ecx
movq 64+48(%rsp),%rbx
vaesdec %xmm1,%xmm3,%xmm3
prefetcht0 31(%r14)
vaesdec %xmm1,%xmm4,%xmm4
prefetcht0 15(%r12)
vaesdec %xmm1,%xmm5,%xmm5
leaq (%r14,%rbx,1),%rbx
cmovgeq %rsp,%r14
vaesdec %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm1,%xmm7,%xmm7
subq %r14,%rbx
vaesdec %xmm1,%xmm8,%xmm8
vmovdqu 16(%r14),%xmm12
movq %rbx,64+48(%rsp)
vaesdec %xmm1,%xmm9,%xmm9
vmovups 24(%rsi),%xmm1
leaq 16(%r14,%rbx,1),%r14
vaesdec %xmm0,%xmm2,%xmm2
cmpl 32+28(%rsp),%ecx
movq 64+56(%rsp),%rbx
vaesdec %xmm0,%xmm3,%xmm3
prefetcht0 31(%r15)
vaesdec %xmm0,%xmm4,%xmm4
prefetcht0 15(%r13)
vaesdec %xmm0,%xmm5,%xmm5
leaq (%r15,%rbx,1),%rbx
cmovgeq %rsp,%r15
vaesdec %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm0,%xmm7,%xmm7
subq %r15,%rbx
vaesdec %xmm0,%xmm8,%xmm8
vmovdqu 16(%r15),%xmm13
movq %rbx,64+56(%rsp)
vaesdec %xmm0,%xmm9,%xmm9
vmovups 40(%rsi),%xmm0
leaq 16(%r15,%rbx,1),%r15
vmovdqu 32(%rsp),%xmm14
prefetcht0 15(%r14)
prefetcht0 15(%r15)
cmpl $11,%eax
jb .Ldec8x_tail
vaesdec %xmm1,%xmm2,%xmm2
vaesdec %xmm1,%xmm3,%xmm3
vaesdec %xmm1,%xmm4,%xmm4
vaesdec %xmm1,%xmm5,%xmm5
vaesdec %xmm1,%xmm6,%xmm6
vaesdec %xmm1,%xmm7,%xmm7
vaesdec %xmm1,%xmm8,%xmm8
vaesdec %xmm1,%xmm9,%xmm9
vmovups 176-120(%rsi),%xmm1
vaesdec %xmm0,%xmm2,%xmm2
vaesdec %xmm0,%xmm3,%xmm3
vaesdec %xmm0,%xmm4,%xmm4
vaesdec %xmm0,%xmm5,%xmm5
vaesdec %xmm0,%xmm6,%xmm6
vaesdec %xmm0,%xmm7,%xmm7
vaesdec %xmm0,%xmm8,%xmm8
vaesdec %xmm0,%xmm9,%xmm9
vmovups 192-120(%rsi),%xmm0
je .Ldec8x_tail
vaesdec %xmm1,%xmm2,%xmm2
vaesdec %xmm1,%xmm3,%xmm3
vaesdec %xmm1,%xmm4,%xmm4
vaesdec %xmm1,%xmm5,%xmm5
vaesdec %xmm1,%xmm6,%xmm6
vaesdec %xmm1,%xmm7,%xmm7
vaesdec %xmm1,%xmm8,%xmm8
vaesdec %xmm1,%xmm9,%xmm9
vmovups 208-120(%rsi),%xmm1
vaesdec %xmm0,%xmm2,%xmm2
vaesdec %xmm0,%xmm3,%xmm3
vaesdec %xmm0,%xmm4,%xmm4
vaesdec %xmm0,%xmm5,%xmm5
vaesdec %xmm0,%xmm6,%xmm6
vaesdec %xmm0,%xmm7,%xmm7
vaesdec %xmm0,%xmm8,%xmm8
vaesdec %xmm0,%xmm9,%xmm9
vmovups 224-120(%rsi),%xmm0
.Ldec8x_tail:
vaesdec %xmm1,%xmm2,%xmm2
vpxor %xmm15,%xmm15,%xmm15
vaesdec %xmm1,%xmm3,%xmm3
vaesdec %xmm1,%xmm4,%xmm4
vpcmpgtd %xmm15,%xmm14,%xmm15
vaesdec %xmm1,%xmm5,%xmm5
vaesdec %xmm1,%xmm6,%xmm6
vpaddd %xmm14,%xmm15,%xmm15
vmovdqu 48(%rsp),%xmm14
vaesdec %xmm1,%xmm7,%xmm7
movq 64(%rsp),%rbx
vaesdec %xmm1,%xmm8,%xmm8
vaesdec %xmm1,%xmm9,%xmm9
vmovups 16-120(%rsi),%xmm1
vaesdeclast %xmm0,%xmm2,%xmm2
vmovdqa %xmm15,32(%rsp)
vpxor %xmm15,%xmm15,%xmm15
vaesdeclast %xmm0,%xmm3,%xmm3
vpxor 0(%rbp),%xmm2,%xmm2
vaesdeclast %xmm0,%xmm4,%xmm4
vpxor 16(%rbp),%xmm3,%xmm3
vpcmpgtd %xmm15,%xmm14,%xmm15
vaesdeclast %xmm0,%xmm5,%xmm5
vpxor 32(%rbp),%xmm4,%xmm4
vaesdeclast %xmm0,%xmm6,%xmm6
vpxor 48(%rbp),%xmm5,%xmm5
vpaddd %xmm15,%xmm14,%xmm14
vmovdqu -120(%rsi),%xmm15
vaesdeclast %xmm0,%xmm7,%xmm7
vpxor 64(%rbp),%xmm6,%xmm6
vaesdeclast %xmm0,%xmm8,%xmm8
vpxor 80(%rbp),%xmm7,%xmm7
vmovdqa %xmm14,48(%rsp)
vaesdeclast %xmm0,%xmm9,%xmm9
vpxor 96(%rbp),%xmm8,%xmm8
vmovups 32-120(%rsi),%xmm0
vmovups %xmm2,-16(%r8)
subq %rbx,%r8
vmovdqu 128+0(%rsp),%xmm2
vpxor 112(%rbp),%xmm9,%xmm9
vmovups %xmm3,-16(%r9)
subq 72(%rsp),%r9
vmovdqu %xmm2,0(%rbp)
vpxor %xmm15,%xmm2,%xmm2
vmovdqu 128+16(%rsp),%xmm3
vmovups %xmm4,-16(%r10)
subq 80(%rsp),%r10
vmovdqu %xmm3,16(%rbp)
vpxor %xmm15,%xmm3,%xmm3
vmovdqu 128+32(%rsp),%xmm4
vmovups %xmm5,-16(%r11)
subq 88(%rsp),%r11
vmovdqu %xmm4,32(%rbp)
vpxor %xmm15,%xmm4,%xmm4
vmovdqu 128+48(%rsp),%xmm5
vmovups %xmm6,-16(%r12)
subq 96(%rsp),%r12
vmovdqu %xmm5,48(%rbp)
vpxor %xmm15,%xmm5,%xmm5
vmovdqu %xmm10,64(%rbp)
vpxor %xmm10,%xmm15,%xmm6
vmovups %xmm7,-16(%r13)
subq 104(%rsp),%r13
vmovdqu %xmm11,80(%rbp)
vpxor %xmm11,%xmm15,%xmm7
vmovups %xmm8,-16(%r14)
subq 112(%rsp),%r14
vmovdqu %xmm12,96(%rbp)
vpxor %xmm12,%xmm15,%xmm8
vmovups %xmm9,-16(%r15)
subq 120(%rsp),%r15
vmovdqu %xmm13,112(%rbp)
vpxor %xmm13,%xmm15,%xmm9
xorq $128,%rbp
decl %edx
jnz .Loop_dec8x
movq 16(%rsp),%rax
.Ldec8x_done:
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
.Ldec8x_epilogue:
.byte 0xf3,0xc3
.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx

1335
deps/openssl/asm/x64-elf-gas/aes/aesni-sha1-x86_64.s

File diff suppressed because it is too large

4297
deps/openssl/asm/x64-elf-gas/aes/aesni-sha256-x86_64.s

File diff suppressed because it is too large

82
deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s

@ -503,7 +503,7 @@ aesni_ecb_encrypt:
testl %r8d,%r8d
jz .Lecb_decrypt
cmpq $128,%rdx
cmpq $0x80,%rdx
jb .Lecb_enc_tail
movdqu (%rdi),%xmm2
@ -515,7 +515,7 @@ aesni_ecb_encrypt:
movdqu 96(%rdi),%xmm8
movdqu 112(%rdi),%xmm9
leaq 128(%rdi),%rdi
subq $128,%rdx
subq $0x80,%rdx
jmp .Lecb_enc_loop8_enter
.align 16
.Lecb_enc_loop8:
@ -543,7 +543,7 @@ aesni_ecb_encrypt:
call _aesni_encrypt8
subq $128,%rdx
subq $0x80,%rdx
jnc .Lecb_enc_loop8
movups %xmm2,(%rsi)
@ -557,22 +557,22 @@ aesni_ecb_encrypt:
movups %xmm8,96(%rsi)
movups %xmm9,112(%rsi)
leaq 128(%rsi),%rsi
addq $128,%rdx
addq $0x80,%rdx
jz .Lecb_ret
.Lecb_enc_tail:
movups (%rdi),%xmm2
cmpq $32,%rdx
cmpq $0x20,%rdx
jb .Lecb_enc_one
movups 16(%rdi),%xmm3
je .Lecb_enc_two
movups 32(%rdi),%xmm4
cmpq $64,%rdx
cmpq $0x40,%rdx
jb .Lecb_enc_three
movups 48(%rdi),%xmm5
je .Lecb_enc_four
movups 64(%rdi),%xmm6
cmpq $96,%rdx
cmpq $0x60,%rdx
jb .Lecb_enc_five
movups 80(%rdi),%xmm7
je .Lecb_enc_six
@ -646,7 +646,7 @@ aesni_ecb_encrypt:
.align 16
.Lecb_decrypt:
cmpq $128,%rdx
cmpq $0x80,%rdx
jb .Lecb_dec_tail
movdqu (%rdi),%xmm2
@ -658,7 +658,7 @@ aesni_ecb_encrypt:
movdqu 96(%rdi),%xmm8
movdqu 112(%rdi),%xmm9
leaq 128(%rdi),%rdi
subq $128,%rdx
subq $0x80,%rdx
jmp .Lecb_dec_loop8_enter
.align 16
.Lecb_dec_loop8:
@ -687,7 +687,7 @@ aesni_ecb_encrypt:
call _aesni_decrypt8
movups (%r11),%xmm0
subq $128,%rdx
subq $0x80,%rdx
jnc .Lecb_dec_loop8
movups %xmm2,(%rsi)
@ -709,22 +709,22 @@ aesni_ecb_encrypt:
movups %xmm9,112(%rsi)
pxor %xmm9,%xmm9
leaq 128(%rsi),%rsi
addq $128,%rdx
addq $0x80,%rdx
jz .Lecb_ret
.Lecb_dec_tail:
movups (%rdi),%xmm2
cmpq $32,%rdx
cmpq $0x20,%rdx
jb .Lecb_dec_one
movups 16(%rdi),%xmm3
je .Lecb_dec_two
movups 32(%rdi),%xmm4
cmpq $64,%rdx
cmpq $0x40,%rdx
jb .Lecb_dec_three
movups 48(%rdi),%xmm5
je .Lecb_dec_four
movups 64(%rdi),%xmm6
cmpq $96,%rdx
cmpq $0x60,%rdx
jb .Lecb_dec_five
movups 80(%rdi),%xmm7
je .Lecb_dec_six
@ -1598,7 +1598,7 @@ aesni_xts_encrypt:
movdqa .Lxts_magic(%rip),%xmm8
movdqa %xmm2,%xmm15
pshufd $95,%xmm2,%xmm9
pshufd $0x5f,%xmm2,%xmm9
pxor %xmm0,%xmm1
movdqa %xmm9,%xmm14
paddd %xmm9,%xmm9
@ -1697,7 +1697,7 @@ aesni_xts_encrypt:
.byte 102,15,56,220,248
movups 64(%r11),%xmm0
movdqa %xmm8,80(%rsp)
pshufd $95,%xmm15,%xmm9
pshufd $0x5f,%xmm15,%xmm9
jmp .Lxts_enc_loop6
.align 32
.Lxts_enc_loop6:
@ -1836,13 +1836,13 @@ aesni_xts_encrypt:
jz .Lxts_enc_done
pxor %xmm0,%xmm11
cmpq $32,%rdx
cmpq $0x20,%rdx
jb .Lxts_enc_one
pxor %xmm0,%xmm12
je .Lxts_enc_two
pxor %xmm0,%xmm13
cmpq $64,%rdx
cmpq $0x40,%rdx
jb .Lxts_enc_three
pxor %xmm0,%xmm14
je .Lxts_enc_four
@ -2069,7 +2069,7 @@ aesni_xts_decrypt:
movdqa .Lxts_magic(%rip),%xmm8
movdqa %xmm2,%xmm15
pshufd $95,%xmm2,%xmm9
pshufd $0x5f,%xmm2,%xmm9
pxor %xmm0,%xmm1
movdqa %xmm9,%xmm14
paddd %xmm9,%xmm9
@ -2168,7 +2168,7 @@ aesni_xts_decrypt:
.byte 102,15,56,222,248
movups 64(%r11),%xmm0
movdqa %xmm8,80(%rsp)
pshufd $95,%xmm15,%xmm9
pshufd $0x5f,%xmm15,%xmm9
jmp .Lxts_dec_loop6
.align 32
.Lxts_dec_loop6:
@ -2308,13 +2308,13 @@ aesni_xts_decrypt:
jz .Lxts_dec_done
pxor %xmm0,%xmm12
cmpq $32,%rdx
cmpq $0x20,%rdx
jb .Lxts_dec_one
pxor %xmm0,%xmm13
je .Lxts_dec_two
pxor %xmm0,%xmm14
cmpq $64,%rdx
cmpq $0x40,%rdx
jb .Lxts_dec_three
je .Lxts_dec_four
@ -2345,7 +2345,7 @@ aesni_xts_decrypt:
pcmpgtd %xmm15,%xmm14
movdqu %xmm6,64(%rsi)
leaq 80(%rsi),%rsi
pshufd $19,%xmm14,%xmm11
pshufd $0x13,%xmm14,%xmm11
andq $15,%r9
jz .Lxts_dec_ret
@ -2634,7 +2634,7 @@ aesni_cbc_encrypt:
leaq -8(%rax),%rbp
movups (%r8),%xmm10
movl %r10d,%eax
cmpq $80,%rdx
cmpq $0x50,%rdx
jbe .Lcbc_dec_tail
movups (%rcx),%xmm0
@ -2650,14 +2650,14 @@ aesni_cbc_encrypt:
movdqu 80(%rdi),%xmm7
movdqa %xmm6,%xmm15
movl OPENSSL_ia32cap_P+4(%rip),%r9d
cmpq $112,%rdx
cmpq $0x70,%rdx
jbe .Lcbc_dec_six_or_seven
andl $71303168,%r9d
subq $80,%rdx
subq $0x50,%rdx
cmpl $4194304,%r9d
je .Lcbc_dec_loop6_enter
subq $32,%rdx
subq $0x20,%rdx
leaq 112(%rcx),%rcx
jmp .Lcbc_dec_loop8_enter
.align 16
@ -2672,7 +2672,7 @@ aesni_cbc_encrypt:
movups 16-112(%rcx),%xmm1
pxor %xmm0,%xmm4
xorq %r11,%r11
cmpq $112,%rdx
cmpq $0x70,%rdx
pxor %xmm0,%xmm5
pxor %xmm0,%xmm6
pxor %xmm0,%xmm7
@ -2857,21 +2857,21 @@ aesni_cbc_encrypt:
movups %xmm8,96(%rsi)
leaq 112(%rsi),%rsi
subq $128,%rdx
subq $0x80,%rdx
ja .Lcbc_dec_loop8
movaps %xmm9,%xmm2
leaq -112(%rcx),%rcx
addq $112,%rdx
addq $0x70,%rdx
jle .Lcbc_dec_clear_tail_collected
movups %xmm9,(%rsi)
leaq 16(%rsi),%rsi
cmpq $80,%rdx
cmpq $0x50,%rdx
jbe .Lcbc_dec_tail
movaps %xmm11,%xmm2
.Lcbc_dec_six_or_seven:
cmpq $96,%rdx
cmpq $0x60,%rdx
ja .Lcbc_dec_seven
movaps %xmm7,%xmm8
@ -2964,33 +2964,33 @@ aesni_cbc_encrypt:
movl %r10d,%eax
movdqu %xmm6,64(%rsi)
leaq 80(%rsi),%rsi
subq $96,%rdx
subq $0x60,%rdx
ja .Lcbc_dec_loop6
movdqa %xmm7,%xmm2
addq $80,%rdx
addq $0x50,%rdx
jle .Lcbc_dec_clear_tail_collected
movups %xmm7,(%rsi)
leaq 16(%rsi),%rsi
.Lcbc_dec_tail:
movups (%rdi),%xmm2
subq $16,%rdx
subq $0x10,%rdx
jbe .Lcbc_dec_one
movups 16(%rdi),%xmm3
movaps %xmm2,%xmm11
subq $16,%rdx
subq $0x10,%rdx
jbe .Lcbc_dec_two
movups 32(%rdi),%xmm4
movaps %xmm3,%xmm12
subq $16,%rdx
subq $0x10,%rdx
jbe .Lcbc_dec_three
movups 48(%rdi),%xmm5
movaps %xmm4,%xmm13
subq $16,%rdx
subq $0x10,%rdx
jbe .Lcbc_dec_four
movups 64(%rdi),%xmm6
@ -3015,7 +3015,7 @@ aesni_cbc_encrypt:
movdqa %xmm6,%xmm2
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
subq $16,%rdx
subq $0x10,%rdx
jmp .Lcbc_dec_tail_collected
.align 16
@ -3332,7 +3332,7 @@ __aesni_set_encrypt_key:
pslldq $4,%xmm0
pxor %xmm3,%xmm0
pshufd $255,%xmm0,%xmm3
pshufd $0xff,%xmm0,%xmm3
pxor %xmm1,%xmm3
pslldq $4,%xmm1
pxor %xmm1,%xmm3
@ -3419,7 +3419,7 @@ __aesni_set_encrypt_key:
decl %r10d
jz .Ldone_key256
pshufd $255,%xmm0,%xmm2
pshufd $0xff,%xmm0,%xmm2
pxor %xmm3,%xmm3
.byte 102,15,56,221,211

158
deps/openssl/asm/x64-elf-gas/aes/bsaes-x86_64.s

@ -324,45 +324,45 @@ _bsaes_encrypt8_bitslice:
pxor %xmm2,%xmm5
decl %r10d
jl .Lenc_done
pshufd $147,%xmm15,%xmm7
pshufd $147,%xmm0,%xmm8
pshufd $0x93,%xmm15,%xmm7
pshufd $0x93,%xmm0,%xmm8
pxor %xmm7,%xmm15
pshufd $147,%xmm3,%xmm9
pshufd $0x93,%xmm3,%xmm9
pxor %xmm8,%xmm0
pshufd $147,%xmm5,%xmm10
pshufd $0x93,%xmm5,%xmm10
pxor %xmm9,%xmm3
pshufd $147,%xmm2,%xmm11
pshufd $0x93,%xmm2,%xmm11
pxor %xmm10,%xmm5
pshufd $147,%xmm6,%xmm12
pshufd $0x93,%xmm6,%xmm12
pxor %xmm11,%xmm2
pshufd $147,%xmm1,%xmm13
pshufd $0x93,%xmm1,%xmm13
pxor %xmm12,%xmm6
pshufd $147,%xmm4,%xmm14
pshufd $0x93,%xmm4,%xmm14
pxor %xmm13,%xmm1
pxor %xmm14,%xmm4
pxor %xmm15,%xmm8
pxor %xmm4,%xmm7
pxor %xmm4,%xmm8
pshufd $78,%xmm15,%xmm15
pshufd $0x4E,%xmm15,%xmm15
pxor %xmm0,%xmm9
pshufd $78,%xmm0,%xmm0
pshufd $0x4E,%xmm0,%xmm0
pxor %xmm2,%xmm12
pxor %xmm7,%xmm15
pxor %xmm6,%xmm13
pxor %xmm8,%xmm0
pxor %xmm5,%xmm11
pshufd $78,%xmm2,%xmm7
pshufd $0x4E,%xmm2,%xmm7
pxor %xmm1,%xmm14
pshufd $78,%xmm6,%xmm8
pshufd $0x4E,%xmm6,%xmm8
pxor %xmm3,%xmm10
pshufd $78,%xmm5,%xmm2
pshufd $0x4E,%xmm5,%xmm2
pxor %xmm4,%xmm10
pshufd $78,%xmm4,%xmm6
pshufd $0x4E,%xmm4,%xmm6
pxor %xmm4,%xmm11
pshufd $78,%xmm1,%xmm5
pshufd $0x4E,%xmm1,%xmm5
pxor %xmm11,%xmm7
pshufd $78,%xmm3,%xmm1
pshufd $0x4E,%xmm3,%xmm1
pxor %xmm12,%xmm8
pxor %xmm10,%xmm2
pxor %xmm14,%xmm6
@ -796,24 +796,24 @@ _bsaes_decrypt8:
decl %r10d
jl .Ldec_done
pshufd $78,%xmm15,%xmm7
pshufd $78,%xmm2,%xmm13
pshufd $0x4E,%xmm15,%xmm7
pshufd $0x4E,%xmm2,%xmm13
pxor %xmm15,%xmm7
pshufd $78,%xmm4,%xmm14
pshufd $0x4E,%xmm4,%xmm14
pxor %xmm2,%xmm13
pshufd $78,%xmm0,%xmm8
pshufd $0x4E,%xmm0,%xmm8
pxor %xmm4,%xmm14
pshufd $78,%xmm5,%xmm9
pshufd $0x4E,%xmm5,%xmm9
pxor %xmm0,%xmm8
pshufd $78,%xmm3,%xmm10
pshufd $0x4E,%xmm3,%xmm10
pxor %xmm5,%xmm9
pxor %xmm13,%xmm15
pxor %xmm13,%xmm0
pshufd $78,%xmm1,%xmm11
pshufd $0x4E,%xmm1,%xmm11
pxor %xmm3,%xmm10
pxor %xmm7,%xmm5
pxor %xmm8,%xmm3
pshufd $78,%xmm6,%xmm12
pshufd $0x4E,%xmm6,%xmm12
pxor %xmm1,%xmm11
pxor %xmm14,%xmm0
pxor %xmm9,%xmm1
@ -827,45 +827,45 @@ _bsaes_decrypt8:
pxor %xmm14,%xmm1
pxor %xmm14,%xmm6
pxor %xmm12,%xmm4
pshufd $147,%xmm15,%xmm7
pshufd $147,%xmm0,%xmm8
pshufd $0x93,%xmm15,%xmm7
pshufd $0x93,%xmm0,%xmm8
pxor %xmm7,%xmm15
pshufd $147,%xmm5,%xmm9
pshufd $0x93,%xmm5,%xmm9
pxor %xmm8,%xmm0
pshufd $147,%xmm3,%xmm10
pshufd $0x93,%xmm3,%xmm10
pxor %xmm9,%xmm5
pshufd $147,%xmm1,%xmm11
pshufd $0x93,%xmm1,%xmm11
pxor %xmm10,%xmm3
pshufd $147,%xmm6,%xmm12
pshufd $0x93,%xmm6,%xmm12
pxor %xmm11,%xmm1
pshufd $147,%xmm2,%xmm13
pshufd $0x93,%xmm2,%xmm13
pxor %xmm12,%xmm6
pshufd $147,%xmm4,%xmm14
pshufd $0x93,%xmm4,%xmm14
pxor %xmm13,%xmm2
pxor %xmm14,%xmm4
pxor %xmm15,%xmm8
pxor %xmm4,%xmm7
pxor %xmm4,%xmm8
pshufd $78,%xmm15,%xmm15
pshufd $0x4E,%xmm15,%xmm15
pxor %xmm0,%xmm9
pshufd $78,%xmm0,%xmm0
pshufd $0x4E,%xmm0,%xmm0
pxor %xmm1,%xmm12
pxor %xmm7,%xmm15
pxor %xmm6,%xmm13
pxor %xmm8,%xmm0
pxor %xmm3,%xmm11
pshufd $78,%xmm1,%xmm7
pshufd $0x4E,%xmm1,%xmm7
pxor %xmm2,%xmm14
pshufd $78,%xmm6,%xmm8
pshufd $0x4E,%xmm6,%xmm8
pxor %xmm5,%xmm10
pshufd $78,%xmm3,%xmm1
pshufd $0x4E,%xmm3,%xmm1
pxor %xmm4,%xmm10
pshufd $78,%xmm4,%xmm6
pshufd $0x4E,%xmm4,%xmm6
pxor %xmm4,%xmm11
pshufd $78,%xmm2,%xmm3
pshufd $0x4E,%xmm2,%xmm3
pxor %xmm11,%xmm7
pshufd $78,%xmm5,%xmm2
pshufd $0x4E,%xmm5,%xmm2
pxor %xmm12,%xmm8
pxor %xmm1,%xmm10
pxor %xmm14,%xmm6
@ -1552,20 +1552,20 @@ bsaes_xts_encrypt:
movdqa %xmm7,(%rax)
andq $-16,%r14
subq $128,%rsp
subq $0x80,%rsp
movdqa 32(%rbp),%xmm6
pxor %xmm14,%xmm14
movdqa .Lxts_magic(%rip),%xmm12
pcmpgtd %xmm6,%xmm14
subq $128,%r14
subq $0x80,%r14
jc .Lxts_enc_short
jmp .Lxts_enc_loop
.align 16
.Lxts_enc_loop:
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm15
movdqa %xmm6,0(%rsp)
@ -1573,7 +1573,7 @@ bsaes_xts_encrypt:
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm0
movdqa %xmm6,16(%rsp)
@ -1582,7 +1582,7 @@ bsaes_xts_encrypt:
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
movdqu 0(%r12),%xmm7
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm1
movdqa %xmm6,32(%rsp)
@ -1592,7 +1592,7 @@ bsaes_xts_encrypt:
pxor %xmm13,%xmm6
movdqu 16(%r12),%xmm8
pxor %xmm7,%xmm15
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm2
movdqa %xmm6,48(%rsp)
@ -1602,7 +1602,7 @@ bsaes_xts_encrypt:
pxor %xmm13,%xmm6
movdqu 32(%r12),%xmm9
pxor %xmm8,%xmm0
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm3
movdqa %xmm6,64(%rsp)
@ -1612,7 +1612,7 @@ bsaes_xts_encrypt:
pxor %xmm13,%xmm6
movdqu 48(%r12),%xmm10
pxor %xmm9,%xmm1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm4
movdqa %xmm6,80(%rsp)
@ -1622,7 +1622,7 @@ bsaes_xts_encrypt:
pxor %xmm13,%xmm6
movdqu 64(%r12),%xmm11
pxor %xmm10,%xmm2
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm5
movdqa %xmm6,96(%rsp)
@ -1666,20 +1666,20 @@ bsaes_xts_encrypt:
pxor %xmm14,%xmm14
movdqa .Lxts_magic(%rip),%xmm12
pcmpgtd %xmm6,%xmm14
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
paddq %xmm6,%xmm6
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
subq $128,%r14
subq $0x80,%r14
jnc .Lxts_enc_loop
.Lxts_enc_short:
addq $128,%r14
addq $0x80,%r14
jz .Lxts_enc_done
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm15
movdqa %xmm6,0(%rsp)
@ -1687,7 +1687,7 @@ bsaes_xts_encrypt:
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm0
movdqa %xmm6,16(%rsp)
@ -1698,7 +1698,7 @@ bsaes_xts_encrypt:
movdqu 0(%r12),%xmm7
cmpq $16,%r14
je .Lxts_enc_1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm1
movdqa %xmm6,32(%rsp)
@ -1710,7 +1710,7 @@ bsaes_xts_encrypt:
cmpq $32,%r14
je .Lxts_enc_2
pxor %xmm7,%xmm15
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm2
movdqa %xmm6,48(%rsp)
@ -1722,7 +1722,7 @@ bsaes_xts_encrypt:
cmpq $48,%r14
je .Lxts_enc_3
pxor %xmm8,%xmm0
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm3
movdqa %xmm6,64(%rsp)
@ -1734,7 +1734,7 @@ bsaes_xts_encrypt:
cmpq $64,%r14
je .Lxts_enc_4
pxor %xmm9,%xmm1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm4
movdqa %xmm6,80(%rsp)
@ -1746,7 +1746,7 @@ bsaes_xts_encrypt:
cmpq $80,%r14
je .Lxts_enc_5
pxor %xmm10,%xmm2
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm5
movdqa %xmm6,96(%rsp)
@ -2011,20 +2011,20 @@ bsaes_xts_decrypt:
shlq $4,%rax
subq %rax,%r14
subq $128,%rsp
subq $0x80,%rsp
movdqa 32(%rbp),%xmm6
pxor %xmm14,%xmm14
movdqa .Lxts_magic(%rip),%xmm12
pcmpgtd %xmm6,%xmm14
subq $128,%r14
subq $0x80,%r14
jc .Lxts_dec_short
jmp .Lxts_dec_loop
.align 16
.Lxts_dec_loop:
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm15
movdqa %xmm6,0(%rsp)
@ -2032,7 +2032,7 @@ bsaes_xts_decrypt:
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm0
movdqa %xmm6,16(%rsp)
@ -2041,7 +2041,7 @@ bsaes_xts_decrypt:
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
movdqu 0(%r12),%xmm7
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm1
movdqa %xmm6,32(%rsp)
@ -2051,7 +2051,7 @@ bsaes_xts_decrypt:
pxor %xmm13,%xmm6
movdqu 16(%r12),%xmm8
pxor %xmm7,%xmm15
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm2
movdqa %xmm6,48(%rsp)
@ -2061,7 +2061,7 @@ bsaes_xts_decrypt:
pxor %xmm13,%xmm6
movdqu 32(%r12),%xmm9
pxor %xmm8,%xmm0
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm3
movdqa %xmm6,64(%rsp)
@ -2071,7 +2071,7 @@ bsaes_xts_decrypt:
pxor %xmm13,%xmm6
movdqu 48(%r12),%xmm10
pxor %xmm9,%xmm1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm4
movdqa %xmm6,80(%rsp)
@ -2081,7 +2081,7 @@ bsaes_xts_decrypt:
pxor %xmm13,%xmm6
movdqu 64(%r12),%xmm11
pxor %xmm10,%xmm2
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm5
movdqa %xmm6,96(%rsp)
@ -2125,20 +2125,20 @@ bsaes_xts_decrypt:
pxor %xmm14,%xmm14
movdqa .Lxts_magic(%rip),%xmm12
pcmpgtd %xmm6,%xmm14
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
paddq %xmm6,%xmm6
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
subq $128,%r14
subq $0x80,%r14
jnc .Lxts_dec_loop
.Lxts_dec_short:
addq $128,%r14
addq $0x80,%r14
jz .Lxts_dec_done
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm15
movdqa %xmm6,0(%rsp)
@ -2146,7 +2146,7 @@ bsaes_xts_decrypt:
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm0
movdqa %xmm6,16(%rsp)
@ -2157,7 +2157,7 @@ bsaes_xts_decrypt:
movdqu 0(%r12),%xmm7
cmpq $16,%r14
je .Lxts_dec_1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm1
movdqa %xmm6,32(%rsp)
@ -2169,7 +2169,7 @@ bsaes_xts_decrypt:
cmpq $32,%r14
je .Lxts_dec_2
pxor %xmm7,%xmm15
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm2
movdqa %xmm6,48(%rsp)
@ -2181,7 +2181,7 @@ bsaes_xts_decrypt:
cmpq $48,%r14
je .Lxts_dec_3
pxor %xmm8,%xmm0
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm3
movdqa %xmm6,64(%rsp)
@ -2193,7 +2193,7 @@ bsaes_xts_decrypt:
cmpq $64,%r14
je .Lxts_dec_4
pxor %xmm9,%xmm1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm4
movdqa %xmm6,80(%rsp)
@ -2205,7 +2205,7 @@ bsaes_xts_decrypt:
cmpq $80,%r14
je .Lxts_dec_5
pxor %xmm10,%xmm2
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm5
movdqa %xmm6,96(%rsp)
@ -2382,7 +2382,7 @@ bsaes_xts_decrypt:
pxor %xmm14,%xmm14
movdqa .Lxts_magic(%rip),%xmm12
pcmpgtd %xmm6,%xmm14
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
movdqa %xmm6,%xmm5
paddq %xmm6,%xmm6
pand %xmm12,%xmm13

20
deps/openssl/asm/x64-elf-gas/aes/vpaes-x86_64.s

@ -60,7 +60,7 @@ _vpaes_encrypt_core:
addq $16,%r11
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andq $48,%r11
andq $0x30,%r11
subq $1,%rax
pxor %xmm3,%xmm0
@ -120,10 +120,10 @@ _vpaes_decrypt_core:
pand %xmm9,%xmm0
.byte 102,15,56,0,208
movdqa .Lk_dipt+16(%rip),%xmm0
xorq $48,%r11
xorq $0x30,%r11
leaq .Lk_dsbd(%rip),%r10
.byte 102,15,56,0,193
andq $48,%r11
andq $0x30,%r11
pxor %xmm5,%xmm2
movdqa .Lk_mc_forward+48(%rip),%xmm5
pxor %xmm2,%xmm0
@ -242,7 +242,7 @@ _vpaes_schedule_core:
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,217
movdqu %xmm3,(%rdx)
xorq $48,%r8
xorq $0x30,%r8
.Lschedule_go:
cmpl $192,%esi
@ -332,7 +332,7 @@ _vpaes_schedule_core:
call _vpaes_schedule_mangle
pshufd $255,%xmm0,%xmm0
pshufd $0xFF,%xmm0,%xmm0
movdqa %xmm7,%xmm5
movdqa %xmm6,%xmm7
call _vpaes_schedule_low_round
@ -399,8 +399,8 @@ _vpaes_schedule_core:
.type _vpaes_schedule_192_smear,@function
.align 16
_vpaes_schedule_192_smear:
pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
pshufd $0x80,%xmm6,%xmm1
pshufd $0xFE,%xmm7,%xmm0
pxor %xmm1,%xmm6
pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
@ -437,7 +437,7 @@ _vpaes_schedule_round:
pxor %xmm1,%xmm7
pshufd $255,%xmm0,%xmm0
pshufd $0xFF,%xmm0,%xmm0
.byte 102,15,58,15,192,1
@ -596,7 +596,7 @@ _vpaes_schedule_mangle:
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,217
addq $-16,%r8
andq $48,%r8
andq $0x30,%r8
movdqu %xmm3,(%rdx)
.byte 0xf3,0xc3
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
@ -614,7 +614,7 @@ vpaes_set_encrypt_key:
movl %eax,240(%rdx)
movl $0,%ecx
movl $48,%r8d
movl $0x30,%r8d
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3

1701
deps/openssl/asm/x64-elf-gas/bn/rsaz-avx2.s

File diff suppressed because it is too large

654
deps/openssl/asm/x64-elf-gas/bn/rsaz-x86_64.s

@ -19,6 +19,10 @@ rsaz_512_sqr:
movq (%rsi),%rdx
movq 8(%rsi),%rax
movq %rcx,128(%rsp)
movl $0x80100,%r11d
andl OPENSSL_ia32cap_P+8(%rip),%r11d
cmpl $0x80100,%r11d
je .Loop_sqrx
jmp .Loop_sqr
.align 32
@ -382,6 +386,276 @@ rsaz_512_sqr:
decl %r8d
jnz .Loop_sqr
jmp .Lsqr_tail
.align 32
.Loop_sqrx:
movl %r8d,128+8(%rsp)
.byte 102,72,15,110,199
.byte 102,72,15,110,205
mulxq %rax,%r8,%r9
mulxq 16(%rsi),%rcx,%r10
xorq %rbp,%rbp
mulxq 24(%rsi),%rax,%r11
adcxq %rcx,%r9
mulxq 32(%rsi),%rcx,%r12
adcxq %rax,%r10
mulxq 40(%rsi),%rax,%r13
adcxq %rcx,%r11
.byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00
adcxq %rax,%r12
adcxq %rcx,%r13
.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
adcxq %rax,%r14
adcxq %rbp,%r15
movq %r9,%rcx
shldq $1,%r8,%r9
shlq $1,%r8
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rdx,%r8
movq 8(%rsi),%rdx
adcxq %rbp,%r9
movq %rax,(%rsp)
movq %r8,8(%rsp)
mulxq 16(%rsi),%rax,%rbx
adoxq %rax,%r10
adcxq %rbx,%r11
.byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00
adoxq %rdi,%r11
adcxq %r8,%r12
mulxq 32(%rsi),%rax,%rbx
adoxq %rax,%r12
adcxq %rbx,%r13
mulxq 40(%rsi),%rdi,%r8
adoxq %rdi,%r13
adcxq %r8,%r14
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
adoxq %rax,%r14
adcxq %rbx,%r15
.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
adoxq %rdi,%r15
adcxq %rbp,%r8
adoxq %rbp,%r8
movq %r11,%rbx
shldq $1,%r10,%r11
shldq $1,%rcx,%r10
xorl %ebp,%ebp
mulxq %rdx,%rax,%rcx
movq 16(%rsi),%rdx
adcxq %rax,%r9
adcxq %rcx,%r10
adcxq %rbp,%r11
movq %r9,16(%rsp)
.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00
adoxq %rdi,%r12
adcxq %r9,%r13
mulxq 32(%rsi),%rax,%rcx
adoxq %rax,%r13
adcxq %rcx,%r14
mulxq 40(%rsi),%rdi,%r9
adoxq %rdi,%r14
adcxq %r9,%r15
.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
adoxq %rax,%r15
adcxq %rcx,%r8
.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00
adoxq %rdi,%r8
adcxq %rbp,%r9
adoxq %rbp,%r9
movq %r13,%rcx
shldq $1,%r12,%r13
shldq $1,%rbx,%r12
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rax,%r11
adcxq %rdx,%r12
movq 24(%rsi),%rdx
adcxq %rbp,%r13
movq %r11,32(%rsp)
.byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00
adoxq %rax,%r14
adcxq %rbx,%r15
mulxq 40(%rsi),%rdi,%r10
adoxq %rdi,%r15
adcxq %r10,%r8
mulxq 48(%rsi),%rax,%rbx
adoxq %rax,%r8
adcxq %rbx,%r9
mulxq 56(%rsi),%rdi,%r10
adoxq %rdi,%r9
adcxq %rbp,%r10
adoxq %rbp,%r10
.byte 0x66
movq %r15,%rbx
shldq $1,%r14,%r15
shldq $1,%rcx,%r14
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rax,%r13
adcxq %rdx,%r14
movq 32(%rsi),%rdx
adcxq %rbp,%r15
movq %r13,48(%rsp)
movq %r14,56(%rsp)
.byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00
adoxq %rdi,%r8
adcxq %r11,%r9
mulxq 48(%rsi),%rax,%rcx
adoxq %rax,%r9
adcxq %rcx,%r10
mulxq 56(%rsi),%rdi,%r11
adoxq %rdi,%r10
adcxq %rbp,%r11
adoxq %rbp,%r11
movq %r9,%rcx
shldq $1,%r8,%r9
shldq $1,%rbx,%r8
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rax,%r15
adcxq %rdx,%r8
movq 40(%rsi),%rdx
adcxq %rbp,%r9
movq %r15,64(%rsp)
movq %r8,72(%rsp)
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
adoxq %rax,%r10
adcxq %rbx,%r11
.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
adoxq %rdi,%r11
adcxq %rbp,%r12
adoxq %rbp,%r12
movq %r11,%rbx
shldq $1,%r10,%r11
shldq $1,%rcx,%r10
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rax,%r9
adcxq %rdx,%r10
movq 48(%rsi),%rdx
adcxq %rbp,%r11
movq %r9,80(%rsp)
movq %r10,88(%rsp)
.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
adoxq %rax,%r12
adoxq %rbp,%r13
xorq %r14,%r14
shldq $1,%r13,%r14
shldq $1,%r12,%r13
shldq $1,%rbx,%r12
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rax,%r11
adcxq %rdx,%r12
movq 56(%rsi),%rdx
adcxq %rbp,%r13
.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
mulxq %rdx,%rax,%rdx
adoxq %rax,%r13
adoxq %rbp,%rdx
.byte 0x66
addq %rdx,%r14
movq %r13,112(%rsp)
movq %r14,120(%rsp)
.byte 102,72,15,126,199
.byte 102,72,15,126,205
movq 128(%rsp),%rdx
movq (%rsp),%r8
movq 8(%rsp),%r9
movq 16(%rsp),%r10
movq 24(%rsp),%r11
movq 32(%rsp),%r12
movq 40(%rsp),%r13
movq 48(%rsp),%r14
movq 56(%rsp),%r15
call __rsaz_512_reducex
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
adcq 88(%rsp),%r11
adcq 96(%rsp),%r12
adcq 104(%rsp),%r13
adcq 112(%rsp),%r14
adcq 120(%rsp),%r15
sbbq %rcx,%rcx
call __rsaz_512_subtract
movq %r8,%rdx
movq %r9,%rax
movl 128+8(%rsp),%r8d
movq %rdi,%rsi
decl %r8d
jnz .Loop_sqrx
.Lsqr_tail:
leaq 128+24+48(%rsp),%rax
movq -48(%rax),%r15
@ -410,6 +684,10 @@ rsaz_512_mul:
.byte 102,72,15,110,199
.byte 102,72,15,110,201
movq %r8,128(%rsp)
movl $0x80100,%r11d
andl OPENSSL_ia32cap_P+8(%rip),%r11d
cmpl $0x80100,%r11d
je .Lmulx
movq (%rdx),%rbx
movq %rdx,%rbp
call __rsaz_512_mul
@ -427,6 +705,29 @@ rsaz_512_mul:
movq 56(%rsp),%r15
call __rsaz_512_reduce
jmp .Lmul_tail
.align 32
.Lmulx:
movq %rdx,%rbp
movq (%rdx),%rdx
call __rsaz_512_mulx
.byte 102,72,15,126,199
.byte 102,72,15,126,205
movq 128(%rsp),%rdx
movq (%rsp),%r8
movq 8(%rsp),%r9
movq 16(%rsp),%r10
movq 24(%rsp),%r11
movq 32(%rsp),%r12
movq 40(%rsp),%r13
movq 48(%rsp),%r14
movq 56(%rsp),%r15
call __rsaz_512_reducex
.Lmul_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@ -517,6 +818,10 @@ rsaz_512_mul_gather4:
por %xmm9,%xmm8
pshufd $0x4e,%xmm8,%xmm9
por %xmm9,%xmm8
movl $0x80100,%r11d
andl OPENSSL_ia32cap_P+8(%rip),%r11d
cmpl $0x80100,%r11d
je .Lmulx_gather
.byte 102,76,15,126,195
movq %r8,128(%rsp)
@ -697,6 +1002,142 @@ rsaz_512_mul_gather4:
movq 56(%rsp),%r15
call __rsaz_512_reduce
jmp .Lmul_gather_tail
.align 32
.Lmulx_gather:
.byte 102,76,15,126,194
movq %r8,128(%rsp)
movq %rdi,128+8(%rsp)
movq %rcx,128+16(%rsp)
mulxq (%rsi),%rbx,%r8
movq %rbx,(%rsp)
xorl %edi,%edi
mulxq 8(%rsi),%rax,%r9
mulxq 16(%rsi),%rbx,%r10
adcxq %rax,%r8
mulxq 24(%rsi),%rax,%r11
adcxq %rbx,%r9
mulxq 32(%rsi),%rbx,%r12
adcxq %rax,%r10
mulxq 40(%rsi),%rax,%r13
adcxq %rbx,%r11
mulxq 48(%rsi),%rbx,%r14
adcxq %rax,%r12
mulxq 56(%rsi),%rax,%r15
adcxq %rbx,%r13
adcxq %rax,%r14
.byte 0x67
movq %r8,%rbx
adcxq %rdi,%r15
movq $-7,%rcx
jmp .Loop_mulx_gather
.align 32
.Loop_mulx_gather:
movdqa 0(%rbp),%xmm8
movdqa 16(%rbp),%xmm9
movdqa 32(%rbp),%xmm10
movdqa 48(%rbp),%xmm11
pand %xmm0,%xmm8
movdqa 64(%rbp),%xmm12
pand %xmm1,%xmm9
movdqa 80(%rbp),%xmm13
pand %xmm2,%xmm10
movdqa 96(%rbp),%xmm14
pand %xmm3,%xmm11
movdqa 112(%rbp),%xmm15
leaq 128(%rbp),%rbp
pand %xmm4,%xmm12
pand %xmm5,%xmm13
pand %xmm6,%xmm14
pand %xmm7,%xmm15
por %xmm10,%xmm8
por %xmm11,%xmm9
por %xmm12,%xmm8
por %xmm13,%xmm9
por %xmm14,%xmm8
por %xmm15,%xmm9
por %xmm9,%xmm8
pshufd $0x4e,%xmm8,%xmm9
por %xmm9,%xmm8
.byte 102,76,15,126,194
.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
adcxq %rax,%rbx
adoxq %r9,%r8
mulxq 8(%rsi),%rax,%r9
adcxq %rax,%r8
adoxq %r10,%r9
mulxq 16(%rsi),%rax,%r10
adcxq %rax,%r9
adoxq %r11,%r10
.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
adcxq %rax,%r10
adoxq %r12,%r11
mulxq 32(%rsi),%rax,%r12
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 40(%rsi),%rax,%r13
adcxq %rax,%r12
adoxq %r14,%r13
.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
adcxq %rax,%r13
.byte 0x67
adoxq %r15,%r14
mulxq 56(%rsi),%rax,%r15
movq %rbx,64(%rsp,%rcx,8)
adcxq %rax,%r14
adoxq %rdi,%r15
movq %r8,%rbx
adcxq %rdi,%r15
incq %rcx
jnz .Loop_mulx_gather
movq %r8,64(%rsp)
movq %r9,64+8(%rsp)
movq %r10,64+16(%rsp)
movq %r11,64+24(%rsp)
movq %r12,64+32(%rsp)
movq %r13,64+40(%rsp)
movq %r14,64+48(%rsp)
movq %r15,64+56(%rsp)
movq 128(%rsp),%rdx
movq 128+8(%rsp),%rdi
movq 128+16(%rsp),%rbp
movq (%rsp),%r8
movq 8(%rsp),%r9
movq 16(%rsp),%r10
movq 24(%rsp),%r11
movq 32(%rsp),%r12
movq 40(%rsp),%r13
movq 48(%rsp),%r14
movq 56(%rsp),%r15
call __rsaz_512_reducex
.Lmul_gather_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@ -741,6 +1182,10 @@ rsaz_512_mul_scatter4:
movq %rcx,128(%rsp)
movq %rdi,%rbp
movl $0x80100,%r11d
andl OPENSSL_ia32cap_P+8(%rip),%r11d
cmpl $0x80100,%r11d
je .Lmulx_scatter
movq (%rdi),%rbx
call __rsaz_512_mul
@ -757,6 +1202,29 @@ rsaz_512_mul_scatter4:
movq 56(%rsp),%r15
call __rsaz_512_reduce
jmp .Lmul_scatter_tail
.align 32
.Lmulx_scatter:
movq (%rdi),%rdx
call __rsaz_512_mulx
.byte 102,72,15,126,199
.byte 102,72,15,126,205
movq 128(%rsp),%rdx
movq (%rsp),%r8
movq 8(%rsp),%r9
movq 16(%rsp),%r10
movq 24(%rsp),%r11
movq 32(%rsp),%r12
movq 40(%rsp),%r13
movq 48(%rsp),%r14
movq 56(%rsp),%r15
call __rsaz_512_reducex
.Lmul_scatter_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@ -803,6 +1271,7 @@ rsaz_512_mul_by_one:
subq $128+24,%rsp
.Lmul_by_one_body:
movl OPENSSL_ia32cap_P+8(%rip),%eax
movq %rdx,%rbp
movq %rcx,128(%rsp)
@ -823,7 +1292,16 @@ rsaz_512_mul_by_one:
movdqa %xmm0,64(%rsp)
movdqa %xmm0,80(%rsp)
movdqa %xmm0,96(%rsp)
andl $0x80100,%eax
cmpl $0x80100,%eax
je .Lby_one_callx
call __rsaz_512_reduce
jmp .Lby_one_tail
.align 32
.Lby_one_callx:
movq 128(%rsp),%rdx
call __rsaz_512_reducex
.Lby_one_tail:
movq %r8,(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
@ -927,6 +1405,62 @@ __rsaz_512_reduce:
.byte 0xf3,0xc3
.size __rsaz_512_reduce,.-__rsaz_512_reduce
.type __rsaz_512_reducex,@function
.align 32
__rsaz_512_reducex:
imulq %r8,%rdx
xorq %rsi,%rsi
movl $8,%ecx
jmp .Lreduction_loopx
.align 32
.Lreduction_loopx:
movq %r8,%rbx
mulxq 0(%rbp),%rax,%r8
adcxq %rbx,%rax
adoxq %r9,%r8
mulxq 8(%rbp),%rax,%r9
adcxq %rax,%r8
adoxq %r10,%r9
mulxq 16(%rbp),%rbx,%r10
adcxq %rbx,%r9
adoxq %r11,%r10
mulxq 24(%rbp),%rbx,%r11
adcxq %rbx,%r10
adoxq %r12,%r11
.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
movq %rdx,%rax
movq %r8,%rdx
adcxq %rbx,%r11
adoxq %r13,%r12
mulxq 128+8(%rsp),%rbx,%rdx
movq %rax,%rdx
mulxq 40(%rbp),%rax,%r13
adcxq %rax,%r12
adoxq %r14,%r13
.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
adcxq %rax,%r13
adoxq %r15,%r14
mulxq 56(%rbp),%rax,%r15
movq %rbx,%rdx
adcxq %rax,%r14
adoxq %rsi,%r15
adcxq %rsi,%r15
decl %ecx
jne .Lreduction_loopx
.byte 0xf3,0xc3
.size __rsaz_512_reducex,.-__rsaz_512_reducex
.type __rsaz_512_subtract,@function
.align 32
__rsaz_512_subtract:
@ -1126,6 +1660,126 @@ __rsaz_512_mul:
.byte 0xf3,0xc3
.size __rsaz_512_mul,.-__rsaz_512_mul
.type __rsaz_512_mulx,@function
.align 32
__rsaz_512_mulx:
mulxq (%rsi),%rbx,%r8
movq $-6,%rcx
mulxq 8(%rsi),%rax,%r9
movq %rbx,8(%rsp)
mulxq 16(%rsi),%rbx,%r10
adcq %rax,%r8
mulxq 24(%rsi),%rax,%r11
adcq %rbx,%r9
mulxq 32(%rsi),%rbx,%r12
adcq %rax,%r10
mulxq 40(%rsi),%rax,%r13
adcq %rbx,%r11
mulxq 48(%rsi),%rbx,%r14
adcq %rax,%r12
mulxq 56(%rsi),%rax,%r15
movq 8(%rbp),%rdx
adcq %rbx,%r13
adcq %rax,%r14
adcq $0,%r15
xorq %rdi,%rdi
jmp .Loop_mulx
.align 32
.Loop_mulx:
movq %r8,%rbx
mulxq (%rsi),%rax,%r8
adcxq %rax,%rbx
adoxq %r9,%r8
mulxq 8(%rsi),%rax,%r9
adcxq %rax,%r8
adoxq %r10,%r9
mulxq 16(%rsi),%rax,%r10
adcxq %rax,%r9
adoxq %r11,%r10
mulxq 24(%rsi),%rax,%r11
adcxq %rax,%r10
adoxq %r12,%r11
.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 40(%rsi),%rax,%r13
adcxq %rax,%r12
adoxq %r14,%r13
mulxq 48(%rsi),%rax,%r14
adcxq %rax,%r13
adoxq %r15,%r14
mulxq 56(%rsi),%rax,%r15
movq 64(%rbp,%rcx,8),%rdx
movq %rbx,8+64-8(%rsp,%rcx,8)
adcxq %rax,%r14
adoxq %rdi,%r15
adcxq %rdi,%r15
incq %rcx
jnz .Loop_mulx
movq %r8,%rbx
mulxq (%rsi),%rax,%r8
adcxq %rax,%rbx
adoxq %r9,%r8
.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
adcxq %rax,%r8
adoxq %r10,%r9
.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
adcxq %rax,%r9
adoxq %r11,%r10
mulxq 24(%rsi),%rax,%r11
adcxq %rax,%r10
adoxq %r12,%r11
mulxq 32(%rsi),%rax,%r12
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 40(%rsi),%rax,%r13
adcxq %rax,%r12
adoxq %r14,%r13
.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
adcxq %rax,%r13
adoxq %r15,%r14
.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
adcxq %rax,%r14
adoxq %rdi,%r15
adcxq %rdi,%r15
movq %rbx,8+64-8(%rsp)
movq %r8,8+64(%rsp)
movq %r9,8+64+8(%rsp)
movq %r10,8+64+16(%rsp)
movq %r11,8+64+24(%rsp)
movq %r12,8+64+32(%rsp)
movq %r13,8+64+40(%rsp)
movq %r14,8+64+48(%rsp)
movq %r15,8+64+56(%rsp)
.byte 0xf3,0xc3
.size __rsaz_512_mulx,.-__rsaz_512_mulx
.globl rsaz_512_scatter4
.type rsaz_512_scatter4,@function
.align 16

386
deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s

@ -10,6 +10,7 @@ bn_mul_mont:
jnz .Lmul_enter
cmpl $8,%r9d
jb .Lmul_enter
movl OPENSSL_ia32cap_P+8(%rip),%r11d
cmpq %rsi,%rdx
jne .Lmul4x_enter
testl $7,%r9d
@ -34,6 +35,20 @@ bn_mul_mont:
movq %r11,8(%rsp,%r9,8)
.Lmul_body:
subq %rsp,%r11
andq $-4096,%r11
.Lmul_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc .Lmul_page_walk
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@ -215,6 +230,9 @@ bn_mul_mont:
.align 16
bn_mul4x_mont:
.Lmul4x_enter:
andl $0x80100,%r11d
cmpl $0x80100,%r11d
je .Lmulx4x_enter
pushq %rbx
pushq %rbp
pushq %r12
@ -231,6 +249,14 @@ bn_mul4x_mont:
movq %r11,8(%rsp,%r9,8)
.Lmul4x_body:
subq %rsp,%r11
andq $-4096,%r11
.Lmul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lmul4x_page_walk
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@ -611,6 +637,7 @@ bn_mul4x_mont:
.size bn_mul4x_mont,.-bn_mul4x_mont
.type bn_sqr8x_mont,@function
.align 32
bn_sqr8x_mont:
@ -653,6 +680,15 @@ bn_sqr8x_mont:
subq %r11,%rsp
.Lsqr8x_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
.Lsqr8x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lsqr8x_page_walk
movq %r9,%r10
negq %r9
@ -664,6 +700,25 @@ bn_sqr8x_mont:
pxor %xmm0,%xmm0
.byte 102,72,15,110,207
.byte 102,73,15,110,218
movl OPENSSL_ia32cap_P+8(%rip),%eax
andl $0x80100,%eax
cmpl $0x80100,%eax
jne .Lsqr8x_nox
call bn_sqrx8x_internal
leaq (%r8,%rcx,1),%rbx
movq %rcx,%r9
movq %rcx,%rdx
.byte 102,72,15,126,207
sarq $3+2,%rcx
jmp .Lsqr8x_sub
.align 32
.Lsqr8x_nox:
call bn_sqr8x_internal
@ -742,5 +797,336 @@ bn_sqr8x_mont:
.Lsqr8x_epilogue:
.byte 0xf3,0xc3
.size bn_sqr8x_mont,.-bn_sqr8x_mont
.type bn_mulx4x_mont,@function
.align 32
bn_mulx4x_mont:
.Lmulx4x_enter:
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
shll $3,%r9d
.byte 0x67
xorq %r10,%r10
subq %r9,%r10
movq (%r8),%r8
leaq -72(%rsp,%r10,1),%rsp
andq $-128,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
.Lmulx4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc .Lmulx4x_page_walk
leaq (%rdx,%r9,1),%r10
movq %r9,0(%rsp)
shrq $5,%r9
movq %r10,16(%rsp)
subq $1,%r9
movq %r8,24(%rsp)
movq %rdi,32(%rsp)
movq %rax,40(%rsp)
movq %r9,48(%rsp)
jmp .Lmulx4x_body
.align 32
.Lmulx4x_body:
leaq 8(%rdx),%rdi
movq (%rdx),%rdx
leaq 64+32(%rsp),%rbx
movq %rdx,%r9
mulxq 0(%rsi),%r8,%rax
mulxq 8(%rsi),%r11,%r14
addq %rax,%r11
movq %rdi,8(%rsp)
mulxq 16(%rsi),%r12,%r13
adcq %r14,%r12
adcq $0,%r13
movq %r8,%rdi
imulq 24(%rsp),%r8
xorq %rbp,%rbp
mulxq 24(%rsi),%rax,%r14
movq %r8,%rdx
leaq 32(%rsi),%rsi
adcxq %rax,%r13
adcxq %rbp,%r14
mulxq 0(%rcx),%rax,%r10
adcxq %rax,%rdi
adoxq %r11,%r10
mulxq 8(%rcx),%rax,%r11
adcxq %rax,%r10
adoxq %r12,%r11
.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00
movq 48(%rsp),%rdi
movq %r10,-32(%rbx)
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r11,-24(%rbx)
adcxq %rax,%r12
adoxq %rbp,%r15
leaq 32(%rcx),%rcx
movq %r12,-16(%rbx)
jmp .Lmulx4x_1st
.align 32
.Lmulx4x_1st:
adcxq %rbp,%r15
mulxq 0(%rsi),%r10,%rax
adcxq %r14,%r10
mulxq 8(%rsi),%r11,%r14
adcxq %rax,%r11
mulxq 16(%rsi),%r12,%rax
adcxq %r14,%r12
mulxq 24(%rsi),%r13,%r14
.byte 0x67,0x67
movq %r8,%rdx
adcxq %rax,%r13
adcxq %rbp,%r14
leaq 32(%rsi),%rsi
leaq 32(%rbx),%rbx
adoxq %r15,%r10
mulxq 0(%rcx),%rax,%r15
adcxq %rax,%r10
adoxq %r15,%r11
mulxq 8(%rcx),%rax,%r15
adcxq %rax,%r11
adoxq %r15,%r12
mulxq 16(%rcx),%rax,%r15
movq %r10,-40(%rbx)
adcxq %rax,%r12
movq %r11,-32(%rbx)
adoxq %r15,%r13
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r12,-24(%rbx)
adcxq %rax,%r13
adoxq %rbp,%r15
leaq 32(%rcx),%rcx
movq %r13,-16(%rbx)
decq %rdi
jnz .Lmulx4x_1st
movq 0(%rsp),%rax
movq 8(%rsp),%rdi
adcq %rbp,%r15
addq %r15,%r14
sbbq %r15,%r15
movq %r14,-8(%rbx)
jmp .Lmulx4x_outer
.align 32
.Lmulx4x_outer:
movq (%rdi),%rdx
leaq 8(%rdi),%rdi
subq %rax,%rsi
movq %r15,(%rbx)
leaq 64+32(%rsp),%rbx
subq %rax,%rcx
mulxq 0(%rsi),%r8,%r11
xorl %ebp,%ebp
movq %rdx,%r9
mulxq 8(%rsi),%r14,%r12
adoxq -32(%rbx),%r8
adcxq %r14,%r11
mulxq 16(%rsi),%r15,%r13
adoxq -24(%rbx),%r11
adcxq %r15,%r12
adoxq %rbp,%r12
adcxq %rbp,%r13
movq %rdi,8(%rsp)
.byte 0x67
movq %r8,%r15
imulq 24(%rsp),%r8
xorl %ebp,%ebp
mulxq 24(%rsi),%rax,%r14
movq %r8,%rdx
adoxq -16(%rbx),%r12
adcxq %rax,%r13
adoxq -8(%rbx),%r13
adcxq %rbp,%r14
leaq 32(%rsi),%rsi
adoxq %rbp,%r14
mulxq 0(%rcx),%rax,%r10
adcxq %rax,%r15
adoxq %r11,%r10
mulxq 8(%rcx),%rax,%r11
adcxq %rax,%r10
adoxq %r12,%r11
mulxq 16(%rcx),%rax,%r12
movq %r10,-32(%rbx)
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r11,-24(%rbx)
leaq 32(%rcx),%rcx
adcxq %rax,%r12
adoxq %rbp,%r15
movq 48(%rsp),%rdi
movq %r12,-16(%rbx)
jmp .Lmulx4x_inner
.align 32
.Lmulx4x_inner:
mulxq 0(%rsi),%r10,%rax
adcxq %rbp,%r15
adoxq %r14,%r10
mulxq 8(%rsi),%r11,%r14
adcxq 0(%rbx),%r10
adoxq %rax,%r11
mulxq 16(%rsi),%r12,%rax
adcxq 8(%rbx),%r11
adoxq %r14,%r12
mulxq 24(%rsi),%r13,%r14
movq %r8,%rdx
adcxq 16(%rbx),%r12
adoxq %rax,%r13
adcxq 24(%rbx),%r13
adoxq %rbp,%r14
leaq 32(%rsi),%rsi
leaq 32(%rbx),%rbx
adcxq %rbp,%r14
adoxq %r15,%r10
mulxq 0(%rcx),%rax,%r15
adcxq %rax,%r10
adoxq %r15,%r11
mulxq 8(%rcx),%rax,%r15
adcxq %rax,%r11
adoxq %r15,%r12
mulxq 16(%rcx),%rax,%r15
movq %r10,-40(%rbx)
adcxq %rax,%r12
adoxq %r15,%r13
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r11,-32(%rbx)
movq %r12,-24(%rbx)
adcxq %rax,%r13
adoxq %rbp,%r15
leaq 32(%rcx),%rcx
movq %r13,-16(%rbx)
decq %rdi
jnz .Lmulx4x_inner
movq 0(%rsp),%rax
movq 8(%rsp),%rdi
adcq %rbp,%r15
subq 0(%rbx),%rbp
adcq %r15,%r14
sbbq %r15,%r15
movq %r14,-8(%rbx)
cmpq 16(%rsp),%rdi
jne .Lmulx4x_outer
leaq 64(%rsp),%rbx
subq %rax,%rcx
negq %r15
movq %rax,%rdx
shrq $3+2,%rax
movq 32(%rsp),%rdi
jmp .Lmulx4x_sub
.align 32
.Lmulx4x_sub:
movq 0(%rbx),%r11
movq 8(%rbx),%r12
movq 16(%rbx),%r13
movq 24(%rbx),%r14
leaq 32(%rbx),%rbx
sbbq 0(%rcx),%r11
sbbq 8(%rcx),%r12
sbbq 16(%rcx),%r13
sbbq 24(%rcx),%r14
leaq 32(%rcx),%rcx
movq %r11,0(%rdi)
movq %r12,8(%rdi)
movq %r13,16(%rdi)
movq %r14,24(%rdi)
leaq 32(%rdi),%rdi
decq %rax
jnz .Lmulx4x_sub
sbbq $0,%r15
leaq 64(%rsp),%rbx
subq %rdx,%rdi
.byte 102,73,15,110,207
pxor %xmm0,%xmm0
pshufd $0,%xmm1,%xmm1
movq 40(%rsp),%rsi
jmp .Lmulx4x_cond_copy
.align 32
.Lmulx4x_cond_copy:
movdqa 0(%rbx),%xmm2
movdqa 16(%rbx),%xmm3
leaq 32(%rbx),%rbx
movdqu 0(%rdi),%xmm4
movdqu 16(%rdi),%xmm5
leaq 32(%rdi),%rdi
movdqa %xmm0,-32(%rbx)
movdqa %xmm0,-16(%rbx)
pcmpeqd %xmm1,%xmm0
pand %xmm1,%xmm2
pand %xmm1,%xmm3
pand %xmm0,%xmm4
pand %xmm0,%xmm5
pxor %xmm0,%xmm0
por %xmm2,%xmm4
por %xmm3,%xmm5
movdqu %xmm4,-32(%rdi)
movdqu %xmm5,-16(%rdi)
subq $32,%rdx
jnz .Lmulx4x_cond_copy
movq %rdx,(%rbx)
movq $1,%rax
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
.Lmulx4x_epilogue:
.byte 0xf3,0xc3
.size bn_mulx4x_mont,.-bn_mulx4x_mont
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16

1340
deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s

File diff suppressed because it is too large

2
deps/openssl/asm/x64-elf-gas/camellia/cmll-x86_64.s

@ -1624,7 +1624,7 @@ Camellia_cbc_encrypt:
leaq -64-63(%rcx),%r10
subq %rsp,%r10
negq %r10
andq $960,%r10
andq $0x3C0,%r10
subq %r10,%rsp

1511
deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s

File diff suppressed because it is too large

34
deps/openssl/asm/x64-elf-gas/md5/md5-x86_64.s

@ -493,14 +493,14 @@ md5_block_asm_data_order:
movl %ecx,%r11d
addl %ecx,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
xorl %edx,%r11d
leal -198630844(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 28(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -509,7 +509,7 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
addl %r11d,%edx
movl 56(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -518,7 +518,7 @@ md5_block_asm_data_order:
xorl %eax,%r11d
addl %r11d,%ecx
movl 20(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -527,7 +527,7 @@ md5_block_asm_data_order:
xorl %edx,%r11d
addl %r11d,%ebx
movl 48(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -536,7 +536,7 @@ md5_block_asm_data_order:
xorl %ecx,%r11d
addl %r11d,%eax
movl 12(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -545,7 +545,7 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
addl %r11d,%edx
movl 40(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -554,7 +554,7 @@ md5_block_asm_data_order:
xorl %eax,%r11d
addl %r11d,%ecx
movl 4(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -563,7 +563,7 @@ md5_block_asm_data_order:
xorl %edx,%r11d
addl %r11d,%ebx
movl 32(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -572,7 +572,7 @@ md5_block_asm_data_order:
xorl %ecx,%r11d
addl %r11d,%eax
movl 60(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -581,7 +581,7 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
addl %r11d,%edx
movl 24(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -590,7 +590,7 @@ md5_block_asm_data_order:
xorl %eax,%r11d
addl %r11d,%ecx
movl 52(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -599,7 +599,7 @@ md5_block_asm_data_order:
xorl %edx,%r11d
addl %r11d,%ebx
movl 16(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -608,7 +608,7 @@ md5_block_asm_data_order:
xorl %ecx,%r11d
addl %r11d,%eax
movl 44(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -617,7 +617,7 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
addl %r11d,%edx
movl 8(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -626,7 +626,7 @@ md5_block_asm_data_order:
xorl %eax,%r11d
addl %r11d,%ecx
movl 36(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -635,7 +635,7 @@ md5_block_asm_data_order:
xorl %edx,%r11d
addl %r11d,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx

752
deps/openssl/asm/x64-elf-gas/modes/aesni-gcm-x86_64.s

@ -1,15 +1,753 @@
.text
.globl aesni_gcm_encrypt
.type aesni_gcm_encrypt,@function
aesni_gcm_encrypt:
xorl %eax,%eax
.byte 0xf3,0xc3
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
.type _aesni_ctr32_ghash_6x,@function
.align 32
_aesni_ctr32_ghash_6x:
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
vmovdqu 0-128(%rcx),%xmm15
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpaddb %xmm2,%xmm11,%xmm12
vpaddb %xmm2,%xmm12,%xmm13
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm15,%xmm1,%xmm9
vmovdqu %xmm4,16+8(%rsp)
jmp .Loop6x
.align 32
.Loop6x:
addl $100663296,%ebx
jc .Lhandle_ctr32
vmovdqu 0-32(%r9),%xmm3
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm15,%xmm10,%xmm10
vpxor %xmm15,%xmm11,%xmm11
.Lresume_ctr32:
vmovdqu %xmm1,(%r8)
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
vpxor %xmm15,%xmm12,%xmm12
vmovups 16-128(%rcx),%xmm2
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
xorq %r12,%r12
cmpq %r14,%r15
vaesenc %xmm2,%xmm9,%xmm9
vmovdqu 48+8(%rsp),%xmm0
vpxor %xmm15,%xmm13,%xmm13
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
vaesenc %xmm2,%xmm10,%xmm10
vpxor %xmm15,%xmm14,%xmm14
setnc %r12b
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vaesenc %xmm2,%xmm11,%xmm11
vmovdqu 16-32(%r9),%xmm3
negq %r12
vaesenc %xmm2,%xmm12,%xmm12
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
vpxor %xmm4,%xmm8,%xmm8
vaesenc %xmm2,%xmm13,%xmm13
vpxor %xmm5,%xmm1,%xmm4
andq $0x60,%r12
vmovups 32-128(%rcx),%xmm15
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
vaesenc %xmm2,%xmm14,%xmm14
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
leaq (%r14,%r12,1),%r14
vaesenc %xmm15,%xmm9,%xmm9
vpxor 16+8(%rsp),%xmm8,%xmm8
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
vmovdqu 64+8(%rsp),%xmm0
vaesenc %xmm15,%xmm10,%xmm10
movbeq 88(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 80(%r14),%r12
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,32+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,40+8(%rsp)
vmovdqu 48-32(%r9),%xmm5
vaesenc %xmm15,%xmm14,%xmm14
vmovups 48-128(%rcx),%xmm15
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm3,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
vaesenc %xmm15,%xmm11,%xmm11
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
vmovdqu 80+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vpxor %xmm1,%xmm4,%xmm4
vmovdqu 64-32(%r9),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vmovups 64-128(%rcx),%xmm15
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
vaesenc %xmm15,%xmm10,%xmm10
movbeq 72(%r14),%r13
vpxor %xmm5,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
movbeq 64(%r14),%r12
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
vmovdqu 96+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,48+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,56+8(%rsp)
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 96-32(%r9),%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vmovups 80-128(%rcx),%xmm15
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
vaesenc %xmm15,%xmm10,%xmm10
movbeq 56(%r14),%r13
vpxor %xmm1,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
vpxor 112+8(%rsp),%xmm8,%xmm8
vaesenc %xmm15,%xmm11,%xmm11
movbeq 48(%r14),%r12
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,64+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,72+8(%rsp)
vpxor %xmm3,%xmm4,%xmm4
vmovdqu 112-32(%r9),%xmm3
vaesenc %xmm15,%xmm14,%xmm14
vmovups 96-128(%rcx),%xmm15
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
vaesenc %xmm15,%xmm10,%xmm10
movbeq 40(%r14),%r13
vpxor %xmm2,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
movbeq 32(%r14),%r12
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,80+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,88+8(%rsp)
vpxor %xmm5,%xmm6,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor %xmm1,%xmm6,%xmm6
vmovups 112-128(%rcx),%xmm15
vpslldq $8,%xmm6,%xmm5
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 16(%r11),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm8,%xmm7,%xmm7
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm5,%xmm4,%xmm4
movbeq 24(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 16(%r14),%r12
vpalignr $8,%xmm4,%xmm4,%xmm0
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
movq %r13,96+8(%rsp)
vaesenc %xmm15,%xmm12,%xmm12
movq %r12,104+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
vmovups 128-128(%rcx),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vmovups 144-128(%rcx),%xmm15
vaesenc %xmm1,%xmm10,%xmm10
vpsrldq $8,%xmm6,%xmm6
vaesenc %xmm1,%xmm11,%xmm11
vpxor %xmm6,%xmm7,%xmm7
vaesenc %xmm1,%xmm12,%xmm12
vpxor %xmm0,%xmm4,%xmm4
movbeq 8(%r14),%r13
vaesenc %xmm1,%xmm13,%xmm13
movbeq 0(%r14),%r12
vaesenc %xmm1,%xmm14,%xmm14
vmovups 160-128(%rcx),%xmm1
cmpl $11,%ebp
jb .Lenc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 176-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 192-128(%rcx),%xmm1
je .Lenc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 208-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 224-128(%rcx),%xmm1
jmp .Lenc_tail
.align 32
.Lhandle_ctr32:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vmovdqu 0-32(%r9),%xmm3
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm15,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm15,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpshufb %xmm0,%xmm14,%xmm14
vpshufb %xmm0,%xmm1,%xmm1
jmp .Lresume_ctr32
.align 32
.Lenc_tail:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
vpalignr $8,%xmm4,%xmm4,%xmm8
vaesenc %xmm15,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
vpxor 0(%rdi),%xmm1,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
vpxor 16(%rdi),%xmm1,%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vpxor 32(%rdi),%xmm1,%xmm5
vaesenc %xmm15,%xmm13,%xmm13
vpxor 48(%rdi),%xmm1,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor 64(%rdi),%xmm1,%xmm7
vpxor 80(%rdi),%xmm1,%xmm3
vmovdqu (%r8),%xmm1
vaesenclast %xmm2,%xmm9,%xmm9
vmovdqu 32(%r11),%xmm2
vaesenclast %xmm0,%xmm10,%xmm10
vpaddb %xmm2,%xmm1,%xmm0
movq %r13,112+8(%rsp)
leaq 96(%rdi),%rdi
vaesenclast %xmm5,%xmm11,%xmm11
vpaddb %xmm2,%xmm0,%xmm5
movq %r12,120+8(%rsp)
leaq 96(%rsi),%rsi
vmovdqu 0-128(%rcx),%xmm15
vaesenclast %xmm6,%xmm12,%xmm12
vpaddb %xmm2,%xmm5,%xmm6
vaesenclast %xmm7,%xmm13,%xmm13
vpaddb %xmm2,%xmm6,%xmm7
vaesenclast %xmm3,%xmm14,%xmm14
vpaddb %xmm2,%xmm7,%xmm3
addq $0x60,%r10
subq $0x6,%rdx
jc .L6x_done
vmovups %xmm9,-96(%rsi)
vpxor %xmm15,%xmm1,%xmm9
vmovups %xmm10,-80(%rsi)
vmovdqa %xmm0,%xmm10
vmovups %xmm11,-64(%rsi)
vmovdqa %xmm5,%xmm11
vmovups %xmm12,-48(%rsi)
vmovdqa %xmm6,%xmm12
vmovups %xmm13,-32(%rsi)
vmovdqa %xmm7,%xmm13
vmovups %xmm14,-16(%rsi)
vmovdqa %xmm3,%xmm14
vmovdqu 32+8(%rsp),%xmm7
jmp .Loop6x
.L6x_done:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
.byte 0xf3,0xc3
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
.globl aesni_gcm_decrypt
.type aesni_gcm_decrypt,@function
.align 32
aesni_gcm_decrypt:
xorl %eax,%eax
xorq %r10,%r10
cmpq $0x60,%rdx
jb .Lgcm_dec_abort
leaq (%rsp),%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq .Lbswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
vmovdqu (%r9),%xmm8
andq $-128,%rsp
vmovdqu (%r11),%xmm0
leaq 128(%rcx),%rcx
leaq 32+32(%r9),%r9
movl 240-128(%rcx),%ebp
vpshufb %xmm0,%xmm8,%xmm8
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc .Ldec_no_key_aliasing
cmpq $768,%r15
jnc .Ldec_no_key_aliasing
subq %r15,%rsp
.Ldec_no_key_aliasing:
vmovdqu 80(%rdi),%xmm7
leaq (%rdi),%r14
vmovdqu 64(%rdi),%xmm4
leaq -192(%rdi,%rdx,1),%r15
vmovdqu 48(%rdi),%xmm5
shrq $4,%rdx
xorq %r10,%r10
vmovdqu 32(%rdi),%xmm6
vpshufb %xmm0,%xmm7,%xmm7
vmovdqu 16(%rdi),%xmm2
vpshufb %xmm0,%xmm4,%xmm4
vmovdqu (%rdi),%xmm3
vpshufb %xmm0,%xmm5,%xmm5
vmovdqu %xmm4,48(%rsp)
vpshufb %xmm0,%xmm6,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm2,%xmm2
vmovdqu %xmm6,80(%rsp)
vpshufb %xmm0,%xmm3,%xmm3
vmovdqu %xmm2,96(%rsp)
vmovdqu %xmm3,112(%rsp)
call _aesni_ctr32_ghash_6x
vmovups %xmm9,-96(%rsi)
vmovups %xmm10,-80(%rsi)
vmovups %xmm11,-64(%rsi)
vmovups %xmm12,-48(%rsi)
vmovups %xmm13,-32(%rsi)
vmovups %xmm14,-16(%rsi)
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
.Lgcm_dec_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
.type _aesni_ctr32_6x,@function
.align 32
_aesni_ctr32_6x:
vmovdqu 0-128(%rcx),%xmm4
vmovdqu 32(%r11),%xmm2
leaq -1(%rbp),%r13
vmovups 16-128(%rcx),%xmm15
leaq 32-128(%rcx),%r12
vpxor %xmm4,%xmm1,%xmm9
addl $100663296,%ebx
jc .Lhandle_ctr32_2
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddb %xmm2,%xmm11,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddb %xmm2,%xmm12,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.align 16
.Loop_ctr32:
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vmovups (%r12),%xmm15
leaq 16(%r12),%r12
decl %r13d
jnz .Loop_ctr32
vmovdqu (%r12),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor 0(%rdi),%xmm3,%xmm4
vaesenc %xmm15,%xmm10,%xmm10
vpxor 16(%rdi),%xmm3,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
vpxor 32(%rdi),%xmm3,%xmm6
vaesenc %xmm15,%xmm12,%xmm12
vpxor 48(%rdi),%xmm3,%xmm8
vaesenc %xmm15,%xmm13,%xmm13
vpxor 64(%rdi),%xmm3,%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vpxor 80(%rdi),%xmm3,%xmm3
leaq 96(%rdi),%rdi
vaesenclast %xmm4,%xmm9,%xmm9
vaesenclast %xmm5,%xmm10,%xmm10
vaesenclast %xmm6,%xmm11,%xmm11
vaesenclast %xmm8,%xmm12,%xmm12
vaesenclast %xmm2,%xmm13,%xmm13
vaesenclast %xmm3,%xmm14,%xmm14
vmovups %xmm9,0(%rsi)
vmovups %xmm10,16(%rsi)
vmovups %xmm11,32(%rsi)
vmovups %xmm12,48(%rsi)
vmovups %xmm13,64(%rsi)
vmovups %xmm14,80(%rsi)
leaq 96(%rsi),%rsi
.byte 0xf3,0xc3
.align 32
.Lhandle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpshufb %xmm0,%xmm14,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpshufb %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
.globl aesni_gcm_encrypt
.type aesni_gcm_encrypt,@function
.align 32
aesni_gcm_encrypt:
xorq %r10,%r10
cmpq $288,%rdx
jb .Lgcm_enc_abort
leaq (%rsp),%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq .Lbswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
leaq 128(%rcx),%rcx
vmovdqu (%r11),%xmm0
andq $-128,%rsp
movl 240-128(%rcx),%ebp
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc .Lenc_no_key_aliasing
cmpq $768,%r15
jnc .Lenc_no_key_aliasing
subq %r15,%rsp
.Lenc_no_key_aliasing:
leaq (%rsi),%r14
leaq -192(%rsi,%rdx,1),%r15
shrq $4,%rdx
call _aesni_ctr32_6x
vpshufb %xmm0,%xmm9,%xmm8
vpshufb %xmm0,%xmm10,%xmm2
vmovdqu %xmm8,112(%rsp)
vpshufb %xmm0,%xmm11,%xmm4
vmovdqu %xmm2,96(%rsp)
vpshufb %xmm0,%xmm12,%xmm5
vmovdqu %xmm4,80(%rsp)
vpshufb %xmm0,%xmm13,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm14,%xmm7
vmovdqu %xmm6,48(%rsp)
call _aesni_ctr32_6x
vmovdqu (%r9),%xmm8
leaq 32+32(%r9),%r9
subq $12,%rdx
movq $192,%r10
vpshufb %xmm0,%xmm8,%xmm8
call _aesni_ctr32_ghash_6x
vmovdqu 32(%rsp),%xmm7
vmovdqu (%r11),%xmm0
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm7,%xmm7,%xmm1
vmovdqu 32-32(%r9),%xmm15
vmovups %xmm9,-96(%rsi)
vpshufb %xmm0,%xmm9,%xmm9
vpxor %xmm7,%xmm1,%xmm1
vmovups %xmm10,-80(%rsi)
vpshufb %xmm0,%xmm10,%xmm10
vmovups %xmm11,-64(%rsi)
vpshufb %xmm0,%xmm11,%xmm11
vmovups %xmm12,-48(%rsi)
vpshufb %xmm0,%xmm12,%xmm12
vmovups %xmm13,-32(%rsi)
vpshufb %xmm0,%xmm13,%xmm13
vmovups %xmm14,-16(%rsi)
vpshufb %xmm0,%xmm14,%xmm14
vmovdqu %xmm9,16(%rsp)
vmovdqu 48(%rsp),%xmm6
vmovdqu 16-32(%r9),%xmm0
vpunpckhqdq %xmm6,%xmm6,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
vpxor %xmm6,%xmm2,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vmovdqu 64(%rsp),%xmm9
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm9,%xmm9,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
vpxor %xmm9,%xmm5,%xmm5
vpxor %xmm7,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vmovdqu 80(%rsp),%xmm1
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm4,%xmm7,%xmm7
vpunpckhqdq %xmm1,%xmm1,%xmm4
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpxor %xmm6,%xmm9,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 96(%rsp),%xmm2
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm7,%xmm6,%xmm6
vpunpckhqdq %xmm2,%xmm2,%xmm7
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
vpxor %xmm2,%xmm7,%xmm7
vpxor %xmm9,%xmm1,%xmm1
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm5,%xmm4,%xmm4
vpxor 112(%rsp),%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
vmovdqu 112-32(%r9),%xmm0
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpxor %xmm6,%xmm5,%xmm5
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
vpxor %xmm4,%xmm7,%xmm4
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm1
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
vpxor %xmm14,%xmm1,%xmm1
vpxor %xmm5,%xmm6,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
vmovdqu 32-32(%r9),%xmm15
vpxor %xmm2,%xmm8,%xmm7
vpxor %xmm4,%xmm9,%xmm6
vmovdqu 16-32(%r9),%xmm0
vpxor %xmm5,%xmm7,%xmm9
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
vpxor %xmm9,%xmm6,%xmm6
vpunpckhqdq %xmm13,%xmm13,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
vpxor %xmm13,%xmm2,%xmm2
vpslldq $8,%xmm6,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vpxor %xmm9,%xmm5,%xmm8
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm6,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm12,%xmm12,%xmm9
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
vpxor %xmm12,%xmm9,%xmm9
vpxor %xmm14,%xmm13,%xmm13
vpalignr $8,%xmm8,%xmm8,%xmm14
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm11,%xmm11,%xmm1
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
vpxor %xmm11,%xmm1,%xmm1
vpxor %xmm13,%xmm12,%xmm12
vxorps 16(%rsp),%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm9,%xmm9
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm10,%xmm10,%xmm2
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
vpxor %xmm10,%xmm2,%xmm2
vpalignr $8,%xmm8,%xmm8,%xmm14
vpxor %xmm12,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm9,%xmm1,%xmm1
vxorps %xmm7,%xmm14,%xmm14
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
vmovdqu 112-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm11,%xmm10,%xmm10
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
vpxor %xmm4,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
vpxor %xmm10,%xmm7,%xmm7
vpxor %xmm2,%xmm6,%xmm6
vpxor %xmm5,%xmm7,%xmm4
vpxor %xmm4,%xmm6,%xmm6
vpslldq $8,%xmm6,%xmm1
vmovdqu 16(%r11),%xmm3
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm1,%xmm5,%xmm8
vpxor %xmm6,%xmm7,%xmm7
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm2,%xmm8,%xmm8
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm7,%xmm2,%xmm2
vpxor %xmm2,%xmm8,%xmm8
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
.Lgcm_enc_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.Lpoly:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
.Lone_msb:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
.Ltwo_lsb:
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.Lone_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64

523
deps/openssl/asm/x64-elf-gas/modes/ghash-x86_64.s

@ -661,10 +661,10 @@ gcm_ghash_4bit:
gcm_init_clmul:
.L_init_clmul:
movdqu (%rsi),%xmm2
pshufd $0b01001110,%xmm2,%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $0b11111111,%xmm2,%xmm4
pshufd $255,%xmm2,%xmm4
movdqa %xmm2,%xmm3
psllq $1,%xmm2
pxor %xmm5,%xmm5
@ -678,11 +678,11 @@ gcm_init_clmul:
pxor %xmm5,%xmm2
pshufd $0b01001110,%xmm2,%xmm6
pshufd $78,%xmm2,%xmm6
movdqa %xmm2,%xmm0
pxor %xmm2,%xmm6
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -718,8 +718,8 @@ gcm_init_clmul:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $0b01001110,%xmm2,%xmm3
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm2,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm2,%xmm3
movdqu %xmm2,0(%rdi)
pxor %xmm0,%xmm4
@ -727,7 +727,7 @@ gcm_init_clmul:
.byte 102,15,58,15,227,8
movdqu %xmm4,32(%rdi)
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -765,7 +765,7 @@ gcm_init_clmul:
pxor %xmm1,%xmm0
movdqa %xmm0,%xmm5
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -801,8 +801,8 @@ gcm_init_clmul:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $0b01001110,%xmm5,%xmm3
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm5,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm5,%xmm3
movdqu %xmm5,48(%rdi)
pxor %xmm0,%xmm4
@ -822,7 +822,7 @@ gcm_gmult_clmul:
movdqu 32(%rsi),%xmm4
.byte 102,15,56,0,197
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -899,14 +899,14 @@ gcm_ghash_clmul:
.byte 102,65,15,56,0,218
.byte 102,69,15,56,0,218
movdqa %xmm3,%xmm5
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,68,218,0
.byte 102,15,58,68,234,17
.byte 102,15,58,68,231,0
movdqa %xmm11,%xmm13
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm11,%xmm12
.byte 102,68,15,58,68,222,0
.byte 102,68,15,58,68,238,17
@ -921,12 +921,12 @@ gcm_ghash_clmul:
.byte 102,69,15,56,0,218
.byte 102,69,15,56,0,194
movdqa %xmm11,%xmm13
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm8,%xmm0
pxor %xmm11,%xmm12
.byte 102,69,15,58,68,222,0
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm8
pshufd $78,%xmm0,%xmm8
pxor %xmm0,%xmm8
.byte 102,69,15,58,68,238,17
.byte 102,68,15,58,68,231,0
@ -949,14 +949,14 @@ gcm_ghash_clmul:
movdqu 32(%rdx),%xmm3
movdqa %xmm11,%xmm13
.byte 102,68,15,58,68,199,16
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
xorps %xmm5,%xmm1
pxor %xmm11,%xmm12
.byte 102,65,15,56,0,218
movups 32(%rsi),%xmm7
xorps %xmm4,%xmm8
.byte 102,68,15,58,68,218,0
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm0,%xmm8
movdqa %xmm3,%xmm5
@ -1000,7 +1000,7 @@ gcm_ghash_clmul:
movdqa %xmm11,%xmm13
pxor %xmm12,%xmm4
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm9,%xmm0
pxor %xmm8,%xmm1
pxor %xmm11,%xmm12
@ -1010,7 +1010,7 @@ gcm_ghash_clmul:
movdqa %xmm0,%xmm1
.byte 102,69,15,58,68,238,17
xorps %xmm11,%xmm3
pshufd $0b01001110,%xmm0,%xmm8
pshufd $78,%xmm0,%xmm8
pxor %xmm0,%xmm8
.byte 102,68,15,58,68,231,0
@ -1079,7 +1079,7 @@ gcm_ghash_clmul:
pxor %xmm8,%xmm0
movdqa %xmm3,%xmm5
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,68,218,0
.byte 102,15,58,68,234,17
@ -1096,7 +1096,7 @@ gcm_ghash_clmul:
.Lmod_loop:
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm8
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm0,%xmm4
pxor %xmm0,%xmm4
.byte 102,15,58,68,198,0
@ -1134,7 +1134,7 @@ gcm_ghash_clmul:
pslldq $8,%xmm0
psrldq $8,%xmm8
pxor %xmm9,%xmm0
pshufd $0b01001110,%xmm5,%xmm4
pshufd $78,%xmm5,%xmm4
pxor %xmm8,%xmm1
pxor %xmm5,%xmm4
@ -1156,7 +1156,7 @@ gcm_ghash_clmul:
.Leven_tail:
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm8
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm0,%xmm4
pxor %xmm0,%xmm4
.byte 102,15,58,68,198,0
@ -1204,7 +1204,7 @@ gcm_ghash_clmul:
.byte 102,69,15,56,0,194
pxor %xmm8,%xmm0
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -1249,7 +1249,108 @@ gcm_ghash_clmul:
.type gcm_init_avx,@function
.align 32
gcm_init_avx:
jmp .L_init_clmul
vzeroupper
vmovdqu (%rsi),%xmm2
vpshufd $78,%xmm2,%xmm2
vpshufd $255,%xmm2,%xmm4
vpsrlq $63,%xmm2,%xmm3
vpsllq $1,%xmm2,%xmm2
vpxor %xmm5,%xmm5,%xmm5
vpcmpgtd %xmm4,%xmm5,%xmm5
vpslldq $8,%xmm3,%xmm3
vpor %xmm3,%xmm2,%xmm2
vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm2,%xmm2,%xmm6
vmovdqa %xmm2,%xmm0
vpxor %xmm2,%xmm6,%xmm6
movq $4,%r10
jmp .Linit_start_avx
.align 32
.Linit_loop_avx:
vpalignr $8,%xmm3,%xmm4,%xmm5
vmovdqu %xmm5,-16(%rdi)
vpunpckhqdq %xmm0,%xmm0,%xmm3
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
vpxor %xmm0,%xmm1,%xmm4
vpxor %xmm4,%xmm3,%xmm3
vpslldq $8,%xmm3,%xmm4
vpsrldq $8,%xmm3,%xmm3
vpxor %xmm4,%xmm0,%xmm0
vpxor %xmm3,%xmm1,%xmm1
vpsllq $57,%xmm0,%xmm3
vpsllq $62,%xmm0,%xmm4
vpxor %xmm3,%xmm4,%xmm4
vpsllq $63,%xmm0,%xmm3
vpxor %xmm3,%xmm4,%xmm4
vpslldq $8,%xmm4,%xmm3
vpsrldq $8,%xmm4,%xmm4
vpxor %xmm3,%xmm0,%xmm0
vpxor %xmm4,%xmm1,%xmm1
vpsrlq $1,%xmm0,%xmm4
vpxor %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $5,%xmm4,%xmm4
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $1,%xmm0,%xmm0
vpxor %xmm1,%xmm0,%xmm0
.Linit_start_avx:
vmovdqa %xmm0,%xmm5
vpunpckhqdq %xmm0,%xmm0,%xmm3
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
vpxor %xmm0,%xmm1,%xmm4
vpxor %xmm4,%xmm3,%xmm3
vpslldq $8,%xmm3,%xmm4
vpsrldq $8,%xmm3,%xmm3
vpxor %xmm4,%xmm0,%xmm0
vpxor %xmm3,%xmm1,%xmm1
vpsllq $57,%xmm0,%xmm3
vpsllq $62,%xmm0,%xmm4
vpxor %xmm3,%xmm4,%xmm4
vpsllq $63,%xmm0,%xmm3
vpxor %xmm3,%xmm4,%xmm4
vpslldq $8,%xmm4,%xmm3
vpsrldq $8,%xmm4,%xmm4
vpxor %xmm3,%xmm0,%xmm0
vpxor %xmm4,%xmm1,%xmm1
vpsrlq $1,%xmm0,%xmm4
vpxor %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $5,%xmm4,%xmm4
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $1,%xmm0,%xmm0
vpxor %xmm1,%xmm0,%xmm0
vpshufd $78,%xmm5,%xmm3
vpshufd $78,%xmm0,%xmm4
vpxor %xmm5,%xmm3,%xmm3
vmovdqu %xmm5,0(%rdi)
vpxor %xmm0,%xmm4,%xmm4
vmovdqu %xmm0,16(%rdi)
leaq 48(%rdi),%rdi
subq $1,%r10
jnz .Linit_loop_avx
vpalignr $8,%xmm4,%xmm3,%xmm5
vmovdqu %xmm5,-16(%rdi)
vzeroupper
.byte 0xf3,0xc3
.size gcm_init_avx,.-gcm_init_avx
.globl gcm_gmult_avx
.type gcm_gmult_avx,@function
@ -1261,7 +1362,377 @@ gcm_gmult_avx:
.type gcm_ghash_avx,@function
.align 32
gcm_ghash_avx:
jmp .L_ghash_clmul
vzeroupper
vmovdqu (%rdi),%xmm10
leaq .L0x1c2_polynomial(%rip),%r10
leaq 64(%rsi),%rsi
vmovdqu .Lbswap_mask(%rip),%xmm13
vpshufb %xmm13,%xmm10,%xmm10
cmpq $0x80,%rcx
jb .Lshort_avx
subq $0x80,%rcx
vmovdqu 112(%rdx),%xmm14
vmovdqu 0-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm14
vmovdqu 32-64(%rsi),%xmm7
vpunpckhqdq %xmm14,%xmm14,%xmm9
vmovdqu 96(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm14,%xmm9,%xmm9
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 16-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 80(%rdx),%xmm14
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 48-64(%rsi),%xmm6
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 64(%rdx),%xmm15
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 80-64(%rsi),%xmm7
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vmovdqu 48(%rdx),%xmm14
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm4,%xmm1,%xmm1
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 96-64(%rsi),%xmm6
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 128-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 32(%rdx),%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vmovdqu 16(%rdx),%xmm14
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm4,%xmm1,%xmm1
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 144-64(%rsi),%xmm6
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 176-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu (%rdx),%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 160-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
leaq 128(%rdx),%rdx
cmpq $0x80,%rcx
jb .Ltail_avx
vpxor %xmm10,%xmm15,%xmm15
subq $0x80,%rcx
jmp .Loop8x_avx
.align 32
.Loop8x_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 112(%rdx),%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpxor %xmm15,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11
vmovdqu 0-64(%rsi),%xmm6
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12
vmovdqu 32-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 96(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm3,%xmm10,%xmm10
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vxorps %xmm4,%xmm11,%xmm11
vmovdqu 16-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm5,%xmm12,%xmm12
vxorps %xmm15,%xmm8,%xmm8
vmovdqu 80(%rdx),%xmm14
vpxor %xmm10,%xmm12,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm11,%xmm12,%xmm12
vpslldq $8,%xmm12,%xmm9
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vpsrldq $8,%xmm12,%xmm12
vpxor %xmm9,%xmm10,%xmm10
vmovdqu 48-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm14
vxorps %xmm12,%xmm11,%xmm11
vpxor %xmm1,%xmm4,%xmm4
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 80-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 64(%rdx),%xmm15
vpalignr $8,%xmm10,%xmm10,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm4,%xmm1,%xmm1
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vxorps %xmm15,%xmm8,%xmm8
vpxor %xmm5,%xmm2,%xmm2
vmovdqu 48(%rdx),%xmm14
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 96-64(%rsi),%xmm6
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 128-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 32(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm4,%xmm1,%xmm1
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vpxor %xmm5,%xmm2,%xmm2
vxorps %xmm12,%xmm10,%xmm10
vmovdqu 16(%rdx),%xmm14
vpalignr $8,%xmm10,%xmm10,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 144-64(%rsi),%xmm6
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
vxorps %xmm11,%xmm12,%xmm12
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 176-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu (%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 160-64(%rsi),%xmm6
vpxor %xmm12,%xmm15,%xmm15
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
vpxor %xmm10,%xmm15,%xmm15
leaq 128(%rdx),%rdx
subq $0x80,%rcx
jnc .Loop8x_avx
addq $0x80,%rcx
jmp .Ltail_no_xor_avx
.align 32
.Lshort_avx:
vmovdqu -16(%rdx,%rcx,1),%xmm14
leaq (%rdx,%rcx,1),%rdx
vmovdqu 0-64(%rsi),%xmm6
vmovdqu 32-64(%rsi),%xmm7
vpshufb %xmm13,%xmm14,%xmm15
vmovdqa %xmm0,%xmm3
vmovdqa %xmm1,%xmm4
vmovdqa %xmm2,%xmm5
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -32(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 16-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -48(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 48-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu 80-64(%rsi),%xmm7
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -64(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -80(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 96-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu 128-64(%rsi),%xmm7
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -96(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -112(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 144-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovq 184-64(%rsi),%xmm7
subq $0x10,%rcx
jmp .Ltail_avx
.align 32
.Ltail_avx:
vpxor %xmm10,%xmm15,%xmm15
.Ltail_no_xor_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu (%r10),%xmm12
vpxor %xmm0,%xmm3,%xmm10
vpxor %xmm1,%xmm4,%xmm11
vpxor %xmm2,%xmm5,%xmm5
vpxor %xmm10,%xmm5,%xmm5
vpxor %xmm11,%xmm5,%xmm5
vpslldq $8,%xmm5,%xmm9
vpsrldq $8,%xmm5,%xmm5
vpxor %xmm9,%xmm10,%xmm10
vpxor %xmm5,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
vpalignr $8,%xmm10,%xmm10,%xmm10
vpxor %xmm9,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
vpalignr $8,%xmm10,%xmm10,%xmm10
vpxor %xmm11,%xmm10,%xmm10
vpxor %xmm9,%xmm10,%xmm10
cmpq $0,%rcx
jne .Lshort_avx
vpshufb %xmm13,%xmm10,%xmm10
vmovdqu %xmm10,(%rdi)
vzeroupper
.byte 0xf3,0xc3
.size gcm_ghash_avx,.-gcm_ghash_avx
.align 64
.Lbswap_mask:

4299
deps/openssl/asm/x64-elf-gas/sha/sha1-mb-x86_64.s

File diff suppressed because it is too large

2813
deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s

File diff suppressed because it is too large

4660
deps/openssl/asm/x64-elf-gas/sha/sha256-mb-x86_64.s

File diff suppressed because it is too large

2309
deps/openssl/asm/x64-elf-gas/sha/sha256-x86_64.s

File diff suppressed because it is too large

3582
deps/openssl/asm/x64-elf-gas/sha/sha512-x86_64.s

File diff suppressed because it is too large

48
deps/openssl/asm/x64-elf-gas/x86_64cpuid.s

@ -44,43 +44,43 @@ OPENSSL_ia32_cpuid:
movl %eax,%r11d
xorl %eax,%eax
cmpl $1970169159,%ebx
cmpl $0x756e6547,%ebx
setne %al
movl %eax,%r9d
cmpl $1231384169,%edx
cmpl $0x49656e69,%edx
setne %al
orl %eax,%r9d
cmpl $1818588270,%ecx
cmpl $0x6c65746e,%ecx
setne %al
orl %eax,%r9d
jz .Lintel
cmpl $1752462657,%ebx
cmpl $0x68747541,%ebx
setne %al
movl %eax,%r10d
cmpl $1769238117,%edx
cmpl $0x69746E65,%edx
setne %al
orl %eax,%r10d
cmpl $1145913699,%ecx
cmpl $0x444D4163,%ecx
setne %al
orl %eax,%r10d
jnz .Lintel
movl $2147483648,%eax
movl $0x80000000,%eax
cpuid
cmpl $2147483649,%eax
cmpl $0x80000001,%eax
jb .Lintel
movl %eax,%r10d
movl $2147483649,%eax
movl $0x80000001,%eax
cpuid
orl %ecx,%r9d
andl $2049,%r9d
andl $0x00000801,%r9d
cmpl $2147483656,%r10d
cmpl $0x80000008,%r10d
jb .Lintel
movl $2147483656,%eax
movl $0x80000008,%eax
cpuid
movzbq %cl,%r10
incq %r10
@ -92,7 +92,7 @@ OPENSSL_ia32_cpuid:
shrl $16,%ebx
cmpb %r10b,%bl
ja .Lgeneric
andl $4026531839,%edx
andl $0xefffffff,%edx
jmp .Lgeneric
.Lintel:
@ -105,7 +105,7 @@ OPENSSL_ia32_cpuid:
cpuid
movl %eax,%r10d
shrl $14,%r10d
andl $4095,%r10d
andl $0xfff,%r10d
cmpl $7,%r11d
jb .Lnocacheinfo
@ -118,29 +118,29 @@ OPENSSL_ia32_cpuid:
.Lnocacheinfo:
movl $1,%eax
cpuid
andl $3220176895,%edx
andl $0xbfefffff,%edx
cmpl $0,%r9d
jne .Lnotintel
orl $1073741824,%edx
orl $0x40000000,%edx
andb $15,%ah
cmpb $15,%ah
jne .Lnotintel
orl $1048576,%edx
orl $0x00100000,%edx
.Lnotintel:
btl $28,%edx
jnc .Lgeneric
andl $4026531839,%edx
andl $0xefffffff,%edx
cmpl $0,%r10d
je .Lgeneric
orl $268435456,%edx
orl $0x10000000,%edx
shrl $16,%ebx
cmpb $1,%bl
ja .Lgeneric
andl $4026531839,%edx
andl $0xefffffff,%edx
.Lgeneric:
andl $2048,%r9d
andl $4294965247,%ecx
andl $0x00000800,%r9d
andl $0xfffff7ff,%ecx
orl %ecx,%r9d
movl %edx,%r10d
@ -152,9 +152,9 @@ OPENSSL_ia32_cpuid:
cmpl $6,%eax
je .Ldone
.Lclear_avx:
movl $4026525695,%eax
movl $0xefffe7ff,%eax
andl %eax,%r9d
andl $4294967263,8(%rdi)
andl $0xffffffdf,8(%rdi)
.Ldone:
shlq $32,%r9
movl %r10d,%eax

70
deps/openssl/asm/x64-macosx-gas/aes/aes-x86_64.s

@ -81,8 +81,8 @@ L$enc_loop:
movl 0(%r14,%rdi,8),%edi
movl 0(%r14,%rbp,8),%ebp
andl $65280,%edi
andl $65280,%ebp
andl $0x0000ff00,%edi
andl $0x0000ff00,%ebp
xorl %edi,%r10d
xorl %ebp,%r11d
@ -94,8 +94,8 @@ L$enc_loop:
movl 0(%r14,%rsi,8),%esi
movl 0(%r14,%rdi,8),%edi
andl $65280,%esi
andl $65280,%edi
andl $0x0000ff00,%esi
andl $0x0000ff00,%edi
shrl $16,%ebx
xorl %esi,%r12d
xorl %edi,%r8d
@ -108,9 +108,9 @@ L$enc_loop:
movl 0(%r14,%rdi,8),%edi
movl 0(%r14,%rbp,8),%ebp
andl $16711680,%esi
andl $16711680,%edi
andl $16711680,%ebp
andl $0x00ff0000,%esi
andl $0x00ff0000,%edi
andl $0x00ff0000,%ebp
xorl %esi,%r10d
xorl %edi,%r11d
@ -123,9 +123,9 @@ L$enc_loop:
movl 2(%r14,%rdi,8),%edi
movl 2(%r14,%rbp,8),%ebp
andl $16711680,%esi
andl $4278190080,%edi
andl $4278190080,%ebp
andl $0x00ff0000,%esi
andl $0xff000000,%edi
andl $0xff000000,%ebp
xorl %esi,%r8d
xorl %edi,%r10d
@ -138,8 +138,8 @@ L$enc_loop:
movl 2(%r14,%rdi,8),%edi
movl 16+0(%r15),%eax
andl $4278190080,%esi
andl $4278190080,%edi
andl $0xff000000,%esi
andl $0xff000000,%edi
xorl %esi,%r12d
xorl %edi,%r8d
@ -241,8 +241,8 @@ L$enc_loop_compact:
xorl %r8d,%edx
cmpq 16(%rsp),%r15
je L$enc_compact_done
movl $2155905152,%r10d
movl $2155905152,%r11d
movl $0x80808080,%r10d
movl $0x80808080,%r11d
andl %eax,%r10d
andl %ebx,%r11d
movl %r10d,%esi
@ -253,10 +253,10 @@ L$enc_loop_compact:
leal (%rbx,%rbx,1),%r9d
subl %r10d,%esi
subl %r11d,%edi
andl $4278124286,%r8d
andl $4278124286,%r9d
andl $454761243,%esi
andl $454761243,%edi
andl $0xfefefefe,%r8d
andl $0xfefefefe,%r9d
andl $0x1b1b1b1b,%esi
andl $0x1b1b1b1b,%edi
movl %eax,%r10d
movl %ebx,%r11d
xorl %esi,%r8d
@ -264,9 +264,9 @@ L$enc_loop_compact:
xorl %r8d,%eax
xorl %r9d,%ebx
movl $2155905152,%r12d
movl $0x80808080,%r12d
roll $24,%eax
movl $2155905152,%ebp
movl $0x80808080,%ebp
roll $24,%ebx
andl %ecx,%r12d
andl %edx,%ebp
@ -289,10 +289,10 @@ L$enc_loop_compact:
xorl %r10d,%eax
xorl %r11d,%ebx
andl $4278124286,%r8d
andl $4278124286,%r9d
andl $454761243,%esi
andl $454761243,%edi
andl $0xfefefefe,%r8d
andl $0xfefefefe,%r9d
andl $0x1b1b1b1b,%esi
andl $0x1b1b1b1b,%edi
movl %ecx,%r12d
movl %edx,%ebp
xorl %esi,%r8d
@ -345,7 +345,7 @@ _AES_encrypt:
andq $-64,%rsp
subq %rsp,%rcx
negq %rcx
andq $960,%rcx
andq $0x3c0,%rcx
subq %rcx,%rsp
subq $32,%rsp
@ -370,7 +370,7 @@ L$enc_prologue:
leaq L$AES_Te+2048(%rip),%r14
leaq 768(%rsp),%rbp
subq %r14,%rbp
andq $768,%rbp
andq $0x300,%rbp
leaq (%r14,%rbp,1),%r14
call _x86_64_AES_encrypt_compact
@ -792,7 +792,7 @@ _AES_decrypt:
andq $-64,%rsp
subq %rsp,%rcx
negq %rcx
andq $960,%rcx
andq $0x3c0,%rcx
subq %rcx,%rsp
subq $32,%rsp
@ -817,7 +817,7 @@ L$dec_prologue:
leaq L$AES_Td+2048(%rip),%r14
leaq 768(%rsp),%rbp
subq %r14,%rbp
andq $768,%rbp
andq $0x300,%rbp
leaq (%r14,%rbp,1),%r14
shrq $3,%rbp
addq %rbp,%r14
@ -1333,9 +1333,9 @@ L$cbc_picked_te:
movq %r14,%r10
leaq 2304(%r14),%r11
movq %r15,%r12
andq $4095,%r10
andq $4095,%r11
andq $4095,%r12
andq $0xFFF,%r10
andq $0xFFF,%r11
andq $0xFFF,%r12
cmpq %r11,%r12
jb L$cbc_te_break_out
@ -1344,7 +1344,7 @@ L$cbc_picked_te:
jmp L$cbc_te_ok
L$cbc_te_break_out:
subq %r10,%r12
andq $4095,%r12
andq $0xFFF,%r12
addq $320,%r12
subq %r12,%r15
.p2align 2
@ -1370,7 +1370,7 @@ L$cbc_fast_body:
movq %r15,%r10
subq %r14,%r10
andq $4095,%r10
andq $0xfff,%r10
cmpq $2304,%r10
jb L$cbc_do_ecopy
cmpq $4096-248,%r10
@ -1557,7 +1557,7 @@ L$cbc_slow_prologue:
leaq -88-63(%rcx),%r10
subq %rbp,%r10
negq %r10
andq $960,%r10
andq $0x3c0,%r10
subq %r10,%rbp
xchgq %rsp,%rbp
@ -1586,7 +1586,7 @@ L$cbc_slow_body:
leaq 2048(%r14),%r14
leaq 768-8(%rsp),%rax
subq %r14,%rax
andq $768,%rax
andq $0x300,%rax
leaq (%r14,%rax,1),%r14
cmpq $0,%rbx

929
deps/openssl/asm/x64-macosx-gas/aes/aesni-mb-x86_64.s

@ -6,6 +6,14 @@
.p2align 5
_aesni_multi_cbc_encrypt:
cmpl $2,%edx
jb L$enc_non_avx
movl _OPENSSL_ia32cap_P+4(%rip),%ecx
testl $268435456,%ecx
jnz _avx_cbc_enc_shortcut
jmp L$enc_non_avx
.p2align 4
L$enc_non_avx:
movq %rsp,%rax
pushq %rbx
pushq %rbp
@ -262,6 +270,14 @@ L$enc4x_epilogue:
.p2align 5
_aesni_multi_cbc_decrypt:
cmpl $2,%edx
jb L$dec_non_avx
movl _OPENSSL_ia32cap_P+4(%rip),%ecx
testl $268435456,%ecx
jnz _avx_cbc_dec_shortcut
jmp L$dec_non_avx
.p2align 4
L$dec_non_avx:
movq %rsp,%rax
pushq %rbx
pushq %rbp
@ -503,3 +519,916 @@ L$dec4x_done:
leaq (%rax),%rsp
L$dec4x_epilogue:
.byte 0xf3,0xc3
.p2align 5
aesni_multi_cbc_encrypt_avx:
_avx_cbc_enc_shortcut:
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
subq $192,%rsp
andq $-128,%rsp
movq %rax,16(%rsp)
L$enc8x_body:
vzeroupper
vmovdqu (%rsi),%xmm15
leaq 120(%rsi),%rsi
leaq 160(%rdi),%rdi
shrl $1,%edx
L$enc8x_loop_grande:
xorl %edx,%edx
movl -144(%rdi),%ecx
movq -160(%rdi),%r8
cmpl %edx,%ecx
movq -152(%rdi),%rbx
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -136(%rdi),%xmm2
movl %ecx,32(%rsp)
cmovleq %rsp,%r8
subq %r8,%rbx
movq %rbx,64(%rsp)
movl -104(%rdi),%ecx
movq -120(%rdi),%r9
cmpl %edx,%ecx
movq -112(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -96(%rdi),%xmm3
movl %ecx,36(%rsp)
cmovleq %rsp,%r9
subq %r9,%rbp
movq %rbp,72(%rsp)
movl -64(%rdi),%ecx
movq -80(%rdi),%r10
cmpl %edx,%ecx
movq -72(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -56(%rdi),%xmm4
movl %ecx,40(%rsp)
cmovleq %rsp,%r10
subq %r10,%rbp
movq %rbp,80(%rsp)
movl -24(%rdi),%ecx
movq -40(%rdi),%r11
cmpl %edx,%ecx
movq -32(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -16(%rdi),%xmm5
movl %ecx,44(%rsp)
cmovleq %rsp,%r11
subq %r11,%rbp
movq %rbp,88(%rsp)
movl 16(%rdi),%ecx
movq 0(%rdi),%r12
cmpl %edx,%ecx
movq 8(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 24(%rdi),%xmm6
movl %ecx,48(%rsp)
cmovleq %rsp,%r12
subq %r12,%rbp
movq %rbp,96(%rsp)
movl 56(%rdi),%ecx
movq 40(%rdi),%r13
cmpl %edx,%ecx
movq 48(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 64(%rdi),%xmm7
movl %ecx,52(%rsp)
cmovleq %rsp,%r13
subq %r13,%rbp
movq %rbp,104(%rsp)
movl 96(%rdi),%ecx
movq 80(%rdi),%r14
cmpl %edx,%ecx
movq 88(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 104(%rdi),%xmm8
movl %ecx,56(%rsp)
cmovleq %rsp,%r14
subq %r14,%rbp
movq %rbp,112(%rsp)
movl 136(%rdi),%ecx
movq 120(%rdi),%r15
cmpl %edx,%ecx
movq 128(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 144(%rdi),%xmm9
movl %ecx,60(%rsp)
cmovleq %rsp,%r15
subq %r15,%rbp
movq %rbp,120(%rsp)
testl %edx,%edx
jz L$enc8x_done
vmovups 16-120(%rsi),%xmm1
vmovups 32-120(%rsi),%xmm0
movl 240-120(%rsi),%eax
vpxor (%r8),%xmm15,%xmm10
leaq 128(%rsp),%rbp
vpxor (%r9),%xmm15,%xmm11
vpxor (%r10),%xmm15,%xmm12
vpxor (%r11),%xmm15,%xmm13
vpxor %xmm10,%xmm2,%xmm2
vpxor (%r12),%xmm15,%xmm10
vpxor %xmm11,%xmm3,%xmm3
vpxor (%r13),%xmm15,%xmm11
vpxor %xmm12,%xmm4,%xmm4
vpxor (%r14),%xmm15,%xmm12
vpxor %xmm13,%xmm5,%xmm5
vpxor (%r15),%xmm15,%xmm13
vpxor %xmm10,%xmm6,%xmm6
movl $1,%ecx
vpxor %xmm11,%xmm7,%xmm7
vpxor %xmm12,%xmm8,%xmm8
vpxor %xmm13,%xmm9,%xmm9
jmp L$oop_enc8x
.p2align 5
L$oop_enc8x:
vaesenc %xmm1,%xmm2,%xmm2
cmpl 32+0(%rsp),%ecx
vaesenc %xmm1,%xmm3,%xmm3
prefetcht0 31(%r8)
vaesenc %xmm1,%xmm4,%xmm4
vaesenc %xmm1,%xmm5,%xmm5
leaq (%r8,%rbx,1),%rbx
cmovgeq %rsp,%r8
vaesenc %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm1,%xmm7,%xmm7
subq %r8,%rbx
vaesenc %xmm1,%xmm8,%xmm8
vpxor 16(%r8),%xmm15,%xmm10
movq %rbx,64+0(%rsp)
vaesenc %xmm1,%xmm9,%xmm9
vmovups -72(%rsi),%xmm1
leaq 16(%r8,%rbx,1),%r8
vmovdqu %xmm10,0(%rbp)
vaesenc %xmm0,%xmm2,%xmm2
cmpl 32+4(%rsp),%ecx
movq 64+8(%rsp),%rbx
vaesenc %xmm0,%xmm3,%xmm3
prefetcht0 31(%r9)
vaesenc %xmm0,%xmm4,%xmm4
vaesenc %xmm0,%xmm5,%xmm5
leaq (%r9,%rbx,1),%rbx
cmovgeq %rsp,%r9
vaesenc %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm0,%xmm7,%xmm7
subq %r9,%rbx
vaesenc %xmm0,%xmm8,%xmm8
vpxor 16(%r9),%xmm15,%xmm11
movq %rbx,64+8(%rsp)
vaesenc %xmm0,%xmm9,%xmm9
vmovups -56(%rsi),%xmm0
leaq 16(%r9,%rbx,1),%r9
vmovdqu %xmm11,16(%rbp)
vaesenc %xmm1,%xmm2,%xmm2
cmpl 32+8(%rsp),%ecx
movq 64+16(%rsp),%rbx
vaesenc %xmm1,%xmm3,%xmm3
prefetcht0 31(%r10)
vaesenc %xmm1,%xmm4,%xmm4
prefetcht0 15(%r8)
vaesenc %xmm1,%xmm5,%xmm5
leaq (%r10,%rbx,1),%rbx
cmovgeq %rsp,%r10
vaesenc %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm1,%xmm7,%xmm7
subq %r10,%rbx
vaesenc %xmm1,%xmm8,%xmm8
vpxor 16(%r10),%xmm15,%xmm12
movq %rbx,64+16(%rsp)
vaesenc %xmm1,%xmm9,%xmm9
vmovups -40(%rsi),%xmm1
leaq 16(%r10,%rbx,1),%r10
vmovdqu %xmm12,32(%rbp)
vaesenc %xmm0,%xmm2,%xmm2
cmpl 32+12(%rsp),%ecx
movq 64+24(%rsp),%rbx
vaesenc %xmm0,%xmm3,%xmm3
prefetcht0 31(%r11)
vaesenc %xmm0,%xmm4,%xmm4
prefetcht0 15(%r9)
vaesenc %xmm0,%xmm5,%xmm5
leaq (%r11,%rbx,1),%rbx
cmovgeq %rsp,%r11
vaesenc %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm0,%xmm7,%xmm7
subq %r11,%rbx
vaesenc %xmm0,%xmm8,%xmm8
vpxor 16(%r11),%xmm15,%xmm13
movq %rbx,64+24(%rsp)
vaesenc %xmm0,%xmm9,%xmm9
vmovups -24(%rsi),%xmm0
leaq 16(%r11,%rbx,1),%r11
vmovdqu %xmm13,48(%rbp)
vaesenc %xmm1,%xmm2,%xmm2
cmpl 32+16(%rsp),%ecx
movq 64+32(%rsp),%rbx
vaesenc %xmm1,%xmm3,%xmm3
prefetcht0 31(%r12)
vaesenc %xmm1,%xmm4,%xmm4
prefetcht0 15(%r10)
vaesenc %xmm1,%xmm5,%xmm5
leaq (%r12,%rbx,1),%rbx
cmovgeq %rsp,%r12
vaesenc %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm1,%xmm7,%xmm7
subq %r12,%rbx
vaesenc %xmm1,%xmm8,%xmm8
vpxor 16(%r12),%xmm15,%xmm10
movq %rbx,64+32(%rsp)
vaesenc %xmm1,%xmm9,%xmm9
vmovups -8(%rsi),%xmm1
leaq 16(%r12,%rbx,1),%r12
vaesenc %xmm0,%xmm2,%xmm2
cmpl 32+20(%rsp),%ecx
movq 64+40(%rsp),%rbx
vaesenc %xmm0,%xmm3,%xmm3
prefetcht0 31(%r13)
vaesenc %xmm0,%xmm4,%xmm4
prefetcht0 15(%r11)
vaesenc %xmm0,%xmm5,%xmm5
leaq (%rbx,%r13,1),%rbx
cmovgeq %rsp,%r13
vaesenc %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm0,%xmm7,%xmm7
subq %r13,%rbx
vaesenc %xmm0,%xmm8,%xmm8
vpxor 16(%r13),%xmm15,%xmm11
movq %rbx,64+40(%rsp)
vaesenc %xmm0,%xmm9,%xmm9
vmovups 8(%rsi),%xmm0
leaq 16(%r13,%rbx,1),%r13
vaesenc %xmm1,%xmm2,%xmm2
cmpl 32+24(%rsp),%ecx
movq 64+48(%rsp),%rbx
vaesenc %xmm1,%xmm3,%xmm3
prefetcht0 31(%r14)
vaesenc %xmm1,%xmm4,%xmm4
prefetcht0 15(%r12)
vaesenc %xmm1,%xmm5,%xmm5
leaq (%r14,%rbx,1),%rbx
cmovgeq %rsp,%r14
vaesenc %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm1,%xmm7,%xmm7
subq %r14,%rbx
vaesenc %xmm1,%xmm8,%xmm8
vpxor 16(%r14),%xmm15,%xmm12
movq %rbx,64+48(%rsp)
vaesenc %xmm1,%xmm9,%xmm9
vmovups 24(%rsi),%xmm1
leaq 16(%r14,%rbx,1),%r14
vaesenc %xmm0,%xmm2,%xmm2
cmpl 32+28(%rsp),%ecx
movq 64+56(%rsp),%rbx
vaesenc %xmm0,%xmm3,%xmm3
prefetcht0 31(%r15)
vaesenc %xmm0,%xmm4,%xmm4
prefetcht0 15(%r13)
vaesenc %xmm0,%xmm5,%xmm5
leaq (%r15,%rbx,1),%rbx
cmovgeq %rsp,%r15
vaesenc %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesenc %xmm0,%xmm7,%xmm7
subq %r15,%rbx
vaesenc %xmm0,%xmm8,%xmm8
vpxor 16(%r15),%xmm15,%xmm13
movq %rbx,64+56(%rsp)
vaesenc %xmm0,%xmm9,%xmm9
vmovups 40(%rsi),%xmm0
leaq 16(%r15,%rbx,1),%r15
vmovdqu 32(%rsp),%xmm14
prefetcht0 15(%r14)
prefetcht0 15(%r15)
cmpl $11,%eax
jb L$enc8x_tail
vaesenc %xmm1,%xmm2,%xmm2
vaesenc %xmm1,%xmm3,%xmm3
vaesenc %xmm1,%xmm4,%xmm4
vaesenc %xmm1,%xmm5,%xmm5
vaesenc %xmm1,%xmm6,%xmm6
vaesenc %xmm1,%xmm7,%xmm7
vaesenc %xmm1,%xmm8,%xmm8
vaesenc %xmm1,%xmm9,%xmm9
vmovups 176-120(%rsi),%xmm1
vaesenc %xmm0,%xmm2,%xmm2
vaesenc %xmm0,%xmm3,%xmm3
vaesenc %xmm0,%xmm4,%xmm4
vaesenc %xmm0,%xmm5,%xmm5
vaesenc %xmm0,%xmm6,%xmm6
vaesenc %xmm0,%xmm7,%xmm7
vaesenc %xmm0,%xmm8,%xmm8
vaesenc %xmm0,%xmm9,%xmm9
vmovups 192-120(%rsi),%xmm0
je L$enc8x_tail
vaesenc %xmm1,%xmm2,%xmm2
vaesenc %xmm1,%xmm3,%xmm3
vaesenc %xmm1,%xmm4,%xmm4
vaesenc %xmm1,%xmm5,%xmm5
vaesenc %xmm1,%xmm6,%xmm6
vaesenc %xmm1,%xmm7,%xmm7
vaesenc %xmm1,%xmm8,%xmm8
vaesenc %xmm1,%xmm9,%xmm9
vmovups 208-120(%rsi),%xmm1
vaesenc %xmm0,%xmm2,%xmm2
vaesenc %xmm0,%xmm3,%xmm3
vaesenc %xmm0,%xmm4,%xmm4
vaesenc %xmm0,%xmm5,%xmm5
vaesenc %xmm0,%xmm6,%xmm6
vaesenc %xmm0,%xmm7,%xmm7
vaesenc %xmm0,%xmm8,%xmm8
vaesenc %xmm0,%xmm9,%xmm9
vmovups 224-120(%rsi),%xmm0
L$enc8x_tail:
vaesenc %xmm1,%xmm2,%xmm2
vpxor %xmm15,%xmm15,%xmm15
vaesenc %xmm1,%xmm3,%xmm3
vaesenc %xmm1,%xmm4,%xmm4
vpcmpgtd %xmm15,%xmm14,%xmm15
vaesenc %xmm1,%xmm5,%xmm5
vaesenc %xmm1,%xmm6,%xmm6
vpaddd %xmm14,%xmm15,%xmm15
vmovdqu 48(%rsp),%xmm14
vaesenc %xmm1,%xmm7,%xmm7
movq 64(%rsp),%rbx
vaesenc %xmm1,%xmm8,%xmm8
vaesenc %xmm1,%xmm9,%xmm9
vmovups 16-120(%rsi),%xmm1
vaesenclast %xmm0,%xmm2,%xmm2
vmovdqa %xmm15,32(%rsp)
vpxor %xmm15,%xmm15,%xmm15
vaesenclast %xmm0,%xmm3,%xmm3
vaesenclast %xmm0,%xmm4,%xmm4
vpcmpgtd %xmm15,%xmm14,%xmm15
vaesenclast %xmm0,%xmm5,%xmm5
vaesenclast %xmm0,%xmm6,%xmm6
vpaddd %xmm15,%xmm14,%xmm14
vmovdqu -120(%rsi),%xmm15
vaesenclast %xmm0,%xmm7,%xmm7
vaesenclast %xmm0,%xmm8,%xmm8
vmovdqa %xmm14,48(%rsp)
vaesenclast %xmm0,%xmm9,%xmm9
vmovups 32-120(%rsi),%xmm0
vmovups %xmm2,-16(%r8)
subq %rbx,%r8
vpxor 0(%rbp),%xmm2,%xmm2
vmovups %xmm3,-16(%r9)
subq 72(%rsp),%r9
vpxor 16(%rbp),%xmm3,%xmm3
vmovups %xmm4,-16(%r10)
subq 80(%rsp),%r10
vpxor 32(%rbp),%xmm4,%xmm4
vmovups %xmm5,-16(%r11)
subq 88(%rsp),%r11
vpxor 48(%rbp),%xmm5,%xmm5
vmovups %xmm6,-16(%r12)
subq 96(%rsp),%r12
vpxor %xmm10,%xmm6,%xmm6
vmovups %xmm7,-16(%r13)
subq 104(%rsp),%r13
vpxor %xmm11,%xmm7,%xmm7
vmovups %xmm8,-16(%r14)
subq 112(%rsp),%r14
vpxor %xmm12,%xmm8,%xmm8
vmovups %xmm9,-16(%r15)
subq 120(%rsp),%r15
vpxor %xmm13,%xmm9,%xmm9
decl %edx
jnz L$oop_enc8x
movq 16(%rsp),%rax
L$enc8x_done:
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
L$enc8x_epilogue:
.byte 0xf3,0xc3
.p2align 5
aesni_multi_cbc_decrypt_avx:
_avx_cbc_dec_shortcut:
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
subq $256,%rsp
andq $-256,%rsp
subq $192,%rsp
movq %rax,16(%rsp)
L$dec8x_body:
vzeroupper
vmovdqu (%rsi),%xmm15
leaq 120(%rsi),%rsi
leaq 160(%rdi),%rdi
shrl $1,%edx
L$dec8x_loop_grande:
xorl %edx,%edx
movl -144(%rdi),%ecx
movq -160(%rdi),%r8
cmpl %edx,%ecx
movq -152(%rdi),%rbx
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -136(%rdi),%xmm2
movl %ecx,32(%rsp)
cmovleq %rsp,%r8
subq %r8,%rbx
movq %rbx,64(%rsp)
vmovdqu %xmm2,192(%rsp)
movl -104(%rdi),%ecx
movq -120(%rdi),%r9
cmpl %edx,%ecx
movq -112(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -96(%rdi),%xmm3
movl %ecx,36(%rsp)
cmovleq %rsp,%r9
subq %r9,%rbp
movq %rbp,72(%rsp)
vmovdqu %xmm3,208(%rsp)
movl -64(%rdi),%ecx
movq -80(%rdi),%r10
cmpl %edx,%ecx
movq -72(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -56(%rdi),%xmm4
movl %ecx,40(%rsp)
cmovleq %rsp,%r10
subq %r10,%rbp
movq %rbp,80(%rsp)
vmovdqu %xmm4,224(%rsp)
movl -24(%rdi),%ecx
movq -40(%rdi),%r11
cmpl %edx,%ecx
movq -32(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -16(%rdi),%xmm5
movl %ecx,44(%rsp)
cmovleq %rsp,%r11
subq %r11,%rbp
movq %rbp,88(%rsp)
vmovdqu %xmm5,240(%rsp)
movl 16(%rdi),%ecx
movq 0(%rdi),%r12
cmpl %edx,%ecx
movq 8(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 24(%rdi),%xmm6
movl %ecx,48(%rsp)
cmovleq %rsp,%r12
subq %r12,%rbp
movq %rbp,96(%rsp)
vmovdqu %xmm6,256(%rsp)
movl 56(%rdi),%ecx
movq 40(%rdi),%r13
cmpl %edx,%ecx
movq 48(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 64(%rdi),%xmm7
movl %ecx,52(%rsp)
cmovleq %rsp,%r13
subq %r13,%rbp
movq %rbp,104(%rsp)
vmovdqu %xmm7,272(%rsp)
movl 96(%rdi),%ecx
movq 80(%rdi),%r14
cmpl %edx,%ecx
movq 88(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 104(%rdi),%xmm8
movl %ecx,56(%rsp)
cmovleq %rsp,%r14
subq %r14,%rbp
movq %rbp,112(%rsp)
vmovdqu %xmm8,288(%rsp)
movl 136(%rdi),%ecx
movq 120(%rdi),%r15
cmpl %edx,%ecx
movq 128(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 144(%rdi),%xmm9
movl %ecx,60(%rsp)
cmovleq %rsp,%r15
subq %r15,%rbp
movq %rbp,120(%rsp)
vmovdqu %xmm9,304(%rsp)
testl %edx,%edx
jz L$dec8x_done
vmovups 16-120(%rsi),%xmm1
vmovups 32-120(%rsi),%xmm0
movl 240-120(%rsi),%eax
leaq 192+128(%rsp),%rbp
vmovdqu (%r8),%xmm2
vmovdqu (%r9),%xmm3
vmovdqu (%r10),%xmm4
vmovdqu (%r11),%xmm5
vmovdqu (%r12),%xmm6
vmovdqu (%r13),%xmm7
vmovdqu (%r14),%xmm8
vmovdqu (%r15),%xmm9
vmovdqu %xmm2,0(%rbp)
vpxor %xmm15,%xmm2,%xmm2
vmovdqu %xmm3,16(%rbp)
vpxor %xmm15,%xmm3,%xmm3
vmovdqu %xmm4,32(%rbp)
vpxor %xmm15,%xmm4,%xmm4
vmovdqu %xmm5,48(%rbp)
vpxor %xmm15,%xmm5,%xmm5
vmovdqu %xmm6,64(%rbp)
vpxor %xmm15,%xmm6,%xmm6
vmovdqu %xmm7,80(%rbp)
vpxor %xmm15,%xmm7,%xmm7
vmovdqu %xmm8,96(%rbp)
vpxor %xmm15,%xmm8,%xmm8
vmovdqu %xmm9,112(%rbp)
vpxor %xmm15,%xmm9,%xmm9
xorq $0x80,%rbp
movl $1,%ecx
jmp L$oop_dec8x
.p2align 5
L$oop_dec8x:
vaesdec %xmm1,%xmm2,%xmm2
cmpl 32+0(%rsp),%ecx
vaesdec %xmm1,%xmm3,%xmm3
prefetcht0 31(%r8)
vaesdec %xmm1,%xmm4,%xmm4
vaesdec %xmm1,%xmm5,%xmm5
leaq (%r8,%rbx,1),%rbx
cmovgeq %rsp,%r8
vaesdec %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm1,%xmm7,%xmm7
subq %r8,%rbx
vaesdec %xmm1,%xmm8,%xmm8
vmovdqu 16(%r8),%xmm10
movq %rbx,64+0(%rsp)
vaesdec %xmm1,%xmm9,%xmm9
vmovups -72(%rsi),%xmm1
leaq 16(%r8,%rbx,1),%r8
vmovdqu %xmm10,128(%rsp)
vaesdec %xmm0,%xmm2,%xmm2
cmpl 32+4(%rsp),%ecx
movq 64+8(%rsp),%rbx
vaesdec %xmm0,%xmm3,%xmm3
prefetcht0 31(%r9)
vaesdec %xmm0,%xmm4,%xmm4
vaesdec %xmm0,%xmm5,%xmm5
leaq (%r9,%rbx,1),%rbx
cmovgeq %rsp,%r9
vaesdec %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm0,%xmm7,%xmm7
subq %r9,%rbx
vaesdec %xmm0,%xmm8,%xmm8
vmovdqu 16(%r9),%xmm11
movq %rbx,64+8(%rsp)
vaesdec %xmm0,%xmm9,%xmm9
vmovups -56(%rsi),%xmm0
leaq 16(%r9,%rbx,1),%r9
vmovdqu %xmm11,144(%rsp)
vaesdec %xmm1,%xmm2,%xmm2
cmpl 32+8(%rsp),%ecx
movq 64+16(%rsp),%rbx
vaesdec %xmm1,%xmm3,%xmm3
prefetcht0 31(%r10)
vaesdec %xmm1,%xmm4,%xmm4
prefetcht0 15(%r8)
vaesdec %xmm1,%xmm5,%xmm5
leaq (%r10,%rbx,1),%rbx
cmovgeq %rsp,%r10
vaesdec %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm1,%xmm7,%xmm7
subq %r10,%rbx
vaesdec %xmm1,%xmm8,%xmm8
vmovdqu 16(%r10),%xmm12
movq %rbx,64+16(%rsp)
vaesdec %xmm1,%xmm9,%xmm9
vmovups -40(%rsi),%xmm1
leaq 16(%r10,%rbx,1),%r10
vmovdqu %xmm12,160(%rsp)
vaesdec %xmm0,%xmm2,%xmm2
cmpl 32+12(%rsp),%ecx
movq 64+24(%rsp),%rbx
vaesdec %xmm0,%xmm3,%xmm3
prefetcht0 31(%r11)
vaesdec %xmm0,%xmm4,%xmm4
prefetcht0 15(%r9)
vaesdec %xmm0,%xmm5,%xmm5
leaq (%r11,%rbx,1),%rbx
cmovgeq %rsp,%r11
vaesdec %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm0,%xmm7,%xmm7
subq %r11,%rbx
vaesdec %xmm0,%xmm8,%xmm8
vmovdqu 16(%r11),%xmm13
movq %rbx,64+24(%rsp)
vaesdec %xmm0,%xmm9,%xmm9
vmovups -24(%rsi),%xmm0
leaq 16(%r11,%rbx,1),%r11
vmovdqu %xmm13,176(%rsp)
vaesdec %xmm1,%xmm2,%xmm2
cmpl 32+16(%rsp),%ecx
movq 64+32(%rsp),%rbx
vaesdec %xmm1,%xmm3,%xmm3
prefetcht0 31(%r12)
vaesdec %xmm1,%xmm4,%xmm4
prefetcht0 15(%r10)
vaesdec %xmm1,%xmm5,%xmm5
leaq (%r12,%rbx,1),%rbx
cmovgeq %rsp,%r12
vaesdec %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm1,%xmm7,%xmm7
subq %r12,%rbx
vaesdec %xmm1,%xmm8,%xmm8
vmovdqu 16(%r12),%xmm10
movq %rbx,64+32(%rsp)
vaesdec %xmm1,%xmm9,%xmm9
vmovups -8(%rsi),%xmm1
leaq 16(%r12,%rbx,1),%r12
vaesdec %xmm0,%xmm2,%xmm2
cmpl 32+20(%rsp),%ecx
movq 64+40(%rsp),%rbx
vaesdec %xmm0,%xmm3,%xmm3
prefetcht0 31(%r13)
vaesdec %xmm0,%xmm4,%xmm4
prefetcht0 15(%r11)
vaesdec %xmm0,%xmm5,%xmm5
leaq (%rbx,%r13,1),%rbx
cmovgeq %rsp,%r13
vaesdec %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm0,%xmm7,%xmm7
subq %r13,%rbx
vaesdec %xmm0,%xmm8,%xmm8
vmovdqu 16(%r13),%xmm11
movq %rbx,64+40(%rsp)
vaesdec %xmm0,%xmm9,%xmm9
vmovups 8(%rsi),%xmm0
leaq 16(%r13,%rbx,1),%r13
vaesdec %xmm1,%xmm2,%xmm2
cmpl 32+24(%rsp),%ecx
movq 64+48(%rsp),%rbx
vaesdec %xmm1,%xmm3,%xmm3
prefetcht0 31(%r14)
vaesdec %xmm1,%xmm4,%xmm4
prefetcht0 15(%r12)
vaesdec %xmm1,%xmm5,%xmm5
leaq (%r14,%rbx,1),%rbx
cmovgeq %rsp,%r14
vaesdec %xmm1,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm1,%xmm7,%xmm7
subq %r14,%rbx
vaesdec %xmm1,%xmm8,%xmm8
vmovdqu 16(%r14),%xmm12
movq %rbx,64+48(%rsp)
vaesdec %xmm1,%xmm9,%xmm9
vmovups 24(%rsi),%xmm1
leaq 16(%r14,%rbx,1),%r14
vaesdec %xmm0,%xmm2,%xmm2
cmpl 32+28(%rsp),%ecx
movq 64+56(%rsp),%rbx
vaesdec %xmm0,%xmm3,%xmm3
prefetcht0 31(%r15)
vaesdec %xmm0,%xmm4,%xmm4
prefetcht0 15(%r13)
vaesdec %xmm0,%xmm5,%xmm5
leaq (%r15,%rbx,1),%rbx
cmovgeq %rsp,%r15
vaesdec %xmm0,%xmm6,%xmm6
cmovgq %rsp,%rbx
vaesdec %xmm0,%xmm7,%xmm7
subq %r15,%rbx
vaesdec %xmm0,%xmm8,%xmm8
vmovdqu 16(%r15),%xmm13
movq %rbx,64+56(%rsp)
vaesdec %xmm0,%xmm9,%xmm9
vmovups 40(%rsi),%xmm0
leaq 16(%r15,%rbx,1),%r15
vmovdqu 32(%rsp),%xmm14
prefetcht0 15(%r14)
prefetcht0 15(%r15)
cmpl $11,%eax
jb L$dec8x_tail
vaesdec %xmm1,%xmm2,%xmm2
vaesdec %xmm1,%xmm3,%xmm3
vaesdec %xmm1,%xmm4,%xmm4
vaesdec %xmm1,%xmm5,%xmm5
vaesdec %xmm1,%xmm6,%xmm6
vaesdec %xmm1,%xmm7,%xmm7
vaesdec %xmm1,%xmm8,%xmm8
vaesdec %xmm1,%xmm9,%xmm9
vmovups 176-120(%rsi),%xmm1
vaesdec %xmm0,%xmm2,%xmm2
vaesdec %xmm0,%xmm3,%xmm3
vaesdec %xmm0,%xmm4,%xmm4
vaesdec %xmm0,%xmm5,%xmm5
vaesdec %xmm0,%xmm6,%xmm6
vaesdec %xmm0,%xmm7,%xmm7
vaesdec %xmm0,%xmm8,%xmm8
vaesdec %xmm0,%xmm9,%xmm9
vmovups 192-120(%rsi),%xmm0
je L$dec8x_tail
vaesdec %xmm1,%xmm2,%xmm2
vaesdec %xmm1,%xmm3,%xmm3
vaesdec %xmm1,%xmm4,%xmm4
vaesdec %xmm1,%xmm5,%xmm5
vaesdec %xmm1,%xmm6,%xmm6
vaesdec %xmm1,%xmm7,%xmm7
vaesdec %xmm1,%xmm8,%xmm8
vaesdec %xmm1,%xmm9,%xmm9
vmovups 208-120(%rsi),%xmm1
vaesdec %xmm0,%xmm2,%xmm2
vaesdec %xmm0,%xmm3,%xmm3
vaesdec %xmm0,%xmm4,%xmm4
vaesdec %xmm0,%xmm5,%xmm5
vaesdec %xmm0,%xmm6,%xmm6
vaesdec %xmm0,%xmm7,%xmm7
vaesdec %xmm0,%xmm8,%xmm8
vaesdec %xmm0,%xmm9,%xmm9
vmovups 224-120(%rsi),%xmm0
L$dec8x_tail:
vaesdec %xmm1,%xmm2,%xmm2
vpxor %xmm15,%xmm15,%xmm15
vaesdec %xmm1,%xmm3,%xmm3
vaesdec %xmm1,%xmm4,%xmm4
vpcmpgtd %xmm15,%xmm14,%xmm15
vaesdec %xmm1,%xmm5,%xmm5
vaesdec %xmm1,%xmm6,%xmm6
vpaddd %xmm14,%xmm15,%xmm15
vmovdqu 48(%rsp),%xmm14
vaesdec %xmm1,%xmm7,%xmm7
movq 64(%rsp),%rbx
vaesdec %xmm1,%xmm8,%xmm8
vaesdec %xmm1,%xmm9,%xmm9
vmovups 16-120(%rsi),%xmm1
vaesdeclast %xmm0,%xmm2,%xmm2
vmovdqa %xmm15,32(%rsp)
vpxor %xmm15,%xmm15,%xmm15
vaesdeclast %xmm0,%xmm3,%xmm3
vpxor 0(%rbp),%xmm2,%xmm2
vaesdeclast %xmm0,%xmm4,%xmm4
vpxor 16(%rbp),%xmm3,%xmm3
vpcmpgtd %xmm15,%xmm14,%xmm15
vaesdeclast %xmm0,%xmm5,%xmm5
vpxor 32(%rbp),%xmm4,%xmm4
vaesdeclast %xmm0,%xmm6,%xmm6
vpxor 48(%rbp),%xmm5,%xmm5
vpaddd %xmm15,%xmm14,%xmm14
vmovdqu -120(%rsi),%xmm15
vaesdeclast %xmm0,%xmm7,%xmm7
vpxor 64(%rbp),%xmm6,%xmm6
vaesdeclast %xmm0,%xmm8,%xmm8
vpxor 80(%rbp),%xmm7,%xmm7
vmovdqa %xmm14,48(%rsp)
vaesdeclast %xmm0,%xmm9,%xmm9
vpxor 96(%rbp),%xmm8,%xmm8
vmovups 32-120(%rsi),%xmm0
vmovups %xmm2,-16(%r8)
subq %rbx,%r8
vmovdqu 128+0(%rsp),%xmm2
vpxor 112(%rbp),%xmm9,%xmm9
vmovups %xmm3,-16(%r9)
subq 72(%rsp),%r9
vmovdqu %xmm2,0(%rbp)
vpxor %xmm15,%xmm2,%xmm2
vmovdqu 128+16(%rsp),%xmm3
vmovups %xmm4,-16(%r10)
subq 80(%rsp),%r10
vmovdqu %xmm3,16(%rbp)
vpxor %xmm15,%xmm3,%xmm3
vmovdqu 128+32(%rsp),%xmm4
vmovups %xmm5,-16(%r11)
subq 88(%rsp),%r11
vmovdqu %xmm4,32(%rbp)
vpxor %xmm15,%xmm4,%xmm4
vmovdqu 128+48(%rsp),%xmm5
vmovups %xmm6,-16(%r12)
subq 96(%rsp),%r12
vmovdqu %xmm5,48(%rbp)
vpxor %xmm15,%xmm5,%xmm5
vmovdqu %xmm10,64(%rbp)
vpxor %xmm10,%xmm15,%xmm6
vmovups %xmm7,-16(%r13)
subq 104(%rsp),%r13
vmovdqu %xmm11,80(%rbp)
vpxor %xmm11,%xmm15,%xmm7
vmovups %xmm8,-16(%r14)
subq 112(%rsp),%r14
vmovdqu %xmm12,96(%rbp)
vpxor %xmm12,%xmm15,%xmm8
vmovups %xmm9,-16(%r15)
subq 120(%rsp),%r15
vmovdqu %xmm13,112(%rbp)
vpxor %xmm13,%xmm15,%xmm9
xorq $128,%rbp
decl %edx
jnz L$oop_dec8x
movq 16(%rsp),%rax
L$dec8x_done:
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
L$dec8x_epilogue:
.byte 0xf3,0xc3

1335
deps/openssl/asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s

File diff suppressed because it is too large

4296
deps/openssl/asm/x64-macosx-gas/aes/aesni-sha256-x86_64.s

File diff suppressed because it is too large

82
deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s

@ -503,7 +503,7 @@ _aesni_ecb_encrypt:
testl %r8d,%r8d
jz L$ecb_decrypt
cmpq $128,%rdx
cmpq $0x80,%rdx
jb L$ecb_enc_tail
movdqu (%rdi),%xmm2
@ -515,7 +515,7 @@ _aesni_ecb_encrypt:
movdqu 96(%rdi),%xmm8
movdqu 112(%rdi),%xmm9
leaq 128(%rdi),%rdi
subq $128,%rdx
subq $0x80,%rdx
jmp L$ecb_enc_loop8_enter
.p2align 4
L$ecb_enc_loop8:
@ -543,7 +543,7 @@ L$ecb_enc_loop8_enter:
call _aesni_encrypt8
subq $128,%rdx
subq $0x80,%rdx
jnc L$ecb_enc_loop8
movups %xmm2,(%rsi)
@ -557,22 +557,22 @@ L$ecb_enc_loop8_enter:
movups %xmm8,96(%rsi)
movups %xmm9,112(%rsi)
leaq 128(%rsi),%rsi
addq $128,%rdx
addq $0x80,%rdx
jz L$ecb_ret
L$ecb_enc_tail:
movups (%rdi),%xmm2
cmpq $32,%rdx
cmpq $0x20,%rdx
jb L$ecb_enc_one
movups 16(%rdi),%xmm3
je L$ecb_enc_two
movups 32(%rdi),%xmm4
cmpq $64,%rdx
cmpq $0x40,%rdx
jb L$ecb_enc_three
movups 48(%rdi),%xmm5
je L$ecb_enc_four
movups 64(%rdi),%xmm6
cmpq $96,%rdx
cmpq $0x60,%rdx
jb L$ecb_enc_five
movups 80(%rdi),%xmm7
je L$ecb_enc_six
@ -646,7 +646,7 @@ L$ecb_enc_six:
.p2align 4
L$ecb_decrypt:
cmpq $128,%rdx
cmpq $0x80,%rdx
jb L$ecb_dec_tail
movdqu (%rdi),%xmm2
@ -658,7 +658,7 @@ L$ecb_decrypt:
movdqu 96(%rdi),%xmm8
movdqu 112(%rdi),%xmm9
leaq 128(%rdi),%rdi
subq $128,%rdx
subq $0x80,%rdx
jmp L$ecb_dec_loop8_enter
.p2align 4
L$ecb_dec_loop8:
@ -687,7 +687,7 @@ L$ecb_dec_loop8_enter:
call _aesni_decrypt8
movups (%r11),%xmm0
subq $128,%rdx
subq $0x80,%rdx
jnc L$ecb_dec_loop8
movups %xmm2,(%rsi)
@ -709,22 +709,22 @@ L$ecb_dec_loop8_enter:
movups %xmm9,112(%rsi)
pxor %xmm9,%xmm9
leaq 128(%rsi),%rsi
addq $128,%rdx
addq $0x80,%rdx
jz L$ecb_ret
L$ecb_dec_tail:
movups (%rdi),%xmm2
cmpq $32,%rdx
cmpq $0x20,%rdx
jb L$ecb_dec_one
movups 16(%rdi),%xmm3
je L$ecb_dec_two
movups 32(%rdi),%xmm4
cmpq $64,%rdx
cmpq $0x40,%rdx
jb L$ecb_dec_three
movups 48(%rdi),%xmm5
je L$ecb_dec_four
movups 64(%rdi),%xmm6
cmpq $96,%rdx
cmpq $0x60,%rdx
jb L$ecb_dec_five
movups 80(%rdi),%xmm7
je L$ecb_dec_six
@ -1598,7 +1598,7 @@ L$oop_enc1_8:
movdqa L$xts_magic(%rip),%xmm8
movdqa %xmm2,%xmm15
pshufd $95,%xmm2,%xmm9
pshufd $0x5f,%xmm2,%xmm9
pxor %xmm0,%xmm1
movdqa %xmm9,%xmm14
paddd %xmm9,%xmm9
@ -1697,7 +1697,7 @@ L$xts_enc_grandloop:
.byte 102,15,56,220,248
movups 64(%r11),%xmm0
movdqa %xmm8,80(%rsp)
pshufd $95,%xmm15,%xmm9
pshufd $0x5f,%xmm15,%xmm9
jmp L$xts_enc_loop6
.p2align 5
L$xts_enc_loop6:
@ -1836,13 +1836,13 @@ L$xts_enc_short:
jz L$xts_enc_done
pxor %xmm0,%xmm11
cmpq $32,%rdx
cmpq $0x20,%rdx
jb L$xts_enc_one
pxor %xmm0,%xmm12
je L$xts_enc_two
pxor %xmm0,%xmm13
cmpq $64,%rdx
cmpq $0x40,%rdx
jb L$xts_enc_three
pxor %xmm0,%xmm14
je L$xts_enc_four
@ -2069,7 +2069,7 @@ L$oop_enc1_11:
movdqa L$xts_magic(%rip),%xmm8
movdqa %xmm2,%xmm15
pshufd $95,%xmm2,%xmm9
pshufd $0x5f,%xmm2,%xmm9
pxor %xmm0,%xmm1
movdqa %xmm9,%xmm14
paddd %xmm9,%xmm9
@ -2168,7 +2168,7 @@ L$xts_dec_grandloop:
.byte 102,15,56,222,248
movups 64(%r11),%xmm0
movdqa %xmm8,80(%rsp)
pshufd $95,%xmm15,%xmm9
pshufd $0x5f,%xmm15,%xmm9
jmp L$xts_dec_loop6
.p2align 5
L$xts_dec_loop6:
@ -2308,13 +2308,13 @@ L$xts_dec_short:
jz L$xts_dec_done
pxor %xmm0,%xmm12
cmpq $32,%rdx
cmpq $0x20,%rdx
jb L$xts_dec_one
pxor %xmm0,%xmm13
je L$xts_dec_two
pxor %xmm0,%xmm14
cmpq $64,%rdx
cmpq $0x40,%rdx
jb L$xts_dec_three
je L$xts_dec_four
@ -2345,7 +2345,7 @@ L$xts_dec_short:
pcmpgtd %xmm15,%xmm14
movdqu %xmm6,64(%rsi)
leaq 80(%rsi),%rsi
pshufd $19,%xmm14,%xmm11
pshufd $0x13,%xmm14,%xmm11
andq $15,%r9
jz L$xts_dec_ret
@ -2634,7 +2634,7 @@ L$cbc_decrypt_bulk:
leaq -8(%rax),%rbp
movups (%r8),%xmm10
movl %r10d,%eax
cmpq $80,%rdx
cmpq $0x50,%rdx
jbe L$cbc_dec_tail
movups (%rcx),%xmm0
@ -2650,14 +2650,14 @@ L$cbc_decrypt_bulk:
movdqu 80(%rdi),%xmm7
movdqa %xmm6,%xmm15
movl _OPENSSL_ia32cap_P+4(%rip),%r9d
cmpq $112,%rdx
cmpq $0x70,%rdx
jbe L$cbc_dec_six_or_seven
andl $71303168,%r9d
subq $80,%rdx
subq $0x50,%rdx
cmpl $4194304,%r9d
je L$cbc_dec_loop6_enter
subq $32,%rdx
subq $0x20,%rdx
leaq 112(%rcx),%rcx
jmp L$cbc_dec_loop8_enter
.p2align 4
@ -2672,7 +2672,7 @@ L$cbc_dec_loop8_enter:
movups 16-112(%rcx),%xmm1
pxor %xmm0,%xmm4
xorq %r11,%r11
cmpq $112,%rdx
cmpq $0x70,%rdx
pxor %xmm0,%xmm5
pxor %xmm0,%xmm6
pxor %xmm0,%xmm7
@ -2857,21 +2857,21 @@ L$cbc_dec_done:
movups %xmm8,96(%rsi)
leaq 112(%rsi),%rsi
subq $128,%rdx
subq $0x80,%rdx
ja L$cbc_dec_loop8
movaps %xmm9,%xmm2
leaq -112(%rcx),%rcx
addq $112,%rdx
addq $0x70,%rdx
jle L$cbc_dec_clear_tail_collected
movups %xmm9,(%rsi)
leaq 16(%rsi),%rsi
cmpq $80,%rdx
cmpq $0x50,%rdx
jbe L$cbc_dec_tail
movaps %xmm11,%xmm2
L$cbc_dec_six_or_seven:
cmpq $96,%rdx
cmpq $0x60,%rdx
ja L$cbc_dec_seven
movaps %xmm7,%xmm8
@ -2964,33 +2964,33 @@ L$cbc_dec_loop6_enter:
movl %r10d,%eax
movdqu %xmm6,64(%rsi)
leaq 80(%rsi),%rsi
subq $96,%rdx
subq $0x60,%rdx
ja L$cbc_dec_loop6
movdqa %xmm7,%xmm2
addq $80,%rdx
addq $0x50,%rdx
jle L$cbc_dec_clear_tail_collected
movups %xmm7,(%rsi)
leaq 16(%rsi),%rsi
L$cbc_dec_tail:
movups (%rdi),%xmm2
subq $16,%rdx
subq $0x10,%rdx
jbe L$cbc_dec_one
movups 16(%rdi),%xmm3
movaps %xmm2,%xmm11
subq $16,%rdx
subq $0x10,%rdx
jbe L$cbc_dec_two
movups 32(%rdi),%xmm4
movaps %xmm3,%xmm12
subq $16,%rdx
subq $0x10,%rdx
jbe L$cbc_dec_three
movups 48(%rdi),%xmm5
movaps %xmm4,%xmm13
subq $16,%rdx
subq $0x10,%rdx
jbe L$cbc_dec_four
movups 64(%rdi),%xmm6
@ -3015,7 +3015,7 @@ L$cbc_dec_tail:
movdqa %xmm6,%xmm2
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
subq $16,%rdx
subq $0x10,%rdx
jmp L$cbc_dec_tail_collected
.p2align 4
@ -3332,7 +3332,7 @@ L$oop_key192:
pslldq $4,%xmm0
pxor %xmm3,%xmm0
pshufd $255,%xmm0,%xmm3
pshufd $0xff,%xmm0,%xmm3
pxor %xmm1,%xmm3
pslldq $4,%xmm1
pxor %xmm1,%xmm3
@ -3419,7 +3419,7 @@ L$oop_key256:
decl %r10d
jz L$done_key256
pshufd $255,%xmm0,%xmm2
pshufd $0xff,%xmm0,%xmm2
pxor %xmm3,%xmm3
.byte 102,15,56,221,211

158
deps/openssl/asm/x64-macosx-gas/aes/bsaes-x86_64.s

@ -324,45 +324,45 @@ L$enc_sbox:
pxor %xmm2,%xmm5
decl %r10d
jl L$enc_done
pshufd $147,%xmm15,%xmm7
pshufd $147,%xmm0,%xmm8
pshufd $0x93,%xmm15,%xmm7
pshufd $0x93,%xmm0,%xmm8
pxor %xmm7,%xmm15
pshufd $147,%xmm3,%xmm9
pshufd $0x93,%xmm3,%xmm9
pxor %xmm8,%xmm0
pshufd $147,%xmm5,%xmm10
pshufd $0x93,%xmm5,%xmm10
pxor %xmm9,%xmm3
pshufd $147,%xmm2,%xmm11
pshufd $0x93,%xmm2,%xmm11
pxor %xmm10,%xmm5
pshufd $147,%xmm6,%xmm12
pshufd $0x93,%xmm6,%xmm12
pxor %xmm11,%xmm2
pshufd $147,%xmm1,%xmm13
pshufd $0x93,%xmm1,%xmm13
pxor %xmm12,%xmm6
pshufd $147,%xmm4,%xmm14
pshufd $0x93,%xmm4,%xmm14
pxor %xmm13,%xmm1
pxor %xmm14,%xmm4
pxor %xmm15,%xmm8
pxor %xmm4,%xmm7
pxor %xmm4,%xmm8
pshufd $78,%xmm15,%xmm15
pshufd $0x4E,%xmm15,%xmm15
pxor %xmm0,%xmm9
pshufd $78,%xmm0,%xmm0
pshufd $0x4E,%xmm0,%xmm0
pxor %xmm2,%xmm12
pxor %xmm7,%xmm15
pxor %xmm6,%xmm13
pxor %xmm8,%xmm0
pxor %xmm5,%xmm11
pshufd $78,%xmm2,%xmm7
pshufd $0x4E,%xmm2,%xmm7
pxor %xmm1,%xmm14
pshufd $78,%xmm6,%xmm8
pshufd $0x4E,%xmm6,%xmm8
pxor %xmm3,%xmm10
pshufd $78,%xmm5,%xmm2
pshufd $0x4E,%xmm5,%xmm2
pxor %xmm4,%xmm10
pshufd $78,%xmm4,%xmm6
pshufd $0x4E,%xmm4,%xmm6
pxor %xmm4,%xmm11
pshufd $78,%xmm1,%xmm5
pshufd $0x4E,%xmm1,%xmm5
pxor %xmm11,%xmm7
pshufd $78,%xmm3,%xmm1
pshufd $0x4E,%xmm3,%xmm1
pxor %xmm12,%xmm8
pxor %xmm10,%xmm2
pxor %xmm14,%xmm6
@ -796,24 +796,24 @@ L$dec_sbox:
decl %r10d
jl L$dec_done
pshufd $78,%xmm15,%xmm7
pshufd $78,%xmm2,%xmm13
pshufd $0x4E,%xmm15,%xmm7
pshufd $0x4E,%xmm2,%xmm13
pxor %xmm15,%xmm7
pshufd $78,%xmm4,%xmm14
pshufd $0x4E,%xmm4,%xmm14
pxor %xmm2,%xmm13
pshufd $78,%xmm0,%xmm8
pshufd $0x4E,%xmm0,%xmm8
pxor %xmm4,%xmm14
pshufd $78,%xmm5,%xmm9
pshufd $0x4E,%xmm5,%xmm9
pxor %xmm0,%xmm8
pshufd $78,%xmm3,%xmm10
pshufd $0x4E,%xmm3,%xmm10
pxor %xmm5,%xmm9
pxor %xmm13,%xmm15
pxor %xmm13,%xmm0
pshufd $78,%xmm1,%xmm11
pshufd $0x4E,%xmm1,%xmm11
pxor %xmm3,%xmm10
pxor %xmm7,%xmm5
pxor %xmm8,%xmm3
pshufd $78,%xmm6,%xmm12
pshufd $0x4E,%xmm6,%xmm12
pxor %xmm1,%xmm11
pxor %xmm14,%xmm0
pxor %xmm9,%xmm1
@ -827,45 +827,45 @@ L$dec_sbox:
pxor %xmm14,%xmm1
pxor %xmm14,%xmm6
pxor %xmm12,%xmm4
pshufd $147,%xmm15,%xmm7
pshufd $147,%xmm0,%xmm8
pshufd $0x93,%xmm15,%xmm7
pshufd $0x93,%xmm0,%xmm8
pxor %xmm7,%xmm15
pshufd $147,%xmm5,%xmm9
pshufd $0x93,%xmm5,%xmm9
pxor %xmm8,%xmm0
pshufd $147,%xmm3,%xmm10
pshufd $0x93,%xmm3,%xmm10
pxor %xmm9,%xmm5
pshufd $147,%xmm1,%xmm11
pshufd $0x93,%xmm1,%xmm11
pxor %xmm10,%xmm3
pshufd $147,%xmm6,%xmm12
pshufd $0x93,%xmm6,%xmm12
pxor %xmm11,%xmm1
pshufd $147,%xmm2,%xmm13
pshufd $0x93,%xmm2,%xmm13
pxor %xmm12,%xmm6
pshufd $147,%xmm4,%xmm14
pshufd $0x93,%xmm4,%xmm14
pxor %xmm13,%xmm2
pxor %xmm14,%xmm4
pxor %xmm15,%xmm8
pxor %xmm4,%xmm7
pxor %xmm4,%xmm8
pshufd $78,%xmm15,%xmm15
pshufd $0x4E,%xmm15,%xmm15
pxor %xmm0,%xmm9
pshufd $78,%xmm0,%xmm0
pshufd $0x4E,%xmm0,%xmm0
pxor %xmm1,%xmm12
pxor %xmm7,%xmm15
pxor %xmm6,%xmm13
pxor %xmm8,%xmm0
pxor %xmm3,%xmm11
pshufd $78,%xmm1,%xmm7
pshufd $0x4E,%xmm1,%xmm7
pxor %xmm2,%xmm14
pshufd $78,%xmm6,%xmm8
pshufd $0x4E,%xmm6,%xmm8
pxor %xmm5,%xmm10
pshufd $78,%xmm3,%xmm1
pshufd $0x4E,%xmm3,%xmm1
pxor %xmm4,%xmm10
pshufd $78,%xmm4,%xmm6
pshufd $0x4E,%xmm4,%xmm6
pxor %xmm4,%xmm11
pshufd $78,%xmm2,%xmm3
pshufd $0x4E,%xmm2,%xmm3
pxor %xmm11,%xmm7
pshufd $78,%xmm5,%xmm2
pshufd $0x4E,%xmm5,%xmm2
pxor %xmm12,%xmm8
pxor %xmm1,%xmm10
pxor %xmm14,%xmm6
@ -1552,20 +1552,20 @@ L$xts_enc_prologue:
movdqa %xmm7,(%rax)
andq $-16,%r14
subq $128,%rsp
subq $0x80,%rsp
movdqa 32(%rbp),%xmm6
pxor %xmm14,%xmm14
movdqa L$xts_magic(%rip),%xmm12
pcmpgtd %xmm6,%xmm14
subq $128,%r14
subq $0x80,%r14
jc L$xts_enc_short
jmp L$xts_enc_loop
.p2align 4
L$xts_enc_loop:
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm15
movdqa %xmm6,0(%rsp)
@ -1573,7 +1573,7 @@ L$xts_enc_loop:
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm0
movdqa %xmm6,16(%rsp)
@ -1582,7 +1582,7 @@ L$xts_enc_loop:
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
movdqu 0(%r12),%xmm7
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm1
movdqa %xmm6,32(%rsp)
@ -1592,7 +1592,7 @@ L$xts_enc_loop:
pxor %xmm13,%xmm6
movdqu 16(%r12),%xmm8
pxor %xmm7,%xmm15
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm2
movdqa %xmm6,48(%rsp)
@ -1602,7 +1602,7 @@ L$xts_enc_loop:
pxor %xmm13,%xmm6
movdqu 32(%r12),%xmm9
pxor %xmm8,%xmm0
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm3
movdqa %xmm6,64(%rsp)
@ -1612,7 +1612,7 @@ L$xts_enc_loop:
pxor %xmm13,%xmm6
movdqu 48(%r12),%xmm10
pxor %xmm9,%xmm1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm4
movdqa %xmm6,80(%rsp)
@ -1622,7 +1622,7 @@ L$xts_enc_loop:
pxor %xmm13,%xmm6
movdqu 64(%r12),%xmm11
pxor %xmm10,%xmm2
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm5
movdqa %xmm6,96(%rsp)
@ -1666,20 +1666,20 @@ L$xts_enc_loop:
pxor %xmm14,%xmm14
movdqa L$xts_magic(%rip),%xmm12
pcmpgtd %xmm6,%xmm14
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
paddq %xmm6,%xmm6
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
subq $128,%r14
subq $0x80,%r14
jnc L$xts_enc_loop
L$xts_enc_short:
addq $128,%r14
addq $0x80,%r14
jz L$xts_enc_done
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm15
movdqa %xmm6,0(%rsp)
@ -1687,7 +1687,7 @@ L$xts_enc_short:
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm0
movdqa %xmm6,16(%rsp)
@ -1698,7 +1698,7 @@ L$xts_enc_short:
movdqu 0(%r12),%xmm7
cmpq $16,%r14
je L$xts_enc_1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm1
movdqa %xmm6,32(%rsp)
@ -1710,7 +1710,7 @@ L$xts_enc_short:
cmpq $32,%r14
je L$xts_enc_2
pxor %xmm7,%xmm15
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm2
movdqa %xmm6,48(%rsp)
@ -1722,7 +1722,7 @@ L$xts_enc_short:
cmpq $48,%r14
je L$xts_enc_3
pxor %xmm8,%xmm0
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm3
movdqa %xmm6,64(%rsp)
@ -1734,7 +1734,7 @@ L$xts_enc_short:
cmpq $64,%r14
je L$xts_enc_4
pxor %xmm9,%xmm1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm4
movdqa %xmm6,80(%rsp)
@ -1746,7 +1746,7 @@ L$xts_enc_short:
cmpq $80,%r14
je L$xts_enc_5
pxor %xmm10,%xmm2
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm5
movdqa %xmm6,96(%rsp)
@ -2011,20 +2011,20 @@ L$xts_dec_prologue:
shlq $4,%rax
subq %rax,%r14
subq $128,%rsp
subq $0x80,%rsp
movdqa 32(%rbp),%xmm6
pxor %xmm14,%xmm14
movdqa L$xts_magic(%rip),%xmm12
pcmpgtd %xmm6,%xmm14
subq $128,%r14
subq $0x80,%r14
jc L$xts_dec_short
jmp L$xts_dec_loop
.p2align 4
L$xts_dec_loop:
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm15
movdqa %xmm6,0(%rsp)
@ -2032,7 +2032,7 @@ L$xts_dec_loop:
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm0
movdqa %xmm6,16(%rsp)
@ -2041,7 +2041,7 @@ L$xts_dec_loop:
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
movdqu 0(%r12),%xmm7
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm1
movdqa %xmm6,32(%rsp)
@ -2051,7 +2051,7 @@ L$xts_dec_loop:
pxor %xmm13,%xmm6
movdqu 16(%r12),%xmm8
pxor %xmm7,%xmm15
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm2
movdqa %xmm6,48(%rsp)
@ -2061,7 +2061,7 @@ L$xts_dec_loop:
pxor %xmm13,%xmm6
movdqu 32(%r12),%xmm9
pxor %xmm8,%xmm0
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm3
movdqa %xmm6,64(%rsp)
@ -2071,7 +2071,7 @@ L$xts_dec_loop:
pxor %xmm13,%xmm6
movdqu 48(%r12),%xmm10
pxor %xmm9,%xmm1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm4
movdqa %xmm6,80(%rsp)
@ -2081,7 +2081,7 @@ L$xts_dec_loop:
pxor %xmm13,%xmm6
movdqu 64(%r12),%xmm11
pxor %xmm10,%xmm2
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm5
movdqa %xmm6,96(%rsp)
@ -2125,20 +2125,20 @@ L$xts_dec_loop:
pxor %xmm14,%xmm14
movdqa L$xts_magic(%rip),%xmm12
pcmpgtd %xmm6,%xmm14
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
paddq %xmm6,%xmm6
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
subq $128,%r14
subq $0x80,%r14
jnc L$xts_dec_loop
L$xts_dec_short:
addq $128,%r14
addq $0x80,%r14
jz L$xts_dec_done
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm15
movdqa %xmm6,0(%rsp)
@ -2146,7 +2146,7 @@ L$xts_dec_short:
pand %xmm12,%xmm13
pcmpgtd %xmm6,%xmm14
pxor %xmm13,%xmm6
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm0
movdqa %xmm6,16(%rsp)
@ -2157,7 +2157,7 @@ L$xts_dec_short:
movdqu 0(%r12),%xmm7
cmpq $16,%r14
je L$xts_dec_1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm1
movdqa %xmm6,32(%rsp)
@ -2169,7 +2169,7 @@ L$xts_dec_short:
cmpq $32,%r14
je L$xts_dec_2
pxor %xmm7,%xmm15
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm2
movdqa %xmm6,48(%rsp)
@ -2181,7 +2181,7 @@ L$xts_dec_short:
cmpq $48,%r14
je L$xts_dec_3
pxor %xmm8,%xmm0
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm3
movdqa %xmm6,64(%rsp)
@ -2193,7 +2193,7 @@ L$xts_dec_short:
cmpq $64,%r14
je L$xts_dec_4
pxor %xmm9,%xmm1
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm4
movdqa %xmm6,80(%rsp)
@ -2205,7 +2205,7 @@ L$xts_dec_short:
cmpq $80,%r14
je L$xts_dec_5
pxor %xmm10,%xmm2
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
pxor %xmm14,%xmm14
movdqa %xmm6,%xmm5
movdqa %xmm6,96(%rsp)
@ -2382,7 +2382,7 @@ L$xts_dec_done:
pxor %xmm14,%xmm14
movdqa L$xts_magic(%rip),%xmm12
pcmpgtd %xmm6,%xmm14
pshufd $19,%xmm14,%xmm13
pshufd $0x13,%xmm14,%xmm13
movdqa %xmm6,%xmm5
paddq %xmm6,%xmm6
pand %xmm12,%xmm13

20
deps/openssl/asm/x64-macosx-gas/aes/vpaes-x86_64.s

@ -60,7 +60,7 @@ L$enc_loop:
addq $16,%r11
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andq $48,%r11
andq $0x30,%r11
subq $1,%rax
pxor %xmm3,%xmm0
@ -120,10 +120,10 @@ _vpaes_decrypt_core:
pand %xmm9,%xmm0
.byte 102,15,56,0,208
movdqa L$k_dipt+16(%rip),%xmm0
xorq $48,%r11
xorq $0x30,%r11
leaq L$k_dsbd(%rip),%r10
.byte 102,15,56,0,193
andq $48,%r11
andq $0x30,%r11
pxor %xmm5,%xmm2
movdqa L$k_mc_forward+48(%rip),%xmm5
pxor %xmm2,%xmm0
@ -242,7 +242,7 @@ L$schedule_am_decrypting:
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,217
movdqu %xmm3,(%rdx)
xorq $48,%r8
xorq $0x30,%r8
L$schedule_go:
cmpl $192,%esi
@ -332,7 +332,7 @@ L$oop_schedule_256:
call _vpaes_schedule_mangle
pshufd $255,%xmm0,%xmm0
pshufd $0xFF,%xmm0,%xmm0
movdqa %xmm7,%xmm5
movdqa %xmm6,%xmm7
call _vpaes_schedule_low_round
@ -399,8 +399,8 @@ L$schedule_mangle_last_dec:
.p2align 4
_vpaes_schedule_192_smear:
pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
pshufd $0x80,%xmm6,%xmm1
pshufd $0xFE,%xmm7,%xmm0
pxor %xmm1,%xmm6
pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
@ -437,7 +437,7 @@ _vpaes_schedule_round:
pxor %xmm1,%xmm7
pshufd $255,%xmm0,%xmm0
pshufd $0xFF,%xmm0,%xmm0
.byte 102,15,58,15,192,1
@ -596,7 +596,7 @@ L$schedule_mangle_both:
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,217
addq $-16,%r8
andq $48,%r8
andq $0x30,%r8
movdqu %xmm3,(%rdx)
.byte 0xf3,0xc3
@ -614,7 +614,7 @@ _vpaes_set_encrypt_key:
movl %eax,240(%rdx)
movl $0,%ecx
movl $48,%r8d
movl $0x30,%r8d
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3

1696
deps/openssl/asm/x64-macosx-gas/bn/rsaz-avx2.s

File diff suppressed because it is too large

654
deps/openssl/asm/x64-macosx-gas/bn/rsaz-x86_64.s

@ -19,6 +19,10 @@ L$sqr_body:
movq (%rsi),%rdx
movq 8(%rsi),%rax
movq %rcx,128(%rsp)
movl $0x80100,%r11d
andl _OPENSSL_ia32cap_P+8(%rip),%r11d
cmpl $0x80100,%r11d
je L$oop_sqrx
jmp L$oop_sqr
.p2align 5
@ -382,6 +386,276 @@ L$oop_sqr:
decl %r8d
jnz L$oop_sqr
jmp L$sqr_tail
.p2align 5
L$oop_sqrx:
movl %r8d,128+8(%rsp)
.byte 102,72,15,110,199
.byte 102,72,15,110,205
mulxq %rax,%r8,%r9
mulxq 16(%rsi),%rcx,%r10
xorq %rbp,%rbp
mulxq 24(%rsi),%rax,%r11
adcxq %rcx,%r9
mulxq 32(%rsi),%rcx,%r12
adcxq %rax,%r10
mulxq 40(%rsi),%rax,%r13
adcxq %rcx,%r11
.byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00
adcxq %rax,%r12
adcxq %rcx,%r13
.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
adcxq %rax,%r14
adcxq %rbp,%r15
movq %r9,%rcx
shldq $1,%r8,%r9
shlq $1,%r8
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rdx,%r8
movq 8(%rsi),%rdx
adcxq %rbp,%r9
movq %rax,(%rsp)
movq %r8,8(%rsp)
mulxq 16(%rsi),%rax,%rbx
adoxq %rax,%r10
adcxq %rbx,%r11
.byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00
adoxq %rdi,%r11
adcxq %r8,%r12
mulxq 32(%rsi),%rax,%rbx
adoxq %rax,%r12
adcxq %rbx,%r13
mulxq 40(%rsi),%rdi,%r8
adoxq %rdi,%r13
adcxq %r8,%r14
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
adoxq %rax,%r14
adcxq %rbx,%r15
.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
adoxq %rdi,%r15
adcxq %rbp,%r8
adoxq %rbp,%r8
movq %r11,%rbx
shldq $1,%r10,%r11
shldq $1,%rcx,%r10
xorl %ebp,%ebp
mulxq %rdx,%rax,%rcx
movq 16(%rsi),%rdx
adcxq %rax,%r9
adcxq %rcx,%r10
adcxq %rbp,%r11
movq %r9,16(%rsp)
.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00
adoxq %rdi,%r12
adcxq %r9,%r13
mulxq 32(%rsi),%rax,%rcx
adoxq %rax,%r13
adcxq %rcx,%r14
mulxq 40(%rsi),%rdi,%r9
adoxq %rdi,%r14
adcxq %r9,%r15
.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
adoxq %rax,%r15
adcxq %rcx,%r8
.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00
adoxq %rdi,%r8
adcxq %rbp,%r9
adoxq %rbp,%r9
movq %r13,%rcx
shldq $1,%r12,%r13
shldq $1,%rbx,%r12
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rax,%r11
adcxq %rdx,%r12
movq 24(%rsi),%rdx
adcxq %rbp,%r13
movq %r11,32(%rsp)
.byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00
adoxq %rax,%r14
adcxq %rbx,%r15
mulxq 40(%rsi),%rdi,%r10
adoxq %rdi,%r15
adcxq %r10,%r8
mulxq 48(%rsi),%rax,%rbx
adoxq %rax,%r8
adcxq %rbx,%r9
mulxq 56(%rsi),%rdi,%r10
adoxq %rdi,%r9
adcxq %rbp,%r10
adoxq %rbp,%r10
.byte 0x66
movq %r15,%rbx
shldq $1,%r14,%r15
shldq $1,%rcx,%r14
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rax,%r13
adcxq %rdx,%r14
movq 32(%rsi),%rdx
adcxq %rbp,%r15
movq %r13,48(%rsp)
movq %r14,56(%rsp)
.byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00
adoxq %rdi,%r8
adcxq %r11,%r9
mulxq 48(%rsi),%rax,%rcx
adoxq %rax,%r9
adcxq %rcx,%r10
mulxq 56(%rsi),%rdi,%r11
adoxq %rdi,%r10
adcxq %rbp,%r11
adoxq %rbp,%r11
movq %r9,%rcx
shldq $1,%r8,%r9
shldq $1,%rbx,%r8
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rax,%r15
adcxq %rdx,%r8
movq 40(%rsi),%rdx
adcxq %rbp,%r9
movq %r15,64(%rsp)
movq %r8,72(%rsp)
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
adoxq %rax,%r10
adcxq %rbx,%r11
.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
adoxq %rdi,%r11
adcxq %rbp,%r12
adoxq %rbp,%r12
movq %r11,%rbx
shldq $1,%r10,%r11
shldq $1,%rcx,%r10
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rax,%r9
adcxq %rdx,%r10
movq 48(%rsi),%rdx
adcxq %rbp,%r11
movq %r9,80(%rsp)
movq %r10,88(%rsp)
.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
adoxq %rax,%r12
adoxq %rbp,%r13
xorq %r14,%r14
shldq $1,%r13,%r14
shldq $1,%r12,%r13
shldq $1,%rbx,%r12
xorl %ebp,%ebp
mulxq %rdx,%rax,%rdx
adcxq %rax,%r11
adcxq %rdx,%r12
movq 56(%rsi),%rdx
adcxq %rbp,%r13
.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
mulxq %rdx,%rax,%rdx
adoxq %rax,%r13
adoxq %rbp,%rdx
.byte 0x66
addq %rdx,%r14
movq %r13,112(%rsp)
movq %r14,120(%rsp)
.byte 102,72,15,126,199
.byte 102,72,15,126,205
movq 128(%rsp),%rdx
movq (%rsp),%r8
movq 8(%rsp),%r9
movq 16(%rsp),%r10
movq 24(%rsp),%r11
movq 32(%rsp),%r12
movq 40(%rsp),%r13
movq 48(%rsp),%r14
movq 56(%rsp),%r15
call __rsaz_512_reducex
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
adcq 88(%rsp),%r11
adcq 96(%rsp),%r12
adcq 104(%rsp),%r13
adcq 112(%rsp),%r14
adcq 120(%rsp),%r15
sbbq %rcx,%rcx
call __rsaz_512_subtract
movq %r8,%rdx
movq %r9,%rax
movl 128+8(%rsp),%r8d
movq %rdi,%rsi
decl %r8d
jnz L$oop_sqrx
L$sqr_tail:
leaq 128+24+48(%rsp),%rax
movq -48(%rax),%r15
@ -410,6 +684,10 @@ L$mul_body:
.byte 102,72,15,110,199
.byte 102,72,15,110,201
movq %r8,128(%rsp)
movl $0x80100,%r11d
andl _OPENSSL_ia32cap_P+8(%rip),%r11d
cmpl $0x80100,%r11d
je L$mulx
movq (%rdx),%rbx
movq %rdx,%rbp
call __rsaz_512_mul
@ -427,6 +705,29 @@ L$mul_body:
movq 56(%rsp),%r15
call __rsaz_512_reduce
jmp L$mul_tail
.p2align 5
L$mulx:
movq %rdx,%rbp
movq (%rdx),%rdx
call __rsaz_512_mulx
.byte 102,72,15,126,199
.byte 102,72,15,126,205
movq 128(%rsp),%rdx
movq (%rsp),%r8
movq 8(%rsp),%r9
movq 16(%rsp),%r10
movq 24(%rsp),%r11
movq 32(%rsp),%r12
movq 40(%rsp),%r13
movq 48(%rsp),%r14
movq 56(%rsp),%r15
call __rsaz_512_reducex
L$mul_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@ -517,6 +818,10 @@ L$mul_gather4_body:
por %xmm9,%xmm8
pshufd $0x4e,%xmm8,%xmm9
por %xmm9,%xmm8
movl $0x80100,%r11d
andl _OPENSSL_ia32cap_P+8(%rip),%r11d
cmpl $0x80100,%r11d
je L$mulx_gather
.byte 102,76,15,126,195
movq %r8,128(%rsp)
@ -697,6 +1002,142 @@ L$oop_mul_gather:
movq 56(%rsp),%r15
call __rsaz_512_reduce
jmp L$mul_gather_tail
.p2align 5
L$mulx_gather:
.byte 102,76,15,126,194
movq %r8,128(%rsp)
movq %rdi,128+8(%rsp)
movq %rcx,128+16(%rsp)
mulxq (%rsi),%rbx,%r8
movq %rbx,(%rsp)
xorl %edi,%edi
mulxq 8(%rsi),%rax,%r9
mulxq 16(%rsi),%rbx,%r10
adcxq %rax,%r8
mulxq 24(%rsi),%rax,%r11
adcxq %rbx,%r9
mulxq 32(%rsi),%rbx,%r12
adcxq %rax,%r10
mulxq 40(%rsi),%rax,%r13
adcxq %rbx,%r11
mulxq 48(%rsi),%rbx,%r14
adcxq %rax,%r12
mulxq 56(%rsi),%rax,%r15
adcxq %rbx,%r13
adcxq %rax,%r14
.byte 0x67
movq %r8,%rbx
adcxq %rdi,%r15
movq $-7,%rcx
jmp L$oop_mulx_gather
.p2align 5
L$oop_mulx_gather:
movdqa 0(%rbp),%xmm8
movdqa 16(%rbp),%xmm9
movdqa 32(%rbp),%xmm10
movdqa 48(%rbp),%xmm11
pand %xmm0,%xmm8
movdqa 64(%rbp),%xmm12
pand %xmm1,%xmm9
movdqa 80(%rbp),%xmm13
pand %xmm2,%xmm10
movdqa 96(%rbp),%xmm14
pand %xmm3,%xmm11
movdqa 112(%rbp),%xmm15
leaq 128(%rbp),%rbp
pand %xmm4,%xmm12
pand %xmm5,%xmm13
pand %xmm6,%xmm14
pand %xmm7,%xmm15
por %xmm10,%xmm8
por %xmm11,%xmm9
por %xmm12,%xmm8
por %xmm13,%xmm9
por %xmm14,%xmm8
por %xmm15,%xmm9
por %xmm9,%xmm8
pshufd $0x4e,%xmm8,%xmm9
por %xmm9,%xmm8
.byte 102,76,15,126,194
.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
adcxq %rax,%rbx
adoxq %r9,%r8
mulxq 8(%rsi),%rax,%r9
adcxq %rax,%r8
adoxq %r10,%r9
mulxq 16(%rsi),%rax,%r10
adcxq %rax,%r9
adoxq %r11,%r10
.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
adcxq %rax,%r10
adoxq %r12,%r11
mulxq 32(%rsi),%rax,%r12
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 40(%rsi),%rax,%r13
adcxq %rax,%r12
adoxq %r14,%r13
.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
adcxq %rax,%r13
.byte 0x67
adoxq %r15,%r14
mulxq 56(%rsi),%rax,%r15
movq %rbx,64(%rsp,%rcx,8)
adcxq %rax,%r14
adoxq %rdi,%r15
movq %r8,%rbx
adcxq %rdi,%r15
incq %rcx
jnz L$oop_mulx_gather
movq %r8,64(%rsp)
movq %r9,64+8(%rsp)
movq %r10,64+16(%rsp)
movq %r11,64+24(%rsp)
movq %r12,64+32(%rsp)
movq %r13,64+40(%rsp)
movq %r14,64+48(%rsp)
movq %r15,64+56(%rsp)
movq 128(%rsp),%rdx
movq 128+8(%rsp),%rdi
movq 128+16(%rsp),%rbp
movq (%rsp),%r8
movq 8(%rsp),%r9
movq 16(%rsp),%r10
movq 24(%rsp),%r11
movq 32(%rsp),%r12
movq 40(%rsp),%r13
movq 48(%rsp),%r14
movq 56(%rsp),%r15
call __rsaz_512_reducex
L$mul_gather_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@ -741,6 +1182,10 @@ L$mul_scatter4_body:
movq %rcx,128(%rsp)
movq %rdi,%rbp
movl $0x80100,%r11d
andl _OPENSSL_ia32cap_P+8(%rip),%r11d
cmpl $0x80100,%r11d
je L$mulx_scatter
movq (%rdi),%rbx
call __rsaz_512_mul
@ -757,6 +1202,29 @@ L$mul_scatter4_body:
movq 56(%rsp),%r15
call __rsaz_512_reduce
jmp L$mul_scatter_tail
.p2align 5
L$mulx_scatter:
movq (%rdi),%rdx
call __rsaz_512_mulx
.byte 102,72,15,126,199
.byte 102,72,15,126,205
movq 128(%rsp),%rdx
movq (%rsp),%r8
movq 8(%rsp),%r9
movq 16(%rsp),%r10
movq 24(%rsp),%r11
movq 32(%rsp),%r12
movq 40(%rsp),%r13
movq 48(%rsp),%r14
movq 56(%rsp),%r15
call __rsaz_512_reducex
L$mul_scatter_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@ -803,6 +1271,7 @@ _rsaz_512_mul_by_one:
subq $128+24,%rsp
L$mul_by_one_body:
movl _OPENSSL_ia32cap_P+8(%rip),%eax
movq %rdx,%rbp
movq %rcx,128(%rsp)
@ -823,7 +1292,16 @@ L$mul_by_one_body:
movdqa %xmm0,64(%rsp)
movdqa %xmm0,80(%rsp)
movdqa %xmm0,96(%rsp)
andl $0x80100,%eax
cmpl $0x80100,%eax
je L$by_one_callx
call __rsaz_512_reduce
jmp L$by_one_tail
.p2align 5
L$by_one_callx:
movq 128(%rsp),%rdx
call __rsaz_512_reducex
L$by_one_tail:
movq %r8,(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
@ -928,6 +1406,62 @@ L$reduction_loop:
.byte 0xf3,0xc3
.p2align 5
__rsaz_512_reducex:
imulq %r8,%rdx
xorq %rsi,%rsi
movl $8,%ecx
jmp L$reduction_loopx
.p2align 5
L$reduction_loopx:
movq %r8,%rbx
mulxq 0(%rbp),%rax,%r8
adcxq %rbx,%rax
adoxq %r9,%r8
mulxq 8(%rbp),%rax,%r9
adcxq %rax,%r8
adoxq %r10,%r9
mulxq 16(%rbp),%rbx,%r10
adcxq %rbx,%r9
adoxq %r11,%r10
mulxq 24(%rbp),%rbx,%r11
adcxq %rbx,%r10
adoxq %r12,%r11
.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
movq %rdx,%rax
movq %r8,%rdx
adcxq %rbx,%r11
adoxq %r13,%r12
mulxq 128+8(%rsp),%rbx,%rdx
movq %rax,%rdx
mulxq 40(%rbp),%rax,%r13
adcxq %rax,%r12
adoxq %r14,%r13
.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
adcxq %rax,%r13
adoxq %r15,%r14
mulxq 56(%rbp),%rax,%r15
movq %rbx,%rdx
adcxq %rax,%r14
adoxq %rsi,%r15
adcxq %rsi,%r15
decl %ecx
jne L$reduction_loopx
.byte 0xf3,0xc3
.p2align 5
__rsaz_512_subtract:
movq %r8,(%rdi)
@ -1126,6 +1660,126 @@ L$oop_mul:
.byte 0xf3,0xc3
.p2align 5
__rsaz_512_mulx:
mulxq (%rsi),%rbx,%r8
movq $-6,%rcx
mulxq 8(%rsi),%rax,%r9
movq %rbx,8(%rsp)
mulxq 16(%rsi),%rbx,%r10
adcq %rax,%r8
mulxq 24(%rsi),%rax,%r11
adcq %rbx,%r9
mulxq 32(%rsi),%rbx,%r12
adcq %rax,%r10
mulxq 40(%rsi),%rax,%r13
adcq %rbx,%r11
mulxq 48(%rsi),%rbx,%r14
adcq %rax,%r12
mulxq 56(%rsi),%rax,%r15
movq 8(%rbp),%rdx
adcq %rbx,%r13
adcq %rax,%r14
adcq $0,%r15
xorq %rdi,%rdi
jmp L$oop_mulx
.p2align 5
L$oop_mulx:
movq %r8,%rbx
mulxq (%rsi),%rax,%r8
adcxq %rax,%rbx
adoxq %r9,%r8
mulxq 8(%rsi),%rax,%r9
adcxq %rax,%r8
adoxq %r10,%r9
mulxq 16(%rsi),%rax,%r10
adcxq %rax,%r9
adoxq %r11,%r10
mulxq 24(%rsi),%rax,%r11
adcxq %rax,%r10
adoxq %r12,%r11
.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 40(%rsi),%rax,%r13
adcxq %rax,%r12
adoxq %r14,%r13
mulxq 48(%rsi),%rax,%r14
adcxq %rax,%r13
adoxq %r15,%r14
mulxq 56(%rsi),%rax,%r15
movq 64(%rbp,%rcx,8),%rdx
movq %rbx,8+64-8(%rsp,%rcx,8)
adcxq %rax,%r14
adoxq %rdi,%r15
adcxq %rdi,%r15
incq %rcx
jnz L$oop_mulx
movq %r8,%rbx
mulxq (%rsi),%rax,%r8
adcxq %rax,%rbx
adoxq %r9,%r8
.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
adcxq %rax,%r8
adoxq %r10,%r9
.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
adcxq %rax,%r9
adoxq %r11,%r10
mulxq 24(%rsi),%rax,%r11
adcxq %rax,%r10
adoxq %r12,%r11
mulxq 32(%rsi),%rax,%r12
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 40(%rsi),%rax,%r13
adcxq %rax,%r12
adoxq %r14,%r13
.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
adcxq %rax,%r13
adoxq %r15,%r14
.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
adcxq %rax,%r14
adoxq %rdi,%r15
adcxq %rdi,%r15
movq %rbx,8+64-8(%rsp)
movq %r8,8+64(%rsp)
movq %r9,8+64+8(%rsp)
movq %r10,8+64+16(%rsp)
movq %r11,8+64+24(%rsp)
movq %r12,8+64+32(%rsp)
movq %r13,8+64+40(%rsp)
movq %r14,8+64+48(%rsp)
movq %r15,8+64+56(%rsp)
.byte 0xf3,0xc3
.globl _rsaz_512_scatter4
.p2align 4

386
deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s

@ -10,6 +10,7 @@ _bn_mul_mont:
jnz L$mul_enter
cmpl $8,%r9d
jb L$mul_enter
movl _OPENSSL_ia32cap_P+8(%rip),%r11d
cmpq %rsi,%rdx
jne L$mul4x_enter
testl $7,%r9d
@ -34,6 +35,20 @@ L$mul_enter:
movq %r11,8(%rsp,%r9,8)
L$mul_body:
subq %rsp,%r11
andq $-4096,%r11
L$mul_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc L$mul_page_walk
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@ -215,6 +230,9 @@ L$mul_epilogue:
.p2align 4
bn_mul4x_mont:
L$mul4x_enter:
andl $0x80100,%r11d
cmpl $0x80100,%r11d
je L$mulx4x_enter
pushq %rbx
pushq %rbp
pushq %r12
@ -231,6 +249,14 @@ L$mul4x_enter:
movq %r11,8(%rsp,%r9,8)
L$mul4x_body:
subq %rsp,%r11
andq $-4096,%r11
L$mul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$mul4x_page_walk
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@ -612,6 +638,7 @@ L$mul4x_epilogue:
.p2align 5
bn_sqr8x_mont:
L$sqr8x_enter:
@ -653,6 +680,15 @@ L$sqr8x_sp_alt:
subq %r11,%rsp
L$sqr8x_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
L$sqr8x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$sqr8x_page_walk
movq %r9,%r10
negq %r9
@ -664,6 +700,25 @@ L$sqr8x_body:
pxor %xmm0,%xmm0
.byte 102,72,15,110,207
.byte 102,73,15,110,218
movl _OPENSSL_ia32cap_P+8(%rip),%eax
andl $0x80100,%eax
cmpl $0x80100,%eax
jne L$sqr8x_nox
call _bn_sqrx8x_internal
leaq (%r8,%rcx,1),%rbx
movq %rcx,%r9
movq %rcx,%rdx
.byte 102,72,15,126,207
sarq $3+2,%rcx
jmp L$sqr8x_sub
.p2align 5
L$sqr8x_nox:
call _bn_sqr8x_internal
@ -742,5 +797,336 @@ L$sqr8x_cond_copy:
L$sqr8x_epilogue:
.byte 0xf3,0xc3
.p2align 5
bn_mulx4x_mont:
L$mulx4x_enter:
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
shll $3,%r9d
.byte 0x67
xorq %r10,%r10
subq %r9,%r10
movq (%r8),%r8
leaq -72(%rsp,%r10,1),%rsp
andq $-128,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
L$mulx4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc L$mulx4x_page_walk
leaq (%rdx,%r9,1),%r10
movq %r9,0(%rsp)
shrq $5,%r9
movq %r10,16(%rsp)
subq $1,%r9
movq %r8,24(%rsp)
movq %rdi,32(%rsp)
movq %rax,40(%rsp)
movq %r9,48(%rsp)
jmp L$mulx4x_body
.p2align 5
L$mulx4x_body:
leaq 8(%rdx),%rdi
movq (%rdx),%rdx
leaq 64+32(%rsp),%rbx
movq %rdx,%r9
mulxq 0(%rsi),%r8,%rax
mulxq 8(%rsi),%r11,%r14
addq %rax,%r11
movq %rdi,8(%rsp)
mulxq 16(%rsi),%r12,%r13
adcq %r14,%r12
adcq $0,%r13
movq %r8,%rdi
imulq 24(%rsp),%r8
xorq %rbp,%rbp
mulxq 24(%rsi),%rax,%r14
movq %r8,%rdx
leaq 32(%rsi),%rsi
adcxq %rax,%r13
adcxq %rbp,%r14
mulxq 0(%rcx),%rax,%r10
adcxq %rax,%rdi
adoxq %r11,%r10
mulxq 8(%rcx),%rax,%r11
adcxq %rax,%r10
adoxq %r12,%r11
.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00
movq 48(%rsp),%rdi
movq %r10,-32(%rbx)
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r11,-24(%rbx)
adcxq %rax,%r12
adoxq %rbp,%r15
leaq 32(%rcx),%rcx
movq %r12,-16(%rbx)
jmp L$mulx4x_1st
.p2align 5
L$mulx4x_1st:
adcxq %rbp,%r15
mulxq 0(%rsi),%r10,%rax
adcxq %r14,%r10
mulxq 8(%rsi),%r11,%r14
adcxq %rax,%r11
mulxq 16(%rsi),%r12,%rax
adcxq %r14,%r12
mulxq 24(%rsi),%r13,%r14
.byte 0x67,0x67
movq %r8,%rdx
adcxq %rax,%r13
adcxq %rbp,%r14
leaq 32(%rsi),%rsi
leaq 32(%rbx),%rbx
adoxq %r15,%r10
mulxq 0(%rcx),%rax,%r15
adcxq %rax,%r10
adoxq %r15,%r11
mulxq 8(%rcx),%rax,%r15
adcxq %rax,%r11
adoxq %r15,%r12
mulxq 16(%rcx),%rax,%r15
movq %r10,-40(%rbx)
adcxq %rax,%r12
movq %r11,-32(%rbx)
adoxq %r15,%r13
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r12,-24(%rbx)
adcxq %rax,%r13
adoxq %rbp,%r15
leaq 32(%rcx),%rcx
movq %r13,-16(%rbx)
decq %rdi
jnz L$mulx4x_1st
movq 0(%rsp),%rax
movq 8(%rsp),%rdi
adcq %rbp,%r15
addq %r15,%r14
sbbq %r15,%r15
movq %r14,-8(%rbx)
jmp L$mulx4x_outer
.p2align 5
L$mulx4x_outer:
movq (%rdi),%rdx
leaq 8(%rdi),%rdi
subq %rax,%rsi
movq %r15,(%rbx)
leaq 64+32(%rsp),%rbx
subq %rax,%rcx
mulxq 0(%rsi),%r8,%r11
xorl %ebp,%ebp
movq %rdx,%r9
mulxq 8(%rsi),%r14,%r12
adoxq -32(%rbx),%r8
adcxq %r14,%r11
mulxq 16(%rsi),%r15,%r13
adoxq -24(%rbx),%r11
adcxq %r15,%r12
adoxq %rbp,%r12
adcxq %rbp,%r13
movq %rdi,8(%rsp)
.byte 0x67
movq %r8,%r15
imulq 24(%rsp),%r8
xorl %ebp,%ebp
mulxq 24(%rsi),%rax,%r14
movq %r8,%rdx
adoxq -16(%rbx),%r12
adcxq %rax,%r13
adoxq -8(%rbx),%r13
adcxq %rbp,%r14
leaq 32(%rsi),%rsi
adoxq %rbp,%r14
mulxq 0(%rcx),%rax,%r10
adcxq %rax,%r15
adoxq %r11,%r10
mulxq 8(%rcx),%rax,%r11
adcxq %rax,%r10
adoxq %r12,%r11
mulxq 16(%rcx),%rax,%r12
movq %r10,-32(%rbx)
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r11,-24(%rbx)
leaq 32(%rcx),%rcx
adcxq %rax,%r12
adoxq %rbp,%r15
movq 48(%rsp),%rdi
movq %r12,-16(%rbx)
jmp L$mulx4x_inner
.p2align 5
L$mulx4x_inner:
mulxq 0(%rsi),%r10,%rax
adcxq %rbp,%r15
adoxq %r14,%r10
mulxq 8(%rsi),%r11,%r14
adcxq 0(%rbx),%r10
adoxq %rax,%r11
mulxq 16(%rsi),%r12,%rax
adcxq 8(%rbx),%r11
adoxq %r14,%r12
mulxq 24(%rsi),%r13,%r14
movq %r8,%rdx
adcxq 16(%rbx),%r12
adoxq %rax,%r13
adcxq 24(%rbx),%r13
adoxq %rbp,%r14
leaq 32(%rsi),%rsi
leaq 32(%rbx),%rbx
adcxq %rbp,%r14
adoxq %r15,%r10
mulxq 0(%rcx),%rax,%r15
adcxq %rax,%r10
adoxq %r15,%r11
mulxq 8(%rcx),%rax,%r15
adcxq %rax,%r11
adoxq %r15,%r12
mulxq 16(%rcx),%rax,%r15
movq %r10,-40(%rbx)
adcxq %rax,%r12
adoxq %r15,%r13
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r11,-32(%rbx)
movq %r12,-24(%rbx)
adcxq %rax,%r13
adoxq %rbp,%r15
leaq 32(%rcx),%rcx
movq %r13,-16(%rbx)
decq %rdi
jnz L$mulx4x_inner
movq 0(%rsp),%rax
movq 8(%rsp),%rdi
adcq %rbp,%r15
subq 0(%rbx),%rbp
adcq %r15,%r14
sbbq %r15,%r15
movq %r14,-8(%rbx)
cmpq 16(%rsp),%rdi
jne L$mulx4x_outer
leaq 64(%rsp),%rbx
subq %rax,%rcx
negq %r15
movq %rax,%rdx
shrq $3+2,%rax
movq 32(%rsp),%rdi
jmp L$mulx4x_sub
.p2align 5
L$mulx4x_sub:
movq 0(%rbx),%r11
movq 8(%rbx),%r12
movq 16(%rbx),%r13
movq 24(%rbx),%r14
leaq 32(%rbx),%rbx
sbbq 0(%rcx),%r11
sbbq 8(%rcx),%r12
sbbq 16(%rcx),%r13
sbbq 24(%rcx),%r14
leaq 32(%rcx),%rcx
movq %r11,0(%rdi)
movq %r12,8(%rdi)
movq %r13,16(%rdi)
movq %r14,24(%rdi)
leaq 32(%rdi),%rdi
decq %rax
jnz L$mulx4x_sub
sbbq $0,%r15
leaq 64(%rsp),%rbx
subq %rdx,%rdi
.byte 102,73,15,110,207
pxor %xmm0,%xmm0
pshufd $0,%xmm1,%xmm1
movq 40(%rsp),%rsi
jmp L$mulx4x_cond_copy
.p2align 5
L$mulx4x_cond_copy:
movdqa 0(%rbx),%xmm2
movdqa 16(%rbx),%xmm3
leaq 32(%rbx),%rbx
movdqu 0(%rdi),%xmm4
movdqu 16(%rdi),%xmm5
leaq 32(%rdi),%rdi
movdqa %xmm0,-32(%rbx)
movdqa %xmm0,-16(%rbx)
pcmpeqd %xmm1,%xmm0
pand %xmm1,%xmm2
pand %xmm1,%xmm3
pand %xmm0,%xmm4
pand %xmm0,%xmm5
pxor %xmm0,%xmm0
por %xmm2,%xmm4
por %xmm3,%xmm5
movdqu %xmm4,-32(%rdi)
movdqu %xmm5,-16(%rdi)
subq $32,%rdx
jnz L$mulx4x_cond_copy
movq %rdx,(%rbx)
movq $1,%rax
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$mulx4x_epilogue:
.byte 0xf3,0xc3
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 4

1340
deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s

File diff suppressed because it is too large

2
deps/openssl/asm/x64-macosx-gas/camellia/cmll-x86_64.s

@ -1624,7 +1624,7 @@ L$cbc_prologue:
leaq -64-63(%rcx),%r10
subq %rsp,%r10
negq %r10
andq $960,%r10
andq $0x3C0,%r10
subq %r10,%rsp

1511
deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s

File diff suppressed because it is too large

34
deps/openssl/asm/x64-macosx-gas/md5/md5-x86_64.s

@ -493,14 +493,14 @@ L$loop:
movl %ecx,%r11d
addl %ecx,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
xorl %edx,%r11d
leal -198630844(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 28(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -509,7 +509,7 @@ L$loop:
xorl %ebx,%r11d
addl %r11d,%edx
movl 56(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -518,7 +518,7 @@ L$loop:
xorl %eax,%r11d
addl %r11d,%ecx
movl 20(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -527,7 +527,7 @@ L$loop:
xorl %edx,%r11d
addl %r11d,%ebx
movl 48(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -536,7 +536,7 @@ L$loop:
xorl %ecx,%r11d
addl %r11d,%eax
movl 12(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -545,7 +545,7 @@ L$loop:
xorl %ebx,%r11d
addl %r11d,%edx
movl 40(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -554,7 +554,7 @@ L$loop:
xorl %eax,%r11d
addl %r11d,%ecx
movl 4(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -563,7 +563,7 @@ L$loop:
xorl %edx,%r11d
addl %r11d,%ebx
movl 32(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -572,7 +572,7 @@ L$loop:
xorl %ecx,%r11d
addl %r11d,%eax
movl 60(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -581,7 +581,7 @@ L$loop:
xorl %ebx,%r11d
addl %r11d,%edx
movl 24(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -590,7 +590,7 @@ L$loop:
xorl %eax,%r11d
addl %r11d,%ecx
movl 52(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -599,7 +599,7 @@ L$loop:
xorl %edx,%r11d
addl %r11d,%ebx
movl 16(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -608,7 +608,7 @@ L$loop:
xorl %ecx,%r11d
addl %r11d,%eax
movl 44(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -617,7 +617,7 @@ L$loop:
xorl %ebx,%r11d
addl %r11d,%edx
movl 8(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -626,7 +626,7 @@ L$loop:
xorl %eax,%r11d
addl %r11d,%ecx
movl 36(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -635,7 +635,7 @@ L$loop:
xorl %edx,%r11d
addl %r11d,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx

749
deps/openssl/asm/x64-macosx-gas/modes/aesni-gcm-x86_64.s

@ -1,14 +1,753 @@
.text
.globl _aesni_gcm_encrypt
_aesni_gcm_encrypt:
xorl %eax,%eax
.byte 0xf3,0xc3
.p2align 5
_aesni_ctr32_ghash_6x:
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
vmovdqu 0-128(%rcx),%xmm15
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpaddb %xmm2,%xmm11,%xmm12
vpaddb %xmm2,%xmm12,%xmm13
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm15,%xmm1,%xmm9
vmovdqu %xmm4,16+8(%rsp)
jmp L$oop6x
.p2align 5
L$oop6x:
addl $100663296,%ebx
jc L$handle_ctr32
vmovdqu 0-32(%r9),%xmm3
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm15,%xmm10,%xmm10
vpxor %xmm15,%xmm11,%xmm11
L$resume_ctr32:
vmovdqu %xmm1,(%r8)
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
vpxor %xmm15,%xmm12,%xmm12
vmovups 16-128(%rcx),%xmm2
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
xorq %r12,%r12
cmpq %r14,%r15
vaesenc %xmm2,%xmm9,%xmm9
vmovdqu 48+8(%rsp),%xmm0
vpxor %xmm15,%xmm13,%xmm13
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
vaesenc %xmm2,%xmm10,%xmm10
vpxor %xmm15,%xmm14,%xmm14
setnc %r12b
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vaesenc %xmm2,%xmm11,%xmm11
vmovdqu 16-32(%r9),%xmm3
negq %r12
vaesenc %xmm2,%xmm12,%xmm12
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
vpxor %xmm4,%xmm8,%xmm8
vaesenc %xmm2,%xmm13,%xmm13
vpxor %xmm5,%xmm1,%xmm4
andq $0x60,%r12
vmovups 32-128(%rcx),%xmm15
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
vaesenc %xmm2,%xmm14,%xmm14
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
leaq (%r14,%r12,1),%r14
vaesenc %xmm15,%xmm9,%xmm9
vpxor 16+8(%rsp),%xmm8,%xmm8
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
vmovdqu 64+8(%rsp),%xmm0
vaesenc %xmm15,%xmm10,%xmm10
movbeq 88(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 80(%r14),%r12
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,32+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,40+8(%rsp)
vmovdqu 48-32(%r9),%xmm5
vaesenc %xmm15,%xmm14,%xmm14
vmovups 48-128(%rcx),%xmm15
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm3,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
vaesenc %xmm15,%xmm11,%xmm11
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
vmovdqu 80+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vpxor %xmm1,%xmm4,%xmm4
vmovdqu 64-32(%r9),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vmovups 64-128(%rcx),%xmm15
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
vaesenc %xmm15,%xmm10,%xmm10
movbeq 72(%r14),%r13
vpxor %xmm5,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
movbeq 64(%r14),%r12
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
vmovdqu 96+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,48+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,56+8(%rsp)
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 96-32(%r9),%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vmovups 80-128(%rcx),%xmm15
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
vaesenc %xmm15,%xmm10,%xmm10
movbeq 56(%r14),%r13
vpxor %xmm1,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
vpxor 112+8(%rsp),%xmm8,%xmm8
vaesenc %xmm15,%xmm11,%xmm11
movbeq 48(%r14),%r12
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,64+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,72+8(%rsp)
vpxor %xmm3,%xmm4,%xmm4
vmovdqu 112-32(%r9),%xmm3
vaesenc %xmm15,%xmm14,%xmm14
vmovups 96-128(%rcx),%xmm15
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
vaesenc %xmm15,%xmm10,%xmm10
movbeq 40(%r14),%r13
vpxor %xmm2,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
movbeq 32(%r14),%r12
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,80+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,88+8(%rsp)
vpxor %xmm5,%xmm6,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor %xmm1,%xmm6,%xmm6
vmovups 112-128(%rcx),%xmm15
vpslldq $8,%xmm6,%xmm5
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 16(%r11),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm8,%xmm7,%xmm7
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm5,%xmm4,%xmm4
movbeq 24(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 16(%r14),%r12
vpalignr $8,%xmm4,%xmm4,%xmm0
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
movq %r13,96+8(%rsp)
vaesenc %xmm15,%xmm12,%xmm12
movq %r12,104+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
vmovups 128-128(%rcx),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vmovups 144-128(%rcx),%xmm15
vaesenc %xmm1,%xmm10,%xmm10
vpsrldq $8,%xmm6,%xmm6
vaesenc %xmm1,%xmm11,%xmm11
vpxor %xmm6,%xmm7,%xmm7
vaesenc %xmm1,%xmm12,%xmm12
vpxor %xmm0,%xmm4,%xmm4
movbeq 8(%r14),%r13
vaesenc %xmm1,%xmm13,%xmm13
movbeq 0(%r14),%r12
vaesenc %xmm1,%xmm14,%xmm14
vmovups 160-128(%rcx),%xmm1
cmpl $11,%ebp
jb L$enc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 176-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 192-128(%rcx),%xmm1
je L$enc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 208-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 224-128(%rcx),%xmm1
jmp L$enc_tail
.p2align 5
L$handle_ctr32:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vmovdqu 0-32(%r9),%xmm3
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm15,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm15,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpshufb %xmm0,%xmm14,%xmm14
vpshufb %xmm0,%xmm1,%xmm1
jmp L$resume_ctr32
.p2align 5
L$enc_tail:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
vpalignr $8,%xmm4,%xmm4,%xmm8
vaesenc %xmm15,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
vpxor 0(%rdi),%xmm1,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
vpxor 16(%rdi),%xmm1,%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vpxor 32(%rdi),%xmm1,%xmm5
vaesenc %xmm15,%xmm13,%xmm13
vpxor 48(%rdi),%xmm1,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor 64(%rdi),%xmm1,%xmm7
vpxor 80(%rdi),%xmm1,%xmm3
vmovdqu (%r8),%xmm1
vaesenclast %xmm2,%xmm9,%xmm9
vmovdqu 32(%r11),%xmm2
vaesenclast %xmm0,%xmm10,%xmm10
vpaddb %xmm2,%xmm1,%xmm0
movq %r13,112+8(%rsp)
leaq 96(%rdi),%rdi
vaesenclast %xmm5,%xmm11,%xmm11
vpaddb %xmm2,%xmm0,%xmm5
movq %r12,120+8(%rsp)
leaq 96(%rsi),%rsi
vmovdqu 0-128(%rcx),%xmm15
vaesenclast %xmm6,%xmm12,%xmm12
vpaddb %xmm2,%xmm5,%xmm6
vaesenclast %xmm7,%xmm13,%xmm13
vpaddb %xmm2,%xmm6,%xmm7
vaesenclast %xmm3,%xmm14,%xmm14
vpaddb %xmm2,%xmm7,%xmm3
addq $0x60,%r10
subq $0x6,%rdx
jc L$6x_done
vmovups %xmm9,-96(%rsi)
vpxor %xmm15,%xmm1,%xmm9
vmovups %xmm10,-80(%rsi)
vmovdqa %xmm0,%xmm10
vmovups %xmm11,-64(%rsi)
vmovdqa %xmm5,%xmm11
vmovups %xmm12,-48(%rsi)
vmovdqa %xmm6,%xmm12
vmovups %xmm13,-32(%rsi)
vmovdqa %xmm7,%xmm13
vmovups %xmm14,-16(%rsi)
vmovdqa %xmm3,%xmm14
vmovdqu 32+8(%rsp),%xmm7
jmp L$oop6x
L$6x_done:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
.byte 0xf3,0xc3
.globl _aesni_gcm_decrypt
.p2align 5
_aesni_gcm_decrypt:
xorl %eax,%eax
xorq %r10,%r10
cmpq $0x60,%rdx
jb L$gcm_dec_abort
leaq (%rsp),%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq L$bswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
vmovdqu (%r9),%xmm8
andq $-128,%rsp
vmovdqu (%r11),%xmm0
leaq 128(%rcx),%rcx
leaq 32+32(%r9),%r9
movl 240-128(%rcx),%ebp
vpshufb %xmm0,%xmm8,%xmm8
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc L$dec_no_key_aliasing
cmpq $768,%r15
jnc L$dec_no_key_aliasing
subq %r15,%rsp
L$dec_no_key_aliasing:
vmovdqu 80(%rdi),%xmm7
leaq (%rdi),%r14
vmovdqu 64(%rdi),%xmm4
leaq -192(%rdi,%rdx,1),%r15
vmovdqu 48(%rdi),%xmm5
shrq $4,%rdx
xorq %r10,%r10
vmovdqu 32(%rdi),%xmm6
vpshufb %xmm0,%xmm7,%xmm7
vmovdqu 16(%rdi),%xmm2
vpshufb %xmm0,%xmm4,%xmm4
vmovdqu (%rdi),%xmm3
vpshufb %xmm0,%xmm5,%xmm5
vmovdqu %xmm4,48(%rsp)
vpshufb %xmm0,%xmm6,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm2,%xmm2
vmovdqu %xmm6,80(%rsp)
vpshufb %xmm0,%xmm3,%xmm3
vmovdqu %xmm2,96(%rsp)
vmovdqu %xmm3,112(%rsp)
call _aesni_ctr32_ghash_6x
vmovups %xmm9,-96(%rsi)
vmovups %xmm10,-80(%rsi)
vmovups %xmm11,-64(%rsi)
vmovups %xmm12,-48(%rsi)
vmovups %xmm13,-32(%rsi)
vmovups %xmm14,-16(%rsi)
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
L$gcm_dec_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.p2align 5
_aesni_ctr32_6x:
vmovdqu 0-128(%rcx),%xmm4
vmovdqu 32(%r11),%xmm2
leaq -1(%rbp),%r13
vmovups 16-128(%rcx),%xmm15
leaq 32-128(%rcx),%r12
vpxor %xmm4,%xmm1,%xmm9
addl $100663296,%ebx
jc L$handle_ctr32_2
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddb %xmm2,%xmm11,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddb %xmm2,%xmm12,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp L$oop_ctr32
.p2align 4
L$oop_ctr32:
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vmovups (%r12),%xmm15
leaq 16(%r12),%r12
decl %r13d
jnz L$oop_ctr32
vmovdqu (%r12),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor 0(%rdi),%xmm3,%xmm4
vaesenc %xmm15,%xmm10,%xmm10
vpxor 16(%rdi),%xmm3,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
vpxor 32(%rdi),%xmm3,%xmm6
vaesenc %xmm15,%xmm12,%xmm12
vpxor 48(%rdi),%xmm3,%xmm8
vaesenc %xmm15,%xmm13,%xmm13
vpxor 64(%rdi),%xmm3,%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vpxor 80(%rdi),%xmm3,%xmm3
leaq 96(%rdi),%rdi
vaesenclast %xmm4,%xmm9,%xmm9
vaesenclast %xmm5,%xmm10,%xmm10
vaesenclast %xmm6,%xmm11,%xmm11
vaesenclast %xmm8,%xmm12,%xmm12
vaesenclast %xmm2,%xmm13,%xmm13
vaesenclast %xmm3,%xmm14,%xmm14
vmovups %xmm9,0(%rsi)
vmovups %xmm10,16(%rsi)
vmovups %xmm11,32(%rsi)
vmovups %xmm12,48(%rsi)
vmovups %xmm13,64(%rsi)
vmovups %xmm14,80(%rsi)
leaq 96(%rsi),%rsi
.byte 0xf3,0xc3
.p2align 5
L$handle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpshufb %xmm0,%xmm14,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpshufb %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp L$oop_ctr32
.globl _aesni_gcm_encrypt
.p2align 5
_aesni_gcm_encrypt:
xorq %r10,%r10
cmpq $288,%rdx
jb L$gcm_enc_abort
leaq (%rsp),%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq L$bswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
leaq 128(%rcx),%rcx
vmovdqu (%r11),%xmm0
andq $-128,%rsp
movl 240-128(%rcx),%ebp
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc L$enc_no_key_aliasing
cmpq $768,%r15
jnc L$enc_no_key_aliasing
subq %r15,%rsp
L$enc_no_key_aliasing:
leaq (%rsi),%r14
leaq -192(%rsi,%rdx,1),%r15
shrq $4,%rdx
call _aesni_ctr32_6x
vpshufb %xmm0,%xmm9,%xmm8
vpshufb %xmm0,%xmm10,%xmm2
vmovdqu %xmm8,112(%rsp)
vpshufb %xmm0,%xmm11,%xmm4
vmovdqu %xmm2,96(%rsp)
vpshufb %xmm0,%xmm12,%xmm5
vmovdqu %xmm4,80(%rsp)
vpshufb %xmm0,%xmm13,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm14,%xmm7
vmovdqu %xmm6,48(%rsp)
call _aesni_ctr32_6x
vmovdqu (%r9),%xmm8
leaq 32+32(%r9),%r9
subq $12,%rdx
movq $192,%r10
vpshufb %xmm0,%xmm8,%xmm8
call _aesni_ctr32_ghash_6x
vmovdqu 32(%rsp),%xmm7
vmovdqu (%r11),%xmm0
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm7,%xmm7,%xmm1
vmovdqu 32-32(%r9),%xmm15
vmovups %xmm9,-96(%rsi)
vpshufb %xmm0,%xmm9,%xmm9
vpxor %xmm7,%xmm1,%xmm1
vmovups %xmm10,-80(%rsi)
vpshufb %xmm0,%xmm10,%xmm10
vmovups %xmm11,-64(%rsi)
vpshufb %xmm0,%xmm11,%xmm11
vmovups %xmm12,-48(%rsi)
vpshufb %xmm0,%xmm12,%xmm12
vmovups %xmm13,-32(%rsi)
vpshufb %xmm0,%xmm13,%xmm13
vmovups %xmm14,-16(%rsi)
vpshufb %xmm0,%xmm14,%xmm14
vmovdqu %xmm9,16(%rsp)
vmovdqu 48(%rsp),%xmm6
vmovdqu 16-32(%r9),%xmm0
vpunpckhqdq %xmm6,%xmm6,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
vpxor %xmm6,%xmm2,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vmovdqu 64(%rsp),%xmm9
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm9,%xmm9,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
vpxor %xmm9,%xmm5,%xmm5
vpxor %xmm7,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vmovdqu 80(%rsp),%xmm1
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm4,%xmm7,%xmm7
vpunpckhqdq %xmm1,%xmm1,%xmm4
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpxor %xmm6,%xmm9,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 96(%rsp),%xmm2
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm7,%xmm6,%xmm6
vpunpckhqdq %xmm2,%xmm2,%xmm7
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
vpxor %xmm2,%xmm7,%xmm7
vpxor %xmm9,%xmm1,%xmm1
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm5,%xmm4,%xmm4
vpxor 112(%rsp),%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
vmovdqu 112-32(%r9),%xmm0
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpxor %xmm6,%xmm5,%xmm5
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
vpxor %xmm4,%xmm7,%xmm4
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm1
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
vpxor %xmm14,%xmm1,%xmm1
vpxor %xmm5,%xmm6,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
vmovdqu 32-32(%r9),%xmm15
vpxor %xmm2,%xmm8,%xmm7
vpxor %xmm4,%xmm9,%xmm6
vmovdqu 16-32(%r9),%xmm0
vpxor %xmm5,%xmm7,%xmm9
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
vpxor %xmm9,%xmm6,%xmm6
vpunpckhqdq %xmm13,%xmm13,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
vpxor %xmm13,%xmm2,%xmm2
vpslldq $8,%xmm6,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vpxor %xmm9,%xmm5,%xmm8
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm6,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm12,%xmm12,%xmm9
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
vpxor %xmm12,%xmm9,%xmm9
vpxor %xmm14,%xmm13,%xmm13
vpalignr $8,%xmm8,%xmm8,%xmm14
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm11,%xmm11,%xmm1
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
vpxor %xmm11,%xmm1,%xmm1
vpxor %xmm13,%xmm12,%xmm12
vxorps 16(%rsp),%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm9,%xmm9
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm10,%xmm10,%xmm2
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
vpxor %xmm10,%xmm2,%xmm2
vpalignr $8,%xmm8,%xmm8,%xmm14
vpxor %xmm12,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm9,%xmm1,%xmm1
vxorps %xmm7,%xmm14,%xmm14
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
vmovdqu 112-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm11,%xmm10,%xmm10
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
vpxor %xmm4,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
vpxor %xmm10,%xmm7,%xmm7
vpxor %xmm2,%xmm6,%xmm6
vpxor %xmm5,%xmm7,%xmm4
vpxor %xmm4,%xmm6,%xmm6
vpslldq $8,%xmm6,%xmm1
vmovdqu 16(%r11),%xmm3
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm1,%xmm5,%xmm8
vpxor %xmm6,%xmm7,%xmm7
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm2,%xmm8,%xmm8
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm7,%xmm2,%xmm2
vpxor %xmm2,%xmm8,%xmm8
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
L$gcm_enc_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.p2align 6
L$bswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
L$poly:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
L$one_msb:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
L$two_lsb:
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
L$one_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6

523
deps/openssl/asm/x64-macosx-gas/modes/ghash-x86_64.s

@ -661,10 +661,10 @@ L$ghash_epilogue:
_gcm_init_clmul:
L$_init_clmul:
movdqu (%rsi),%xmm2
pshufd $0b01001110,%xmm2,%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $0b11111111,%xmm2,%xmm4
pshufd $255,%xmm2,%xmm4
movdqa %xmm2,%xmm3
psllq $1,%xmm2
pxor %xmm5,%xmm5
@ -678,11 +678,11 @@ L$_init_clmul:
pxor %xmm5,%xmm2
pshufd $0b01001110,%xmm2,%xmm6
pshufd $78,%xmm2,%xmm6
movdqa %xmm2,%xmm0
pxor %xmm2,%xmm6
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -718,8 +718,8 @@ L$_init_clmul:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $0b01001110,%xmm2,%xmm3
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm2,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm2,%xmm3
movdqu %xmm2,0(%rdi)
pxor %xmm0,%xmm4
@ -727,7 +727,7 @@ L$_init_clmul:
.byte 102,15,58,15,227,8
movdqu %xmm4,32(%rdi)
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -765,7 +765,7 @@ L$_init_clmul:
pxor %xmm1,%xmm0
movdqa %xmm0,%xmm5
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -801,8 +801,8 @@ L$_init_clmul:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $0b01001110,%xmm5,%xmm3
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm5,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm5,%xmm3
movdqu %xmm5,48(%rdi)
pxor %xmm0,%xmm4
@ -822,7 +822,7 @@ L$_gmult_clmul:
movdqu 32(%rsi),%xmm4
.byte 102,15,56,0,197
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -899,14 +899,14 @@ L$_ghash_clmul:
.byte 102,65,15,56,0,218
.byte 102,69,15,56,0,218
movdqa %xmm3,%xmm5
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,68,218,0
.byte 102,15,58,68,234,17
.byte 102,15,58,68,231,0
movdqa %xmm11,%xmm13
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm11,%xmm12
.byte 102,68,15,58,68,222,0
.byte 102,68,15,58,68,238,17
@ -921,12 +921,12 @@ L$_ghash_clmul:
.byte 102,69,15,56,0,218
.byte 102,69,15,56,0,194
movdqa %xmm11,%xmm13
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm8,%xmm0
pxor %xmm11,%xmm12
.byte 102,69,15,58,68,222,0
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm8
pshufd $78,%xmm0,%xmm8
pxor %xmm0,%xmm8
.byte 102,69,15,58,68,238,17
.byte 102,68,15,58,68,231,0
@ -949,14 +949,14 @@ L$mod4_loop:
movdqu 32(%rdx),%xmm3
movdqa %xmm11,%xmm13
.byte 102,68,15,58,68,199,16
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
xorps %xmm5,%xmm1
pxor %xmm11,%xmm12
.byte 102,65,15,56,0,218
movups 32(%rsi),%xmm7
xorps %xmm4,%xmm8
.byte 102,68,15,58,68,218,0
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm0,%xmm8
movdqa %xmm3,%xmm5
@ -1000,7 +1000,7 @@ L$mod4_loop:
movdqa %xmm11,%xmm13
pxor %xmm12,%xmm4
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm9,%xmm0
pxor %xmm8,%xmm1
pxor %xmm11,%xmm12
@ -1010,7 +1010,7 @@ L$mod4_loop:
movdqa %xmm0,%xmm1
.byte 102,69,15,58,68,238,17
xorps %xmm11,%xmm3
pshufd $0b01001110,%xmm0,%xmm8
pshufd $78,%xmm0,%xmm8
pxor %xmm0,%xmm8
.byte 102,68,15,58,68,231,0
@ -1079,7 +1079,7 @@ L$skip4x:
pxor %xmm8,%xmm0
movdqa %xmm3,%xmm5
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,68,218,0
.byte 102,15,58,68,234,17
@ -1096,7 +1096,7 @@ L$skip4x:
L$mod_loop:
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm8
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm0,%xmm4
pxor %xmm0,%xmm4
.byte 102,15,58,68,198,0
@ -1134,7 +1134,7 @@ L$mod_loop:
pslldq $8,%xmm0
psrldq $8,%xmm8
pxor %xmm9,%xmm0
pshufd $0b01001110,%xmm5,%xmm4
pshufd $78,%xmm5,%xmm4
pxor %xmm8,%xmm1
pxor %xmm5,%xmm4
@ -1156,7 +1156,7 @@ L$mod_loop:
L$even_tail:
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm8
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm0,%xmm4
pxor %xmm0,%xmm4
.byte 102,15,58,68,198,0
@ -1204,7 +1204,7 @@ L$odd_tail:
.byte 102,69,15,56,0,194
pxor %xmm8,%xmm0
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -1249,7 +1249,108 @@ L$done:
.p2align 5
_gcm_init_avx:
jmp L$_init_clmul
vzeroupper
vmovdqu (%rsi),%xmm2
vpshufd $78,%xmm2,%xmm2
vpshufd $255,%xmm2,%xmm4
vpsrlq $63,%xmm2,%xmm3
vpsllq $1,%xmm2,%xmm2
vpxor %xmm5,%xmm5,%xmm5
vpcmpgtd %xmm4,%xmm5,%xmm5
vpslldq $8,%xmm3,%xmm3
vpor %xmm3,%xmm2,%xmm2
vpand L$0x1c2_polynomial(%rip),%xmm5,%xmm5
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm2,%xmm2,%xmm6
vmovdqa %xmm2,%xmm0
vpxor %xmm2,%xmm6,%xmm6
movq $4,%r10
jmp L$init_start_avx
.p2align 5
L$init_loop_avx:
vpalignr $8,%xmm3,%xmm4,%xmm5
vmovdqu %xmm5,-16(%rdi)
vpunpckhqdq %xmm0,%xmm0,%xmm3
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
vpxor %xmm0,%xmm1,%xmm4
vpxor %xmm4,%xmm3,%xmm3
vpslldq $8,%xmm3,%xmm4
vpsrldq $8,%xmm3,%xmm3
vpxor %xmm4,%xmm0,%xmm0
vpxor %xmm3,%xmm1,%xmm1
vpsllq $57,%xmm0,%xmm3
vpsllq $62,%xmm0,%xmm4
vpxor %xmm3,%xmm4,%xmm4
vpsllq $63,%xmm0,%xmm3
vpxor %xmm3,%xmm4,%xmm4
vpslldq $8,%xmm4,%xmm3
vpsrldq $8,%xmm4,%xmm4
vpxor %xmm3,%xmm0,%xmm0
vpxor %xmm4,%xmm1,%xmm1
vpsrlq $1,%xmm0,%xmm4
vpxor %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $5,%xmm4,%xmm4
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $1,%xmm0,%xmm0
vpxor %xmm1,%xmm0,%xmm0
L$init_start_avx:
vmovdqa %xmm0,%xmm5
vpunpckhqdq %xmm0,%xmm0,%xmm3
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
vpxor %xmm0,%xmm1,%xmm4
vpxor %xmm4,%xmm3,%xmm3
vpslldq $8,%xmm3,%xmm4
vpsrldq $8,%xmm3,%xmm3
vpxor %xmm4,%xmm0,%xmm0
vpxor %xmm3,%xmm1,%xmm1
vpsllq $57,%xmm0,%xmm3
vpsllq $62,%xmm0,%xmm4
vpxor %xmm3,%xmm4,%xmm4
vpsllq $63,%xmm0,%xmm3
vpxor %xmm3,%xmm4,%xmm4
vpslldq $8,%xmm4,%xmm3
vpsrldq $8,%xmm4,%xmm4
vpxor %xmm3,%xmm0,%xmm0
vpxor %xmm4,%xmm1,%xmm1
vpsrlq $1,%xmm0,%xmm4
vpxor %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $5,%xmm4,%xmm4
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $1,%xmm0,%xmm0
vpxor %xmm1,%xmm0,%xmm0
vpshufd $78,%xmm5,%xmm3
vpshufd $78,%xmm0,%xmm4
vpxor %xmm5,%xmm3,%xmm3
vmovdqu %xmm5,0(%rdi)
vpxor %xmm0,%xmm4,%xmm4
vmovdqu %xmm0,16(%rdi)
leaq 48(%rdi),%rdi
subq $1,%r10
jnz L$init_loop_avx
vpalignr $8,%xmm4,%xmm3,%xmm5
vmovdqu %xmm5,-16(%rdi)
vzeroupper
.byte 0xf3,0xc3
.globl _gcm_gmult_avx
@ -1261,7 +1362,377 @@ _gcm_gmult_avx:
.p2align 5
_gcm_ghash_avx:
jmp L$_ghash_clmul
vzeroupper
vmovdqu (%rdi),%xmm10
leaq L$0x1c2_polynomial(%rip),%r10
leaq 64(%rsi),%rsi
vmovdqu L$bswap_mask(%rip),%xmm13
vpshufb %xmm13,%xmm10,%xmm10
cmpq $0x80,%rcx
jb L$short_avx
subq $0x80,%rcx
vmovdqu 112(%rdx),%xmm14
vmovdqu 0-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm14
vmovdqu 32-64(%rsi),%xmm7
vpunpckhqdq %xmm14,%xmm14,%xmm9
vmovdqu 96(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm14,%xmm9,%xmm9
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 16-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 80(%rdx),%xmm14
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 48-64(%rsi),%xmm6
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 64(%rdx),%xmm15
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 80-64(%rsi),%xmm7
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vmovdqu 48(%rdx),%xmm14
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm4,%xmm1,%xmm1
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 96-64(%rsi),%xmm6
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 128-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 32(%rdx),%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vmovdqu 16(%rdx),%xmm14
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm4,%xmm1,%xmm1
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 144-64(%rsi),%xmm6
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 176-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu (%rdx),%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 160-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
leaq 128(%rdx),%rdx
cmpq $0x80,%rcx
jb L$tail_avx
vpxor %xmm10,%xmm15,%xmm15
subq $0x80,%rcx
jmp L$oop8x_avx
.p2align 5
L$oop8x_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 112(%rdx),%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpxor %xmm15,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11
vmovdqu 0-64(%rsi),%xmm6
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12
vmovdqu 32-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 96(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm3,%xmm10,%xmm10
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vxorps %xmm4,%xmm11,%xmm11
vmovdqu 16-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm5,%xmm12,%xmm12
vxorps %xmm15,%xmm8,%xmm8
vmovdqu 80(%rdx),%xmm14
vpxor %xmm10,%xmm12,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm11,%xmm12,%xmm12
vpslldq $8,%xmm12,%xmm9
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vpsrldq $8,%xmm12,%xmm12
vpxor %xmm9,%xmm10,%xmm10
vmovdqu 48-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm14
vxorps %xmm12,%xmm11,%xmm11
vpxor %xmm1,%xmm4,%xmm4
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 80-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 64(%rdx),%xmm15
vpalignr $8,%xmm10,%xmm10,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm4,%xmm1,%xmm1
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vxorps %xmm15,%xmm8,%xmm8
vpxor %xmm5,%xmm2,%xmm2
vmovdqu 48(%rdx),%xmm14
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 96-64(%rsi),%xmm6
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 128-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 32(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm4,%xmm1,%xmm1
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vpxor %xmm5,%xmm2,%xmm2
vxorps %xmm12,%xmm10,%xmm10
vmovdqu 16(%rdx),%xmm14
vpalignr $8,%xmm10,%xmm10,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 144-64(%rsi),%xmm6
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
vxorps %xmm11,%xmm12,%xmm12
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 176-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu (%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 160-64(%rsi),%xmm6
vpxor %xmm12,%xmm15,%xmm15
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
vpxor %xmm10,%xmm15,%xmm15
leaq 128(%rdx),%rdx
subq $0x80,%rcx
jnc L$oop8x_avx
addq $0x80,%rcx
jmp L$tail_no_xor_avx
.p2align 5
L$short_avx:
vmovdqu -16(%rdx,%rcx,1),%xmm14
leaq (%rdx,%rcx,1),%rdx
vmovdqu 0-64(%rsi),%xmm6
vmovdqu 32-64(%rsi),%xmm7
vpshufb %xmm13,%xmm14,%xmm15
vmovdqa %xmm0,%xmm3
vmovdqa %xmm1,%xmm4
vmovdqa %xmm2,%xmm5
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -32(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 16-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -48(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 48-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu 80-64(%rsi),%xmm7
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -64(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -80(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 96-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu 128-64(%rsi),%xmm7
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -96(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -112(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 144-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovq 184-64(%rsi),%xmm7
subq $0x10,%rcx
jmp L$tail_avx
.p2align 5
L$tail_avx:
vpxor %xmm10,%xmm15,%xmm15
L$tail_no_xor_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu (%r10),%xmm12
vpxor %xmm0,%xmm3,%xmm10
vpxor %xmm1,%xmm4,%xmm11
vpxor %xmm2,%xmm5,%xmm5
vpxor %xmm10,%xmm5,%xmm5
vpxor %xmm11,%xmm5,%xmm5
vpslldq $8,%xmm5,%xmm9
vpsrldq $8,%xmm5,%xmm5
vpxor %xmm9,%xmm10,%xmm10
vpxor %xmm5,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
vpalignr $8,%xmm10,%xmm10,%xmm10
vpxor %xmm9,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
vpalignr $8,%xmm10,%xmm10,%xmm10
vpxor %xmm11,%xmm10,%xmm10
vpxor %xmm9,%xmm10,%xmm10
cmpq $0,%rcx
jne L$short_avx
vpshufb %xmm13,%xmm10,%xmm10
vmovdqu %xmm10,(%rdi)
vzeroupper
.byte 0xf3,0xc3
.p2align 6
L$bswap_mask:

4299
deps/openssl/asm/x64-macosx-gas/sha/sha1-mb-x86_64.s

File diff suppressed because it is too large

2813
deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s

File diff suppressed because it is too large

4660
deps/openssl/asm/x64-macosx-gas/sha/sha256-mb-x86_64.s

File diff suppressed because it is too large

2309
deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s

File diff suppressed because it is too large

3581
deps/openssl/asm/x64-macosx-gas/sha/sha512-x86_64.s

File diff suppressed because it is too large

48
deps/openssl/asm/x64-macosx-gas/x86_64cpuid.s

@ -45,43 +45,43 @@ _OPENSSL_ia32_cpuid:
movl %eax,%r11d
xorl %eax,%eax
cmpl $1970169159,%ebx
cmpl $0x756e6547,%ebx
setne %al
movl %eax,%r9d
cmpl $1231384169,%edx
cmpl $0x49656e69,%edx
setne %al
orl %eax,%r9d
cmpl $1818588270,%ecx
cmpl $0x6c65746e,%ecx
setne %al
orl %eax,%r9d
jz L$intel
cmpl $1752462657,%ebx
cmpl $0x68747541,%ebx
setne %al
movl %eax,%r10d
cmpl $1769238117,%edx
cmpl $0x69746E65,%edx
setne %al
orl %eax,%r10d
cmpl $1145913699,%ecx
cmpl $0x444D4163,%ecx
setne %al
orl %eax,%r10d
jnz L$intel
movl $2147483648,%eax
movl $0x80000000,%eax
cpuid
cmpl $2147483649,%eax
cmpl $0x80000001,%eax
jb L$intel
movl %eax,%r10d
movl $2147483649,%eax
movl $0x80000001,%eax
cpuid
orl %ecx,%r9d
andl $2049,%r9d
andl $0x00000801,%r9d
cmpl $2147483656,%r10d
cmpl $0x80000008,%r10d
jb L$intel
movl $2147483656,%eax
movl $0x80000008,%eax
cpuid
movzbq %cl,%r10
incq %r10
@ -93,7 +93,7 @@ _OPENSSL_ia32_cpuid:
shrl $16,%ebx
cmpb %r10b,%bl
ja L$generic
andl $4026531839,%edx
andl $0xefffffff,%edx
jmp L$generic
L$intel:
@ -106,7 +106,7 @@ L$intel:
cpuid
movl %eax,%r10d
shrl $14,%r10d
andl $4095,%r10d
andl $0xfff,%r10d
cmpl $7,%r11d
jb L$nocacheinfo
@ -119,29 +119,29 @@ L$intel:
L$nocacheinfo:
movl $1,%eax
cpuid
andl $3220176895,%edx
andl $0xbfefffff,%edx
cmpl $0,%r9d
jne L$notintel
orl $1073741824,%edx
orl $0x40000000,%edx
andb $15,%ah
cmpb $15,%ah
jne L$notintel
orl $1048576,%edx
orl $0x00100000,%edx
L$notintel:
btl $28,%edx
jnc L$generic
andl $4026531839,%edx
andl $0xefffffff,%edx
cmpl $0,%r10d
je L$generic
orl $268435456,%edx
orl $0x10000000,%edx
shrl $16,%ebx
cmpb $1,%bl
ja L$generic
andl $4026531839,%edx
andl $0xefffffff,%edx
L$generic:
andl $2048,%r9d
andl $4294965247,%ecx
andl $0x00000800,%r9d
andl $0xfffff7ff,%ecx
orl %ecx,%r9d
movl %edx,%r10d
@ -153,9 +153,9 @@ L$generic:
cmpl $6,%eax
je L$done
L$clear_avx:
movl $4026525695,%eax
movl $0xefffe7ff,%eax
andl %eax,%r9d
andl $4294967263,8(%rdi)
andl $0xffffffdf,8(%rdi)
L$done:
shlq $32,%r9
movl %r10d,%eax

42
deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm

@ -48,6 +48,20 @@ $L$mul_enter::
mov QWORD PTR[8+r9*8+rsp],r11
$L$mul_body::
sub r11,rsp
and r11,-4096
$L$mul_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 066h,02eh
jnc $L$mul_page_walk
mov r12,rdx
mov r8,QWORD PTR[r8]
mov rbx,QWORD PTR[r12]
@ -263,6 +277,14 @@ $L$mul4x_enter::
mov QWORD PTR[8+r9*8+rsp],r11
$L$mul4x_body::
sub r11,rsp
and r11,-4096
$L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$mul4x_page_walk
mov QWORD PTR[16+r9*8+rsp],rdi
mov r12,rdx
mov r8,QWORD PTR[r8]
@ -701,6 +723,15 @@ $L$sqr8x_sp_alt::
sub rsp,r11
$L$sqr8x_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and r11,-4096
$L$sqr8x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$sqr8x_page_walk
mov r10,r9
neg r9
@ -842,8 +873,17 @@ DB 067h
sub r10,r9
mov r8,QWORD PTR[r8]
lea rsp,QWORD PTR[((-72))+r10*1+rsp]
lea r10,QWORD PTR[r9*1+rdx]
and rsp,-128
mov r11,rax
sub r11,rsp
and r11,-4096
$L$mulx4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 066h,02eh
jnc $L$mulx4x_page_walk
lea r10,QWORD PTR[r9*1+rdx]

59
deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm

@ -44,6 +44,20 @@ $L$mul_enter::
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
sub rax,rsp
and rax,-4096
$L$mul_page_walk::
mov r11,QWORD PTR[rax*1+rsp]
sub rax,4096
DB 02eh
jnc $L$mul_page_walk
lea r12,QWORD PTR[128+rdx]
movdqa xmm0,XMMWORD PTR[r10]
movdqa xmm1,XMMWORD PTR[16+r10]
@ -474,6 +488,15 @@ $L$mul4xsp_alt::
sub rsp,r11
$L$mul4xsp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and r11,-4096
$L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$mul4x_page_walk
neg r9
mov QWORD PTR[40+rsp],rax
@ -1082,6 +1105,15 @@ $L$pwr_sp_alt::
sub rsp,r11
$L$pwr_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and r11,-4096
$L$pwr_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$pwr_page_walk
mov r10,r9
neg r9
@ -2038,6 +2070,15 @@ $L$from_sp_alt::
sub rsp,r11
$L$from_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and r11,-4096
$L$from_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$from_page_walk
mov r10,r9
neg r9
@ -2186,6 +2227,15 @@ $L$mulx4xsp_alt::
sub rsp,r11
$L$mulx4xsp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and r11,-4096
$L$mulx4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$mulx4x_page_walk
@ -2697,6 +2747,15 @@ $L$pwrx_sp_alt::
sub rsp,r11
$L$pwrx_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and r11,-4096
$L$pwrx_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$pwrx_page_walk
mov r10,r9
neg r9

78
deps/openssl/asm/x86-elf-gas/bn/x86-mont.s

@ -29,6 +29,14 @@ bn_mul_mont:
xorl $2048,%edx
subl %edx,%esp
andl $-64,%esp
movl %ebp,%eax
subl %esp,%eax
andl $-4096,%eax
.L001page_walk:
movl (%esp,%eax,1),%edx
subl $4096,%eax
.byte 46
jnc .L001page_walk
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@ -44,7 +52,7 @@ bn_mul_mont:
movl %ebp,24(%esp)
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
jnc .L001non_sse2
jnc .L002non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@ -68,7 +76,7 @@ bn_mul_mont:
psrlq $32,%mm3
incl %ecx
.align 16
.L0021st:
.L0031st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -83,7 +91,7 @@ bn_mul_mont:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl .L0021st
jl .L0031st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -97,7 +105,7 @@ bn_mul_mont:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
.L003outer:
.L004outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@ -119,7 +127,7 @@ bn_mul_mont:
paddq %mm6,%mm2
incl %ecx
decl %ebx
.L004inner:
.L005inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -136,7 +144,7 @@ bn_mul_mont:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz .L004inner
jnz .L005inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@ -154,11 +162,11 @@ bn_mul_mont:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle .L003outer
jle .L004outer
emms
jmp .L005common_tail
jmp .L006common_tail
.align 16
.L001non_sse2:
.L002non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@ -169,12 +177,12 @@ bn_mul_mont:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz .L006bn_sqr_mont
jz .L007bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
.L007mull:
.L008mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@ -183,7 +191,7 @@ bn_mul_mont:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L007mull
jl .L008mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@ -201,9 +209,9 @@ bn_mul_mont:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp .L0082ndmadd
jmp .L0092ndmadd
.align 16
.L0091stmadd:
.L0101stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -214,7 +222,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L0091stmadd
jl .L0101stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@ -237,7 +245,7 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
.align 16
.L0082ndmadd:
.L0092ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -248,7 +256,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0082ndmadd
jl .L0092ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -264,16 +272,16 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je .L005common_tail
je .L006common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp .L0091stmadd
jmp .L0101stmadd
.align 16
.L006bn_sqr_mont:
.L007bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@ -284,7 +292,7 @@ bn_mul_mont:
andl $1,%ebx
incl %ecx
.align 16
.L010sqr:
.L011sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -296,7 +304,7 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl .L010sqr
jl .L011sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -320,7 +328,7 @@ bn_mul_mont:
movl 4(%esi),%eax
movl $1,%ecx
.align 16
.L0113rdmadd:
.L0123rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -339,7 +347,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0113rdmadd
jl .L0123rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -355,7 +363,7 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je .L005common_tail
je .L006common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@ -367,12 +375,12 @@ bn_mul_mont:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je .L012sqrlast
je .L013sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
.L013sqradd:
.L014sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -388,13 +396,13 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle .L013sqradd
jle .L014sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
.L012sqrlast:
.L013sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@ -409,9 +417,9 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp .L0113rdmadd
jmp .L0123rdmadd
.align 16
.L005common_tail:
.L006common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@ -419,13 +427,13 @@ bn_mul_mont:
movl %ebx,%ecx
xorl %edx,%edx
.align 16
.L014sub:
.L015sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge .L014sub
jge .L015sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@ -433,12 +441,12 @@ bn_mul_mont:
andl %eax,%ebp
orl %ebp,%esi
.align 16
.L015copy:
.L016copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
jge .L015copy
jge .L016copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:

1175
deps/openssl/asm/x86-elf-gas/sha/sha1-586.s

File diff suppressed because it is too large

2248
deps/openssl/asm/x86-elf-gas/sha/sha256-586.s

File diff suppressed because it is too large

84
deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s

@ -28,6 +28,14 @@ L_bn_mul_mont_begin:
xorl $2048,%edx
subl %edx,%esp
andl $-64,%esp
movl %ebp,%eax
subl %esp,%eax
andl $-4096,%eax
L001page_walk:
movl (%esp,%eax,1),%edx
subl $4096,%eax
.byte 46
jnc L001page_walk
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@ -41,12 +49,12 @@ L_bn_mul_mont_begin:
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %ebp,24(%esp)
call L001PIC_me_up
L001PIC_me_up:
call L002PIC_me_up
L002PIC_me_up:
popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L002PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc L002non_sse2
jnc L003non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@ -70,7 +78,7 @@ L001PIC_me_up:
psrlq $32,%mm3
incl %ecx
.align 4,0x90
L0031st:
L0041st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -85,7 +93,7 @@ L0031st:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl L0031st
jl L0041st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -99,7 +107,7 @@ L0031st:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
L004outer:
L005outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@ -121,7 +129,7 @@ L004outer:
paddq %mm6,%mm2
incl %ecx
decl %ebx
L005inner:
L006inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -138,7 +146,7 @@ L005inner:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz L005inner
jnz L006inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@ -156,11 +164,11 @@ L005inner:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle L004outer
jle L005outer
emms
jmp L006common_tail
jmp L007common_tail
.align 4,0x90
L002non_sse2:
L003non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@ -171,12 +179,12 @@ L002non_sse2:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz L007bn_sqr_mont
jz L008bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 4,0x90
L008mull:
L009mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@ -185,7 +193,7 @@ L008mull:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L008mull
jl L009mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@ -203,9 +211,9 @@ L008mull:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp L0092ndmadd
jmp L0102ndmadd
.align 4,0x90
L0101stmadd:
L0111stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -216,7 +224,7 @@ L0101stmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L0101stmadd
jl L0111stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@ -239,7 +247,7 @@ L0101stmadd:
adcl $0,%edx
movl $1,%ecx
.align 4,0x90
L0092ndmadd:
L0102ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -250,7 +258,7 @@ L0092ndmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0092ndmadd
jl L0102ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -266,16 +274,16 @@ L0092ndmadd:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je L006common_tail
je L007common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp L0101stmadd
jmp L0111stmadd
.align 4,0x90
L007bn_sqr_mont:
L008bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@ -286,7 +294,7 @@ L007bn_sqr_mont:
andl $1,%ebx
incl %ecx
.align 4,0x90
L011sqr:
L012sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -298,7 +306,7 @@ L011sqr:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl L011sqr
jl L012sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -322,7 +330,7 @@ L011sqr:
movl 4(%esi),%eax
movl $1,%ecx
.align 4,0x90
L0123rdmadd:
L0133rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -341,7 +349,7 @@ L0123rdmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0123rdmadd
jl L0133rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -357,7 +365,7 @@ L0123rdmadd:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je L006common_tail
je L007common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@ -369,12 +377,12 @@ L0123rdmadd:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je L013sqrlast
je L014sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 4,0x90
L014sqradd:
L015sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -390,13 +398,13 @@ L014sqradd:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle L014sqradd
jle L015sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
L013sqrlast:
L014sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@ -411,9 +419,9 @@ L013sqrlast:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp L0123rdmadd
jmp L0133rdmadd
.align 4,0x90
L006common_tail:
L007common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@ -421,13 +429,13 @@ L006common_tail:
movl %ebx,%ecx
xorl %edx,%edx
.align 4,0x90
L015sub:
L016sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge L015sub
jge L016sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@ -435,12 +443,12 @@ L015sub:
andl %eax,%ebp
orl %ebp,%esi
.align 4,0x90
L016copy:
L017copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
jge L016copy
jge L017copy
movl 24(%esp),%esp
movl $1,%eax
L000just_leave:

1173
deps/openssl/asm/x86-macosx-gas/sha/sha1-586.s

File diff suppressed because it is too large

2248
deps/openssl/asm/x86-macosx-gas/sha/sha256-586.s

File diff suppressed because it is too large

78
deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm

@ -45,6 +45,14 @@ $L_bn_mul_mont_begin::
xor edx,2048
sub esp,edx
and esp,-64
mov eax,ebp
sub eax,esp
and eax,-4096
$L001page_walk:
mov edx,DWORD PTR [eax*1+esp]
sub eax,4096
DB 46
jnc $L001page_walk
mov eax,DWORD PTR [esi]
mov ebx,DWORD PTR 4[esi]
mov ecx,DWORD PTR 8[esi]
@ -60,7 +68,7 @@ $L_bn_mul_mont_begin::
mov DWORD PTR 24[esp],ebp
lea eax,DWORD PTR _OPENSSL_ia32cap_P
bt DWORD PTR [eax],26
jnc $L001non_sse2
jnc $L002non_sse2
mov eax,-1
movd mm7,eax
mov esi,DWORD PTR 8[esp]
@ -84,7 +92,7 @@ $L_bn_mul_mont_begin::
psrlq mm3,32
inc ecx
ALIGN 16
$L0021st:
$L0031st:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -99,7 +107,7 @@ $L0021st:
psrlq mm3,32
lea ecx,DWORD PTR 1[ecx]
cmp ecx,ebx
jl $L0021st
jl $L0031st
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -113,7 +121,7 @@ $L0021st:
paddq mm3,mm2
movq QWORD PTR 32[ebx*4+esp],mm3
inc edx
$L003outer:
$L004outer:
xor ecx,ecx
movd mm4,DWORD PTR [edx*4+edi]
movd mm5,DWORD PTR [esi]
@ -135,7 +143,7 @@ $L003outer:
paddq mm2,mm6
inc ecx
dec ebx
$L004inner:
$L005inner:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -152,7 +160,7 @@ $L004inner:
paddq mm2,mm6
dec ebx
lea ecx,DWORD PTR 1[ecx]
jnz $L004inner
jnz $L005inner
mov ebx,ecx
pmuludq mm0,mm4
pmuludq mm1,mm5
@ -170,11 +178,11 @@ $L004inner:
movq QWORD PTR 32[ebx*4+esp],mm3
lea edx,DWORD PTR 1[edx]
cmp edx,ebx
jle $L003outer
jle $L004outer
emms
jmp $L005common_tail
jmp $L006common_tail
ALIGN 16
$L001non_sse2:
$L002non_sse2:
mov esi,DWORD PTR 8[esp]
lea ebp,DWORD PTR 1[ebx]
mov edi,DWORD PTR 12[esp]
@ -185,12 +193,12 @@ $L001non_sse2:
lea eax,DWORD PTR 4[ebx*4+edi]
or ebp,edx
mov edi,DWORD PTR [edi]
jz $L006bn_sqr_mont
jz $L007bn_sqr_mont
mov DWORD PTR 28[esp],eax
mov eax,DWORD PTR [esi]
xor edx,edx
ALIGN 16
$L007mull:
$L008mull:
mov ebp,edx
mul edi
add ebp,eax
@ -199,7 +207,7 @@ $L007mull:
mov eax,DWORD PTR [ecx*4+esi]
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L007mull
jl $L008mull
mov ebp,edx
mul edi
mov edi,DWORD PTR 20[esp]
@ -217,9 +225,9 @@ $L007mull:
mov eax,DWORD PTR 4[esi]
adc edx,0
inc ecx
jmp $L0082ndmadd
jmp $L0092ndmadd
ALIGN 16
$L0091stmadd:
$L0101stmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -230,7 +238,7 @@ $L0091stmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L0091stmadd
jl $L0101stmadd
mov ebp,edx
mul edi
add eax,DWORD PTR 32[ebx*4+esp]
@ -253,7 +261,7 @@ $L0091stmadd:
adc edx,0
mov ecx,1
ALIGN 16
$L0082ndmadd:
$L0092ndmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -264,7 +272,7 @@ $L0082ndmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0082ndmadd
jl $L0092ndmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@ -280,16 +288,16 @@ $L0082ndmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,DWORD PTR 28[esp]
mov DWORD PTR 36[ebx*4+esp],eax
je $L005common_tail
je $L006common_tail
mov edi,DWORD PTR [ecx]
mov esi,DWORD PTR 8[esp]
mov DWORD PTR 12[esp],ecx
xor ecx,ecx
xor edx,edx
mov eax,DWORD PTR [esi]
jmp $L0091stmadd
jmp $L0101stmadd
ALIGN 16
$L006bn_sqr_mont:
$L007bn_sqr_mont:
mov DWORD PTR [esp],ebx
mov DWORD PTR 12[esp],ecx
mov eax,edi
@ -300,7 +308,7 @@ $L006bn_sqr_mont:
and ebx,1
inc ecx
ALIGN 16
$L010sqr:
$L011sqr:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -312,7 +320,7 @@ $L010sqr:
cmp ecx,DWORD PTR [esp]
mov ebx,eax
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L010sqr
jl $L011sqr
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -336,7 +344,7 @@ $L010sqr:
mov eax,DWORD PTR 4[esi]
mov ecx,1
ALIGN 16
$L0113rdmadd:
$L0123rdmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -355,7 +363,7 @@ $L0113rdmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0113rdmadd
jl $L0123rdmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@ -371,7 +379,7 @@ $L0113rdmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,ebx
mov DWORD PTR 36[ebx*4+esp],eax
je $L005common_tail
je $L006common_tail
mov edi,DWORD PTR 4[ecx*4+esi]
lea ecx,DWORD PTR 1[ecx]
mov eax,edi
@ -383,12 +391,12 @@ $L0113rdmadd:
xor ebp,ebp
cmp ecx,ebx
lea ecx,DWORD PTR 1[ecx]
je $L012sqrlast
je $L013sqrlast
mov ebx,edx
shr edx,1
and ebx,1
ALIGN 16
$L013sqradd:
$L014sqradd:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -404,13 +412,13 @@ $L013sqradd:
cmp ecx,DWORD PTR [esp]
mov DWORD PTR 28[ecx*4+esp],ebp
mov ebx,eax
jle $L013sqradd
jle $L014sqradd
mov ebp,edx
add edx,edx
shr ebp,31
add edx,ebx
adc ebp,0
$L012sqrlast:
$L013sqrlast:
mov edi,DWORD PTR 20[esp]
mov esi,DWORD PTR 16[esp]
imul edi,DWORD PTR 32[esp]
@ -425,9 +433,9 @@ $L012sqrlast:
adc edx,0
mov ecx,1
mov eax,DWORD PTR 4[esi]
jmp $L0113rdmadd
jmp $L0123rdmadd
ALIGN 16
$L005common_tail:
$L006common_tail:
mov ebp,DWORD PTR 16[esp]
mov edi,DWORD PTR 4[esp]
lea esi,DWORD PTR 32[esp]
@ -435,13 +443,13 @@ $L005common_tail:
mov ecx,ebx
xor edx,edx
ALIGN 16
$L014sub:
$L015sub:
sbb eax,DWORD PTR [edx*4+ebp]
mov DWORD PTR [edx*4+edi],eax
dec ecx
mov eax,DWORD PTR 4[edx*4+esi]
lea edx,DWORD PTR 1[edx]
jge $L014sub
jge $L015sub
sbb eax,0
and esi,eax
not eax
@ -449,12 +457,12 @@ $L014sub:
and ebp,eax
or esi,ebp
ALIGN 16
$L015copy:
$L016copy:
mov eax,DWORD PTR [ebx*4+esi]
mov DWORD PTR [ebx*4+edi],eax
mov DWORD PTR 32[ebx*4+esp],ecx
dec ebx
jge $L015copy
jge $L016copy
mov esp,DWORD PTR 24[esp]
mov eax,1
$L000just_leave:

1174
deps/openssl/asm/x86-win32-masm/sha/sha1-586.asm

File diff suppressed because it is too large

2248
deps/openssl/asm/x86-win32-masm/sha/sha256-586.asm

File diff suppressed because it is too large

8
deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-sha1-x86_64.s

@ -1392,8 +1392,8 @@ aesni_cbc_sha1_enc_shaext:
movups 16(%rcx),%xmm0
leaq 112(%rcx),%rcx
pshufd $0b00011011,%xmm8,%xmm8
pshufd $0b00011011,%xmm9,%xmm9
pshufd $27,%xmm8,%xmm8
pshufd $27,%xmm9,%xmm9
jmp .Loop_shaext
.align 16
@ -1672,8 +1672,8 @@ aesni_cbc_sha1_enc_shaext:
leaq 64(%rdi),%rdi
jnz .Loop_shaext
pshufd $0b00011011,%xmm8,%xmm8
pshufd $0b00011011,%xmm9,%xmm9
pshufd $27,%xmm8,%xmm8
pshufd $27,%xmm9,%xmm9
movups %xmm2,(%r8)
movdqu %xmm8,(%r9)
movd %xmm9,16(%r9)

30
deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-x86_64.s

@ -3462,11 +3462,11 @@ __aesni_set_encrypt_key:
movups %xmm0,(%rax)
leaq 16(%rax),%rax
.Lkey_expansion_128_cold:
shufps $0b00010000,%xmm0,%xmm4
shufps $16,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $0b10001100,%xmm0,%xmm4
shufps $140,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $0b11111111,%xmm1,%xmm1
shufps $255,%xmm1,%xmm1
xorps %xmm1,%xmm0
.byte 0xf3,0xc3
@ -3477,25 +3477,25 @@ __aesni_set_encrypt_key:
.Lkey_expansion_192a_cold:
movaps %xmm2,%xmm5
.Lkey_expansion_192b_warm:
shufps $0b00010000,%xmm0,%xmm4
shufps $16,%xmm0,%xmm4
movdqa %xmm2,%xmm3
xorps %xmm4,%xmm0
shufps $0b10001100,%xmm0,%xmm4
shufps $140,%xmm0,%xmm4
pslldq $4,%xmm3
xorps %xmm4,%xmm0
pshufd $0b01010101,%xmm1,%xmm1
pshufd $85,%xmm1,%xmm1
pxor %xmm3,%xmm2
pxor %xmm1,%xmm0
pshufd $0b11111111,%xmm0,%xmm3
pshufd $255,%xmm0,%xmm3
pxor %xmm3,%xmm2
.byte 0xf3,0xc3
.align 16
.Lkey_expansion_192b:
movaps %xmm0,%xmm3
shufps $0b01000100,%xmm0,%xmm5
shufps $68,%xmm0,%xmm5
movups %xmm5,(%rax)
shufps $0b01001110,%xmm2,%xmm3
shufps $78,%xmm2,%xmm3
movups %xmm3,16(%rax)
leaq 32(%rax),%rax
jmp .Lkey_expansion_192b_warm
@ -3505,11 +3505,11 @@ __aesni_set_encrypt_key:
movups %xmm2,(%rax)
leaq 16(%rax),%rax
.Lkey_expansion_256a_cold:
shufps $0b00010000,%xmm0,%xmm4
shufps $16,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $0b10001100,%xmm0,%xmm4
shufps $140,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $0b11111111,%xmm1,%xmm1
shufps $255,%xmm1,%xmm1
xorps %xmm1,%xmm0
.byte 0xf3,0xc3
@ -3518,11 +3518,11 @@ __aesni_set_encrypt_key:
movups %xmm0,(%rax)
leaq 16(%rax),%rax
shufps $0b00010000,%xmm2,%xmm4
shufps $16,%xmm2,%xmm4
xorps %xmm4,%xmm2
shufps $0b10001100,%xmm2,%xmm4
shufps $140,%xmm2,%xmm4
xorps %xmm4,%xmm2
shufps $0b10101010,%xmm1,%xmm1
shufps $170,%xmm1,%xmm1
xorps %xmm1,%xmm2
.byte 0xf3,0xc3
.size aesni_set_encrypt_key,.-aesni_set_encrypt_key

31
deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s

@ -34,6 +34,20 @@ bn_mul_mont:
movq %r11,8(%rsp,%r9,8)
.Lmul_body:
subq %rsp,%r11
andq $-4096,%r11
.Lmul_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc .Lmul_page_walk
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@ -231,6 +245,14 @@ bn_mul4x_mont:
movq %r11,8(%rsp,%r9,8)
.Lmul4x_body:
subq %rsp,%r11
andq $-4096,%r11
.Lmul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lmul4x_page_walk
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@ -653,6 +675,15 @@ bn_sqr8x_mont:
subq %r11,%rsp
.Lsqr8x_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
.Lsqr8x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lsqr8x_page_walk
movq %r9,%r10
negq %r9

41
deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s

@ -30,6 +30,20 @@ bn_mul_mont_gather5:
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
subq %rsp,%rax
andq $-4096,%rax
.Lmul_page_walk:
movq (%rsp,%rax,1),%r11
subq $4096,%rax
.byte 0x2e
jnc .Lmul_page_walk
leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0
movdqa 16(%r10),%xmm1
@ -442,6 +456,15 @@ bn_mul4x_mont_gather5:
subq %r11,%rsp
.Lmul4xsp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
.Lmul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lmul4x_page_walk
negq %r9
movq %rax,40(%rsp)
@ -1031,6 +1054,15 @@ bn_power5:
subq %r11,%rsp
.Lpwr_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
.Lpwr_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lpwr_page_walk
movq %r9,%r10
negq %r9
@ -1972,6 +2004,15 @@ bn_from_mont8x:
subq %r11,%rsp
.Lfrom_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
.Lfrom_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lfrom_page_walk
movq %r9,%r10
negq %r9

34
deps/openssl/asm_obsolete/x64-elf-gas/md5/md5-x86_64.s

@ -493,14 +493,14 @@ md5_block_asm_data_order:
movl %ecx,%r11d
addl %ecx,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
xorl %edx,%r11d
leal -198630844(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 28(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -509,7 +509,7 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
addl %r11d,%edx
movl 56(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -518,7 +518,7 @@ md5_block_asm_data_order:
xorl %eax,%r11d
addl %r11d,%ecx
movl 20(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -527,7 +527,7 @@ md5_block_asm_data_order:
xorl %edx,%r11d
addl %r11d,%ebx
movl 48(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -536,7 +536,7 @@ md5_block_asm_data_order:
xorl %ecx,%r11d
addl %r11d,%eax
movl 12(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -545,7 +545,7 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
addl %r11d,%edx
movl 40(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -554,7 +554,7 @@ md5_block_asm_data_order:
xorl %eax,%r11d
addl %r11d,%ecx
movl 4(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -563,7 +563,7 @@ md5_block_asm_data_order:
xorl %edx,%r11d
addl %r11d,%ebx
movl 32(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -572,7 +572,7 @@ md5_block_asm_data_order:
xorl %ecx,%r11d
addl %r11d,%eax
movl 60(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -581,7 +581,7 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
addl %r11d,%edx
movl 24(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -590,7 +590,7 @@ md5_block_asm_data_order:
xorl %eax,%r11d
addl %r11d,%ecx
movl 52(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -599,7 +599,7 @@ md5_block_asm_data_order:
xorl %edx,%r11d
addl %r11d,%ebx
movl 16(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -608,7 +608,7 @@ md5_block_asm_data_order:
xorl %ecx,%r11d
addl %r11d,%eax
movl 44(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -617,7 +617,7 @@ md5_block_asm_data_order:
xorl %ebx,%r11d
addl %r11d,%edx
movl 8(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -626,7 +626,7 @@ md5_block_asm_data_order:
xorl %eax,%r11d
addl %r11d,%ecx
movl 36(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -635,7 +635,7 @@ md5_block_asm_data_order:
xorl %edx,%r11d
addl %r11d,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx

48
deps/openssl/asm_obsolete/x64-elf-gas/modes/ghash-x86_64.s

@ -661,10 +661,10 @@ gcm_ghash_4bit:
gcm_init_clmul:
.L_init_clmul:
movdqu (%rsi),%xmm2
pshufd $0b01001110,%xmm2,%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $0b11111111,%xmm2,%xmm4
pshufd $255,%xmm2,%xmm4
movdqa %xmm2,%xmm3
psllq $1,%xmm2
pxor %xmm5,%xmm5
@ -678,11 +678,11 @@ gcm_init_clmul:
pxor %xmm5,%xmm2
pshufd $0b01001110,%xmm2,%xmm6
pshufd $78,%xmm2,%xmm6
movdqa %xmm2,%xmm0
pxor %xmm2,%xmm6
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -718,8 +718,8 @@ gcm_init_clmul:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $0b01001110,%xmm2,%xmm3
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm2,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm2,%xmm3
movdqu %xmm2,0(%rdi)
pxor %xmm0,%xmm4
@ -727,7 +727,7 @@ gcm_init_clmul:
.byte 102,15,58,15,227,8
movdqu %xmm4,32(%rdi)
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -765,7 +765,7 @@ gcm_init_clmul:
pxor %xmm1,%xmm0
movdqa %xmm0,%xmm5
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -801,8 +801,8 @@ gcm_init_clmul:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $0b01001110,%xmm5,%xmm3
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm5,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm5,%xmm3
movdqu %xmm5,48(%rdi)
pxor %xmm0,%xmm4
@ -822,7 +822,7 @@ gcm_gmult_clmul:
movdqu 32(%rsi),%xmm4
.byte 102,15,56,0,197
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -899,14 +899,14 @@ gcm_ghash_clmul:
.byte 102,65,15,56,0,218
.byte 102,69,15,56,0,218
movdqa %xmm3,%xmm5
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,68,218,0
.byte 102,15,58,68,234,17
.byte 102,15,58,68,231,0
movdqa %xmm11,%xmm13
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm11,%xmm12
.byte 102,68,15,58,68,222,0
.byte 102,68,15,58,68,238,17
@ -921,12 +921,12 @@ gcm_ghash_clmul:
.byte 102,69,15,56,0,218
.byte 102,69,15,56,0,194
movdqa %xmm11,%xmm13
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm8,%xmm0
pxor %xmm11,%xmm12
.byte 102,69,15,58,68,222,0
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm8
pshufd $78,%xmm0,%xmm8
pxor %xmm0,%xmm8
.byte 102,69,15,58,68,238,17
.byte 102,68,15,58,68,231,0
@ -949,14 +949,14 @@ gcm_ghash_clmul:
movdqu 32(%rdx),%xmm3
movdqa %xmm11,%xmm13
.byte 102,68,15,58,68,199,16
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
xorps %xmm5,%xmm1
pxor %xmm11,%xmm12
.byte 102,65,15,56,0,218
movups 32(%rsi),%xmm7
xorps %xmm4,%xmm8
.byte 102,68,15,58,68,218,0
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm0,%xmm8
movdqa %xmm3,%xmm5
@ -1000,7 +1000,7 @@ gcm_ghash_clmul:
movdqa %xmm11,%xmm13
pxor %xmm12,%xmm4
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm9,%xmm0
pxor %xmm8,%xmm1
pxor %xmm11,%xmm12
@ -1010,7 +1010,7 @@ gcm_ghash_clmul:
movdqa %xmm0,%xmm1
.byte 102,69,15,58,68,238,17
xorps %xmm11,%xmm3
pshufd $0b01001110,%xmm0,%xmm8
pshufd $78,%xmm0,%xmm8
pxor %xmm0,%xmm8
.byte 102,68,15,58,68,231,0
@ -1079,7 +1079,7 @@ gcm_ghash_clmul:
pxor %xmm8,%xmm0
movdqa %xmm3,%xmm5
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,68,218,0
.byte 102,15,58,68,234,17
@ -1096,7 +1096,7 @@ gcm_ghash_clmul:
.Lmod_loop:
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm8
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm0,%xmm4
pxor %xmm0,%xmm4
.byte 102,15,58,68,198,0
@ -1134,7 +1134,7 @@ gcm_ghash_clmul:
pslldq $8,%xmm0
psrldq $8,%xmm8
pxor %xmm9,%xmm0
pshufd $0b01001110,%xmm5,%xmm4
pshufd $78,%xmm5,%xmm4
pxor %xmm8,%xmm1
pxor %xmm5,%xmm4
@ -1156,7 +1156,7 @@ gcm_ghash_clmul:
.Leven_tail:
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm8
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm0,%xmm4
pxor %xmm0,%xmm4
.byte 102,15,58,68,198,0
@ -1204,7 +1204,7 @@ gcm_ghash_clmul:
.byte 102,69,15,56,0,194
pxor %xmm8,%xmm0
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17

12
deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-mb-x86_64.s

@ -2599,10 +2599,10 @@ _shaext_shortcut:
punpcklqdq %xmm5,%xmm0
punpckhqdq %xmm5,%xmm8
pshufd $0b00111111,%xmm7,%xmm1
pshufd $0b01111111,%xmm7,%xmm9
pshufd $0b00011011,%xmm0,%xmm0
pshufd $0b00011011,%xmm8,%xmm8
pshufd $63,%xmm7,%xmm1
pshufd $127,%xmm7,%xmm9
pshufd $27,%xmm0,%xmm0
pshufd $27,%xmm8,%xmm8
jmp .Loop_shaext
.align 32
@ -2888,8 +2888,8 @@ _shaext_shortcut:
movl 280(%rsp),%edx
pshufd $0b00011011,%xmm0,%xmm0
pshufd $0b00011011,%xmm8,%xmm8
pshufd $27,%xmm0,%xmm0
pshufd $27,%xmm8,%xmm8
movdqa %xmm0,%xmm6
punpckldq %xmm8,%xmm0

8
deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s

@ -1240,9 +1240,9 @@ _shaext_shortcut:
movdqa K_XX_XX+160(%rip),%xmm3
movdqu (%rsi),%xmm4
pshufd $0b00011011,%xmm0,%xmm0
pshufd $27,%xmm0,%xmm0
movdqu 16(%rsi),%xmm5
pshufd $0b00011011,%xmm1,%xmm1
pshufd $27,%xmm1,%xmm1
movdqu 32(%rsi),%xmm6
.byte 102,15,56,0,227
movdqu 48(%rsi),%xmm7
@ -1392,8 +1392,8 @@ _shaext_shortcut:
jnz .Loop_shaext
pshufd $0b00011011,%xmm0,%xmm0
pshufd $0b00011011,%xmm1,%xmm1
pshufd $27,%xmm0,%xmm0
pshufd $27,%xmm1,%xmm1
movdqu %xmm0,(%rdi)
movd %xmm1,16(%rdi)
.byte 0xf3,0xc3

16
deps/openssl/asm_obsolete/x64-elf-gas/sha/sha256-mb-x86_64.s

@ -2677,10 +2677,10 @@ _shaext_shortcut:
punpckhqdq %xmm8,%xmm14
punpckhqdq %xmm10,%xmm15
pshufd $0b00011011,%xmm12,%xmm12
pshufd $0b00011011,%xmm13,%xmm13
pshufd $0b00011011,%xmm14,%xmm14
pshufd $0b00011011,%xmm15,%xmm15
pshufd $27,%xmm12,%xmm12
pshufd $27,%xmm13,%xmm13
pshufd $27,%xmm14,%xmm14
pshufd $27,%xmm15,%xmm15
jmp .Loop_shaext
.align 32
@ -3066,10 +3066,10 @@ _shaext_shortcut:
movl 280(%rsp),%edx
pshufd $0b00011011,%xmm12,%xmm12
pshufd $0b00011011,%xmm13,%xmm13
pshufd $0b00011011,%xmm14,%xmm14
pshufd $0b00011011,%xmm15,%xmm15
pshufd $27,%xmm12,%xmm12
pshufd $27,%xmm13,%xmm13
pshufd $27,%xmm14,%xmm14
pshufd $27,%xmm15,%xmm15
movdqa %xmm12,%xmm5
movdqa %xmm13,%xmm6

8
deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-sha1-x86_64.s

@ -1392,8 +1392,8 @@ aesni_cbc_sha1_enc_shaext:
movups 16(%rcx),%xmm0
leaq 112(%rcx),%rcx
pshufd $0b00011011,%xmm8,%xmm8
pshufd $0b00011011,%xmm9,%xmm9
pshufd $27,%xmm8,%xmm8
pshufd $27,%xmm9,%xmm9
jmp L$oop_shaext
.p2align 4
@ -1672,8 +1672,8 @@ L$aesenclast9:
leaq 64(%rdi),%rdi
jnz L$oop_shaext
pshufd $0b00011011,%xmm8,%xmm8
pshufd $0b00011011,%xmm9,%xmm9
pshufd $27,%xmm8,%xmm8
pshufd $27,%xmm9,%xmm9
movups %xmm2,(%r8)
movdqu %xmm8,(%r9)
movd %xmm9,16(%r9)

30
deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-x86_64.s

@ -3462,11 +3462,11 @@ L$key_expansion_128:
movups %xmm0,(%rax)
leaq 16(%rax),%rax
L$key_expansion_128_cold:
shufps $0b00010000,%xmm0,%xmm4
shufps $16,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $0b10001100,%xmm0,%xmm4
shufps $140,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $0b11111111,%xmm1,%xmm1
shufps $255,%xmm1,%xmm1
xorps %xmm1,%xmm0
.byte 0xf3,0xc3
@ -3477,25 +3477,25 @@ L$key_expansion_192a:
L$key_expansion_192a_cold:
movaps %xmm2,%xmm5
L$key_expansion_192b_warm:
shufps $0b00010000,%xmm0,%xmm4
shufps $16,%xmm0,%xmm4
movdqa %xmm2,%xmm3
xorps %xmm4,%xmm0
shufps $0b10001100,%xmm0,%xmm4
shufps $140,%xmm0,%xmm4
pslldq $4,%xmm3
xorps %xmm4,%xmm0
pshufd $0b01010101,%xmm1,%xmm1
pshufd $85,%xmm1,%xmm1
pxor %xmm3,%xmm2
pxor %xmm1,%xmm0
pshufd $0b11111111,%xmm0,%xmm3
pshufd $255,%xmm0,%xmm3
pxor %xmm3,%xmm2
.byte 0xf3,0xc3
.p2align 4
L$key_expansion_192b:
movaps %xmm0,%xmm3
shufps $0b01000100,%xmm0,%xmm5
shufps $68,%xmm0,%xmm5
movups %xmm5,(%rax)
shufps $0b01001110,%xmm2,%xmm3
shufps $78,%xmm2,%xmm3
movups %xmm3,16(%rax)
leaq 32(%rax),%rax
jmp L$key_expansion_192b_warm
@ -3505,11 +3505,11 @@ L$key_expansion_256a:
movups %xmm2,(%rax)
leaq 16(%rax),%rax
L$key_expansion_256a_cold:
shufps $0b00010000,%xmm0,%xmm4
shufps $16,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $0b10001100,%xmm0,%xmm4
shufps $140,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $0b11111111,%xmm1,%xmm1
shufps $255,%xmm1,%xmm1
xorps %xmm1,%xmm0
.byte 0xf3,0xc3
@ -3518,11 +3518,11 @@ L$key_expansion_256b:
movups %xmm0,(%rax)
leaq 16(%rax),%rax
shufps $0b00010000,%xmm2,%xmm4
shufps $16,%xmm2,%xmm4
xorps %xmm4,%xmm2
shufps $0b10001100,%xmm2,%xmm4
shufps $140,%xmm2,%xmm4
xorps %xmm4,%xmm2
shufps $0b10101010,%xmm1,%xmm1
shufps $170,%xmm1,%xmm1
xorps %xmm1,%xmm2
.byte 0xf3,0xc3

31
deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s

@ -34,6 +34,20 @@ L$mul_enter:
movq %r11,8(%rsp,%r9,8)
L$mul_body:
subq %rsp,%r11
andq $-4096,%r11
L$mul_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc L$mul_page_walk
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@ -231,6 +245,14 @@ L$mul4x_enter:
movq %r11,8(%rsp,%r9,8)
L$mul4x_body:
subq %rsp,%r11
andq $-4096,%r11
L$mul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$mul4x_page_walk
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@ -653,6 +675,15 @@ L$sqr8x_sp_alt:
subq %r11,%rsp
L$sqr8x_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
L$sqr8x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$sqr8x_page_walk
movq %r9,%r10
negq %r9

41
deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s

@ -30,6 +30,20 @@ L$mul_enter:
movq %rax,8(%rsp,%r9,8)
L$mul_body:
subq %rsp,%rax
andq $-4096,%rax
L$mul_page_walk:
movq (%rsp,%rax,1),%r11
subq $4096,%rax
.byte 0x2e
jnc L$mul_page_walk
leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0
movdqa 16(%r10),%xmm1
@ -442,6 +456,15 @@ L$mul4xsp_alt:
subq %r11,%rsp
L$mul4xsp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
L$mul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$mul4x_page_walk
negq %r9
movq %rax,40(%rsp)
@ -1031,6 +1054,15 @@ L$pwr_sp_alt:
subq %r11,%rsp
L$pwr_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
L$pwr_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$pwr_page_walk
movq %r9,%r10
negq %r9
@ -1972,6 +2004,15 @@ L$from_sp_alt:
subq %r11,%rsp
L$from_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-4096,%r11
L$from_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$from_page_walk
movq %r9,%r10
negq %r9

34
deps/openssl/asm_obsolete/x64-macosx-gas/md5/md5-x86_64.s

@ -493,14 +493,14 @@ L$loop:
movl %ecx,%r11d
addl %ecx,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
xorl %edx,%r11d
leal -198630844(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 28(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -509,7 +509,7 @@ L$loop:
xorl %ebx,%r11d
addl %r11d,%edx
movl 56(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -518,7 +518,7 @@ L$loop:
xorl %eax,%r11d
addl %r11d,%ecx
movl 20(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -527,7 +527,7 @@ L$loop:
xorl %edx,%r11d
addl %r11d,%ebx
movl 48(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -536,7 +536,7 @@ L$loop:
xorl %ecx,%r11d
addl %r11d,%eax
movl 12(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -545,7 +545,7 @@ L$loop:
xorl %ebx,%r11d
addl %r11d,%edx
movl 40(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -554,7 +554,7 @@ L$loop:
xorl %eax,%r11d
addl %r11d,%ecx
movl 4(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -563,7 +563,7 @@ L$loop:
xorl %edx,%r11d
addl %r11d,%ebx
movl 32(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -572,7 +572,7 @@ L$loop:
xorl %ecx,%r11d
addl %r11d,%eax
movl 60(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -581,7 +581,7 @@ L$loop:
xorl %ebx,%r11d
addl %r11d,%edx
movl 24(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -590,7 +590,7 @@ L$loop:
xorl %eax,%r11d
addl %r11d,%ecx
movl 52(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -599,7 +599,7 @@ L$loop:
xorl %edx,%r11d
addl %r11d,%ebx
movl 16(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
@ -608,7 +608,7 @@ L$loop:
xorl %ecx,%r11d
addl %r11d,%eax
movl 44(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
@ -617,7 +617,7 @@ L$loop:
xorl %ebx,%r11d
addl %r11d,%edx
movl 8(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
@ -626,7 +626,7 @@ L$loop:
xorl %eax,%r11d
addl %r11d,%ecx
movl 36(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
@ -635,7 +635,7 @@ L$loop:
xorl %edx,%r11d
addl %r11d,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx

48
deps/openssl/asm_obsolete/x64-macosx-gas/modes/ghash-x86_64.s

@ -661,10 +661,10 @@ L$ghash_epilogue:
_gcm_init_clmul:
L$_init_clmul:
movdqu (%rsi),%xmm2
pshufd $0b01001110,%xmm2,%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $0b11111111,%xmm2,%xmm4
pshufd $255,%xmm2,%xmm4
movdqa %xmm2,%xmm3
psllq $1,%xmm2
pxor %xmm5,%xmm5
@ -678,11 +678,11 @@ L$_init_clmul:
pxor %xmm5,%xmm2
pshufd $0b01001110,%xmm2,%xmm6
pshufd $78,%xmm2,%xmm6
movdqa %xmm2,%xmm0
pxor %xmm2,%xmm6
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -718,8 +718,8 @@ L$_init_clmul:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $0b01001110,%xmm2,%xmm3
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm2,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm2,%xmm3
movdqu %xmm2,0(%rdi)
pxor %xmm0,%xmm4
@ -727,7 +727,7 @@ L$_init_clmul:
.byte 102,15,58,15,227,8
movdqu %xmm4,32(%rdi)
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -765,7 +765,7 @@ L$_init_clmul:
pxor %xmm1,%xmm0
movdqa %xmm0,%xmm5
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -801,8 +801,8 @@ L$_init_clmul:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $0b01001110,%xmm5,%xmm3
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm5,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm5,%xmm3
movdqu %xmm5,48(%rdi)
pxor %xmm0,%xmm4
@ -822,7 +822,7 @@ L$_gmult_clmul:
movdqu 32(%rsi),%xmm4
.byte 102,15,56,0,197
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
@ -899,14 +899,14 @@ L$_ghash_clmul:
.byte 102,65,15,56,0,218
.byte 102,69,15,56,0,218
movdqa %xmm3,%xmm5
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,68,218,0
.byte 102,15,58,68,234,17
.byte 102,15,58,68,231,0
movdqa %xmm11,%xmm13
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm11,%xmm12
.byte 102,68,15,58,68,222,0
.byte 102,68,15,58,68,238,17
@ -921,12 +921,12 @@ L$_ghash_clmul:
.byte 102,69,15,56,0,218
.byte 102,69,15,56,0,194
movdqa %xmm11,%xmm13
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm8,%xmm0
pxor %xmm11,%xmm12
.byte 102,69,15,58,68,222,0
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm8
pshufd $78,%xmm0,%xmm8
pxor %xmm0,%xmm8
.byte 102,69,15,58,68,238,17
.byte 102,68,15,58,68,231,0
@ -949,14 +949,14 @@ L$mod4_loop:
movdqu 32(%rdx),%xmm3
movdqa %xmm11,%xmm13
.byte 102,68,15,58,68,199,16
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
xorps %xmm5,%xmm1
pxor %xmm11,%xmm12
.byte 102,65,15,56,0,218
movups 32(%rsi),%xmm7
xorps %xmm4,%xmm8
.byte 102,68,15,58,68,218,0
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm0,%xmm8
movdqa %xmm3,%xmm5
@ -1000,7 +1000,7 @@ L$mod4_loop:
movdqa %xmm11,%xmm13
pxor %xmm12,%xmm4
pshufd $0b01001110,%xmm11,%xmm12
pshufd $78,%xmm11,%xmm12
pxor %xmm9,%xmm0
pxor %xmm8,%xmm1
pxor %xmm11,%xmm12
@ -1010,7 +1010,7 @@ L$mod4_loop:
movdqa %xmm0,%xmm1
.byte 102,69,15,58,68,238,17
xorps %xmm11,%xmm3
pshufd $0b01001110,%xmm0,%xmm8
pshufd $78,%xmm0,%xmm8
pxor %xmm0,%xmm8
.byte 102,68,15,58,68,231,0
@ -1079,7 +1079,7 @@ L$skip4x:
pxor %xmm8,%xmm0
movdqa %xmm3,%xmm5
pshufd $0b01001110,%xmm3,%xmm4
pshufd $78,%xmm3,%xmm4
pxor %xmm3,%xmm4
.byte 102,15,58,68,218,0
.byte 102,15,58,68,234,17
@ -1096,7 +1096,7 @@ L$skip4x:
L$mod_loop:
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm8
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm0,%xmm4
pxor %xmm0,%xmm4
.byte 102,15,58,68,198,0
@ -1134,7 +1134,7 @@ L$mod_loop:
pslldq $8,%xmm0
psrldq $8,%xmm8
pxor %xmm9,%xmm0
pshufd $0b01001110,%xmm5,%xmm4
pshufd $78,%xmm5,%xmm4
pxor %xmm8,%xmm1
pxor %xmm5,%xmm4
@ -1156,7 +1156,7 @@ L$mod_loop:
L$even_tail:
movdqa %xmm0,%xmm1
movdqa %xmm4,%xmm8
pshufd $0b01001110,%xmm0,%xmm4
pshufd $78,%xmm0,%xmm4
pxor %xmm0,%xmm4
.byte 102,15,58,68,198,0
@ -1204,7 +1204,7 @@ L$odd_tail:
.byte 102,69,15,56,0,194
pxor %xmm8,%xmm0
movdqa %xmm0,%xmm1
pshufd $0b01001110,%xmm0,%xmm3
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17

12
deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-mb-x86_64.s

@ -2599,10 +2599,10 @@ L$oop_grande_shaext:
punpcklqdq %xmm5,%xmm0
punpckhqdq %xmm5,%xmm8
pshufd $0b00111111,%xmm7,%xmm1
pshufd $0b01111111,%xmm7,%xmm9
pshufd $0b00011011,%xmm0,%xmm0
pshufd $0b00011011,%xmm8,%xmm8
pshufd $63,%xmm7,%xmm1
pshufd $127,%xmm7,%xmm9
pshufd $27,%xmm0,%xmm0
pshufd $27,%xmm8,%xmm8
jmp L$oop_shaext
.p2align 5
@ -2888,8 +2888,8 @@ L$oop_shaext:
movl 280(%rsp),%edx
pshufd $0b00011011,%xmm0,%xmm0
pshufd $0b00011011,%xmm8,%xmm8
pshufd $27,%xmm0,%xmm0
pshufd $27,%xmm8,%xmm8
movdqa %xmm0,%xmm6
punpckldq %xmm8,%xmm0

8
deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s

@ -1240,9 +1240,9 @@ _shaext_shortcut:
movdqa K_XX_XX+160(%rip),%xmm3
movdqu (%rsi),%xmm4
pshufd $0b00011011,%xmm0,%xmm0
pshufd $27,%xmm0,%xmm0
movdqu 16(%rsi),%xmm5
pshufd $0b00011011,%xmm1,%xmm1
pshufd $27,%xmm1,%xmm1
movdqu 32(%rsi),%xmm6
.byte 102,15,56,0,227
movdqu 48(%rsi),%xmm7
@ -1392,8 +1392,8 @@ L$oop_shaext:
jnz L$oop_shaext
pshufd $0b00011011,%xmm0,%xmm0
pshufd $0b00011011,%xmm1,%xmm1
pshufd $27,%xmm0,%xmm0
pshufd $27,%xmm1,%xmm1
movdqu %xmm0,(%rdi)
movd %xmm1,16(%rdi)
.byte 0xf3,0xc3

16
deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha256-mb-x86_64.s

@ -2677,10 +2677,10 @@ L$oop_grande_shaext:
punpckhqdq %xmm8,%xmm14
punpckhqdq %xmm10,%xmm15
pshufd $0b00011011,%xmm12,%xmm12
pshufd $0b00011011,%xmm13,%xmm13
pshufd $0b00011011,%xmm14,%xmm14
pshufd $0b00011011,%xmm15,%xmm15
pshufd $27,%xmm12,%xmm12
pshufd $27,%xmm13,%xmm13
pshufd $27,%xmm14,%xmm14
pshufd $27,%xmm15,%xmm15
jmp L$oop_shaext
.p2align 5
@ -3066,10 +3066,10 @@ L$oop_shaext:
movl 280(%rsp),%edx
pshufd $0b00011011,%xmm12,%xmm12
pshufd $0b00011011,%xmm13,%xmm13
pshufd $0b00011011,%xmm14,%xmm14
pshufd $0b00011011,%xmm15,%xmm15
pshufd $27,%xmm12,%xmm12
pshufd $27,%xmm13,%xmm13
pshufd $27,%xmm14,%xmm14
pshufd $27,%xmm15,%xmm15
movdqa %xmm12,%xmm5
movdqa %xmm13,%xmm6

31
deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm

@ -47,6 +47,20 @@ $L$mul_enter::
mov QWORD PTR[8+r9*8+rsp],r11
$L$mul_body::
sub r11,rsp
and r11,-4096
$L$mul_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 066h,02eh
jnc $L$mul_page_walk
mov r12,rdx
mov r8,QWORD PTR[r8]
mov rbx,QWORD PTR[r12]
@ -259,6 +273,14 @@ $L$mul4x_enter::
mov QWORD PTR[8+r9*8+rsp],r11
$L$mul4x_body::
sub r11,rsp
and r11,-4096
$L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$mul4x_page_walk
mov QWORD PTR[16+r9*8+rsp],rdi
mov r12,rdx
mov r8,QWORD PTR[r8]
@ -696,6 +718,15 @@ $L$sqr8x_sp_alt::
sub rsp,r11
$L$sqr8x_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and r11,-4096
$L$sqr8x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$sqr8x_page_walk
mov r10,r9
neg r9

41
deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm

@ -43,6 +43,20 @@ $L$mul_enter::
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
sub rax,rsp
and rax,-4096
$L$mul_page_walk::
mov r11,QWORD PTR[rax*1+rsp]
sub rax,4096
DB 02eh
jnc $L$mul_page_walk
lea r12,QWORD PTR[128+rdx]
movdqa xmm0,XMMWORD PTR[r10]
movdqa xmm1,XMMWORD PTR[16+r10]
@ -470,6 +484,15 @@ $L$mul4xsp_alt::
sub rsp,r11
$L$mul4xsp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and r11,-4096
$L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$mul4x_page_walk
neg r9
mov QWORD PTR[40+rsp],rax
@ -1074,6 +1097,15 @@ $L$pwr_sp_alt::
sub rsp,r11
$L$pwr_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and r11,-4096
$L$pwr_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$pwr_page_walk
mov r10,r9
neg r9
@ -2030,6 +2062,15 @@ $L$from_sp_alt::
sub rsp,r11
$L$from_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and r11,-4096
$L$from_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$from_page_walk
mov r10,r9
neg r9

78
deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s

@ -29,6 +29,14 @@ bn_mul_mont:
xorl $2048,%edx
subl %edx,%esp
andl $-64,%esp
movl %ebp,%eax
subl %esp,%eax
andl $-4096,%eax
.L001page_walk:
movl (%esp,%eax,1),%edx
subl $4096,%eax
.byte 46
jnc .L001page_walk
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@ -44,7 +52,7 @@ bn_mul_mont:
movl %ebp,24(%esp)
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
jnc .L001non_sse2
jnc .L002non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@ -68,7 +76,7 @@ bn_mul_mont:
psrlq $32,%mm3
incl %ecx
.align 16
.L0021st:
.L0031st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -83,7 +91,7 @@ bn_mul_mont:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl .L0021st
jl .L0031st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -97,7 +105,7 @@ bn_mul_mont:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
.L003outer:
.L004outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@ -119,7 +127,7 @@ bn_mul_mont:
paddq %mm6,%mm2
incl %ecx
decl %ebx
.L004inner:
.L005inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -136,7 +144,7 @@ bn_mul_mont:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz .L004inner
jnz .L005inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@ -154,11 +162,11 @@ bn_mul_mont:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle .L003outer
jle .L004outer
emms
jmp .L005common_tail
jmp .L006common_tail
.align 16
.L001non_sse2:
.L002non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@ -169,12 +177,12 @@ bn_mul_mont:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz .L006bn_sqr_mont
jz .L007bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
.L007mull:
.L008mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@ -183,7 +191,7 @@ bn_mul_mont:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L007mull
jl .L008mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@ -201,9 +209,9 @@ bn_mul_mont:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp .L0082ndmadd
jmp .L0092ndmadd
.align 16
.L0091stmadd:
.L0101stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -214,7 +222,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L0091stmadd
jl .L0101stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@ -237,7 +245,7 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
.align 16
.L0082ndmadd:
.L0092ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -248,7 +256,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0082ndmadd
jl .L0092ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -264,16 +272,16 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je .L005common_tail
je .L006common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp .L0091stmadd
jmp .L0101stmadd
.align 16
.L006bn_sqr_mont:
.L007bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@ -284,7 +292,7 @@ bn_mul_mont:
andl $1,%ebx
incl %ecx
.align 16
.L010sqr:
.L011sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -296,7 +304,7 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl .L010sqr
jl .L011sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -320,7 +328,7 @@ bn_mul_mont:
movl 4(%esi),%eax
movl $1,%ecx
.align 16
.L0113rdmadd:
.L0123rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -339,7 +347,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0113rdmadd
jl .L0123rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -355,7 +363,7 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je .L005common_tail
je .L006common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@ -367,12 +375,12 @@ bn_mul_mont:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je .L012sqrlast
je .L013sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
.L013sqradd:
.L014sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -388,13 +396,13 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle .L013sqradd
jle .L014sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
.L012sqrlast:
.L013sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@ -409,9 +417,9 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp .L0113rdmadd
jmp .L0123rdmadd
.align 16
.L005common_tail:
.L006common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@ -419,13 +427,13 @@ bn_mul_mont:
movl %ebx,%ecx
xorl %edx,%edx
.align 16
.L014sub:
.L015sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge .L014sub
jge .L015sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@ -433,12 +441,12 @@ bn_mul_mont:
andl %eax,%ebp
orl %ebp,%esi
.align 16
.L015copy:
.L016copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
jge .L015copy
jge .L016copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:

84
deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s

@ -28,6 +28,14 @@ L_bn_mul_mont_begin:
xorl $2048,%edx
subl %edx,%esp
andl $-64,%esp
movl %ebp,%eax
subl %esp,%eax
andl $-4096,%eax
L001page_walk:
movl (%esp,%eax,1),%edx
subl $4096,%eax
.byte 46
jnc L001page_walk
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@ -41,12 +49,12 @@ L_bn_mul_mont_begin:
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %ebp,24(%esp)
call L001PIC_me_up
L001PIC_me_up:
call L002PIC_me_up
L002PIC_me_up:
popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L002PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc L002non_sse2
jnc L003non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@ -70,7 +78,7 @@ L001PIC_me_up:
psrlq $32,%mm3
incl %ecx
.align 4,0x90
L0031st:
L0041st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -85,7 +93,7 @@ L0031st:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl L0031st
jl L0041st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -99,7 +107,7 @@ L0031st:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
L004outer:
L005outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@ -121,7 +129,7 @@ L004outer:
paddq %mm6,%mm2
incl %ecx
decl %ebx
L005inner:
L006inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -138,7 +146,7 @@ L005inner:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz L005inner
jnz L006inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@ -156,11 +164,11 @@ L005inner:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle L004outer
jle L005outer
emms
jmp L006common_tail
jmp L007common_tail
.align 4,0x90
L002non_sse2:
L003non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@ -171,12 +179,12 @@ L002non_sse2:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz L007bn_sqr_mont
jz L008bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 4,0x90
L008mull:
L009mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@ -185,7 +193,7 @@ L008mull:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L008mull
jl L009mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@ -203,9 +211,9 @@ L008mull:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp L0092ndmadd
jmp L0102ndmadd
.align 4,0x90
L0101stmadd:
L0111stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -216,7 +224,7 @@ L0101stmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L0101stmadd
jl L0111stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@ -239,7 +247,7 @@ L0101stmadd:
adcl $0,%edx
movl $1,%ecx
.align 4,0x90
L0092ndmadd:
L0102ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -250,7 +258,7 @@ L0092ndmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0092ndmadd
jl L0102ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -266,16 +274,16 @@ L0092ndmadd:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je L006common_tail
je L007common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp L0101stmadd
jmp L0111stmadd
.align 4,0x90
L007bn_sqr_mont:
L008bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@ -286,7 +294,7 @@ L007bn_sqr_mont:
andl $1,%ebx
incl %ecx
.align 4,0x90
L011sqr:
L012sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -298,7 +306,7 @@ L011sqr:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl L011sqr
jl L012sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -322,7 +330,7 @@ L011sqr:
movl 4(%esi),%eax
movl $1,%ecx
.align 4,0x90
L0123rdmadd:
L0133rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -341,7 +349,7 @@ L0123rdmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0123rdmadd
jl L0133rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -357,7 +365,7 @@ L0123rdmadd:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je L006common_tail
je L007common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@ -369,12 +377,12 @@ L0123rdmadd:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je L013sqrlast
je L014sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 4,0x90
L014sqradd:
L015sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -390,13 +398,13 @@ L014sqradd:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle L014sqradd
jle L015sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
L013sqrlast:
L014sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@ -411,9 +419,9 @@ L013sqrlast:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp L0123rdmadd
jmp L0133rdmadd
.align 4,0x90
L006common_tail:
L007common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@ -421,13 +429,13 @@ L006common_tail:
movl %ebx,%ecx
xorl %edx,%edx
.align 4,0x90
L015sub:
L016sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge L015sub
jge L016sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@ -435,12 +443,12 @@ L015sub:
andl %eax,%ebp
orl %ebp,%esi
.align 4,0x90
L016copy:
L017copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
jge L016copy
jge L017copy
movl 24(%esp),%esp
movl $1,%eax
L000just_leave:

78
deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm

@ -45,6 +45,14 @@ $L_bn_mul_mont_begin::
xor edx,2048
sub esp,edx
and esp,-64
mov eax,ebp
sub eax,esp
and eax,-4096
$L001page_walk:
mov edx,DWORD PTR [eax*1+esp]
sub eax,4096
DB 46
jnc $L001page_walk
mov eax,DWORD PTR [esi]
mov ebx,DWORD PTR 4[esi]
mov ecx,DWORD PTR 8[esi]
@ -60,7 +68,7 @@ $L_bn_mul_mont_begin::
mov DWORD PTR 24[esp],ebp
lea eax,DWORD PTR _OPENSSL_ia32cap_P
bt DWORD PTR [eax],26
jnc $L001non_sse2
jnc $L002non_sse2
mov eax,-1
movd mm7,eax
mov esi,DWORD PTR 8[esp]
@ -84,7 +92,7 @@ $L_bn_mul_mont_begin::
psrlq mm3,32
inc ecx
ALIGN 16
$L0021st:
$L0031st:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -99,7 +107,7 @@ $L0021st:
psrlq mm3,32
lea ecx,DWORD PTR 1[ecx]
cmp ecx,ebx
jl $L0021st
jl $L0031st
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -113,7 +121,7 @@ $L0021st:
paddq mm3,mm2
movq QWORD PTR 32[ebx*4+esp],mm3
inc edx
$L003outer:
$L004outer:
xor ecx,ecx
movd mm4,DWORD PTR [edx*4+edi]
movd mm5,DWORD PTR [esi]
@ -135,7 +143,7 @@ $L003outer:
paddq mm2,mm6
inc ecx
dec ebx
$L004inner:
$L005inner:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -152,7 +160,7 @@ $L004inner:
paddq mm2,mm6
dec ebx
lea ecx,DWORD PTR 1[ecx]
jnz $L004inner
jnz $L005inner
mov ebx,ecx
pmuludq mm0,mm4
pmuludq mm1,mm5
@ -170,11 +178,11 @@ $L004inner:
movq QWORD PTR 32[ebx*4+esp],mm3
lea edx,DWORD PTR 1[edx]
cmp edx,ebx
jle $L003outer
jle $L004outer
emms
jmp $L005common_tail
jmp $L006common_tail
ALIGN 16
$L001non_sse2:
$L002non_sse2:
mov esi,DWORD PTR 8[esp]
lea ebp,DWORD PTR 1[ebx]
mov edi,DWORD PTR 12[esp]
@ -185,12 +193,12 @@ $L001non_sse2:
lea eax,DWORD PTR 4[ebx*4+edi]
or ebp,edx
mov edi,DWORD PTR [edi]
jz $L006bn_sqr_mont
jz $L007bn_sqr_mont
mov DWORD PTR 28[esp],eax
mov eax,DWORD PTR [esi]
xor edx,edx
ALIGN 16
$L007mull:
$L008mull:
mov ebp,edx
mul edi
add ebp,eax
@ -199,7 +207,7 @@ $L007mull:
mov eax,DWORD PTR [ecx*4+esi]
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L007mull
jl $L008mull
mov ebp,edx
mul edi
mov edi,DWORD PTR 20[esp]
@ -217,9 +225,9 @@ $L007mull:
mov eax,DWORD PTR 4[esi]
adc edx,0
inc ecx
jmp $L0082ndmadd
jmp $L0092ndmadd
ALIGN 16
$L0091stmadd:
$L0101stmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -230,7 +238,7 @@ $L0091stmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L0091stmadd
jl $L0101stmadd
mov ebp,edx
mul edi
add eax,DWORD PTR 32[ebx*4+esp]
@ -253,7 +261,7 @@ $L0091stmadd:
adc edx,0
mov ecx,1
ALIGN 16
$L0082ndmadd:
$L0092ndmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -264,7 +272,7 @@ $L0082ndmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0082ndmadd
jl $L0092ndmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@ -280,16 +288,16 @@ $L0082ndmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,DWORD PTR 28[esp]
mov DWORD PTR 36[ebx*4+esp],eax
je $L005common_tail
je $L006common_tail
mov edi,DWORD PTR [ecx]
mov esi,DWORD PTR 8[esp]
mov DWORD PTR 12[esp],ecx
xor ecx,ecx
xor edx,edx
mov eax,DWORD PTR [esi]
jmp $L0091stmadd
jmp $L0101stmadd
ALIGN 16
$L006bn_sqr_mont:
$L007bn_sqr_mont:
mov DWORD PTR [esp],ebx
mov DWORD PTR 12[esp],ecx
mov eax,edi
@ -300,7 +308,7 @@ $L006bn_sqr_mont:
and ebx,1
inc ecx
ALIGN 16
$L010sqr:
$L011sqr:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -312,7 +320,7 @@ $L010sqr:
cmp ecx,DWORD PTR [esp]
mov ebx,eax
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L010sqr
jl $L011sqr
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -336,7 +344,7 @@ $L010sqr:
mov eax,DWORD PTR 4[esi]
mov ecx,1
ALIGN 16
$L0113rdmadd:
$L0123rdmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -355,7 +363,7 @@ $L0113rdmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0113rdmadd
jl $L0123rdmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@ -371,7 +379,7 @@ $L0113rdmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,ebx
mov DWORD PTR 36[ebx*4+esp],eax
je $L005common_tail
je $L006common_tail
mov edi,DWORD PTR 4[ecx*4+esi]
lea ecx,DWORD PTR 1[ecx]
mov eax,edi
@ -383,12 +391,12 @@ $L0113rdmadd:
xor ebp,ebp
cmp ecx,ebx
lea ecx,DWORD PTR 1[ecx]
je $L012sqrlast
je $L013sqrlast
mov ebx,edx
shr edx,1
and ebx,1
ALIGN 16
$L013sqradd:
$L014sqradd:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -404,13 +412,13 @@ $L013sqradd:
cmp ecx,DWORD PTR [esp]
mov DWORD PTR 28[ecx*4+esp],ebp
mov ebx,eax
jle $L013sqradd
jle $L014sqradd
mov ebp,edx
add edx,edx
shr ebp,31
add edx,ebx
adc ebp,0
$L012sqrlast:
$L013sqrlast:
mov edi,DWORD PTR 20[esp]
mov esi,DWORD PTR 16[esp]
imul edi,DWORD PTR 32[esp]
@ -425,9 +433,9 @@ $L012sqrlast:
adc edx,0
mov ecx,1
mov eax,DWORD PTR 4[esi]
jmp $L0113rdmadd
jmp $L0123rdmadd
ALIGN 16
$L005common_tail:
$L006common_tail:
mov ebp,DWORD PTR 16[esp]
mov edi,DWORD PTR 4[esp]
lea esi,DWORD PTR 32[esp]
@ -435,13 +443,13 @@ $L005common_tail:
mov ecx,ebx
xor edx,edx
ALIGN 16
$L014sub:
$L015sub:
sbb eax,DWORD PTR [edx*4+ebp]
mov DWORD PTR [edx*4+edi],eax
dec ecx
mov eax,DWORD PTR 4[edx*4+esi]
lea edx,DWORD PTR 1[edx]
jge $L014sub
jge $L015sub
sbb eax,0
and esi,eax
not eax
@ -449,12 +457,12 @@ $L014sub:
and ebp,eax
or esi,ebp
ALIGN 16
$L015copy:
$L016copy:
mov eax,DWORD PTR [ebx*4+esi]
mov DWORD PTR [ebx*4+edi],eax
mov DWORD PTR 32[ebx*4+esp],ecx
dec ebx
jge $L015copy
jge $L016copy
mov esp,DWORD PTR 24[esp]
mov eax,1
$L000just_leave:

Loading…
Cancel
Save