Browse Source

deps: update openssl asm and asm_obsolete files

Regenerate asm files with Makefile and CC=gcc and ASM=gcc where
gcc-5.4.0. Also asm files in asm_obsolete dir to support old compiler
and assembler are regenerated without CC and ASM envs.

PR-URL: https://github.com/nodejs/node/pull/8714
Reviewed-By: Fedor Indutny <fedor@indutny.com>
v6
Shigeki Ohtsu 8 years ago
parent
commit
c17a1fedd8
  1. 24
      deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S
  2. 155
      deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s
  3. 204
      deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s
  4. 189
      deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s
  5. 4
      deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s
  6. 155
      deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s
  7. 204
      deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s
  8. 189
      deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s
  9. 4
      deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s
  10. 185
      deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm
  11. 227
      deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm
  12. 189
      deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm
  13. 4
      deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm
  14. 109
      deps/openssl/asm/x86-elf-gas/bn/x86-mont.s
  15. 115
      deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s
  16. 109
      deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm
  17. 24
      deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S
  18. 127
      deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s
  19. 134
      deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s
  20. 119
      deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s
  21. 4
      deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s
  22. 127
      deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s
  23. 134
      deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s
  24. 119
      deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s
  25. 4
      deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s
  26. 154
      deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm
  27. 153
      deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm
  28. 119
      deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm
  29. 4
      deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm
  30. 109
      deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s
  31. 115
      deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s
  32. 109
      deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm

24
deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S

@ -1816,8 +1816,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_6:
vst1.64 {q14}, [r0,:128] @ next round tweak
veor q4, q4, q12
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -1853,8 +1851,6 @@ bsaes_xts_encrypt:
.align 5
.Lxts_enc_5:
vst1.64 {q13}, [r0,:128] @ next round tweak
veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -1883,8 +1879,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_4:
vst1.64 {q12}, [r0,:128] @ next round tweak
veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -1910,8 +1904,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_3:
vst1.64 {q11}, [r0,:128] @ next round tweak
veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -1936,8 +1928,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_2:
vst1.64 {q10}, [r0,:128] @ next round tweak
veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -1960,7 +1950,7 @@ bsaes_xts_encrypt:
.align 4
.Lxts_enc_1:
mov r0, sp
veor q0, q8
veor q0, q0, q8
mov r1, sp
vst1.8 {q0}, [sp,:128]
mov r2, r10
@ -2346,8 +2336,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_5:
vst1.64 {q13}, [r0,:128] @ next round tweak
veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -2376,8 +2364,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_4:
vst1.64 {q12}, [r0,:128] @ next round tweak
veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -2403,8 +2389,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_3:
vst1.64 {q11}, [r0,:128] @ next round tweak
veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -2429,8 +2413,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_2:
vst1.64 {q10}, [r0,:128] @ next round tweak
veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -2453,12 +2435,12 @@ bsaes_xts_decrypt:
.align 4
.Lxts_dec_1:
mov r0, sp
veor q0, q8
veor q0, q0, q8
mov r1, sp
vst1.8 {q0}, [sp,:128]
mov r5, r2 @ preserve magic
mov r2, r10
mov r4, r3 @ preserve fp
mov r5, r2 @ preserve magic
bl AES_decrypt

155
deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s

@ -6,6 +6,8 @@
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
movl %r9d,%r9d
movq %rsp,%rax
testl $3,%r9d
jnz .Lmul_enter
cmpl $8,%r9d
@ -26,29 +28,36 @@ bn_mul_mont:
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 2(%r9),%r10
negq %r9
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
leaq -16(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
.Lmul_body:
subq %rsp,%r11
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
jmp .Lmul_page_walk_done
.align 16
.Lmul_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc .Lmul_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
.Lmul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@ -216,19 +225,21 @@ bn_mul_mont:
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
.size bn_mul_mont,.-bn_mul_mont
.type bn_mul4x_mont,@function
.align 16
bn_mul4x_mont:
movl %r9d,%r9d
movq %rsp,%rax
.Lmul4x_enter:
andl $0x80100,%r11d
cmpl $0x80100,%r11d
@ -240,23 +251,29 @@ bn_mul4x_mont:
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 4(%r9),%r10
negq %r9
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
leaq -32(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
.Lmul4x_body:
subq %rsp,%r11
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul4x_page_walk
jmp .Lmul4x_page_walk_done
.Lmul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lmul4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul4x_page_walk
.Lmul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.Lmul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@ -625,13 +642,13 @@ bn_mul4x_mont:
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
.Lmul4x_epilogue:
.byte 0xf3,0xc3
.size bn_mul4x_mont,.-bn_mul4x_mont
@ -641,14 +658,15 @@ bn_mul4x_mont:
.type bn_sqr8x_mont,@function
.align 32
bn_sqr8x_mont:
.Lsqr8x_enter:
movq %rsp,%rax
.Lsqr8x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.Lsqr8x_prologue:
movl %r9d,%r10d
shll $3,%r9d
@ -661,33 +679,42 @@ bn_sqr8x_mont:
leaq -64(%rsp,%r9,2),%r11
movq %rsp,%rbp
movq (%r8),%r8
subq %rsi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lsqr8x_sp_alt
subq %r11,%rsp
leaq -64(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -64(%rbp,%r9,2),%rbp
jmp .Lsqr8x_sp_done
.align 32
.Lsqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10
leaq -64(%rsp,%r9,2),%rsp
leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
.Lsqr8x_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lsqr8x_page_walk
jmp .Lsqr8x_page_walk_done
.align 16
.Lsqr8x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lsqr8x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lsqr8x_page_walk
.Lsqr8x_page_walk_done:
movq %r9,%r10
negq %r9
@ -800,30 +827,38 @@ bn_sqr8x_mont:
.type bn_mulx4x_mont,@function
.align 32
bn_mulx4x_mont:
.Lmulx4x_enter:
movq %rsp,%rax
.Lmulx4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.Lmulx4x_prologue:
shll $3,%r9d
.byte 0x67
xorq %r10,%r10
subq %r9,%r10
movq (%r8),%r8
leaq -72(%rsp,%r10,1),%rsp
andq $-128,%rsp
movq %rax,%r11
subq %rsp,%r11
leaq -72(%rsp,%r10,1),%rbp
andq $-128,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmulx4x_page_walk
jmp .Lmulx4x_page_walk_done
.align 16
.Lmulx4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc .Lmulx4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmulx4x_page_walk
.Lmulx4x_page_walk_done:
leaq (%rdx,%r9,1),%r10

204
deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s

@ -6,6 +6,8 @@
.type bn_mul_mont_gather5,@function
.align 64
bn_mul_mont_gather5:
movl %r9d,%r9d
movq %rsp,%rax
testl $7,%r9d
jnz .Lmul_enter
movl OPENSSL_ia32cap_P+8(%rip),%r11d
@ -13,10 +15,7 @@ bn_mul_mont_gather5:
.align 16
.Lmul_enter:
movl %r9d,%r9d
movq %rsp,%rax
movd 8(%rsp),%xmm5
leaq .Linc(%rip),%r10
pushq %rbx
pushq %rbp
pushq %r12
@ -24,26 +23,36 @@ bn_mul_mont_gather5:
pushq %r14
pushq %r15
leaq 2(%r9),%r11
negq %r11
leaq -264(%rsp,%r11,8),%rsp
andq $-1024,%rsp
negq %r9
movq %rsp,%r11
leaq -280(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
subq %rsp,%rax
andq $-4096,%rax
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
jmp .Lmul_page_walk_done
.Lmul_page_walk:
movq (%rsp,%rax,1),%r11
subq $4096,%rax
.byte 0x2e
jnc .Lmul_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
.Lmul_page_walk_done:
leaq .Linc(%rip),%r10
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0
@ -414,18 +423,19 @@ bn_mul_mont_gather5:
.type bn_mul4x_mont_gather5,@function
.align 32
bn_mul4x_mont_gather5:
.byte 0x67
movq %rsp,%rax
.Lmul4x_enter:
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je .Lmulx4x_enter
.byte 0x67
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.Lmul4x_prologue:
.byte 0x67
shll $3,%r9d
@ -442,32 +452,40 @@ bn_mul4x_mont_gather5:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lmul4xsp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp .Lmul4xsp_done
.align 32
.Lmul4xsp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
.Lmul4xsp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmul4x_page_walk
jmp .Lmul4x_page_walk_done
.Lmul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lmul4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmul4x_page_walk
.Lmul4x_page_walk_done:
negq %r9
@ -1019,17 +1037,18 @@ mul4x_internal:
.type bn_power5,@function
.align 32
bn_power5:
movq %rsp,%rax
movl OPENSSL_ia32cap_P+8(%rip),%r11d
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je .Lpowerx5_enter
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.Lpower5_prologue:
shll $3,%r9d
leal (%r9,%r9,2),%r10d
@ -1044,32 +1063,40 @@ bn_power5:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lpwr_sp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp .Lpwr_sp_done
.align 32
.Lpwr_sp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
.Lpwr_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lpwr_page_walk
jmp .Lpwr_page_walk_done
.Lpwr_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lpwr_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lpwr_page_walk
.Lpwr_page_walk_done:
movq %r9,%r10
negq %r9
@ -1980,6 +2007,7 @@ bn_from_mont8x:
pushq %r13
pushq %r14
pushq %r15
.Lfrom_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@ -1994,32 +2022,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lfrom_sp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp .Lfrom_sp_done
.align 32
.Lfrom_sp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
.Lfrom_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lfrom_page_walk
jmp .Lfrom_page_walk_done
.Lfrom_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lfrom_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lfrom_page_walk
.Lfrom_page_walk_done:
movq %r9,%r10
negq %r9
@ -2113,14 +2149,15 @@ bn_from_mont8x:
.type bn_mulx4x_mont_gather5,@function
.align 32
bn_mulx4x_mont_gather5:
.Lmulx4x_enter:
movq %rsp,%rax
.Lmulx4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.Lmulx4x_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@ -2137,31 +2174,39 @@ bn_mulx4x_mont_gather5:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lmulx4xsp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp .Lmulx4xsp_done
.Lmulx4xsp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
.Lmulx4xsp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmulx4x_page_walk
jmp .Lmulx4x_page_walk_done
.Lmulx4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lmulx4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmulx4x_page_walk
.Lmulx4x_page_walk_done:
@ -2619,14 +2664,15 @@ mulx4x_internal:
.type bn_powerx5,@function
.align 32
bn_powerx5:
.Lpowerx5_enter:
movq %rsp,%rax
.Lpowerx5_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.Lpowerx5_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@ -2641,32 +2687,40 @@ bn_powerx5:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lpwrx_sp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp .Lpwrx_sp_done
.align 32
.Lpwrx_sp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
.Lpwrx_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lpwrx_page_walk
jmp .Lpwrx_page_walk_done
.Lpwrx_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lpwrx_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lpwrx_page_walk
.Lpwrx_page_walk_done:
movq %r9,%r10
negq %r9

189
deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s

@ -27,6 +27,7 @@ ecp_nistz256_mul_by_2:
pushq %r13
movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
@ -37,7 +38,7 @@ ecp_nistz256_mul_by_2:
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
sbbq %r13,%r13
adcq $0,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
@ -45,14 +46,14 @@ ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -149,12 +150,12 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovzq %rcx,%r10
cmovzq %r12,%r11
cmovcq %rax,%r8
cmovcq %rdx,%r9
cmovcq %rcx,%r10
cmovcq %r12,%r11
xorq %r13,%r13
addq 0(%rsi),%r8
@ -171,14 +172,14 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -217,14 +218,14 @@ ecp_nistz256_add:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -1461,13 +1462,14 @@ ecp_nistz256_avx2_select_w7:
.type __ecp_nistz256_add_toq,@function
.align 32
__ecp_nistz256_add_toq:
xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
movq %r12,%rax
adcq 16(%rbx),%r8
adcq 24(%rbx),%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1475,14 +1477,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovzq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovzq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -1550,13 +1552,14 @@ __ecp_nistz256_subq:
.type __ecp_nistz256_mul_by_2q,@function
.align 32
__ecp_nistz256_mul_by_2q:
xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
movq %r12,%rax
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1564,14 +1567,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovzq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovzq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -1809,16 +1812,14 @@ ecp_nistz256_point_add:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@ -1830,14 +1831,14 @@ ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1
.byte 102,72,15,110,199
movdqu 64(%rsi),%xmm0
movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
por %xmm1,%xmm3
por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp)
@ -1848,8 +1849,8 @@ ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4
por %xmm3,%xmm4
pshufd $0xb1,%xmm1,%xmm4
por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@ -2032,6 +2033,7 @@ ecp_nistz256_point_add:
xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@ -2039,7 +2041,7 @@ ecp_nistz256_point_add:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -2047,15 +2049,15 @@ ecp_nistz256_point_add:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@ -2213,16 +2215,14 @@ ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@ -2340,6 +2340,7 @@ ecp_nistz256_point_add_affine:
xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@ -2347,7 +2348,7 @@ ecp_nistz256_point_add_affine:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -2355,15 +2356,15 @@ ecp_nistz256_point_add_affine:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@ -2510,14 +2511,14 @@ __ecp_nistz256_add_tox:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
sbbq $0,%r11
btq $0,%r11
cmovncq %rax,%r12
cmovncq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovncq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovncq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -2605,14 +2606,14 @@ __ecp_nistz256_mul_by_2x:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
sbbq $0,%r11
btq $0,%r11
cmovncq %rax,%r12
cmovncq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovncq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovncq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -2842,16 +2843,14 @@ ecp_nistz256_point_addx:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@ -2863,14 +2862,14 @@ ecp_nistz256_point_addx:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1
.byte 102,72,15,110,199
movdqu 64(%rsi),%xmm0
movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
por %xmm1,%xmm3
por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-128(%rsi),%rsi
movq %rdx,544+0(%rsp)
@ -2881,8 +2880,8 @@ ecp_nistz256_point_addx:
call __ecp_nistz256_sqr_montx
pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4
por %xmm3,%xmm4
pshufd $0xb1,%xmm1,%xmm4
por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@ -3065,6 +3064,7 @@ ecp_nistz256_point_addx:
xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@ -3072,7 +3072,7 @@ ecp_nistz256_point_addx:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -3080,15 +3080,15 @@ ecp_nistz256_point_addx:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subx
@ -3242,16 +3242,14 @@ ecp_nistz256_point_add_affinex:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@ -3369,6 +3367,7 @@ ecp_nistz256_point_add_affinex:
xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@ -3376,7 +3375,7 @@ ecp_nistz256_point_add_affinex:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -3384,15 +3383,15 @@ ecp_nistz256_point_add_affinex:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subx

4
deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s

@ -1263,9 +1263,9 @@ _shaext_shortcut:
.align 16
.Loop_shaext:
decq %rdx
leaq 64(%rsi),%rax
leaq 64(%rsi),%r8
paddd %xmm4,%xmm1
cmovneq %rax,%rsi
cmovneq %r8,%rsi
movdqa %xmm0,%xmm8
.byte 15,56,201,229
movdqa %xmm0,%xmm2

155
deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s

@ -6,6 +6,8 @@
.p2align 4
_bn_mul_mont:
movl %r9d,%r9d
movq %rsp,%rax
testl $3,%r9d
jnz L$mul_enter
cmpl $8,%r9d
@ -26,29 +28,36 @@ L$mul_enter:
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 2(%r9),%r10
negq %r9
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
leaq -16(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
L$mul_body:
subq %rsp,%r11
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
jmp L$mul_page_walk_done
.p2align 4
L$mul_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc L$mul_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
L$mul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
L$mul_body:
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@ -216,19 +225,21 @@ L$copy:
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$mul_epilogue:
.byte 0xf3,0xc3
.p2align 4
bn_mul4x_mont:
movl %r9d,%r9d
movq %rsp,%rax
L$mul4x_enter:
andl $0x80100,%r11d
cmpl $0x80100,%r11d
@ -240,23 +251,29 @@ L$mul4x_enter:
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 4(%r9),%r10
negq %r9
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
leaq -32(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
L$mul4x_body:
subq %rsp,%r11
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul4x_page_walk
jmp L$mul4x_page_walk_done
L$mul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$mul4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul4x_page_walk
L$mul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
L$mul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@ -625,13 +642,13 @@ L$copy4x:
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$mul4x_epilogue:
.byte 0xf3,0xc3
@ -641,14 +658,15 @@ L$mul4x_epilogue:
.p2align 5
bn_sqr8x_mont:
L$sqr8x_enter:
movq %rsp,%rax
L$sqr8x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
L$sqr8x_prologue:
movl %r9d,%r10d
shll $3,%r9d
@ -661,33 +679,42 @@ L$sqr8x_enter:
leaq -64(%rsp,%r9,2),%r11
movq %rsp,%rbp
movq (%r8),%r8
subq %rsi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$sqr8x_sp_alt
subq %r11,%rsp
leaq -64(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -64(%rbp,%r9,2),%rbp
jmp L$sqr8x_sp_done
.p2align 5
L$sqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10
leaq -64(%rsp,%r9,2),%rsp
leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
L$sqr8x_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$sqr8x_page_walk
jmp L$sqr8x_page_walk_done
.p2align 4
L$sqr8x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$sqr8x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$sqr8x_page_walk
L$sqr8x_page_walk_done:
movq %r9,%r10
negq %r9
@ -800,30 +827,38 @@ L$sqr8x_epilogue:
.p2align 5
bn_mulx4x_mont:
L$mulx4x_enter:
movq %rsp,%rax
L$mulx4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
L$mulx4x_prologue:
shll $3,%r9d
.byte 0x67
xorq %r10,%r10
subq %r9,%r10
movq (%r8),%r8
leaq -72(%rsp,%r10,1),%rsp
andq $-128,%rsp
movq %rax,%r11
subq %rsp,%r11
leaq -72(%rsp,%r10,1),%rbp
andq $-128,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mulx4x_page_walk
jmp L$mulx4x_page_walk_done
.p2align 4
L$mulx4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc L$mulx4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mulx4x_page_walk
L$mulx4x_page_walk_done:
leaq (%rdx,%r9,1),%r10

204
deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s

@ -6,6 +6,8 @@
.p2align 6
_bn_mul_mont_gather5:
movl %r9d,%r9d
movq %rsp,%rax
testl $7,%r9d
jnz L$mul_enter
movl _OPENSSL_ia32cap_P+8(%rip),%r11d
@ -13,10 +15,7 @@ _bn_mul_mont_gather5:
.p2align 4
L$mul_enter:
movl %r9d,%r9d
movq %rsp,%rax
movd 8(%rsp),%xmm5
leaq L$inc(%rip),%r10
pushq %rbx
pushq %rbp
pushq %r12
@ -24,26 +23,36 @@ L$mul_enter:
pushq %r14
pushq %r15
leaq 2(%r9),%r11
negq %r11
leaq -264(%rsp,%r11,8),%rsp
andq $-1024,%rsp
negq %r9
movq %rsp,%r11
leaq -280(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %rax,8(%rsp,%r9,8)
L$mul_body:
subq %rsp,%rax
andq $-4096,%rax
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
jmp L$mul_page_walk_done
L$mul_page_walk:
movq (%rsp,%rax,1),%r11
subq $4096,%rax
.byte 0x2e
jnc L$mul_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
L$mul_page_walk_done:
leaq L$inc(%rip),%r10
movq %rax,8(%rsp,%r9,8)
L$mul_body:
leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0
@ -414,18 +423,19 @@ L$mul_epilogue:
.p2align 5
bn_mul4x_mont_gather5:
.byte 0x67
movq %rsp,%rax
L$mul4x_enter:
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je L$mulx4x_enter
.byte 0x67
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
L$mul4x_prologue:
.byte 0x67
shll $3,%r9d
@ -442,32 +452,40 @@ L$mul4x_enter:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$mul4xsp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp L$mul4xsp_done
.p2align 5
L$mul4xsp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
L$mul4xsp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mul4x_page_walk
jmp L$mul4x_page_walk_done
L$mul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$mul4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mul4x_page_walk
L$mul4x_page_walk_done:
negq %r9
@ -1019,17 +1037,18 @@ L$inner4x:
.p2align 5
_bn_power5:
movq %rsp,%rax
movl _OPENSSL_ia32cap_P+8(%rip),%r11d
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je L$powerx5_enter
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
L$power5_prologue:
shll $3,%r9d
leal (%r9,%r9,2),%r10d
@ -1044,32 +1063,40 @@ _bn_power5:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$pwr_sp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp L$pwr_sp_done
.p2align 5
L$pwr_sp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
L$pwr_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$pwr_page_walk
jmp L$pwr_page_walk_done
L$pwr_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$pwr_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$pwr_page_walk
L$pwr_page_walk_done:
movq %r9,%r10
negq %r9
@ -1980,6 +2007,7 @@ bn_from_mont8x:
pushq %r13
pushq %r14
pushq %r15
L$from_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@ -1994,32 +2022,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$from_sp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp L$from_sp_done
.p2align 5
L$from_sp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
L$from_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$from_page_walk
jmp L$from_page_walk_done
L$from_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$from_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$from_page_walk
L$from_page_walk_done:
movq %r9,%r10
negq %r9
@ -2113,14 +2149,15 @@ L$from_epilogue:
.p2align 5
bn_mulx4x_mont_gather5:
L$mulx4x_enter:
movq %rsp,%rax
L$mulx4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
L$mulx4x_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@ -2137,31 +2174,39 @@ L$mulx4x_enter:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$mulx4xsp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp L$mulx4xsp_done
L$mulx4xsp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
L$mulx4xsp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mulx4x_page_walk
jmp L$mulx4x_page_walk_done
L$mulx4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$mulx4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mulx4x_page_walk
L$mulx4x_page_walk_done:
@ -2619,14 +2664,15 @@ L$mulx4x_inner:
.p2align 5
bn_powerx5:
L$powerx5_enter:
movq %rsp,%rax
L$powerx5_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
L$powerx5_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@ -2641,32 +2687,40 @@ L$powerx5_enter:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$pwrx_sp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp L$pwrx_sp_done
.p2align 5
L$pwrx_sp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
L$pwrx_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$pwrx_page_walk
jmp L$pwrx_page_walk_done
L$pwrx_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$pwrx_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$pwrx_page_walk
L$pwrx_page_walk_done:
movq %r9,%r10
negq %r9

189
deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s

@ -27,6 +27,7 @@ _ecp_nistz256_mul_by_2:
pushq %r13
movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
@ -37,7 +38,7 @@ _ecp_nistz256_mul_by_2:
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
sbbq %r13,%r13
adcq $0,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
@ -45,14 +46,14 @@ _ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -149,12 +150,12 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq L$poly+24(%rip),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovzq %rcx,%r10
cmovzq %r12,%r11
cmovcq %rax,%r8
cmovcq %rdx,%r9
cmovcq %rcx,%r10
cmovcq %r12,%r11
xorq %r13,%r13
addq 0(%rsi),%r8
@ -171,14 +172,14 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq L$poly+24(%rip),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -217,14 +218,14 @@ _ecp_nistz256_add:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -1461,13 +1462,14 @@ L$select_loop_avx2_w7:
.p2align 5
__ecp_nistz256_add_toq:
xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
movq %r12,%rax
adcq 16(%rbx),%r8
adcq 24(%rbx),%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1475,14 +1477,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovzq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovzq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -1550,13 +1552,14 @@ __ecp_nistz256_subq:
.p2align 5
__ecp_nistz256_mul_by_2q:
xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
movq %r12,%rax
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1564,14 +1567,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovzq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovzq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -1809,16 +1812,14 @@ _ecp_nistz256_point_add:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@ -1830,14 +1831,14 @@ _ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1
.byte 102,72,15,110,199
movdqu 64(%rsi),%xmm0
movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
por %xmm1,%xmm3
por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp)
@ -1848,8 +1849,8 @@ _ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4
por %xmm3,%xmm4
pshufd $0xb1,%xmm1,%xmm4
por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@ -2032,6 +2033,7 @@ L$add_proceedq:
xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@ -2039,7 +2041,7 @@ L$add_proceedq:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -2047,15 +2049,15 @@ L$add_proceedq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@ -2213,16 +2215,14 @@ _ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@ -2340,6 +2340,7 @@ _ecp_nistz256_point_add_affine:
xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@ -2347,7 +2348,7 @@ _ecp_nistz256_point_add_affine:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -2355,15 +2356,15 @@ _ecp_nistz256_point_add_affine:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@ -2510,14 +2511,14 @@ __ecp_nistz256_add_tox:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
sbbq $0,%r11
btq $0,%r11
cmovncq %rax,%r12
cmovncq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovncq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovncq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -2605,14 +2606,14 @@ __ecp_nistz256_mul_by_2x:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
sbbq $0,%r11
btq $0,%r11
cmovncq %rax,%r12
cmovncq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovncq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovncq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -2842,16 +2843,14 @@ L$point_addx:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@ -2863,14 +2862,14 @@ L$point_addx:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1
.byte 102,72,15,110,199
movdqu 64(%rsi),%xmm0
movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
por %xmm1,%xmm3
por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-128(%rsi),%rsi
movq %rdx,544+0(%rsp)
@ -2881,8 +2880,8 @@ L$point_addx:
call __ecp_nistz256_sqr_montx
pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4
por %xmm3,%xmm4
pshufd $0xb1,%xmm1,%xmm4
por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@ -3065,6 +3064,7 @@ L$add_proceedx:
xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@ -3072,7 +3072,7 @@ L$add_proceedx:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -3080,15 +3080,15 @@ L$add_proceedx:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subx
@ -3242,16 +3242,14 @@ L$point_add_affinex:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@ -3369,6 +3367,7 @@ L$point_add_affinex:
xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@ -3376,7 +3375,7 @@ L$point_add_affinex:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -3384,15 +3383,15 @@ L$point_add_affinex:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subx

4
deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s

@ -1263,9 +1263,9 @@ _shaext_shortcut:
.p2align 4
L$oop_shaext:
decq %rdx
leaq 64(%rsi),%rax
leaq 64(%rsi),%r8
paddd %xmm4,%xmm1
cmovneq %rax,%rsi
cmovneq %r8,%rsi
movdqa %xmm0,%xmm8
.byte 15,56,201,229
movdqa %xmm0,%xmm2

185
deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm

@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont::
mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
test r9d,3
jnz $L$mul_enter
cmp r9d,8
@ -39,29 +41,36 @@ $L$mul_enter::
push r14
push r15
mov r9d,r9d
lea r10,QWORD PTR[2+r9]
neg r9
mov r11,rsp
neg r10
lea rsp,QWORD PTR[r10*8+rsp]
and rsp,-1024
lea r10,QWORD PTR[((-16))+r9*8+rsp]
neg r9
and r10,-1024
mov QWORD PTR[8+r9*8+rsp],r11
$L$mul_body::
sub r11,rsp
sub r11,r10
and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
jmp $L$mul_page_walk_done
ALIGN 16
$L$mul_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 066h,02eh
jnc $L$mul_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
$L$mul_page_walk_done::
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
mov r12,rdx
mov r8,QWORD PTR[r8]
mov rbx,QWORD PTR[r12]
@ -229,13 +238,13 @@ $L$copy::
mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1
mov r15,QWORD PTR[rsi]
mov r14,QWORD PTR[8+rsi]
mov r13,QWORD PTR[16+rsi]
mov r12,QWORD PTR[24+rsi]
mov rbp,QWORD PTR[32+rsi]
mov rbx,QWORD PTR[40+rsi]
lea rsp,QWORD PTR[48+rsi]
mov r15,QWORD PTR[((-48))+rsi]
mov r14,QWORD PTR[((-40))+rsi]
mov r13,QWORD PTR[((-32))+rsi]
mov r12,QWORD PTR[((-24))+rsi]
mov rbp,QWORD PTR[((-16))+rsi]
mov rbx,QWORD PTR[((-8))+rsi]
lea rsp,QWORD PTR[rsi]
$L$mul_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
@ -257,6 +266,8 @@ $L$SEH_begin_bn_mul4x_mont::
mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
$L$mul4x_enter::
and r11d,080100h
cmp r11d,080100h
@ -268,23 +279,29 @@ $L$mul4x_enter::
push r14
push r15
mov r9d,r9d
lea r10,QWORD PTR[4+r9]
neg r9
mov r11,rsp
neg r10
lea rsp,QWORD PTR[r10*8+rsp]
and rsp,-1024
lea r10,QWORD PTR[((-32))+r9*8+rsp]
neg r9
and r10,-1024
mov QWORD PTR[8+r9*8+rsp],r11
$L$mul4x_body::
sub r11,rsp
sub r11,r10
and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul4x_page_walk
jmp $L$mul4x_page_walk_done
$L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$mul4x_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul4x_page_walk
$L$mul4x_page_walk_done::
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul4x_body::
mov QWORD PTR[16+r9*8+rsp],rdi
mov r12,rdx
mov r8,QWORD PTR[r8]
@ -653,13 +670,13 @@ $L$copy4x::
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2
mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1
mov r15,QWORD PTR[rsi]
mov r14,QWORD PTR[8+rsi]
mov r13,QWORD PTR[16+rsi]
mov r12,QWORD PTR[24+rsi]
mov rbp,QWORD PTR[32+rsi]
mov rbx,QWORD PTR[40+rsi]
lea rsp,QWORD PTR[48+rsi]
mov r15,QWORD PTR[((-48))+rsi]
mov r14,QWORD PTR[((-40))+rsi]
mov r13,QWORD PTR[((-32))+rsi]
mov r12,QWORD PTR[((-24))+rsi]
mov rbp,QWORD PTR[((-16))+rsi]
mov rbx,QWORD PTR[((-8))+rsi]
lea rsp,QWORD PTR[rsi]
$L$mul4x_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
@ -684,14 +701,15 @@ $L$SEH_begin_bn_sqr8x_mont::
mov r9,QWORD PTR[48+rsp]
$L$sqr8x_enter::
mov rax,rsp
$L$sqr8x_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
$L$sqr8x_prologue::
mov r10d,r9d
shl r9d,3
@ -704,33 +722,42 @@ $L$sqr8x_enter::
lea r11,QWORD PTR[((-64))+r9*2+rsp]
mov rbp,rsp
mov r8,QWORD PTR[r8]
sub r11,rsi
and r11,4095
cmp r10,r11
jb $L$sqr8x_sp_alt
sub rsp,r11
lea rsp,QWORD PTR[((-64))+r9*2+rsp]
sub rbp,r11
lea rbp,QWORD PTR[((-64))+r9*2+rbp]
jmp $L$sqr8x_sp_done
ALIGN 32
$L$sqr8x_sp_alt::
lea r10,QWORD PTR[((4096-64))+r9*2]
lea rsp,QWORD PTR[((-64))+r9*2+rsp]
lea rbp,QWORD PTR[((-64))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
sub rsp,r11
sub rbp,r11
$L$sqr8x_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and rbp,-64
mov r11,rsp
sub r11,rbp
and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$sqr8x_page_walk
jmp $L$sqr8x_page_walk_done
ALIGN 16
$L$sqr8x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$sqr8x_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$sqr8x_page_walk
$L$sqr8x_page_walk_done::
mov r10,r9
neg r9
@ -858,30 +885,38 @@ $L$SEH_begin_bn_mulx4x_mont::
mov r9,QWORD PTR[48+rsp]
$L$mulx4x_enter::
mov rax,rsp
$L$mulx4x_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
$L$mulx4x_prologue::
shl r9d,3
DB 067h
xor r10,r10
sub r10,r9
mov r8,QWORD PTR[r8]
lea rsp,QWORD PTR[((-72))+r10*1+rsp]
and rsp,-128
mov r11,rax
sub r11,rsp
lea rbp,QWORD PTR[((-72))+r10*1+rsp]
and rbp,-128
mov r11,rsp
sub r11,rbp
and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mulx4x_page_walk
jmp $L$mulx4x_page_walk_done
ALIGN 16
$L$mulx4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 066h,02eh
jnc $L$mulx4x_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mulx4x_page_walk
$L$mulx4x_page_walk_done::
lea r10,QWORD PTR[r9*1+rdx]
@ -1230,22 +1265,8 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax]
lea rax,QWORD PTR[48+rax]
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
mov r13,QWORD PTR[((-32))+rax]
mov r14,QWORD PTR[((-40))+rax]
mov r15,QWORD PTR[((-48))+rax]
mov QWORD PTR[144+r8],rbx
mov QWORD PTR[160+r8],rbp
mov QWORD PTR[216+r8],r12
mov QWORD PTR[224+r8],r13
mov QWORD PTR[232+r8],r14
mov QWORD PTR[240+r8],r15
jmp $L$common_seh_tail
jmp $L$common_pop_regs
mul_handler ENDP
@ -1273,15 +1294,21 @@ sqr_handler PROC PRIVATE
cmp rbx,r10
jb $L$common_seh_tail
mov r10d,DWORD PTR[4+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$common_pop_regs
mov rax,QWORD PTR[152+r8]
mov r10d,DWORD PTR[4+r11]
mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jae $L$common_seh_tail
mov rax,QWORD PTR[40+rax]
$L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
@ -1366,11 +1393,13 @@ DB 9,0,0,0
$L$SEH_info_bn_sqr8x_mont::
DB 9,0,0,0
DD imagerel sqr_handler
DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue
DD imagerel $L$sqr8x_prologue,imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue
ALIGN 8
$L$SEH_info_bn_mulx4x_mont::
DB 9,0,0,0
DD imagerel sqr_handler
DD imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue
DD imagerel $L$mulx4x_prologue,imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue
ALIGN 8
.xdata ENDS
END

227
deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm

@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont_gather5::
mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
test r9d,7
jnz $L$mul_enter
mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
@ -26,10 +28,7 @@ $L$SEH_begin_bn_mul_mont_gather5::
ALIGN 16
$L$mul_enter::
mov r9d,r9d
mov rax,rsp
movd xmm5,DWORD PTR[56+rsp]
lea r10,QWORD PTR[$L$inc]
push rbx
push rbp
push r12
@ -37,26 +36,36 @@ $L$mul_enter::
push r14
push r15
lea r11,QWORD PTR[2+r9]
neg r11
lea rsp,QWORD PTR[((-264))+r11*8+rsp]
and rsp,-1024
neg r9
mov r11,rsp
lea r10,QWORD PTR[((-280))+r9*8+rsp]
neg r9
and r10,-1024
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
sub r11,r10
and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
jmp $L$mul_page_walk_done
sub rax,rsp
and rax,-4096
$L$mul_page_walk::
mov r11,QWORD PTR[rax*1+rsp]
sub rax,4096
DB 02eh
jnc $L$mul_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
$L$mul_page_walk_done::
lea r10,QWORD PTR[$L$inc]
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
lea r12,QWORD PTR[128+rdx]
movdqa xmm0,XMMWORD PTR[r10]
@ -442,18 +451,19 @@ $L$SEH_begin_bn_mul4x_mont_gather5::
mov r9,QWORD PTR[48+rsp]
DB 067h
mov rax,rsp
$L$mul4x_enter::
and r11d,080108h
cmp r11d,080108h
je $L$mulx4x_enter
DB 067h
mov rax,rsp
push rbx
push rbp
push r12
push r13
push r14
push r15
$L$mul4x_prologue::
DB 067h
shl r9d,3
@ -470,32 +480,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$mul4xsp_alt
sub rsp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
sub rbp,r11
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$mul4xsp_done
ALIGN 32
$L$mul4xsp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
sub rsp,r11
sub rbp,r11
$L$mul4xsp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and rbp,-64
mov r11,rsp
sub r11,rbp
and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mul4x_page_walk
jmp $L$mul4x_page_walk_done
$L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$mul4x_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mul4x_page_walk
$L$mul4x_page_walk_done::
neg r9
@ -1062,17 +1080,18 @@ $L$SEH_begin_bn_power5::
mov r9,QWORD PTR[48+rsp]
mov rax,rsp
mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
and r11d,080108h
cmp r11d,080108h
je $L$powerx5_enter
mov rax,rsp
push rbx
push rbp
push r12
push r13
push r14
push r15
$L$power5_prologue::
shl r9d,3
lea r10d,DWORD PTR[r9*2+r9]
@ -1087,32 +1106,40 @@ $L$SEH_begin_bn_power5::
lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$pwr_sp_alt
sub rsp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
sub rbp,r11
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$pwr_sp_done
ALIGN 32
$L$pwr_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
sub rsp,r11
sub rbp,r11
$L$pwr_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and rbp,-64
mov r11,rsp
sub r11,rbp
and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$pwr_page_walk
jmp $L$pwr_page_walk_done
$L$pwr_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$pwr_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$pwr_page_walk
$L$pwr_page_walk_done::
mov r10,r9
neg r9
@ -2038,6 +2065,7 @@ DB 067h
push r13
push r14
push r15
$L$from_prologue::
shl r9d,3
lea r10,QWORD PTR[r9*2+r9]
@ -2052,32 +2080,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$from_sp_alt
sub rsp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
sub rbp,r11
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$from_sp_done
ALIGN 32
$L$from_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
sub rsp,r11
sub rbp,r11
$L$from_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and rbp,-64
mov r11,rsp
sub r11,rbp
and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$from_page_walk
jmp $L$from_page_walk_done
$L$from_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$from_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$from_page_walk
$L$from_page_walk_done::
mov r10,r9
neg r9
@ -2186,14 +2222,15 @@ $L$SEH_begin_bn_mulx4x_mont_gather5::
mov r9,QWORD PTR[48+rsp]
$L$mulx4x_enter::
mov rax,rsp
$L$mulx4x_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
$L$mulx4x_prologue::
shl r9d,3
lea r10,QWORD PTR[r9*2+r9]
@ -2210,31 +2247,39 @@ $L$mulx4x_enter::
lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$mulx4xsp_alt
sub rsp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
sub rbp,r11
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$mulx4xsp_done
$L$mulx4xsp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
sub rsp,r11
sub rbp,r11
$L$mulx4xsp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and rbp,-64
mov r11,rsp
sub r11,rbp
and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mulx4x_page_walk
jmp $L$mulx4x_page_walk_done
$L$mulx4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$mulx4x_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mulx4x_page_walk
$L$mulx4x_page_walk_done::
@ -2707,14 +2752,15 @@ $L$SEH_begin_bn_powerx5::
mov r9,QWORD PTR[48+rsp]
$L$powerx5_enter::
mov rax,rsp
$L$powerx5_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
$L$powerx5_prologue::
shl r9d,3
lea r10,QWORD PTR[r9*2+r9]
@ -2729,32 +2775,40 @@ $L$powerx5_enter::
lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$pwrx_sp_alt
sub rsp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
sub rbp,r11
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$pwrx_sp_done
ALIGN 32
$L$pwrx_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
sub rsp,r11
sub rbp,r11
$L$pwrx_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and rbp,-64
mov r11,rsp
sub r11,rbp
and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$pwrx_page_walk
jmp $L$pwrx_page_walk_done
$L$pwrx_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$pwrx_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$pwrx_page_walk
$L$pwrx_page_walk_done::
mov r10,r9
neg r9
@ -3712,9 +3766,14 @@ mul_handler PROC PRIVATE
cmp rbx,r10
jb $L$common_seh_tail
mov r10d,DWORD PTR[4+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$common_pop_regs
mov rax,QWORD PTR[152+r8]
mov r10d,DWORD PTR[4+r11]
mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jae $L$common_seh_tail
@ -3726,11 +3785,11 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax]
jmp $L$body_proceed
jmp $L$common_pop_regs
$L$body_40::
mov rax,QWORD PTR[40+rax]
$L$body_proceed::
$L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
@ -3819,32 +3878,32 @@ ALIGN 8
$L$SEH_info_bn_mul_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$mul_body,imagerel $L$mul_epilogue
DD imagerel $L$mul_body,imagerel $L$mul_body,imagerel $L$mul_epilogue
ALIGN 8
$L$SEH_info_bn_mul4x_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
DD imagerel $L$mul4x_prologue,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
ALIGN 8
$L$SEH_info_bn_power5::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$power5_body,imagerel $L$power5_epilogue
DD imagerel $L$power5_prologue,imagerel $L$power5_body,imagerel $L$power5_epilogue
ALIGN 8
$L$SEH_info_bn_from_mont8x::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$from_body,imagerel $L$from_epilogue
DD imagerel $L$from_prologue,imagerel $L$from_body,imagerel $L$from_epilogue
ALIGN 8
$L$SEH_info_bn_mulx4x_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue
DD imagerel $L$mulx4x_prologue,imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue
ALIGN 8
$L$SEH_info_bn_powerx5::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$powerx5_body,imagerel $L$powerx5_epilogue
DD imagerel $L$powerx5_prologue,imagerel $L$powerx5_body,imagerel $L$powerx5_epilogue
ALIGN 8
$L$SEH_info_bn_gather5::
DB 001h,00bh,003h,00ah

189
deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm

@ -36,6 +36,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
push r13
mov r8,QWORD PTR[rsi]
xor r13,r13
mov r9,QWORD PTR[8+rsi]
add r8,r8
mov r10,QWORD PTR[16+rsi]
@ -46,7 +47,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
adc r10,r10
adc r11,r11
mov rdx,r9
sbb r13,r13
adc r13,0
sub r8,QWORD PTR[rsi]
mov rcx,r10
@ -54,14 +55,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
sbb r10,QWORD PTR[16+rsi]
mov r12,r11
sbb r11,QWORD PTR[24+rsi]
test r13,r13
sbb r13,0
cmovz r8,rax
cmovz r9,rdx
cmovc r8,rax
cmovc r9,rdx
mov QWORD PTR[rdi],r8
cmovz r10,rcx
cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
cmovz r11,r12
cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@ -180,12 +181,12 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0
mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))]
test r13,r13
sbb r13,0
cmovz r8,rax
cmovz r9,rdx
cmovz r10,rcx
cmovz r11,r12
cmovc r8,rax
cmovc r9,rdx
cmovc r10,rcx
cmovc r11,r12
xor r13,r13
add r8,QWORD PTR[rsi]
@ -202,14 +203,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0
mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))]
test r13,r13
sbb r13,0
cmovz r8,rax
cmovz r9,rdx
cmovc r8,rax
cmovc r9,rdx
mov QWORD PTR[rdi],r8
cmovz r10,rcx
cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
cmovz r11,r12
cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@ -260,14 +261,14 @@ $L$SEH_begin_ecp_nistz256_add::
sbb r10,QWORD PTR[16+rsi]
mov r12,r11
sbb r11,QWORD PTR[24+rsi]
test r13,r13
sbb r13,0
cmovz r8,rax
cmovz r9,rdx
cmovc r8,rax
cmovc r9,rdx
mov QWORD PTR[rdi],r8
cmovz r10,rcx
cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
cmovz r11,r12
cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@ -1673,13 +1674,14 @@ ecp_nistz256_avx2_select_w7 ENDP
ALIGN 32
__ecp_nistz256_add_toq PROC PRIVATE
xor r11,r11
add r12,QWORD PTR[rbx]
adc r13,QWORD PTR[8+rbx]
mov rax,r12
adc r8,QWORD PTR[16+rbx]
adc r9,QWORD PTR[24+rbx]
mov rbp,r13
sbb r11,r11
adc r11,0
sub r12,-1
mov rcx,r8
@ -1687,14 +1689,14 @@ __ecp_nistz256_add_toq PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
test r11,r11
sbb r11,0
cmovz r12,rax
cmovz r13,rbp
cmovc r12,rax
cmovc r13,rbp
mov QWORD PTR[rdi],r12
cmovz r8,rcx
cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
cmovz r9,r10
cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@ -1762,13 +1764,14 @@ __ecp_nistz256_subq ENDP
ALIGN 32
__ecp_nistz256_mul_by_2q PROC PRIVATE
xor r11,r11
add r12,r12
adc r13,r13
mov rax,r12
adc r8,r8
adc r9,r9
mov rbp,r13
sbb r11,r11
adc r11,0
sub r12,-1
mov rcx,r8
@ -1776,14 +1779,14 @@ __ecp_nistz256_mul_by_2q PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
test r11,r11
sbb r11,0
cmovz r12,rax
cmovz r13,rbp
cmovc r12,rax
cmovc r13,rbp
mov QWORD PTR[rdi],r12
cmovz r8,rcx
cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
cmovz r9,r10
cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@ -2041,16 +2044,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
mov rsi,rdx
movdqa XMMWORD PTR[384+rsp],xmm0
movdqa XMMWORD PTR[(384+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[416+rsp],xmm2
movdqa XMMWORD PTR[(416+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[448+rsp],xmm4
movdqa XMMWORD PTR[(448+16)+rsp],xmm5
por xmm3,xmm1
por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rsi]
pshufd xmm5,xmm3,1h
pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rsi]
movdqu xmm2,XMMWORD PTR[32+rsi]
por xmm5,xmm3
@ -2062,14 +2063,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
movdqa XMMWORD PTR[480+rsp],xmm0
pshufd xmm4,xmm5,01eh
movdqa XMMWORD PTR[(480+16)+rsp],xmm1
por xmm1,xmm0
DB 102,72,15,110,199
movdqu xmm0,XMMWORD PTR[64+rsi]
movdqu xmm1,XMMWORD PTR[80+rsi]
movdqa XMMWORD PTR[512+rsp],xmm2
movdqa XMMWORD PTR[(512+16)+rsp],xmm3
por xmm3,xmm2
por xmm5,xmm4
pxor xmm4,xmm4
por xmm3,xmm1
por xmm1,xmm0
DB 102,72,15,110,199
lea rsi,QWORD PTR[((64-0))+rsi]
mov QWORD PTR[((544+0))+rsp],rax
@ -2080,8 +2081,8 @@ DB 102,72,15,110,199
call __ecp_nistz256_sqr_montq
pcmpeqd xmm5,xmm4
pshufd xmm4,xmm3,1h
por xmm4,xmm3
pshufd xmm4,xmm1,1h
por xmm4,xmm1
pshufd xmm5,xmm5,0
pshufd xmm3,xmm4,01eh
por xmm4,xmm3
@ -2264,6 +2265,7 @@ $L$add_proceedq::
xor r11,r11
add r12,r12
lea rsi,QWORD PTR[96+rsp]
adc r13,r13
@ -2271,7 +2273,7 @@ $L$add_proceedq::
adc r8,r8
adc r9,r9
mov rbp,r13
sbb r11,r11
adc r11,0
sub r12,-1
mov rcx,r8
@ -2279,15 +2281,15 @@ $L$add_proceedq::
sbb r8,0
mov r10,r9
sbb r9,r15
test r11,r11
sbb r11,0
cmovz r12,rax
cmovc r12,rax
mov rax,QWORD PTR[rsi]
cmovz r13,rbp
cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx
cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10
cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq
@ -2457,16 +2459,14 @@ $L$SEH_begin_ecp_nistz256_point_add_affine::
mov r8,QWORD PTR[((64+24))+rsi]
movdqa XMMWORD PTR[320+rsp],xmm0
movdqa XMMWORD PTR[(320+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[352+rsp],xmm2
movdqa XMMWORD PTR[(352+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[384+rsp],xmm4
movdqa XMMWORD PTR[(384+16)+rsp],xmm5
por xmm3,xmm1
por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rbx]
pshufd xmm5,xmm3,1h
pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rbx]
movdqu xmm2,XMMWORD PTR[32+rbx]
por xmm5,xmm3
@ -2584,6 +2584,7 @@ DB 102,72,15,110,199
xor r11,r11
add r12,r12
lea rsi,QWORD PTR[192+rsp]
adc r13,r13
@ -2591,7 +2592,7 @@ DB 102,72,15,110,199
adc r8,r8
adc r9,r9
mov rbp,r13
sbb r11,r11
adc r11,0
sub r12,-1
mov rcx,r8
@ -2599,15 +2600,15 @@ DB 102,72,15,110,199
sbb r8,0
mov r10,r9
sbb r9,r15
test r11,r11
sbb r11,0
cmovz r12,rax
cmovc r12,rax
mov rax,QWORD PTR[rsi]
cmovz r13,rbp
cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx
cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10
cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq
@ -2757,14 +2758,14 @@ __ecp_nistz256_add_tox PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
sbb r11,0
bt r11,0
cmovnc r12,rax
cmovnc r13,rbp
cmovc r12,rax
cmovc r13,rbp
mov QWORD PTR[rdi],r12
cmovnc r8,rcx
cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
cmovnc r9,r10
cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@ -2852,14 +2853,14 @@ __ecp_nistz256_mul_by_2x PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
sbb r11,0
bt r11,0
cmovnc r12,rax
cmovnc r13,rbp
cmovc r12,rax
cmovc r13,rbp
mov QWORD PTR[rdi],r12
cmovnc r8,rcx
cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
cmovnc r9,r10
cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@ -3109,16 +3110,14 @@ $L$point_addx::
mov rsi,rdx
movdqa XMMWORD PTR[384+rsp],xmm0
movdqa XMMWORD PTR[(384+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[416+rsp],xmm2
movdqa XMMWORD PTR[(416+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[448+rsp],xmm4
movdqa XMMWORD PTR[(448+16)+rsp],xmm5
por xmm3,xmm1
por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rsi]
pshufd xmm5,xmm3,1h
pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rsi]
movdqu xmm2,XMMWORD PTR[32+rsi]
por xmm5,xmm3
@ -3130,14 +3129,14 @@ $L$point_addx::
movdqa XMMWORD PTR[480+rsp],xmm0
pshufd xmm4,xmm5,01eh
movdqa XMMWORD PTR[(480+16)+rsp],xmm1
por xmm1,xmm0
DB 102,72,15,110,199
movdqu xmm0,XMMWORD PTR[64+rsi]
movdqu xmm1,XMMWORD PTR[80+rsi]
movdqa XMMWORD PTR[512+rsp],xmm2
movdqa XMMWORD PTR[(512+16)+rsp],xmm3
por xmm3,xmm2
por xmm5,xmm4
pxor xmm4,xmm4
por xmm3,xmm1
por xmm1,xmm0
DB 102,72,15,110,199
lea rsi,QWORD PTR[((64-128))+rsi]
mov QWORD PTR[((544+0))+rsp],rdx
@ -3148,8 +3147,8 @@ DB 102,72,15,110,199
call __ecp_nistz256_sqr_montx
pcmpeqd xmm5,xmm4
pshufd xmm4,xmm3,1h
por xmm4,xmm3
pshufd xmm4,xmm1,1h
por xmm4,xmm1
pshufd xmm5,xmm5,0
pshufd xmm3,xmm4,01eh
por xmm4,xmm3
@ -3332,6 +3331,7 @@ $L$add_proceedx::
xor r11,r11
add r12,r12
lea rsi,QWORD PTR[96+rsp]
adc r13,r13
@ -3339,7 +3339,7 @@ $L$add_proceedx::
adc r8,r8
adc r9,r9
mov rbp,r13
sbb r11,r11
adc r11,0
sub r12,-1
mov rcx,r8
@ -3347,15 +3347,15 @@ $L$add_proceedx::
sbb r8,0
mov r10,r9
sbb r9,r15
test r11,r11
sbb r11,0
cmovz r12,rax
cmovc r12,rax
mov rax,QWORD PTR[rsi]
cmovz r13,rbp
cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx
cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10
cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subx
@ -3521,16 +3521,14 @@ $L$point_add_affinex::
mov r8,QWORD PTR[((64+24))+rsi]
movdqa XMMWORD PTR[320+rsp],xmm0
movdqa XMMWORD PTR[(320+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[352+rsp],xmm2
movdqa XMMWORD PTR[(352+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[384+rsp],xmm4
movdqa XMMWORD PTR[(384+16)+rsp],xmm5
por xmm3,xmm1
por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rbx]
pshufd xmm5,xmm3,1h
pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rbx]
movdqu xmm2,XMMWORD PTR[32+rbx]
por xmm5,xmm3
@ -3648,6 +3646,7 @@ DB 102,72,15,110,199
xor r11,r11
add r12,r12
lea rsi,QWORD PTR[192+rsp]
adc r13,r13
@ -3655,7 +3654,7 @@ DB 102,72,15,110,199
adc r8,r8
adc r9,r9
mov rbp,r13
sbb r11,r11
adc r11,0
sub r12,-1
mov rcx,r8
@ -3663,15 +3662,15 @@ DB 102,72,15,110,199
sbb r8,0
mov r10,r9
sbb r9,r15
test r11,r11
sbb r11,0
cmovz r12,rax
cmovc r12,rax
mov rax,QWORD PTR[rsi]
cmovz r13,rbp
cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx
cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10
cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subx

4
deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm

@ -1291,9 +1291,9 @@ DB 102,15,56,0,251
ALIGN 16
$L$oop_shaext::
dec rdx
lea rax,QWORD PTR[64+rsi]
lea r8,QWORD PTR[64+rsi]
paddd xmm1,xmm4
cmovne rsi,rax
cmovne rsi,r8
movdqa xmm8,xmm0
DB 15,56,201,229
movdqa xmm2,xmm0

109
deps/openssl/asm/x86-elf-gas/bn/x86-mont.s

@ -15,44 +15,51 @@ bn_mul_mont:
jl .L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi
negl %edi
leal -32(%esp,%edi,4),%esp
leal -32(%esp,%edi,4),%ebp
negl %edi
movl %esp,%eax
movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
subl %eax,%esp
xorl %esp,%edx
subl %eax,%ebp
xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
subl %edx,%esp
andl $-64,%esp
movl %ebp,%eax
subl %esp,%eax
subl %edx,%ebp
andl $-64,%ebp
movl %esp,%eax
subl %ebp,%eax
andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
jmp .L002page_walk_done
.align 16
.L001page_walk:
movl (%esp,%eax,1),%edx
subl $4096,%eax
.byte 46
jnc .L001page_walk
leal -4096(%esp),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
.L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
movl %edx,16(%esp)
movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %ebp,24(%esp)
movl %edx,24(%esp)
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
jnc .L002non_sse2
jnc .L003non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@ -76,7 +83,7 @@ bn_mul_mont:
psrlq $32,%mm3
incl %ecx
.align 16
.L0031st:
.L0041st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -91,7 +98,7 @@ bn_mul_mont:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl .L0031st
jl .L0041st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -105,7 +112,7 @@ bn_mul_mont:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
.L004outer:
.L005outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@ -127,7 +134,7 @@ bn_mul_mont:
paddq %mm6,%mm2
incl %ecx
decl %ebx
.L005inner:
.L006inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -144,7 +151,7 @@ bn_mul_mont:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz .L005inner
jnz .L006inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@ -162,11 +169,11 @@ bn_mul_mont:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle .L004outer
jle .L005outer
emms
jmp .L006common_tail
jmp .L007common_tail
.align 16
.L002non_sse2:
.L003non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@ -177,12 +184,12 @@ bn_mul_mont:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz .L007bn_sqr_mont
jz .L008bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
.L008mull:
.L009mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@ -191,7 +198,7 @@ bn_mul_mont:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L008mull
jl .L009mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@ -209,9 +216,9 @@ bn_mul_mont:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp .L0092ndmadd
jmp .L0102ndmadd
.align 16
.L0101stmadd:
.L0111stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -222,7 +229,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L0101stmadd
jl .L0111stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@ -245,7 +252,7 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
.align 16
.L0092ndmadd:
.L0102ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -256,7 +263,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0092ndmadd
jl .L0102ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -272,16 +279,16 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je .L006common_tail
je .L007common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp .L0101stmadd
jmp .L0111stmadd
.align 16
.L007bn_sqr_mont:
.L008bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@ -292,7 +299,7 @@ bn_mul_mont:
andl $1,%ebx
incl %ecx
.align 16
.L011sqr:
.L012sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -304,7 +311,7 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl .L011sqr
jl .L012sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -328,7 +335,7 @@ bn_mul_mont:
movl 4(%esi),%eax
movl $1,%ecx
.align 16
.L0123rdmadd:
.L0133rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -347,7 +354,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0123rdmadd
jl .L0133rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -363,7 +370,7 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je .L006common_tail
je .L007common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@ -375,12 +382,12 @@ bn_mul_mont:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je .L013sqrlast
je .L014sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
.L014sqradd:
.L015sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -396,13 +403,13 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle .L014sqradd
jle .L015sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
.L013sqrlast:
.L014sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@ -417,9 +424,9 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp .L0123rdmadd
jmp .L0133rdmadd
.align 16
.L006common_tail:
.L007common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@ -427,13 +434,13 @@ bn_mul_mont:
movl %ebx,%ecx
xorl %edx,%edx
.align 16
.L015sub:
.L016sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge .L015sub
jge .L016sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@ -441,12 +448,12 @@ bn_mul_mont:
andl %eax,%ebp
orl %ebp,%esi
.align 16
.L016copy:
.L017copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
jge .L016copy
jge .L017copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:

115
deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s

@ -14,47 +14,54 @@ L_bn_mul_mont_begin:
jl L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi
negl %edi
leal -32(%esp,%edi,4),%esp
leal -32(%esp,%edi,4),%ebp
negl %edi
movl %esp,%eax
movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
subl %eax,%esp
xorl %esp,%edx
subl %eax,%ebp
xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
subl %edx,%esp
andl $-64,%esp
movl %ebp,%eax
subl %esp,%eax
subl %edx,%ebp
andl $-64,%ebp
movl %esp,%eax
subl %ebp,%eax
andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja L001page_walk
jmp L002page_walk_done
.align 4,0x90
L001page_walk:
movl (%esp,%eax,1),%edx
subl $4096,%eax
.byte 46
jnc L001page_walk
leal -4096(%esp),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja L001page_walk
L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
movl %edx,16(%esp)
movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %ebp,24(%esp)
call L002PIC_me_up
L002PIC_me_up:
movl %edx,24(%esp)
call L003PIC_me_up
L003PIC_me_up:
popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L002PIC_me_up(%eax),%eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc L003non_sse2
jnc L004non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@ -78,7 +85,7 @@ L002PIC_me_up:
psrlq $32,%mm3
incl %ecx
.align 4,0x90
L0041st:
L0051st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -93,7 +100,7 @@ L0041st:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl L0041st
jl L0051st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -107,7 +114,7 @@ L0041st:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
L005outer:
L006outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@ -129,7 +136,7 @@ L005outer:
paddq %mm6,%mm2
incl %ecx
decl %ebx
L006inner:
L007inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -146,7 +153,7 @@ L006inner:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz L006inner
jnz L007inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@ -164,11 +171,11 @@ L006inner:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle L005outer
jle L006outer
emms
jmp L007common_tail
jmp L008common_tail
.align 4,0x90
L003non_sse2:
L004non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@ -179,12 +186,12 @@ L003non_sse2:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz L008bn_sqr_mont
jz L009bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 4,0x90
L009mull:
L010mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@ -193,7 +200,7 @@ L009mull:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L009mull
jl L010mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@ -211,9 +218,9 @@ L009mull:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp L0102ndmadd
jmp L0112ndmadd
.align 4,0x90
L0111stmadd:
L0121stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -224,7 +231,7 @@ L0111stmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L0111stmadd
jl L0121stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@ -247,7 +254,7 @@ L0111stmadd:
adcl $0,%edx
movl $1,%ecx
.align 4,0x90
L0102ndmadd:
L0112ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -258,7 +265,7 @@ L0102ndmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0102ndmadd
jl L0112ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -274,16 +281,16 @@ L0102ndmadd:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je L007common_tail
je L008common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp L0111stmadd
jmp L0121stmadd
.align 4,0x90
L008bn_sqr_mont:
L009bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@ -294,7 +301,7 @@ L008bn_sqr_mont:
andl $1,%ebx
incl %ecx
.align 4,0x90
L012sqr:
L013sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -306,7 +313,7 @@ L012sqr:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl L012sqr
jl L013sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -330,7 +337,7 @@ L012sqr:
movl 4(%esi),%eax
movl $1,%ecx
.align 4,0x90
L0133rdmadd:
L0143rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -349,7 +356,7 @@ L0133rdmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0133rdmadd
jl L0143rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -365,7 +372,7 @@ L0133rdmadd:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je L007common_tail
je L008common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@ -377,12 +384,12 @@ L0133rdmadd:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je L014sqrlast
je L015sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 4,0x90
L015sqradd:
L016sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -398,13 +405,13 @@ L015sqradd:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle L015sqradd
jle L016sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
L014sqrlast:
L015sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@ -419,9 +426,9 @@ L014sqrlast:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp L0133rdmadd
jmp L0143rdmadd
.align 4,0x90
L007common_tail:
L008common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@ -429,13 +436,13 @@ L007common_tail:
movl %ebx,%ecx
xorl %edx,%edx
.align 4,0x90
L016sub:
L017sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge L016sub
jge L017sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@ -443,12 +450,12 @@ L016sub:
andl %eax,%ebp
orl %ebp,%esi
.align 4,0x90
L017copy:
L018copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
jge L017copy
jge L018copy
movl 24(%esp),%esp
movl $1,%eax
L000just_leave:

109
deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm

@ -31,44 +31,51 @@ $L_bn_mul_mont_begin::
jl $L000just_leave
lea esi,DWORD PTR 20[esp]
lea edx,DWORD PTR 24[esp]
mov ebp,esp
add edi,2
neg edi
lea esp,DWORD PTR [edi*4+esp-32]
lea ebp,DWORD PTR [edi*4+esp-32]
neg edi
mov eax,esp
mov eax,ebp
sub eax,edx
and eax,2047
sub esp,eax
xor edx,esp
sub ebp,eax
xor edx,ebp
and edx,2048
xor edx,2048
sub esp,edx
and esp,-64
mov eax,ebp
sub eax,esp
sub ebp,edx
and ebp,-64
mov eax,esp
sub eax,ebp
and eax,-4096
mov edx,esp
lea esp,DWORD PTR [eax*1+ebp]
mov eax,DWORD PTR [esp]
cmp esp,ebp
ja $L001page_walk
jmp $L002page_walk_done
ALIGN 16
$L001page_walk:
mov edx,DWORD PTR [eax*1+esp]
sub eax,4096
DB 46
jnc $L001page_walk
lea esp,DWORD PTR [esp-4096]
mov eax,DWORD PTR [esp]
cmp esp,ebp
ja $L001page_walk
$L002page_walk_done:
mov eax,DWORD PTR [esi]
mov ebx,DWORD PTR 4[esi]
mov ecx,DWORD PTR 8[esi]
mov edx,DWORD PTR 12[esi]
mov ebp,DWORD PTR 12[esi]
mov esi,DWORD PTR 16[esi]
mov esi,DWORD PTR [esi]
mov DWORD PTR 4[esp],eax
mov DWORD PTR 8[esp],ebx
mov DWORD PTR 12[esp],ecx
mov DWORD PTR 16[esp],edx
mov DWORD PTR 16[esp],ebp
mov DWORD PTR 20[esp],esi
lea ebx,DWORD PTR [edi-3]
mov DWORD PTR 24[esp],ebp
mov DWORD PTR 24[esp],edx
lea eax,DWORD PTR _OPENSSL_ia32cap_P
bt DWORD PTR [eax],26
jnc $L002non_sse2
jnc $L003non_sse2
mov eax,-1
movd mm7,eax
mov esi,DWORD PTR 8[esp]
@ -92,7 +99,7 @@ DB 46
psrlq mm3,32
inc ecx
ALIGN 16
$L0031st:
$L0041st:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -107,7 +114,7 @@ $L0031st:
psrlq mm3,32
lea ecx,DWORD PTR 1[ecx]
cmp ecx,ebx
jl $L0031st
jl $L0041st
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -121,7 +128,7 @@ $L0031st:
paddq mm3,mm2
movq QWORD PTR 32[ebx*4+esp],mm3
inc edx
$L004outer:
$L005outer:
xor ecx,ecx
movd mm4,DWORD PTR [edx*4+edi]
movd mm5,DWORD PTR [esi]
@ -143,7 +150,7 @@ $L004outer:
paddq mm2,mm6
inc ecx
dec ebx
$L005inner:
$L006inner:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -160,7 +167,7 @@ $L005inner:
paddq mm2,mm6
dec ebx
lea ecx,DWORD PTR 1[ecx]
jnz $L005inner
jnz $L006inner
mov ebx,ecx
pmuludq mm0,mm4
pmuludq mm1,mm5
@ -178,11 +185,11 @@ $L005inner:
movq QWORD PTR 32[ebx*4+esp],mm3
lea edx,DWORD PTR 1[edx]
cmp edx,ebx
jle $L004outer
jle $L005outer
emms
jmp $L006common_tail
jmp $L007common_tail
ALIGN 16
$L002non_sse2:
$L003non_sse2:
mov esi,DWORD PTR 8[esp]
lea ebp,DWORD PTR 1[ebx]
mov edi,DWORD PTR 12[esp]
@ -193,12 +200,12 @@ $L002non_sse2:
lea eax,DWORD PTR 4[ebx*4+edi]
or ebp,edx
mov edi,DWORD PTR [edi]
jz $L007bn_sqr_mont
jz $L008bn_sqr_mont
mov DWORD PTR 28[esp],eax
mov eax,DWORD PTR [esi]
xor edx,edx
ALIGN 16
$L008mull:
$L009mull:
mov ebp,edx
mul edi
add ebp,eax
@ -207,7 +214,7 @@ $L008mull:
mov eax,DWORD PTR [ecx*4+esi]
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L008mull
jl $L009mull
mov ebp,edx
mul edi
mov edi,DWORD PTR 20[esp]
@ -225,9 +232,9 @@ $L008mull:
mov eax,DWORD PTR 4[esi]
adc edx,0
inc ecx
jmp $L0092ndmadd
jmp $L0102ndmadd
ALIGN 16
$L0101stmadd:
$L0111stmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -238,7 +245,7 @@ $L0101stmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L0101stmadd
jl $L0111stmadd
mov ebp,edx
mul edi
add eax,DWORD PTR 32[ebx*4+esp]
@ -261,7 +268,7 @@ $L0101stmadd:
adc edx,0
mov ecx,1
ALIGN 16
$L0092ndmadd:
$L0102ndmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -272,7 +279,7 @@ $L0092ndmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0092ndmadd
jl $L0102ndmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@ -288,16 +295,16 @@ $L0092ndmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,DWORD PTR 28[esp]
mov DWORD PTR 36[ebx*4+esp],eax
je $L006common_tail
je $L007common_tail
mov edi,DWORD PTR [ecx]
mov esi,DWORD PTR 8[esp]
mov DWORD PTR 12[esp],ecx
xor ecx,ecx
xor edx,edx
mov eax,DWORD PTR [esi]
jmp $L0101stmadd
jmp $L0111stmadd
ALIGN 16
$L007bn_sqr_mont:
$L008bn_sqr_mont:
mov DWORD PTR [esp],ebx
mov DWORD PTR 12[esp],ecx
mov eax,edi
@ -308,7 +315,7 @@ $L007bn_sqr_mont:
and ebx,1
inc ecx
ALIGN 16
$L011sqr:
$L012sqr:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -320,7 +327,7 @@ $L011sqr:
cmp ecx,DWORD PTR [esp]
mov ebx,eax
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L011sqr
jl $L012sqr
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -344,7 +351,7 @@ $L011sqr:
mov eax,DWORD PTR 4[esi]
mov ecx,1
ALIGN 16
$L0123rdmadd:
$L0133rdmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -363,7 +370,7 @@ $L0123rdmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0123rdmadd
jl $L0133rdmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@ -379,7 +386,7 @@ $L0123rdmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,ebx
mov DWORD PTR 36[ebx*4+esp],eax
je $L006common_tail
je $L007common_tail
mov edi,DWORD PTR 4[ecx*4+esi]
lea ecx,DWORD PTR 1[ecx]
mov eax,edi
@ -391,12 +398,12 @@ $L0123rdmadd:
xor ebp,ebp
cmp ecx,ebx
lea ecx,DWORD PTR 1[ecx]
je $L013sqrlast
je $L014sqrlast
mov ebx,edx
shr edx,1
and ebx,1
ALIGN 16
$L014sqradd:
$L015sqradd:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -412,13 +419,13 @@ $L014sqradd:
cmp ecx,DWORD PTR [esp]
mov DWORD PTR 28[ecx*4+esp],ebp
mov ebx,eax
jle $L014sqradd
jle $L015sqradd
mov ebp,edx
add edx,edx
shr ebp,31
add edx,ebx
adc ebp,0
$L013sqrlast:
$L014sqrlast:
mov edi,DWORD PTR 20[esp]
mov esi,DWORD PTR 16[esp]
imul edi,DWORD PTR 32[esp]
@ -433,9 +440,9 @@ $L013sqrlast:
adc edx,0
mov ecx,1
mov eax,DWORD PTR 4[esi]
jmp $L0123rdmadd
jmp $L0133rdmadd
ALIGN 16
$L006common_tail:
$L007common_tail:
mov ebp,DWORD PTR 16[esp]
mov edi,DWORD PTR 4[esp]
lea esi,DWORD PTR 32[esp]
@ -443,13 +450,13 @@ $L006common_tail:
mov ecx,ebx
xor edx,edx
ALIGN 16
$L015sub:
$L016sub:
sbb eax,DWORD PTR [edx*4+ebp]
mov DWORD PTR [edx*4+edi],eax
dec ecx
mov eax,DWORD PTR 4[edx*4+esi]
lea edx,DWORD PTR 1[edx]
jge $L015sub
jge $L016sub
sbb eax,0
and esi,eax
not eax
@ -457,12 +464,12 @@ $L015sub:
and ebp,eax
or esi,ebp
ALIGN 16
$L016copy:
$L017copy:
mov eax,DWORD PTR [ebx*4+esi]
mov DWORD PTR [ebx*4+edi],eax
mov DWORD PTR 32[ebx*4+esp],ecx
dec ebx
jge $L016copy
jge $L017copy
mov esp,DWORD PTR 24[esp]
mov eax,1
$L000just_leave:

24
deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S

@ -1816,8 +1816,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_6:
vst1.64 {q14}, [r0,:128] @ next round tweak
veor q4, q4, q12
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -1853,8 +1851,6 @@ bsaes_xts_encrypt:
.align 5
.Lxts_enc_5:
vst1.64 {q13}, [r0,:128] @ next round tweak
veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -1883,8 +1879,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_4:
vst1.64 {q12}, [r0,:128] @ next round tweak
veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -1910,8 +1904,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_3:
vst1.64 {q11}, [r0,:128] @ next round tweak
veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -1936,8 +1928,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_2:
vst1.64 {q10}, [r0,:128] @ next round tweak
veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -1960,7 +1950,7 @@ bsaes_xts_encrypt:
.align 4
.Lxts_enc_1:
mov r0, sp
veor q0, q8
veor q0, q0, q8
mov r1, sp
vst1.8 {q0}, [sp,:128]
mov r2, r10
@ -2346,8 +2336,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_5:
vst1.64 {q13}, [r0,:128] @ next round tweak
veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -2376,8 +2364,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_4:
vst1.64 {q12}, [r0,:128] @ next round tweak
veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -2403,8 +2389,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_3:
vst1.64 {q11}, [r0,:128] @ next round tweak
veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -2429,8 +2413,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_2:
vst1.64 {q10}, [r0,:128] @ next round tweak
veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@ -2453,12 +2435,12 @@ bsaes_xts_decrypt:
.align 4
.Lxts_dec_1:
mov r0, sp
veor q0, q8
veor q0, q0, q8
mov r1, sp
vst1.8 {q0}, [sp,:128]
mov r5, r2 @ preserve magic
mov r2, r10
mov r4, r3 @ preserve fp
mov r5, r2 @ preserve magic
bl AES_decrypt

127
deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s

@ -6,6 +6,8 @@
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
movl %r9d,%r9d
movq %rsp,%rax
testl $3,%r9d
jnz .Lmul_enter
cmpl $8,%r9d
@ -25,29 +27,36 @@ bn_mul_mont:
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 2(%r9),%r10
negq %r9
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
leaq -16(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
.Lmul_body:
subq %rsp,%r11
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
jmp .Lmul_page_walk_done
.align 16
.Lmul_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc .Lmul_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
.Lmul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@ -215,19 +224,21 @@ bn_mul_mont:
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
.size bn_mul_mont,.-bn_mul_mont
.type bn_mul4x_mont,@function
.align 16
bn_mul4x_mont:
movl %r9d,%r9d
movq %rsp,%rax
.Lmul4x_enter:
pushq %rbx
pushq %rbp
@ -236,23 +247,29 @@ bn_mul4x_mont:
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 4(%r9),%r10
negq %r9
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
leaq -32(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
.Lmul4x_body:
subq %rsp,%r11
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul4x_page_walk
jmp .Lmul4x_page_walk_done
.Lmul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lmul4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul4x_page_walk
.Lmul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.Lmul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@ -621,13 +638,13 @@ bn_mul4x_mont:
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
.Lmul4x_epilogue:
.byte 0xf3,0xc3
.size bn_mul4x_mont,.-bn_mul4x_mont
@ -636,14 +653,15 @@ bn_mul4x_mont:
.type bn_sqr8x_mont,@function
.align 32
bn_sqr8x_mont:
.Lsqr8x_enter:
movq %rsp,%rax
.Lsqr8x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.Lsqr8x_prologue:
movl %r9d,%r10d
shll $3,%r9d
@ -656,33 +674,42 @@ bn_sqr8x_mont:
leaq -64(%rsp,%r9,2),%r11
movq %rsp,%rbp
movq (%r8),%r8
subq %rsi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lsqr8x_sp_alt
subq %r11,%rsp
leaq -64(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -64(%rbp,%r9,2),%rbp
jmp .Lsqr8x_sp_done
.align 32
.Lsqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10
leaq -64(%rsp,%r9,2),%rsp
leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
.Lsqr8x_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lsqr8x_page_walk
jmp .Lsqr8x_page_walk_done
.align 16
.Lsqr8x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lsqr8x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lsqr8x_page_walk
.Lsqr8x_page_walk_done:
movq %r9,%r10
negq %r9

134
deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s

@ -6,16 +6,15 @@
.type bn_mul_mont_gather5,@function
.align 64
bn_mul_mont_gather5:
movl %r9d,%r9d
movq %rsp,%rax
testl $7,%r9d
jnz .Lmul_enter
jmp .Lmul4x_enter
.align 16
.Lmul_enter:
movl %r9d,%r9d
movq %rsp,%rax
movd 8(%rsp),%xmm5
leaq .Linc(%rip),%r10
pushq %rbx
pushq %rbp
pushq %r12
@ -23,26 +22,36 @@ bn_mul_mont_gather5:
pushq %r14
pushq %r15
leaq 2(%r9),%r11
negq %r11
leaq -264(%rsp,%r11,8),%rsp
andq $-1024,%rsp
negq %r9
movq %rsp,%r11
leaq -280(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
jmp .Lmul_page_walk_done
subq %rsp,%rax
andq $-4096,%rax
.Lmul_page_walk:
movq (%rsp,%rax,1),%r11
subq $4096,%rax
.byte 0x2e
jnc .Lmul_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
.Lmul_page_walk_done:
leaq .Linc(%rip),%r10
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0
@ -413,15 +422,16 @@ bn_mul_mont_gather5:
.type bn_mul4x_mont_gather5,@function
.align 32
bn_mul4x_mont_gather5:
.Lmul4x_enter:
.byte 0x67
movq %rsp,%rax
.Lmul4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.Lmul4x_prologue:
.byte 0x67
shll $3,%r9d
@ -438,32 +448,40 @@ bn_mul4x_mont_gather5:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lmul4xsp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp .Lmul4xsp_done
.align 32
.Lmul4xsp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
.Lmul4xsp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmul4x_page_walk
jmp .Lmul4x_page_walk_done
.Lmul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lmul4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmul4x_page_walk
.Lmul4x_page_walk_done:
negq %r9
@ -1022,6 +1040,7 @@ bn_power5:
pushq %r13
pushq %r14
pushq %r15
.Lpower5_prologue:
shll $3,%r9d
leal (%r9,%r9,2),%r10d
@ -1036,32 +1055,40 @@ bn_power5:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lpwr_sp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp .Lpwr_sp_done
.align 32
.Lpwr_sp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
.Lpwr_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lpwr_page_walk
jmp .Lpwr_page_walk_done
.Lpwr_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lpwr_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lpwr_page_walk
.Lpwr_page_walk_done:
movq %r9,%r10
negq %r9
@ -1972,6 +1999,7 @@ bn_from_mont8x:
pushq %r13
pushq %r14
pushq %r15
.Lfrom_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@ -1986,32 +2014,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lfrom_sp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp .Lfrom_sp_done
.align 32
.Lfrom_sp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
.Lfrom_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lfrom_page_walk
jmp .Lfrom_page_walk_done
.Lfrom_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc .Lfrom_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lfrom_page_walk
.Lfrom_page_walk_done:
movq %r9,%r10
negq %r9

119
deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s

@ -27,6 +27,7 @@ ecp_nistz256_mul_by_2:
pushq %r13
movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
@ -37,7 +38,7 @@ ecp_nistz256_mul_by_2:
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
sbbq %r13,%r13
adcq $0,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
@ -45,14 +46,14 @@ ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -149,12 +150,12 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovzq %rcx,%r10
cmovzq %r12,%r11
cmovcq %rax,%r8
cmovcq %rdx,%r9
cmovcq %rcx,%r10
cmovcq %r12,%r11
xorq %r13,%r13
addq 0(%rsi),%r8
@ -171,14 +172,14 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -217,14 +218,14 @@ ecp_nistz256_add:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -993,13 +994,14 @@ ecp_nistz256_avx2_select_w7:
.type __ecp_nistz256_add_toq,@function
.align 32
__ecp_nistz256_add_toq:
xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
movq %r12,%rax
adcq 16(%rbx),%r8
adcq 24(%rbx),%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1007,14 +1009,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovzq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovzq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -1082,13 +1084,14 @@ __ecp_nistz256_subq:
.type __ecp_nistz256_mul_by_2q,@function
.align 32
__ecp_nistz256_mul_by_2q:
xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
movq %r12,%rax
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1096,14 +1099,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovzq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovzq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -1333,16 +1336,14 @@ ecp_nistz256_point_add:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@ -1354,14 +1355,14 @@ ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1
.byte 102,72,15,110,199
movdqu 64(%rsi),%xmm0
movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
por %xmm1,%xmm3
por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp)
@ -1372,8 +1373,8 @@ ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4
por %xmm3,%xmm4
pshufd $0xb1,%xmm1,%xmm4
por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@ -1556,6 +1557,7 @@ ecp_nistz256_point_add:
xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@ -1563,7 +1565,7 @@ ecp_nistz256_point_add:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1571,15 +1573,15 @@ ecp_nistz256_point_add:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@ -1733,16 +1735,14 @@ ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@ -1860,6 +1860,7 @@ ecp_nistz256_point_add_affine:
xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@ -1867,7 +1868,7 @@ ecp_nistz256_point_add_affine:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1875,15 +1876,15 @@ ecp_nistz256_point_add_affine:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq

4
deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s

@ -1255,9 +1255,9 @@ _shaext_shortcut:
.align 16
.Loop_shaext:
decq %rdx
leaq 64(%rsi),%rax
leaq 64(%rsi),%r8
paddd %xmm4,%xmm1
cmovneq %rax,%rsi
cmovneq %r8,%rsi
movdqa %xmm0,%xmm8
.byte 15,56,201,229
movdqa %xmm0,%xmm2

127
deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s

@ -6,6 +6,8 @@
.p2align 4
_bn_mul_mont:
movl %r9d,%r9d
movq %rsp,%rax
testl $3,%r9d
jnz L$mul_enter
cmpl $8,%r9d
@ -25,29 +27,36 @@ L$mul_enter:
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 2(%r9),%r10
negq %r9
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
leaq -16(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
L$mul_body:
subq %rsp,%r11
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
jmp L$mul_page_walk_done
.p2align 4
L$mul_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x66,0x2e
jnc L$mul_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
L$mul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
L$mul_body:
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@ -215,19 +224,21 @@ L$copy:
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$mul_epilogue:
.byte 0xf3,0xc3
.p2align 4
bn_mul4x_mont:
movl %r9d,%r9d
movq %rsp,%rax
L$mul4x_enter:
pushq %rbx
pushq %rbp
@ -236,23 +247,29 @@ L$mul4x_enter:
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 4(%r9),%r10
negq %r9
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
leaq -32(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
L$mul4x_body:
subq %rsp,%r11
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul4x_page_walk
jmp L$mul4x_page_walk_done
L$mul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$mul4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul4x_page_walk
L$mul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
L$mul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@ -621,13 +638,13 @@ L$copy4x:
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$mul4x_epilogue:
.byte 0xf3,0xc3
@ -636,14 +653,15 @@ L$mul4x_epilogue:
.p2align 5
bn_sqr8x_mont:
L$sqr8x_enter:
movq %rsp,%rax
L$sqr8x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
L$sqr8x_prologue:
movl %r9d,%r10d
shll $3,%r9d
@ -656,33 +674,42 @@ L$sqr8x_enter:
leaq -64(%rsp,%r9,2),%r11
movq %rsp,%rbp
movq (%r8),%r8
subq %rsi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$sqr8x_sp_alt
subq %r11,%rsp
leaq -64(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -64(%rbp,%r9,2),%rbp
jmp L$sqr8x_sp_done
.p2align 5
L$sqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10
leaq -64(%rsp,%r9,2),%rsp
leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
L$sqr8x_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$sqr8x_page_walk
jmp L$sqr8x_page_walk_done
.p2align 4
L$sqr8x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$sqr8x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$sqr8x_page_walk
L$sqr8x_page_walk_done:
movq %r9,%r10
negq %r9

134
deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s

@ -6,16 +6,15 @@
.p2align 6
_bn_mul_mont_gather5:
movl %r9d,%r9d
movq %rsp,%rax
testl $7,%r9d
jnz L$mul_enter
jmp L$mul4x_enter
.p2align 4
L$mul_enter:
movl %r9d,%r9d
movq %rsp,%rax
movd 8(%rsp),%xmm5
leaq L$inc(%rip),%r10
pushq %rbx
pushq %rbp
pushq %r12
@ -23,26 +22,36 @@ L$mul_enter:
pushq %r14
pushq %r15
leaq 2(%r9),%r11
negq %r11
leaq -264(%rsp,%r11,8),%rsp
andq $-1024,%rsp
negq %r9
movq %rsp,%r11
leaq -280(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
movq %rax,8(%rsp,%r9,8)
L$mul_body:
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
jmp L$mul_page_walk_done
subq %rsp,%rax
andq $-4096,%rax
L$mul_page_walk:
movq (%rsp,%rax,1),%r11
subq $4096,%rax
.byte 0x2e
jnc L$mul_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
L$mul_page_walk_done:
leaq L$inc(%rip),%r10
movq %rax,8(%rsp,%r9,8)
L$mul_body:
leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0
@ -413,15 +422,16 @@ L$mul_epilogue:
.p2align 5
bn_mul4x_mont_gather5:
L$mul4x_enter:
.byte 0x67
movq %rsp,%rax
L$mul4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
L$mul4x_prologue:
.byte 0x67
shll $3,%r9d
@ -438,32 +448,40 @@ L$mul4x_enter:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$mul4xsp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp L$mul4xsp_done
.p2align 5
L$mul4xsp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
L$mul4xsp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mul4x_page_walk
jmp L$mul4x_page_walk_done
L$mul4x_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$mul4x_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mul4x_page_walk
L$mul4x_page_walk_done:
negq %r9
@ -1022,6 +1040,7 @@ _bn_power5:
pushq %r13
pushq %r14
pushq %r15
L$power5_prologue:
shll $3,%r9d
leal (%r9,%r9,2),%r10d
@ -1036,32 +1055,40 @@ _bn_power5:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$pwr_sp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp L$pwr_sp_done
.p2align 5
L$pwr_sp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
L$pwr_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$pwr_page_walk
jmp L$pwr_page_walk_done
L$pwr_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$pwr_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$pwr_page_walk
L$pwr_page_walk_done:
movq %r9,%r10
negq %r9
@ -1972,6 +1999,7 @@ bn_from_mont8x:
pushq %r13
pushq %r14
pushq %r15
L$from_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@ -1986,32 +2014,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$from_sp_alt
subq %r11,%rsp
leaq -320(%rsp,%r9,2),%rsp
subq %r11,%rbp
leaq -320(%rbp,%r9,2),%rbp
jmp L$from_sp_done
.p2align 5
L$from_sp_alt:
leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp
leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
subq %r11,%rbp
L$from_sp_done:
andq $-64,%rsp
movq %rax,%r11
subq %rsp,%r11
andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$from_page_walk
jmp L$from_page_walk_done
L$from_page_walk:
movq (%rsp,%r11,1),%r10
subq $4096,%r11
.byte 0x2e
jnc L$from_page_walk
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$from_page_walk
L$from_page_walk_done:
movq %r9,%r10
negq %r9

119
deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s

@ -27,6 +27,7 @@ _ecp_nistz256_mul_by_2:
pushq %r13
movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
@ -37,7 +38,7 @@ _ecp_nistz256_mul_by_2:
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
sbbq %r13,%r13
adcq $0,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
@ -45,14 +46,14 @@ _ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -149,12 +150,12 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq L$poly+24(%rip),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovzq %rcx,%r10
cmovzq %r12,%r11
cmovcq %rax,%r8
cmovcq %rdx,%r9
cmovcq %rcx,%r10
cmovcq %r12,%r11
xorq %r13,%r13
addq 0(%rsi),%r8
@ -171,14 +172,14 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq L$poly+24(%rip),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -217,14 +218,14 @@ _ecp_nistz256_add:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
testq %r13,%r13
sbbq $0,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
cmovcq %rax,%r8
cmovcq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
cmovcq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@ -993,13 +994,14 @@ _ecp_nistz256_avx2_select_w7:
.p2align 5
__ecp_nistz256_add_toq:
xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
movq %r12,%rax
adcq 16(%rbx),%r8
adcq 24(%rbx),%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1007,14 +1009,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovzq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovzq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -1082,13 +1084,14 @@ __ecp_nistz256_subq:
.p2align 5
__ecp_nistz256_mul_by_2q:
xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
movq %r12,%rax
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1096,14 +1099,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovzq %rbp,%r13
cmovcq %rax,%r12
cmovcq %rbp,%r13
movq %r12,0(%rdi)
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq %r13,8(%rdi)
cmovzq %r10,%r9
cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@ -1333,16 +1336,14 @@ _ecp_nistz256_point_add:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@ -1354,14 +1355,14 @@ _ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1
.byte 102,72,15,110,199
movdqu 64(%rsi),%xmm0
movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
por %xmm1,%xmm3
por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp)
@ -1372,8 +1373,8 @@ _ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4
por %xmm3,%xmm4
pshufd $0xb1,%xmm1,%xmm4
por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@ -1556,6 +1557,7 @@ L$add_proceedq:
xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@ -1563,7 +1565,7 @@ L$add_proceedq:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1571,15 +1573,15 @@ L$add_proceedq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@ -1733,16 +1735,14 @@ _ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3
por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5
pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@ -1860,6 +1860,7 @@ _ecp_nistz256_point_add_affine:
xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@ -1867,7 +1868,7 @@ _ecp_nistz256_point_add_affine:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
sbbq %r11,%r11
adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@ -1875,15 +1876,15 @@ _ecp_nistz256_point_add_affine:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
testq %r11,%r11
sbbq $0,%r11
cmovzq %rax,%r12
cmovcq %rax,%r12
movq 0(%rsi),%rax
cmovzq %rbp,%r13
cmovcq %rbp,%r13
movq 8(%rsi),%rbp
cmovzq %rcx,%r8
cmovcq %rcx,%r8
movq 16(%rsi),%rcx
cmovzq %r10,%r9
cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq

4
deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s

@ -1255,9 +1255,9 @@ _shaext_shortcut:
.p2align 4
L$oop_shaext:
decq %rdx
leaq 64(%rsi),%rax
leaq 64(%rsi),%r8
paddd %xmm4,%xmm1
cmovneq %rax,%rsi
cmovneq %r8,%rsi
movdqa %xmm0,%xmm8
.byte 15,56,201,229
movdqa %xmm0,%xmm2

154
deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm

@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont::
mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
test r9d,3
jnz $L$mul_enter
cmp r9d,8
@ -38,29 +40,36 @@ $L$mul_enter::
push r14
push r15
mov r9d,r9d
lea r10,QWORD PTR[2+r9]
neg r9
mov r11,rsp
neg r10
lea rsp,QWORD PTR[r10*8+rsp]
and rsp,-1024
lea r10,QWORD PTR[((-16))+r9*8+rsp]
neg r9
and r10,-1024
mov QWORD PTR[8+r9*8+rsp],r11
$L$mul_body::
sub r11,rsp
sub r11,r10
and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
jmp $L$mul_page_walk_done
ALIGN 16
$L$mul_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 066h,02eh
jnc $L$mul_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
$L$mul_page_walk_done::
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
mov r12,rdx
mov r8,QWORD PTR[r8]
mov rbx,QWORD PTR[r12]
@ -228,13 +237,13 @@ $L$copy::
mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1
mov r15,QWORD PTR[rsi]
mov r14,QWORD PTR[8+rsi]
mov r13,QWORD PTR[16+rsi]
mov r12,QWORD PTR[24+rsi]
mov rbp,QWORD PTR[32+rsi]
mov rbx,QWORD PTR[40+rsi]
lea rsp,QWORD PTR[48+rsi]
mov r15,QWORD PTR[((-48))+rsi]
mov r14,QWORD PTR[((-40))+rsi]
mov r13,QWORD PTR[((-32))+rsi]
mov r12,QWORD PTR[((-24))+rsi]
mov rbp,QWORD PTR[((-16))+rsi]
mov rbx,QWORD PTR[((-8))+rsi]
lea rsp,QWORD PTR[rsi]
$L$mul_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
@ -256,6 +265,8 @@ $L$SEH_begin_bn_mul4x_mont::
mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
$L$mul4x_enter::
push rbx
push rbp
@ -264,23 +275,29 @@ $L$mul4x_enter::
push r14
push r15
mov r9d,r9d
lea r10,QWORD PTR[4+r9]
neg r9
mov r11,rsp
neg r10
lea rsp,QWORD PTR[r10*8+rsp]
and rsp,-1024
lea r10,QWORD PTR[((-32))+r9*8+rsp]
neg r9
and r10,-1024
mov QWORD PTR[8+r9*8+rsp],r11
$L$mul4x_body::
sub r11,rsp
sub r11,r10
and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul4x_page_walk
jmp $L$mul4x_page_walk_done
$L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$mul4x_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul4x_page_walk
$L$mul4x_page_walk_done::
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul4x_body::
mov QWORD PTR[16+r9*8+rsp],rdi
mov r12,rdx
mov r8,QWORD PTR[r8]
@ -649,13 +666,13 @@ $L$copy4x::
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2
mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1
mov r15,QWORD PTR[rsi]
mov r14,QWORD PTR[8+rsi]
mov r13,QWORD PTR[16+rsi]
mov r12,QWORD PTR[24+rsi]
mov rbp,QWORD PTR[32+rsi]
mov rbx,QWORD PTR[40+rsi]
lea rsp,QWORD PTR[48+rsi]
mov r15,QWORD PTR[((-48))+rsi]
mov r14,QWORD PTR[((-40))+rsi]
mov r13,QWORD PTR[((-32))+rsi]
mov r12,QWORD PTR[((-24))+rsi]
mov rbp,QWORD PTR[((-16))+rsi]
mov rbx,QWORD PTR[((-8))+rsi]
lea rsp,QWORD PTR[rsi]
$L$mul4x_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
@ -679,14 +696,15 @@ $L$SEH_begin_bn_sqr8x_mont::
mov r9,QWORD PTR[48+rsp]
$L$sqr8x_enter::
mov rax,rsp
$L$sqr8x_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
$L$sqr8x_prologue::
mov r10d,r9d
shl r9d,3
@ -699,33 +717,42 @@ $L$sqr8x_enter::
lea r11,QWORD PTR[((-64))+r9*2+rsp]
mov rbp,rsp
mov r8,QWORD PTR[r8]
sub r11,rsi
and r11,4095
cmp r10,r11
jb $L$sqr8x_sp_alt
sub rsp,r11
lea rsp,QWORD PTR[((-64))+r9*2+rsp]
sub rbp,r11
lea rbp,QWORD PTR[((-64))+r9*2+rbp]
jmp $L$sqr8x_sp_done
ALIGN 32
$L$sqr8x_sp_alt::
lea r10,QWORD PTR[((4096-64))+r9*2]
lea rsp,QWORD PTR[((-64))+r9*2+rsp]
lea rbp,QWORD PTR[((-64))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
sub rsp,r11
sub rbp,r11
$L$sqr8x_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and rbp,-64
mov r11,rsp
sub r11,rbp
and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$sqr8x_page_walk
jmp $L$sqr8x_page_walk_done
ALIGN 16
$L$sqr8x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$sqr8x_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$sqr8x_page_walk
$L$sqr8x_page_walk_done::
mov r10,r9
neg r9
@ -860,22 +887,8 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax]
lea rax,QWORD PTR[48+rax]
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
mov r13,QWORD PTR[((-32))+rax]
mov r14,QWORD PTR[((-40))+rax]
mov r15,QWORD PTR[((-48))+rax]
mov QWORD PTR[144+r8],rbx
mov QWORD PTR[160+r8],rbp
mov QWORD PTR[216+r8],r12
mov QWORD PTR[224+r8],r13
mov QWORD PTR[232+r8],r14
mov QWORD PTR[240+r8],r15
jmp $L$common_seh_tail
jmp $L$common_pop_regs
mul_handler ENDP
@ -903,15 +916,21 @@ sqr_handler PROC PRIVATE
cmp rbx,r10
jb $L$common_seh_tail
mov r10d,DWORD PTR[4+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$common_pop_regs
mov rax,QWORD PTR[152+r8]
mov r10d,DWORD PTR[4+r11]
mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jae $L$common_seh_tail
mov rax,QWORD PTR[40+rax]
$L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
@ -993,7 +1012,8 @@ DB 9,0,0,0
$L$SEH_info_bn_sqr8x_mont::
DB 9,0,0,0
DD imagerel sqr_handler
DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue
DD imagerel $L$sqr8x_prologue,imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue
ALIGN 8
.xdata ENDS
END

153
deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm

@ -19,16 +19,15 @@ $L$SEH_begin_bn_mul_mont_gather5::
mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
test r9d,7
jnz $L$mul_enter
jmp $L$mul4x_enter
ALIGN 16
$L$mul_enter::
mov r9d,r9d
mov rax,rsp
movd xmm5,DWORD PTR[56+rsp]
lea r10,QWORD PTR[$L$inc]
push rbx
push rbp
push r12
@ -36,26 +35,36 @@ $L$mul_enter::
push r14
push r15
lea r11,QWORD PTR[2+r9]
neg r11
lea rsp,QWORD PTR[((-264))+r11*8+rsp]
and rsp,-1024
neg r9
mov r11,rsp
lea r10,QWORD PTR[((-280))+r9*8+rsp]
neg r9
and r10,-1024
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
sub r11,r10
and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
jmp $L$mul_page_walk_done
sub rax,rsp
and rax,-4096
$L$mul_page_walk::
mov r11,QWORD PTR[rax*1+rsp]
sub rax,4096
DB 02eh
jnc $L$mul_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
$L$mul_page_walk_done::
lea r10,QWORD PTR[$L$inc]
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
lea r12,QWORD PTR[128+rdx]
movdqa xmm0,XMMWORD PTR[r10]
@ -441,15 +450,16 @@ $L$SEH_begin_bn_mul4x_mont_gather5::
mov r9,QWORD PTR[48+rsp]
$L$mul4x_enter::
DB 067h
mov rax,rsp
$L$mul4x_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
$L$mul4x_prologue::
DB 067h
shl r9d,3
@ -466,32 +476,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$mul4xsp_alt
sub rsp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
sub rbp,r11
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$mul4xsp_done
ALIGN 32
$L$mul4xsp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
sub rsp,r11
sub rbp,r11
$L$mul4xsp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and rbp,-64
mov r11,rsp
sub r11,rbp
and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mul4x_page_walk
jmp $L$mul4x_page_walk_done
$L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$mul4x_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mul4x_page_walk
$L$mul4x_page_walk_done::
neg r9
@ -1065,6 +1083,7 @@ $L$SEH_begin_bn_power5::
push r13
push r14
push r15
$L$power5_prologue::
shl r9d,3
lea r10d,DWORD PTR[r9*2+r9]
@ -1079,32 +1098,40 @@ $L$SEH_begin_bn_power5::
lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$pwr_sp_alt
sub rsp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
sub rbp,r11
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$pwr_sp_done
ALIGN 32
$L$pwr_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
sub rsp,r11
sub rbp,r11
$L$pwr_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and rbp,-64
mov r11,rsp
sub r11,rbp
and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$pwr_page_walk
jmp $L$pwr_page_walk_done
$L$pwr_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$pwr_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$pwr_page_walk
$L$pwr_page_walk_done::
mov r10,r9
neg r9
@ -2030,6 +2057,7 @@ DB 067h
push r13
push r14
push r15
$L$from_prologue::
shl r9d,3
lea r10,QWORD PTR[r9*2+r9]
@ -2044,32 +2072,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$from_sp_alt
sub rsp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
sub rbp,r11
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$from_sp_done
ALIGN 32
$L$from_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp]
lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
sub rsp,r11
sub rbp,r11
$L$from_sp_done::
and rsp,-64
mov r11,rax
sub r11,rsp
and rbp,-64
mov r11,rsp
sub r11,rbp
and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$from_page_walk
jmp $L$from_page_walk_done
$L$from_page_walk::
mov r10,QWORD PTR[r11*1+rsp]
sub r11,4096
DB 02eh
jnc $L$from_page_walk
lea rsp,QWORD PTR[((-4096))+rsp]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$from_page_walk
$L$from_page_walk_done::
mov r10,r9
neg r9
@ -2383,9 +2419,14 @@ mul_handler PROC PRIVATE
cmp rbx,r10
jb $L$common_seh_tail
mov r10d,DWORD PTR[4+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$common_pop_regs
mov rax,QWORD PTR[152+r8]
mov r10d,DWORD PTR[4+r11]
mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jae $L$common_seh_tail
@ -2397,11 +2438,11 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax]
jmp $L$body_proceed
jmp $L$common_pop_regs
$L$body_40::
mov rax,QWORD PTR[40+rax]
$L$body_proceed::
$L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
@ -2483,22 +2524,22 @@ ALIGN 8
$L$SEH_info_bn_mul_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$mul_body,imagerel $L$mul_epilogue
DD imagerel $L$mul_body,imagerel $L$mul_body,imagerel $L$mul_epilogue
ALIGN 8
$L$SEH_info_bn_mul4x_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
DD imagerel $L$mul4x_prologue,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
ALIGN 8
$L$SEH_info_bn_power5::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$power5_body,imagerel $L$power5_epilogue
DD imagerel $L$power5_prologue,imagerel $L$power5_body,imagerel $L$power5_epilogue
ALIGN 8
$L$SEH_info_bn_from_mont8x::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$from_body,imagerel $L$from_epilogue
DD imagerel $L$from_prologue,imagerel $L$from_body,imagerel $L$from_epilogue
ALIGN 8
$L$SEH_info_bn_gather5::
DB 001h,00bh,003h,00ah

119
deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm

@ -36,6 +36,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
push r13
mov r8,QWORD PTR[rsi]
xor r13,r13
mov r9,QWORD PTR[8+rsi]
add r8,r8
mov r10,QWORD PTR[16+rsi]
@ -46,7 +47,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
adc r10,r10
adc r11,r11
mov rdx,r9
sbb r13,r13
adc r13,0
sub r8,QWORD PTR[rsi]
mov rcx,r10
@ -54,14 +55,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
sbb r10,QWORD PTR[16+rsi]
mov r12,r11
sbb r11,QWORD PTR[24+rsi]
test r13,r13
sbb r13,0
cmovz r8,rax
cmovz r9,rdx
cmovc r8,rax
cmovc r9,rdx
mov QWORD PTR[rdi],r8
cmovz r10,rcx
cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
cmovz r11,r12
cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@ -180,12 +181,12 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0
mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))]
test r13,r13
sbb r13,0
cmovz r8,rax
cmovz r9,rdx
cmovz r10,rcx
cmovz r11,r12
cmovc r8,rax
cmovc r9,rdx
cmovc r10,rcx
cmovc r11,r12
xor r13,r13
add r8,QWORD PTR[rsi]
@ -202,14 +203,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0
mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))]
test r13,r13
sbb r13,0
cmovz r8,rax
cmovz r9,rdx
cmovc r8,rax
cmovc r9,rdx
mov QWORD PTR[rdi],r8
cmovz r10,rcx
cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
cmovz r11,r12
cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@ -260,14 +261,14 @@ $L$SEH_begin_ecp_nistz256_add::
sbb r10,QWORD PTR[16+rsi]
mov r12,r11
sbb r11,QWORD PTR[24+rsi]
test r13,r13
sbb r13,0
cmovz r8,rax
cmovz r9,rdx
cmovc r8,rax
cmovc r9,rdx
mov QWORD PTR[rdi],r8
cmovz r10,rcx
cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
cmovz r11,r12
cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@ -1167,13 +1168,14 @@ ecp_nistz256_avx2_select_w7 ENDP
ALIGN 32
__ecp_nistz256_add_toq PROC PRIVATE
xor r11,r11
add r12,QWORD PTR[rbx]
adc r13,QWORD PTR[8+rbx]
mov rax,r12
adc r8,QWORD PTR[16+rbx]
adc r9,QWORD PTR[24+rbx]
mov rbp,r13
sbb r11,r11
adc r11,0
sub r12,-1
mov rcx,r8
@ -1181,14 +1183,14 @@ __ecp_nistz256_add_toq PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
test r11,r11
sbb r11,0
cmovz r12,rax
cmovz r13,rbp
cmovc r12,rax
cmovc r13,rbp
mov QWORD PTR[rdi],r12
cmovz r8,rcx
cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
cmovz r9,r10
cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@ -1256,13 +1258,14 @@ __ecp_nistz256_subq ENDP
ALIGN 32
__ecp_nistz256_mul_by_2q PROC PRIVATE
xor r11,r11
add r12,r12
adc r13,r13
mov rax,r12
adc r8,r8
adc r9,r9
mov rbp,r13
sbb r11,r11
adc r11,0
sub r12,-1
mov rcx,r8
@ -1270,14 +1273,14 @@ __ecp_nistz256_mul_by_2q PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
test r11,r11
sbb r11,0
cmovz r12,rax
cmovz r13,rbp
cmovc r12,rax
cmovc r13,rbp
mov QWORD PTR[rdi],r12
cmovz r8,rcx
cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
cmovz r9,r10
cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@ -1527,16 +1530,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
mov rsi,rdx
movdqa XMMWORD PTR[384+rsp],xmm0
movdqa XMMWORD PTR[(384+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[416+rsp],xmm2
movdqa XMMWORD PTR[(416+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[448+rsp],xmm4
movdqa XMMWORD PTR[(448+16)+rsp],xmm5
por xmm3,xmm1
por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rsi]
pshufd xmm5,xmm3,1h
pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rsi]
movdqu xmm2,XMMWORD PTR[32+rsi]
por xmm5,xmm3
@ -1548,14 +1549,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
movdqa XMMWORD PTR[480+rsp],xmm0
pshufd xmm4,xmm5,01eh
movdqa XMMWORD PTR[(480+16)+rsp],xmm1
por xmm1,xmm0
DB 102,72,15,110,199
movdqu xmm0,XMMWORD PTR[64+rsi]
movdqu xmm1,XMMWORD PTR[80+rsi]
movdqa XMMWORD PTR[512+rsp],xmm2
movdqa XMMWORD PTR[(512+16)+rsp],xmm3
por xmm3,xmm2
por xmm5,xmm4
pxor xmm4,xmm4
por xmm3,xmm1
por xmm1,xmm0
DB 102,72,15,110,199
lea rsi,QWORD PTR[((64-0))+rsi]
mov QWORD PTR[((544+0))+rsp],rax
@ -1566,8 +1567,8 @@ DB 102,72,15,110,199
call __ecp_nistz256_sqr_montq
pcmpeqd xmm5,xmm4
pshufd xmm4,xmm3,1h
por xmm4,xmm3
pshufd xmm4,xmm1,1h
por xmm4,xmm1
pshufd xmm5,xmm5,0
pshufd xmm3,xmm4,01eh
por xmm4,xmm3
@ -1750,6 +1751,7 @@ $L$add_proceedq::
xor r11,r11
add r12,r12
lea rsi,QWORD PTR[96+rsp]
adc r13,r13
@ -1757,7 +1759,7 @@ $L$add_proceedq::
adc r8,r8
adc r9,r9
mov rbp,r13
sbb r11,r11
adc r11,0
sub r12,-1
mov rcx,r8
@ -1765,15 +1767,15 @@ $L$add_proceedq::
sbb r8,0
mov r10,r9
sbb r9,r15
test r11,r11
sbb r11,0
cmovz r12,rax
cmovc r12,rax
mov rax,QWORD PTR[rsi]
cmovz r13,rbp
cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx
cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10
cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq
@ -1939,16 +1941,14 @@ $L$SEH_begin_ecp_nistz256_point_add_affine::
mov r8,QWORD PTR[((64+24))+rsi]
movdqa XMMWORD PTR[320+rsp],xmm0
movdqa XMMWORD PTR[(320+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[352+rsp],xmm2
movdqa XMMWORD PTR[(352+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[384+rsp],xmm4
movdqa XMMWORD PTR[(384+16)+rsp],xmm5
por xmm3,xmm1
por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rbx]
pshufd xmm5,xmm3,1h
pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rbx]
movdqu xmm2,XMMWORD PTR[32+rbx]
por xmm5,xmm3
@ -2066,6 +2066,7 @@ DB 102,72,15,110,199
xor r11,r11
add r12,r12
lea rsi,QWORD PTR[192+rsp]
adc r13,r13
@ -2073,7 +2074,7 @@ DB 102,72,15,110,199
adc r8,r8
adc r9,r9
mov rbp,r13
sbb r11,r11
adc r11,0
sub r12,-1
mov rcx,r8
@ -2081,15 +2082,15 @@ DB 102,72,15,110,199
sbb r8,0
mov r10,r9
sbb r9,r15
test r11,r11
sbb r11,0
cmovz r12,rax
cmovc r12,rax
mov rax,QWORD PTR[rsi]
cmovz r13,rbp
cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx
cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10
cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq

4
deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm

@ -1283,9 +1283,9 @@ DB 102,15,56,0,251
ALIGN 16
$L$oop_shaext::
dec rdx
lea rax,QWORD PTR[64+rsi]
lea r8,QWORD PTR[64+rsi]
paddd xmm1,xmm4
cmovne rsi,rax
cmovne rsi,r8
movdqa xmm8,xmm0
DB 15,56,201,229
movdqa xmm2,xmm0

109
deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s

@ -15,44 +15,51 @@ bn_mul_mont:
jl .L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi
negl %edi
leal -32(%esp,%edi,4),%esp
leal -32(%esp,%edi,4),%ebp
negl %edi
movl %esp,%eax
movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
subl %eax,%esp
xorl %esp,%edx
subl %eax,%ebp
xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
subl %edx,%esp
andl $-64,%esp
movl %ebp,%eax
subl %esp,%eax
subl %edx,%ebp
andl $-64,%ebp
movl %esp,%eax
subl %ebp,%eax
andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
jmp .L002page_walk_done
.align 16
.L001page_walk:
movl (%esp,%eax,1),%edx
subl $4096,%eax
.byte 46
jnc .L001page_walk
leal -4096(%esp),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
.L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
movl %edx,16(%esp)
movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %ebp,24(%esp)
movl %edx,24(%esp)
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
jnc .L002non_sse2
jnc .L003non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@ -76,7 +83,7 @@ bn_mul_mont:
psrlq $32,%mm3
incl %ecx
.align 16
.L0031st:
.L0041st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -91,7 +98,7 @@ bn_mul_mont:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl .L0031st
jl .L0041st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -105,7 +112,7 @@ bn_mul_mont:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
.L004outer:
.L005outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@ -127,7 +134,7 @@ bn_mul_mont:
paddq %mm6,%mm2
incl %ecx
decl %ebx
.L005inner:
.L006inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -144,7 +151,7 @@ bn_mul_mont:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz .L005inner
jnz .L006inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@ -162,11 +169,11 @@ bn_mul_mont:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle .L004outer
jle .L005outer
emms
jmp .L006common_tail
jmp .L007common_tail
.align 16
.L002non_sse2:
.L003non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@ -177,12 +184,12 @@ bn_mul_mont:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz .L007bn_sqr_mont
jz .L008bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
.L008mull:
.L009mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@ -191,7 +198,7 @@ bn_mul_mont:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L008mull
jl .L009mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@ -209,9 +216,9 @@ bn_mul_mont:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp .L0092ndmadd
jmp .L0102ndmadd
.align 16
.L0101stmadd:
.L0111stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -222,7 +229,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L0101stmadd
jl .L0111stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@ -245,7 +252,7 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
.align 16
.L0092ndmadd:
.L0102ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -256,7 +263,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0092ndmadd
jl .L0102ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -272,16 +279,16 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je .L006common_tail
je .L007common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp .L0101stmadd
jmp .L0111stmadd
.align 16
.L007bn_sqr_mont:
.L008bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@ -292,7 +299,7 @@ bn_mul_mont:
andl $1,%ebx
incl %ecx
.align 16
.L011sqr:
.L012sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -304,7 +311,7 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl .L011sqr
jl .L012sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -328,7 +335,7 @@ bn_mul_mont:
movl 4(%esi),%eax
movl $1,%ecx
.align 16
.L0123rdmadd:
.L0133rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -347,7 +354,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0123rdmadd
jl .L0133rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -363,7 +370,7 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je .L006common_tail
je .L007common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@ -375,12 +382,12 @@ bn_mul_mont:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je .L013sqrlast
je .L014sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
.L014sqradd:
.L015sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -396,13 +403,13 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle .L014sqradd
jle .L015sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
.L013sqrlast:
.L014sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@ -417,9 +424,9 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp .L0123rdmadd
jmp .L0133rdmadd
.align 16
.L006common_tail:
.L007common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@ -427,13 +434,13 @@ bn_mul_mont:
movl %ebx,%ecx
xorl %edx,%edx
.align 16
.L015sub:
.L016sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge .L015sub
jge .L016sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@ -441,12 +448,12 @@ bn_mul_mont:
andl %eax,%ebp
orl %ebp,%esi
.align 16
.L016copy:
.L017copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
jge .L016copy
jge .L017copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:

115
deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s

@ -14,47 +14,54 @@ L_bn_mul_mont_begin:
jl L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi
negl %edi
leal -32(%esp,%edi,4),%esp
leal -32(%esp,%edi,4),%ebp
negl %edi
movl %esp,%eax
movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
subl %eax,%esp
xorl %esp,%edx
subl %eax,%ebp
xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
subl %edx,%esp
andl $-64,%esp
movl %ebp,%eax
subl %esp,%eax
subl %edx,%ebp
andl $-64,%ebp
movl %esp,%eax
subl %ebp,%eax
andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja L001page_walk
jmp L002page_walk_done
.align 4,0x90
L001page_walk:
movl (%esp,%eax,1),%edx
subl $4096,%eax
.byte 46
jnc L001page_walk
leal -4096(%esp),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja L001page_walk
L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
movl %edx,16(%esp)
movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %ebp,24(%esp)
call L002PIC_me_up
L002PIC_me_up:
movl %edx,24(%esp)
call L003PIC_me_up
L003PIC_me_up:
popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L002PIC_me_up(%eax),%eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc L003non_sse2
jnc L004non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@ -78,7 +85,7 @@ L002PIC_me_up:
psrlq $32,%mm3
incl %ecx
.align 4,0x90
L0041st:
L0051st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -93,7 +100,7 @@ L0041st:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl L0041st
jl L0051st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -107,7 +114,7 @@ L0041st:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
L005outer:
L006outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@ -129,7 +136,7 @@ L005outer:
paddq %mm6,%mm2
incl %ecx
decl %ebx
L006inner:
L007inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@ -146,7 +153,7 @@ L006inner:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz L006inner
jnz L007inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@ -164,11 +171,11 @@ L006inner:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle L005outer
jle L006outer
emms
jmp L007common_tail
jmp L008common_tail
.align 4,0x90
L003non_sse2:
L004non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@ -179,12 +186,12 @@ L003non_sse2:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz L008bn_sqr_mont
jz L009bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 4,0x90
L009mull:
L010mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@ -193,7 +200,7 @@ L009mull:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L009mull
jl L010mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@ -211,9 +218,9 @@ L009mull:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp L0102ndmadd
jmp L0112ndmadd
.align 4,0x90
L0111stmadd:
L0121stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -224,7 +231,7 @@ L0111stmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L0111stmadd
jl L0121stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@ -247,7 +254,7 @@ L0111stmadd:
adcl $0,%edx
movl $1,%ecx
.align 4,0x90
L0102ndmadd:
L0112ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -258,7 +265,7 @@ L0102ndmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0102ndmadd
jl L0112ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -274,16 +281,16 @@ L0102ndmadd:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je L007common_tail
je L008common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp L0111stmadd
jmp L0121stmadd
.align 4,0x90
L008bn_sqr_mont:
L009bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@ -294,7 +301,7 @@ L008bn_sqr_mont:
andl $1,%ebx
incl %ecx
.align 4,0x90
L012sqr:
L013sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -306,7 +313,7 @@ L012sqr:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl L012sqr
jl L013sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -330,7 +337,7 @@ L012sqr:
movl 4(%esi),%eax
movl $1,%ecx
.align 4,0x90
L0133rdmadd:
L0143rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@ -349,7 +356,7 @@ L0133rdmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0133rdmadd
jl L0143rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@ -365,7 +372,7 @@ L0133rdmadd:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je L007common_tail
je L008common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@ -377,12 +384,12 @@ L0133rdmadd:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je L014sqrlast
je L015sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 4,0x90
L015sqradd:
L016sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@ -398,13 +405,13 @@ L015sqradd:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle L015sqradd
jle L016sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
L014sqrlast:
L015sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@ -419,9 +426,9 @@ L014sqrlast:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp L0133rdmadd
jmp L0143rdmadd
.align 4,0x90
L007common_tail:
L008common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@ -429,13 +436,13 @@ L007common_tail:
movl %ebx,%ecx
xorl %edx,%edx
.align 4,0x90
L016sub:
L017sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge L016sub
jge L017sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@ -443,12 +450,12 @@ L016sub:
andl %eax,%ebp
orl %ebp,%esi
.align 4,0x90
L017copy:
L018copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
jge L017copy
jge L018copy
movl 24(%esp),%esp
movl $1,%eax
L000just_leave:

109
deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm

@ -31,44 +31,51 @@ $L_bn_mul_mont_begin::
jl $L000just_leave
lea esi,DWORD PTR 20[esp]
lea edx,DWORD PTR 24[esp]
mov ebp,esp
add edi,2
neg edi
lea esp,DWORD PTR [edi*4+esp-32]
lea ebp,DWORD PTR [edi*4+esp-32]
neg edi
mov eax,esp
mov eax,ebp
sub eax,edx
and eax,2047
sub esp,eax
xor edx,esp
sub ebp,eax
xor edx,ebp
and edx,2048
xor edx,2048
sub esp,edx
and esp,-64
mov eax,ebp
sub eax,esp
sub ebp,edx
and ebp,-64
mov eax,esp
sub eax,ebp
and eax,-4096
mov edx,esp
lea esp,DWORD PTR [eax*1+ebp]
mov eax,DWORD PTR [esp]
cmp esp,ebp
ja $L001page_walk
jmp $L002page_walk_done
ALIGN 16
$L001page_walk:
mov edx,DWORD PTR [eax*1+esp]
sub eax,4096
DB 46
jnc $L001page_walk
lea esp,DWORD PTR [esp-4096]
mov eax,DWORD PTR [esp]
cmp esp,ebp
ja $L001page_walk
$L002page_walk_done:
mov eax,DWORD PTR [esi]
mov ebx,DWORD PTR 4[esi]
mov ecx,DWORD PTR 8[esi]
mov edx,DWORD PTR 12[esi]
mov ebp,DWORD PTR 12[esi]
mov esi,DWORD PTR 16[esi]
mov esi,DWORD PTR [esi]
mov DWORD PTR 4[esp],eax
mov DWORD PTR 8[esp],ebx
mov DWORD PTR 12[esp],ecx
mov DWORD PTR 16[esp],edx
mov DWORD PTR 16[esp],ebp
mov DWORD PTR 20[esp],esi
lea ebx,DWORD PTR [edi-3]
mov DWORD PTR 24[esp],ebp
mov DWORD PTR 24[esp],edx
lea eax,DWORD PTR _OPENSSL_ia32cap_P
bt DWORD PTR [eax],26
jnc $L002non_sse2
jnc $L003non_sse2
mov eax,-1
movd mm7,eax
mov esi,DWORD PTR 8[esp]
@ -92,7 +99,7 @@ DB 46
psrlq mm3,32
inc ecx
ALIGN 16
$L0031st:
$L0041st:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -107,7 +114,7 @@ $L0031st:
psrlq mm3,32
lea ecx,DWORD PTR 1[ecx]
cmp ecx,ebx
jl $L0031st
jl $L0041st
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -121,7 +128,7 @@ $L0031st:
paddq mm3,mm2
movq QWORD PTR 32[ebx*4+esp],mm3
inc edx
$L004outer:
$L005outer:
xor ecx,ecx
movd mm4,DWORD PTR [edx*4+edi]
movd mm5,DWORD PTR [esi]
@ -143,7 +150,7 @@ $L004outer:
paddq mm2,mm6
inc ecx
dec ebx
$L005inner:
$L006inner:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@ -160,7 +167,7 @@ $L005inner:
paddq mm2,mm6
dec ebx
lea ecx,DWORD PTR 1[ecx]
jnz $L005inner
jnz $L006inner
mov ebx,ecx
pmuludq mm0,mm4
pmuludq mm1,mm5
@ -178,11 +185,11 @@ $L005inner:
movq QWORD PTR 32[ebx*4+esp],mm3
lea edx,DWORD PTR 1[edx]
cmp edx,ebx
jle $L004outer
jle $L005outer
emms
jmp $L006common_tail
jmp $L007common_tail
ALIGN 16
$L002non_sse2:
$L003non_sse2:
mov esi,DWORD PTR 8[esp]
lea ebp,DWORD PTR 1[ebx]
mov edi,DWORD PTR 12[esp]
@ -193,12 +200,12 @@ $L002non_sse2:
lea eax,DWORD PTR 4[ebx*4+edi]
or ebp,edx
mov edi,DWORD PTR [edi]
jz $L007bn_sqr_mont
jz $L008bn_sqr_mont
mov DWORD PTR 28[esp],eax
mov eax,DWORD PTR [esi]
xor edx,edx
ALIGN 16
$L008mull:
$L009mull:
mov ebp,edx
mul edi
add ebp,eax
@ -207,7 +214,7 @@ $L008mull:
mov eax,DWORD PTR [ecx*4+esi]
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L008mull
jl $L009mull
mov ebp,edx
mul edi
mov edi,DWORD PTR 20[esp]
@ -225,9 +232,9 @@ $L008mull:
mov eax,DWORD PTR 4[esi]
adc edx,0
inc ecx
jmp $L0092ndmadd
jmp $L0102ndmadd
ALIGN 16
$L0101stmadd:
$L0111stmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -238,7 +245,7 @@ $L0101stmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L0101stmadd
jl $L0111stmadd
mov ebp,edx
mul edi
add eax,DWORD PTR 32[ebx*4+esp]
@ -261,7 +268,7 @@ $L0101stmadd:
adc edx,0
mov ecx,1
ALIGN 16
$L0092ndmadd:
$L0102ndmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -272,7 +279,7 @@ $L0092ndmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0092ndmadd
jl $L0102ndmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@ -288,16 +295,16 @@ $L0092ndmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,DWORD PTR 28[esp]
mov DWORD PTR 36[ebx*4+esp],eax
je $L006common_tail
je $L007common_tail
mov edi,DWORD PTR [ecx]
mov esi,DWORD PTR 8[esp]
mov DWORD PTR 12[esp],ecx
xor ecx,ecx
xor edx,edx
mov eax,DWORD PTR [esi]
jmp $L0101stmadd
jmp $L0111stmadd
ALIGN 16
$L007bn_sqr_mont:
$L008bn_sqr_mont:
mov DWORD PTR [esp],ebx
mov DWORD PTR 12[esp],ecx
mov eax,edi
@ -308,7 +315,7 @@ $L007bn_sqr_mont:
and ebx,1
inc ecx
ALIGN 16
$L011sqr:
$L012sqr:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -320,7 +327,7 @@ $L011sqr:
cmp ecx,DWORD PTR [esp]
mov ebx,eax
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L011sqr
jl $L012sqr
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -344,7 +351,7 @@ $L011sqr:
mov eax,DWORD PTR 4[esi]
mov ecx,1
ALIGN 16
$L0123rdmadd:
$L0133rdmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@ -363,7 +370,7 @@ $L0123rdmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0123rdmadd
jl $L0133rdmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@ -379,7 +386,7 @@ $L0123rdmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,ebx
mov DWORD PTR 36[ebx*4+esp],eax
je $L006common_tail
je $L007common_tail
mov edi,DWORD PTR 4[ecx*4+esi]
lea ecx,DWORD PTR 1[ecx]
mov eax,edi
@ -391,12 +398,12 @@ $L0123rdmadd:
xor ebp,ebp
cmp ecx,ebx
lea ecx,DWORD PTR 1[ecx]
je $L013sqrlast
je $L014sqrlast
mov ebx,edx
shr edx,1
and ebx,1
ALIGN 16
$L014sqradd:
$L015sqradd:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@ -412,13 +419,13 @@ $L014sqradd:
cmp ecx,DWORD PTR [esp]
mov DWORD PTR 28[ecx*4+esp],ebp
mov ebx,eax
jle $L014sqradd
jle $L015sqradd
mov ebp,edx
add edx,edx
shr ebp,31
add edx,ebx
adc ebp,0
$L013sqrlast:
$L014sqrlast:
mov edi,DWORD PTR 20[esp]
mov esi,DWORD PTR 16[esp]
imul edi,DWORD PTR 32[esp]
@ -433,9 +440,9 @@ $L013sqrlast:
adc edx,0
mov ecx,1
mov eax,DWORD PTR 4[esi]
jmp $L0123rdmadd
jmp $L0133rdmadd
ALIGN 16
$L006common_tail:
$L007common_tail:
mov ebp,DWORD PTR 16[esp]
mov edi,DWORD PTR 4[esp]
lea esi,DWORD PTR 32[esp]
@ -443,13 +450,13 @@ $L006common_tail:
mov ecx,ebx
xor edx,edx
ALIGN 16
$L015sub:
$L016sub:
sbb eax,DWORD PTR [edx*4+ebp]
mov DWORD PTR [edx*4+edi],eax
dec ecx
mov eax,DWORD PTR 4[edx*4+esi]
lea edx,DWORD PTR 1[edx]
jge $L015sub
jge $L016sub
sbb eax,0
and esi,eax
not eax
@ -457,12 +464,12 @@ $L015sub:
and ebp,eax
or esi,ebp
ALIGN 16
$L016copy:
$L017copy:
mov eax,DWORD PTR [ebx*4+esi]
mov DWORD PTR [ebx*4+edi],eax
mov DWORD PTR 32[ebx*4+esp],ecx
dec ebx
jge $L016copy
jge $L017copy
mov esp,DWORD PTR 24[esp]
mov eax,1
$L000just_leave:

Loading…
Cancel
Save