Browse Source

deps: update openssl asm and asm_obsolete files

Regenerate asm files with Makefile and CC=gcc and ASM=gcc where
gcc-5.4.0. Also asm files in asm_obsolete dir to support old compiler
and assembler are regenerated without CC and ASM envs.

PR-URL: https://github.com/nodejs/node/pull/8714
Reviewed-By: Fedor Indutny <fedor@indutny.com>
v6.x
Shigeki Ohtsu 9 years ago
committed by Myles Borins
parent
commit
2573efc9df
  1. 24
      deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S
  2. 155
      deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s
  3. 204
      deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s
  4. 189
      deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s
  5. 4
      deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s
  6. 155
      deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s
  7. 204
      deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s
  8. 189
      deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s
  9. 4
      deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s
  10. 185
      deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm
  11. 227
      deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm
  12. 189
      deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm
  13. 4
      deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm
  14. 109
      deps/openssl/asm/x86-elf-gas/bn/x86-mont.s
  15. 115
      deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s
  16. 109
      deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm
  17. 24
      deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S
  18. 127
      deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s
  19. 134
      deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s
  20. 119
      deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s
  21. 4
      deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s
  22. 127
      deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s
  23. 134
      deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s
  24. 119
      deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s
  25. 4
      deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s
  26. 154
      deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm
  27. 153
      deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm
  28. 119
      deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm
  29. 4
      deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm
  30. 109
      deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s
  31. 115
      deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s
  32. 109
      deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm

24
deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S

@ -1816,8 +1816,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_6: .Lxts_enc_6:
vst1.64 {q14}, [r0,:128] @ next round tweak
veor q4, q4, q12 veor q4, q4, q12
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1853,8 +1851,6 @@ bsaes_xts_encrypt:
.align 5 .align 5
.Lxts_enc_5: .Lxts_enc_5:
vst1.64 {q13}, [r0,:128] @ next round tweak
veor q3, q3, q11 veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1883,8 +1879,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_4: .Lxts_enc_4:
vst1.64 {q12}, [r0,:128] @ next round tweak
veor q2, q2, q10 veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1910,8 +1904,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_3: .Lxts_enc_3:
vst1.64 {q11}, [r0,:128] @ next round tweak
veor q1, q1, q9 veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1936,8 +1928,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_2: .Lxts_enc_2:
vst1.64 {q10}, [r0,:128] @ next round tweak
veor q0, q0, q8 veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1960,7 +1950,7 @@ bsaes_xts_encrypt:
.align 4 .align 4
.Lxts_enc_1: .Lxts_enc_1:
mov r0, sp mov r0, sp
veor q0, q8 veor q0, q0, q8
mov r1, sp mov r1, sp
vst1.8 {q0}, [sp,:128] vst1.8 {q0}, [sp,:128]
mov r2, r10 mov r2, r10
@ -2346,8 +2336,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_5: .Lxts_dec_5:
vst1.64 {q13}, [r0,:128] @ next round tweak
veor q3, q3, q11 veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2376,8 +2364,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_4: .Lxts_dec_4:
vst1.64 {q12}, [r0,:128] @ next round tweak
veor q2, q2, q10 veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2403,8 +2389,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_3: .Lxts_dec_3:
vst1.64 {q11}, [r0,:128] @ next round tweak
veor q1, q1, q9 veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2429,8 +2413,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_2: .Lxts_dec_2:
vst1.64 {q10}, [r0,:128] @ next round tweak
veor q0, q0, q8 veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2453,12 +2435,12 @@ bsaes_xts_decrypt:
.align 4 .align 4
.Lxts_dec_1: .Lxts_dec_1:
mov r0, sp mov r0, sp
veor q0, q8 veor q0, q0, q8
mov r1, sp mov r1, sp
vst1.8 {q0}, [sp,:128] vst1.8 {q0}, [sp,:128]
mov r5, r2 @ preserve magic
mov r2, r10 mov r2, r10
mov r4, r3 @ preserve fp mov r4, r3 @ preserve fp
mov r5, r2 @ preserve magic
bl AES_decrypt bl AES_decrypt

155
deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s

@ -6,6 +6,8 @@
.type bn_mul_mont,@function .type bn_mul_mont,@function
.align 16 .align 16
bn_mul_mont: bn_mul_mont:
movl %r9d,%r9d
movq %rsp,%rax
testl $3,%r9d testl $3,%r9d
jnz .Lmul_enter jnz .Lmul_enter
cmpl $8,%r9d cmpl $8,%r9d
@ -26,29 +28,36 @@ bn_mul_mont:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movl %r9d,%r9d negq %r9
leaq 2(%r9),%r10
movq %rsp,%r11 movq %rsp,%r11
negq %r10 leaq -16(%rsp,%r9,8),%r10
leaq (%rsp,%r10,8),%rsp negq %r9
andq $-1024,%rsp andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
.Lmul_body:
subq %rsp,%r11 subq %r10,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
jmp .Lmul_page_walk_done
.align 16
.Lmul_page_walk: .Lmul_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r11
.byte 0x66,0x2e cmpq %r10,%rsp
jnc .Lmul_page_walk ja .Lmul_page_walk
.Lmul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
movq %rdx,%r12 movq %rdx,%r12
movq (%r8),%r8 movq (%r8),%r8
movq (%r12),%rbx movq (%r12),%rbx
@ -216,19 +225,21 @@ bn_mul_mont:
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Lmul_epilogue: .Lmul_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size bn_mul_mont,.-bn_mul_mont .size bn_mul_mont,.-bn_mul_mont
.type bn_mul4x_mont,@function .type bn_mul4x_mont,@function
.align 16 .align 16
bn_mul4x_mont: bn_mul4x_mont:
movl %r9d,%r9d
movq %rsp,%rax
.Lmul4x_enter: .Lmul4x_enter:
andl $0x80100,%r11d andl $0x80100,%r11d
cmpl $0x80100,%r11d cmpl $0x80100,%r11d
@ -240,23 +251,29 @@ bn_mul4x_mont:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movl %r9d,%r9d negq %r9
leaq 4(%r9),%r10
movq %rsp,%r11 movq %rsp,%r11
negq %r10 leaq -32(%rsp,%r9,8),%r10
leaq (%rsp,%r10,8),%rsp negq %r9
andq $-1024,%rsp andq $-1024,%r10
movq %r11,8(%rsp,%r9,8) subq %r10,%r11
.Lmul4x_body:
subq %rsp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul4x_page_walk
jmp .Lmul4x_page_walk_done
.Lmul4x_page_walk: .Lmul4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r11
.byte 0x2e cmpq %r10,%rsp
jnc .Lmul4x_page_walk ja .Lmul4x_page_walk
.Lmul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.Lmul4x_body:
movq %rdi,16(%rsp,%r9,8) movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12 movq %rdx,%r12
movq (%r8),%r8 movq (%r8),%r8
@ -625,13 +642,13 @@ bn_mul4x_mont:
movdqu %xmm2,16(%rdi,%r14,1) movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Lmul4x_epilogue: .Lmul4x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size bn_mul4x_mont,.-bn_mul4x_mont .size bn_mul4x_mont,.-bn_mul4x_mont
@ -641,14 +658,15 @@ bn_mul4x_mont:
.type bn_sqr8x_mont,@function .type bn_sqr8x_mont,@function
.align 32 .align 32
bn_sqr8x_mont: bn_sqr8x_mont:
.Lsqr8x_enter:
movq %rsp,%rax movq %rsp,%rax
.Lsqr8x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
.Lsqr8x_prologue:
movl %r9d,%r10d movl %r9d,%r10d
shll $3,%r9d shll $3,%r9d
@ -661,33 +679,42 @@ bn_sqr8x_mont:
leaq -64(%rsp,%r9,2),%r11 leaq -64(%rsp,%r9,2),%r11
movq %rsp,%rbp
movq (%r8),%r8 movq (%r8),%r8
subq %rsi,%r11 subq %rsi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lsqr8x_sp_alt jb .Lsqr8x_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
jmp .Lsqr8x_sp_done jmp .Lsqr8x_sp_done
.align 32 .align 32
.Lsqr8x_sp_alt: .Lsqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10 leaq 4096-64(,%r9,2),%r10
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lsqr8x_sp_done: .Lsqr8x_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lsqr8x_page_walk
jmp .Lsqr8x_page_walk_done
.align 16
.Lsqr8x_page_walk: .Lsqr8x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc .Lsqr8x_page_walk ja .Lsqr8x_page_walk
.Lsqr8x_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -800,30 +827,38 @@ bn_sqr8x_mont:
.type bn_mulx4x_mont,@function .type bn_mulx4x_mont,@function
.align 32 .align 32
bn_mulx4x_mont: bn_mulx4x_mont:
.Lmulx4x_enter:
movq %rsp,%rax movq %rsp,%rax
.Lmulx4x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
.Lmulx4x_prologue:
shll $3,%r9d shll $3,%r9d
.byte 0x67
xorq %r10,%r10 xorq %r10,%r10
subq %r9,%r10 subq %r9,%r10
movq (%r8),%r8 movq (%r8),%r8
leaq -72(%rsp,%r10,1),%rsp leaq -72(%rsp,%r10,1),%rbp
andq $-128,%rsp andq $-128,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmulx4x_page_walk
jmp .Lmulx4x_page_walk_done
.align 16
.Lmulx4x_page_walk: .Lmulx4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x66,0x2e cmpq %rbp,%rsp
jnc .Lmulx4x_page_walk ja .Lmulx4x_page_walk
.Lmulx4x_page_walk_done:
leaq (%rdx,%r9,1),%r10 leaq (%rdx,%r9,1),%r10

204
deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s

@ -6,6 +6,8 @@
.type bn_mul_mont_gather5,@function .type bn_mul_mont_gather5,@function
.align 64 .align 64
bn_mul_mont_gather5: bn_mul_mont_gather5:
movl %r9d,%r9d
movq %rsp,%rax
testl $7,%r9d testl $7,%r9d
jnz .Lmul_enter jnz .Lmul_enter
movl OPENSSL_ia32cap_P+8(%rip),%r11d movl OPENSSL_ia32cap_P+8(%rip),%r11d
@ -13,10 +15,7 @@ bn_mul_mont_gather5:
.align 16 .align 16
.Lmul_enter: .Lmul_enter:
movl %r9d,%r9d
movq %rsp,%rax
movd 8(%rsp),%xmm5 movd 8(%rsp),%xmm5
leaq .Linc(%rip),%r10
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
@ -24,26 +23,36 @@ bn_mul_mont_gather5:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
leaq 2(%r9),%r11 negq %r9
negq %r11 movq %rsp,%r11
leaq -264(%rsp,%r11,8),%rsp leaq -280(%rsp,%r9,8),%r10
andq $-1024,%rsp negq %r9
andq $-1024,%r10
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
subq %rsp,%rax subq %r10,%r11
andq $-4096,%rax andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
jmp .Lmul_page_walk_done
.Lmul_page_walk: .Lmul_page_walk:
movq (%rsp,%rax,1),%r11 leaq -4096(%rsp),%rsp
subq $4096,%rax movq (%rsp),%r11
.byte 0x2e cmpq %r10,%rsp
jnc .Lmul_page_walk ja .Lmul_page_walk
.Lmul_page_walk_done:
leaq .Linc(%rip),%r10
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
leaq 128(%rdx),%r12 leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0 movdqa 0(%r10),%xmm0
@ -414,18 +423,19 @@ bn_mul_mont_gather5:
.type bn_mul4x_mont_gather5,@function .type bn_mul4x_mont_gather5,@function
.align 32 .align 32
bn_mul4x_mont_gather5: bn_mul4x_mont_gather5:
.byte 0x67
movq %rsp,%rax
.Lmul4x_enter: .Lmul4x_enter:
andl $0x80108,%r11d andl $0x80108,%r11d
cmpl $0x80108,%r11d cmpl $0x80108,%r11d
je .Lmulx4x_enter je .Lmulx4x_enter
.byte 0x67
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
.Lmul4x_prologue:
.byte 0x67 .byte 0x67
shll $3,%r9d shll $3,%r9d
@ -442,32 +452,40 @@ bn_mul4x_mont_gather5:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lmul4xsp_alt jb .Lmul4xsp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp .Lmul4xsp_done jmp .Lmul4xsp_done
.align 32 .align 32
.Lmul4xsp_alt: .Lmul4xsp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lmul4xsp_done: .Lmul4xsp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmul4x_page_walk
jmp .Lmul4x_page_walk_done
.Lmul4x_page_walk: .Lmul4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc .Lmul4x_page_walk ja .Lmul4x_page_walk
.Lmul4x_page_walk_done:
negq %r9 negq %r9
@ -1019,17 +1037,18 @@ mul4x_internal:
.type bn_power5,@function .type bn_power5,@function
.align 32 .align 32
bn_power5: bn_power5:
movq %rsp,%rax
movl OPENSSL_ia32cap_P+8(%rip),%r11d movl OPENSSL_ia32cap_P+8(%rip),%r11d
andl $0x80108,%r11d andl $0x80108,%r11d
cmpl $0x80108,%r11d cmpl $0x80108,%r11d
je .Lpowerx5_enter je .Lpowerx5_enter
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
.Lpower5_prologue:
shll $3,%r9d shll $3,%r9d
leal (%r9,%r9,2),%r10d leal (%r9,%r9,2),%r10d
@ -1044,32 +1063,40 @@ bn_power5:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lpwr_sp_alt jb .Lpwr_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp .Lpwr_sp_done jmp .Lpwr_sp_done
.align 32 .align 32
.Lpwr_sp_alt: .Lpwr_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lpwr_sp_done: .Lpwr_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lpwr_page_walk
jmp .Lpwr_page_walk_done
.Lpwr_page_walk: .Lpwr_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc .Lpwr_page_walk ja .Lpwr_page_walk
.Lpwr_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -1980,6 +2007,7 @@ bn_from_mont8x:
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
.Lfrom_prologue:
shll $3,%r9d shll $3,%r9d
leaq (%r9,%r9,2),%r10 leaq (%r9,%r9,2),%r10
@ -1994,32 +2022,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lfrom_sp_alt jb .Lfrom_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp .Lfrom_sp_done jmp .Lfrom_sp_done
.align 32 .align 32
.Lfrom_sp_alt: .Lfrom_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lfrom_sp_done: .Lfrom_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lfrom_page_walk
jmp .Lfrom_page_walk_done
.Lfrom_page_walk: .Lfrom_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc .Lfrom_page_walk ja .Lfrom_page_walk
.Lfrom_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -2113,14 +2149,15 @@ bn_from_mont8x:
.type bn_mulx4x_mont_gather5,@function .type bn_mulx4x_mont_gather5,@function
.align 32 .align 32
bn_mulx4x_mont_gather5: bn_mulx4x_mont_gather5:
.Lmulx4x_enter:
movq %rsp,%rax movq %rsp,%rax
.Lmulx4x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
.Lmulx4x_prologue:
shll $3,%r9d shll $3,%r9d
leaq (%r9,%r9,2),%r10 leaq (%r9,%r9,2),%r10
@ -2137,31 +2174,39 @@ bn_mulx4x_mont_gather5:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lmulx4xsp_alt jb .Lmulx4xsp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp .Lmulx4xsp_done jmp .Lmulx4xsp_done
.Lmulx4xsp_alt: .Lmulx4xsp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lmulx4xsp_done: .Lmulx4xsp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmulx4x_page_walk
jmp .Lmulx4x_page_walk_done
.Lmulx4x_page_walk: .Lmulx4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc .Lmulx4x_page_walk ja .Lmulx4x_page_walk
.Lmulx4x_page_walk_done:
@ -2619,14 +2664,15 @@ mulx4x_internal:
.type bn_powerx5,@function .type bn_powerx5,@function
.align 32 .align 32
bn_powerx5: bn_powerx5:
.Lpowerx5_enter:
movq %rsp,%rax movq %rsp,%rax
.Lpowerx5_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
.Lpowerx5_prologue:
shll $3,%r9d shll $3,%r9d
leaq (%r9,%r9,2),%r10 leaq (%r9,%r9,2),%r10
@ -2641,32 +2687,40 @@ bn_powerx5:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lpwrx_sp_alt jb .Lpwrx_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp .Lpwrx_sp_done jmp .Lpwrx_sp_done
.align 32 .align 32
.Lpwrx_sp_alt: .Lpwrx_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lpwrx_sp_done: .Lpwrx_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lpwrx_page_walk
jmp .Lpwrx_page_walk_done
.Lpwrx_page_walk: .Lpwrx_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc .Lpwrx_page_walk ja .Lpwrx_page_walk
.Lpwrx_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9

189
deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s

@ -27,6 +27,7 @@ ecp_nistz256_mul_by_2:
pushq %r13 pushq %r13
movq 0(%rsi),%r8 movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9 movq 8(%rsi),%r9
addq %r8,%r8 addq %r8,%r8
movq 16(%rsi),%r10 movq 16(%rsi),%r10
@ -37,7 +38,7 @@ ecp_nistz256_mul_by_2:
adcq %r10,%r10 adcq %r10,%r10
adcq %r11,%r11 adcq %r11,%r11
movq %r9,%rdx movq %r9,%rdx
sbbq %r13,%r13 adcq $0,%r13
subq 0(%rsi),%r8 subq 0(%rsi),%r8
movq %r10,%rcx movq %r10,%rcx
@ -45,14 +46,14 @@ ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10 sbbq 16(%rsi),%r10
movq %r11,%r12 movq %r11,%r12
sbbq 24(%rsi),%r11 sbbq 24(%rsi),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -149,12 +150,12 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10 sbbq $0,%r10
movq %r11,%r12 movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11 sbbq .Lpoly+24(%rip),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
cmovzq %rcx,%r10 cmovcq %rcx,%r10
cmovzq %r12,%r11 cmovcq %r12,%r11
xorq %r13,%r13 xorq %r13,%r13
addq 0(%rsi),%r8 addq 0(%rsi),%r8
@ -171,14 +172,14 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10 sbbq $0,%r10
movq %r11,%r12 movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11 sbbq .Lpoly+24(%rip),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -217,14 +218,14 @@ ecp_nistz256_add:
sbbq 16(%rsi),%r10 sbbq 16(%rsi),%r10
movq %r11,%r12 movq %r11,%r12
sbbq 24(%rsi),%r11 sbbq 24(%rsi),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -1461,13 +1462,14 @@ ecp_nistz256_avx2_select_w7:
.type __ecp_nistz256_add_toq,@function .type __ecp_nistz256_add_toq,@function
.align 32 .align 32
__ecp_nistz256_add_toq: __ecp_nistz256_add_toq:
xorq %r11,%r11
addq 0(%rbx),%r12 addq 0(%rbx),%r12
adcq 8(%rbx),%r13 adcq 8(%rbx),%r13
movq %r12,%rax movq %r12,%rax
adcq 16(%rbx),%r8 adcq 16(%rbx),%r8
adcq 24(%rbx),%r9 adcq 24(%rbx),%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1475,14 +1477,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -1550,13 +1552,14 @@ __ecp_nistz256_subq:
.type __ecp_nistz256_mul_by_2q,@function .type __ecp_nistz256_mul_by_2q,@function
.align 32 .align 32
__ecp_nistz256_mul_by_2q: __ecp_nistz256_mul_by_2q:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
adcq %r13,%r13 adcq %r13,%r13
movq %r12,%rax movq %r12,%rax
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1564,14 +1567,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -1809,16 +1812,14 @@ ecp_nistz256_point_add:
movq %rdx,%rsi movq %rdx,%rsi
movdqa %xmm0,384(%rsp) movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp) movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp) movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp) movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp) movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp) movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0 movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1 movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2 movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -1830,14 +1831,14 @@ ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp) movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4 pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp) movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1 movdqu 64(%rsi),%xmm0
.byte 102,72,15,110,199 movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp) movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp) movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5 por %xmm4,%xmm5
pxor %xmm4,%xmm4 pxor %xmm4,%xmm4
por %xmm1,%xmm3 por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp) movq %rax,544+0(%rsp)
@ -1848,8 +1849,8 @@ ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5 pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4 pshufd $0xb1,%xmm1,%xmm4
por %xmm3,%xmm4 por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5 pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3 pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4 por %xmm3,%xmm4
@ -2032,6 +2033,7 @@ ecp_nistz256_point_add:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 96(%rsp),%rsi leaq 96(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -2039,7 +2041,7 @@ ecp_nistz256_point_add:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -2047,15 +2049,15 @@ ecp_nistz256_point_add:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq
@ -2213,16 +2215,14 @@ ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8 movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp) movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp) movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp) movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp) movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp) movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp) movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0 movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1 movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2 movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -2340,6 +2340,7 @@ ecp_nistz256_point_add_affine:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 192(%rsp),%rsi leaq 192(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -2347,7 +2348,7 @@ ecp_nistz256_point_add_affine:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -2355,15 +2356,15 @@ ecp_nistz256_point_add_affine:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq
@ -2510,14 +2511,14 @@ __ecp_nistz256_add_tox:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
sbbq $0,%r11
btq $0,%r11 cmovcq %rax,%r12
cmovncq %rax,%r12 cmovcq %rbp,%r13
cmovncq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovncq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovncq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -2605,14 +2606,14 @@ __ecp_nistz256_mul_by_2x:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
sbbq $0,%r11
btq $0,%r11 cmovcq %rax,%r12
cmovncq %rax,%r12 cmovcq %rbp,%r13
cmovncq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovncq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovncq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -2842,16 +2843,14 @@ ecp_nistz256_point_addx:
movq %rdx,%rsi movq %rdx,%rsi
movdqa %xmm0,384(%rsp) movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp) movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp) movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp) movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp) movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp) movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0 movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1 movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2 movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -2863,14 +2862,14 @@ ecp_nistz256_point_addx:
movdqa %xmm0,480(%rsp) movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4 pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp) movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1 movdqu 64(%rsi),%xmm0
.byte 102,72,15,110,199 movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp) movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp) movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5 por %xmm4,%xmm5
pxor %xmm4,%xmm4 pxor %xmm4,%xmm4
por %xmm1,%xmm3 por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-128(%rsi),%rsi leaq 64-128(%rsi),%rsi
movq %rdx,544+0(%rsp) movq %rdx,544+0(%rsp)
@ -2881,8 +2880,8 @@ ecp_nistz256_point_addx:
call __ecp_nistz256_sqr_montx call __ecp_nistz256_sqr_montx
pcmpeqd %xmm4,%xmm5 pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4 pshufd $0xb1,%xmm1,%xmm4
por %xmm3,%xmm4 por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5 pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3 pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4 por %xmm3,%xmm4
@ -3065,6 +3064,7 @@ ecp_nistz256_point_addx:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 96(%rsp),%rsi leaq 96(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -3072,7 +3072,7 @@ ecp_nistz256_point_addx:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -3080,15 +3080,15 @@ ecp_nistz256_point_addx:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subx call __ecp_nistz256_subx
@ -3242,16 +3242,14 @@ ecp_nistz256_point_add_affinex:
movq 64+24(%rsi),%r8 movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp) movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp) movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp) movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp) movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp) movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp) movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0 movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1 movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2 movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -3369,6 +3367,7 @@ ecp_nistz256_point_add_affinex:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 192(%rsp),%rsi leaq 192(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -3376,7 +3375,7 @@ ecp_nistz256_point_add_affinex:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -3384,15 +3383,15 @@ ecp_nistz256_point_add_affinex:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subx call __ecp_nistz256_subx

4
deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s

@ -1263,9 +1263,9 @@ _shaext_shortcut:
.align 16 .align 16
.Loop_shaext: .Loop_shaext:
decq %rdx decq %rdx
leaq 64(%rsi),%rax leaq 64(%rsi),%r8
paddd %xmm4,%xmm1 paddd %xmm4,%xmm1
cmovneq %rax,%rsi cmovneq %r8,%rsi
movdqa %xmm0,%xmm8 movdqa %xmm0,%xmm8
.byte 15,56,201,229 .byte 15,56,201,229
movdqa %xmm0,%xmm2 movdqa %xmm0,%xmm2

155
deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s

@ -6,6 +6,8 @@
.p2align 4 .p2align 4
_bn_mul_mont: _bn_mul_mont:
movl %r9d,%r9d
movq %rsp,%rax
testl $3,%r9d testl $3,%r9d
jnz L$mul_enter jnz L$mul_enter
cmpl $8,%r9d cmpl $8,%r9d
@ -26,29 +28,36 @@ L$mul_enter:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movl %r9d,%r9d negq %r9
leaq 2(%r9),%r10
movq %rsp,%r11 movq %rsp,%r11
negq %r10 leaq -16(%rsp,%r9,8),%r10
leaq (%rsp,%r10,8),%rsp negq %r9
andq $-1024,%rsp andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
L$mul_body:
subq %rsp,%r11 subq %r10,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
jmp L$mul_page_walk_done
.p2align 4
L$mul_page_walk: L$mul_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r11
.byte 0x66,0x2e cmpq %r10,%rsp
jnc L$mul_page_walk ja L$mul_page_walk
L$mul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
L$mul_body:
movq %rdx,%r12 movq %rdx,%r12
movq (%r8),%r8 movq (%r8),%r8
movq (%r12),%rbx movq (%r12),%rbx
@ -216,19 +225,21 @@ L$copy:
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$mul_epilogue: L$mul_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.p2align 4 .p2align 4
bn_mul4x_mont: bn_mul4x_mont:
movl %r9d,%r9d
movq %rsp,%rax
L$mul4x_enter: L$mul4x_enter:
andl $0x80100,%r11d andl $0x80100,%r11d
cmpl $0x80100,%r11d cmpl $0x80100,%r11d
@ -240,23 +251,29 @@ L$mul4x_enter:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movl %r9d,%r9d negq %r9
leaq 4(%r9),%r10
movq %rsp,%r11 movq %rsp,%r11
negq %r10 leaq -32(%rsp,%r9,8),%r10
leaq (%rsp,%r10,8),%rsp negq %r9
andq $-1024,%rsp andq $-1024,%r10
movq %r11,8(%rsp,%r9,8) subq %r10,%r11
L$mul4x_body:
subq %rsp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul4x_page_walk
jmp L$mul4x_page_walk_done
L$mul4x_page_walk: L$mul4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r11
.byte 0x2e cmpq %r10,%rsp
jnc L$mul4x_page_walk ja L$mul4x_page_walk
L$mul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
L$mul4x_body:
movq %rdi,16(%rsp,%r9,8) movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12 movq %rdx,%r12
movq (%r8),%r8 movq (%r8),%r8
@ -625,13 +642,13 @@ L$copy4x:
movdqu %xmm2,16(%rdi,%r14,1) movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$mul4x_epilogue: L$mul4x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -641,14 +658,15 @@ L$mul4x_epilogue:
.p2align 5 .p2align 5
bn_sqr8x_mont: bn_sqr8x_mont:
L$sqr8x_enter:
movq %rsp,%rax movq %rsp,%rax
L$sqr8x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$sqr8x_prologue:
movl %r9d,%r10d movl %r9d,%r10d
shll $3,%r9d shll $3,%r9d
@ -661,33 +679,42 @@ L$sqr8x_enter:
leaq -64(%rsp,%r9,2),%r11 leaq -64(%rsp,%r9,2),%r11
movq %rsp,%rbp
movq (%r8),%r8 movq (%r8),%r8
subq %rsi,%r11 subq %rsi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$sqr8x_sp_alt jb L$sqr8x_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
jmp L$sqr8x_sp_done jmp L$sqr8x_sp_done
.p2align 5 .p2align 5
L$sqr8x_sp_alt: L$sqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10 leaq 4096-64(,%r9,2),%r10
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$sqr8x_sp_done: L$sqr8x_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$sqr8x_page_walk
jmp L$sqr8x_page_walk_done
.p2align 4
L$sqr8x_page_walk: L$sqr8x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc L$sqr8x_page_walk ja L$sqr8x_page_walk
L$sqr8x_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -800,30 +827,38 @@ L$sqr8x_epilogue:
.p2align 5 .p2align 5
bn_mulx4x_mont: bn_mulx4x_mont:
L$mulx4x_enter:
movq %rsp,%rax movq %rsp,%rax
L$mulx4x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$mulx4x_prologue:
shll $3,%r9d shll $3,%r9d
.byte 0x67
xorq %r10,%r10 xorq %r10,%r10
subq %r9,%r10 subq %r9,%r10
movq (%r8),%r8 movq (%r8),%r8
leaq -72(%rsp,%r10,1),%rsp leaq -72(%rsp,%r10,1),%rbp
andq $-128,%rsp andq $-128,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mulx4x_page_walk
jmp L$mulx4x_page_walk_done
.p2align 4
L$mulx4x_page_walk: L$mulx4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x66,0x2e cmpq %rbp,%rsp
jnc L$mulx4x_page_walk ja L$mulx4x_page_walk
L$mulx4x_page_walk_done:
leaq (%rdx,%r9,1),%r10 leaq (%rdx,%r9,1),%r10

204
deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s

@ -6,6 +6,8 @@
.p2align 6 .p2align 6
_bn_mul_mont_gather5: _bn_mul_mont_gather5:
movl %r9d,%r9d
movq %rsp,%rax
testl $7,%r9d testl $7,%r9d
jnz L$mul_enter jnz L$mul_enter
movl _OPENSSL_ia32cap_P+8(%rip),%r11d movl _OPENSSL_ia32cap_P+8(%rip),%r11d
@ -13,10 +15,7 @@ _bn_mul_mont_gather5:
.p2align 4 .p2align 4
L$mul_enter: L$mul_enter:
movl %r9d,%r9d
movq %rsp,%rax
movd 8(%rsp),%xmm5 movd 8(%rsp),%xmm5
leaq L$inc(%rip),%r10
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
@ -24,26 +23,36 @@ L$mul_enter:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
leaq 2(%r9),%r11 negq %r9
negq %r11 movq %rsp,%r11
leaq -264(%rsp,%r11,8),%rsp leaq -280(%rsp,%r9,8),%r10
andq $-1024,%rsp negq %r9
andq $-1024,%r10
movq %rax,8(%rsp,%r9,8)
L$mul_body:
subq %rsp,%rax subq %r10,%r11
andq $-4096,%rax andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
jmp L$mul_page_walk_done
L$mul_page_walk: L$mul_page_walk:
movq (%rsp,%rax,1),%r11 leaq -4096(%rsp),%rsp
subq $4096,%rax movq (%rsp),%r11
.byte 0x2e cmpq %r10,%rsp
jnc L$mul_page_walk ja L$mul_page_walk
L$mul_page_walk_done:
leaq L$inc(%rip),%r10
movq %rax,8(%rsp,%r9,8)
L$mul_body:
leaq 128(%rdx),%r12 leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0 movdqa 0(%r10),%xmm0
@ -414,18 +423,19 @@ L$mul_epilogue:
.p2align 5 .p2align 5
bn_mul4x_mont_gather5: bn_mul4x_mont_gather5:
.byte 0x67
movq %rsp,%rax
L$mul4x_enter: L$mul4x_enter:
andl $0x80108,%r11d andl $0x80108,%r11d
cmpl $0x80108,%r11d cmpl $0x80108,%r11d
je L$mulx4x_enter je L$mulx4x_enter
.byte 0x67
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$mul4x_prologue:
.byte 0x67 .byte 0x67
shll $3,%r9d shll $3,%r9d
@ -442,32 +452,40 @@ L$mul4x_enter:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$mul4xsp_alt jb L$mul4xsp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp L$mul4xsp_done jmp L$mul4xsp_done
.p2align 5 .p2align 5
L$mul4xsp_alt: L$mul4xsp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$mul4xsp_done: L$mul4xsp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mul4x_page_walk
jmp L$mul4x_page_walk_done
L$mul4x_page_walk: L$mul4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc L$mul4x_page_walk ja L$mul4x_page_walk
L$mul4x_page_walk_done:
negq %r9 negq %r9
@ -1019,17 +1037,18 @@ L$inner4x:
.p2align 5 .p2align 5
_bn_power5: _bn_power5:
movq %rsp,%rax
movl _OPENSSL_ia32cap_P+8(%rip),%r11d movl _OPENSSL_ia32cap_P+8(%rip),%r11d
andl $0x80108,%r11d andl $0x80108,%r11d
cmpl $0x80108,%r11d cmpl $0x80108,%r11d
je L$powerx5_enter je L$powerx5_enter
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$power5_prologue:
shll $3,%r9d shll $3,%r9d
leal (%r9,%r9,2),%r10d leal (%r9,%r9,2),%r10d
@ -1044,32 +1063,40 @@ _bn_power5:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$pwr_sp_alt jb L$pwr_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp L$pwr_sp_done jmp L$pwr_sp_done
.p2align 5 .p2align 5
L$pwr_sp_alt: L$pwr_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$pwr_sp_done: L$pwr_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$pwr_page_walk
jmp L$pwr_page_walk_done
L$pwr_page_walk: L$pwr_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc L$pwr_page_walk ja L$pwr_page_walk
L$pwr_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -1980,6 +2007,7 @@ bn_from_mont8x:
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$from_prologue:
shll $3,%r9d shll $3,%r9d
leaq (%r9,%r9,2),%r10 leaq (%r9,%r9,2),%r10
@ -1994,32 +2022,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$from_sp_alt jb L$from_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp L$from_sp_done jmp L$from_sp_done
.p2align 5 .p2align 5
L$from_sp_alt: L$from_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$from_sp_done: L$from_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$from_page_walk
jmp L$from_page_walk_done
L$from_page_walk: L$from_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc L$from_page_walk ja L$from_page_walk
L$from_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -2113,14 +2149,15 @@ L$from_epilogue:
.p2align 5 .p2align 5
bn_mulx4x_mont_gather5: bn_mulx4x_mont_gather5:
L$mulx4x_enter:
movq %rsp,%rax movq %rsp,%rax
L$mulx4x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$mulx4x_prologue:
shll $3,%r9d shll $3,%r9d
leaq (%r9,%r9,2),%r10 leaq (%r9,%r9,2),%r10
@ -2137,31 +2174,39 @@ L$mulx4x_enter:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$mulx4xsp_alt jb L$mulx4xsp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp L$mulx4xsp_done jmp L$mulx4xsp_done
L$mulx4xsp_alt: L$mulx4xsp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$mulx4xsp_done: L$mulx4xsp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mulx4x_page_walk
jmp L$mulx4x_page_walk_done
L$mulx4x_page_walk: L$mulx4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc L$mulx4x_page_walk ja L$mulx4x_page_walk
L$mulx4x_page_walk_done:
@ -2619,14 +2664,15 @@ L$mulx4x_inner:
.p2align 5 .p2align 5
bn_powerx5: bn_powerx5:
L$powerx5_enter:
movq %rsp,%rax movq %rsp,%rax
L$powerx5_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$powerx5_prologue:
shll $3,%r9d shll $3,%r9d
leaq (%r9,%r9,2),%r10 leaq (%r9,%r9,2),%r10
@ -2641,32 +2687,40 @@ L$powerx5_enter:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$pwrx_sp_alt jb L$pwrx_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp L$pwrx_sp_done jmp L$pwrx_sp_done
.p2align 5 .p2align 5
L$pwrx_sp_alt: L$pwrx_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$pwrx_sp_done: L$pwrx_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$pwrx_page_walk
jmp L$pwrx_page_walk_done
L$pwrx_page_walk: L$pwrx_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc L$pwrx_page_walk ja L$pwrx_page_walk
L$pwrx_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9

189
deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s

@ -27,6 +27,7 @@ _ecp_nistz256_mul_by_2:
pushq %r13 pushq %r13
movq 0(%rsi),%r8 movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9 movq 8(%rsi),%r9
addq %r8,%r8 addq %r8,%r8
movq 16(%rsi),%r10 movq 16(%rsi),%r10
@ -37,7 +38,7 @@ _ecp_nistz256_mul_by_2:
adcq %r10,%r10 adcq %r10,%r10
adcq %r11,%r11 adcq %r11,%r11
movq %r9,%rdx movq %r9,%rdx
sbbq %r13,%r13 adcq $0,%r13
subq 0(%rsi),%r8 subq 0(%rsi),%r8
movq %r10,%rcx movq %r10,%rcx
@ -45,14 +46,14 @@ _ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10 sbbq 16(%rsi),%r10
movq %r11,%r12 movq %r11,%r12
sbbq 24(%rsi),%r11 sbbq 24(%rsi),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -149,12 +150,12 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10 sbbq $0,%r10
movq %r11,%r12 movq %r11,%r12
sbbq L$poly+24(%rip),%r11 sbbq L$poly+24(%rip),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
cmovzq %rcx,%r10 cmovcq %rcx,%r10
cmovzq %r12,%r11 cmovcq %r12,%r11
xorq %r13,%r13 xorq %r13,%r13
addq 0(%rsi),%r8 addq 0(%rsi),%r8
@ -171,14 +172,14 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10 sbbq $0,%r10
movq %r11,%r12 movq %r11,%r12
sbbq L$poly+24(%rip),%r11 sbbq L$poly+24(%rip),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -217,14 +218,14 @@ _ecp_nistz256_add:
sbbq 16(%rsi),%r10 sbbq 16(%rsi),%r10
movq %r11,%r12 movq %r11,%r12
sbbq 24(%rsi),%r11 sbbq 24(%rsi),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -1461,13 +1462,14 @@ L$select_loop_avx2_w7:
.p2align 5 .p2align 5
__ecp_nistz256_add_toq: __ecp_nistz256_add_toq:
xorq %r11,%r11
addq 0(%rbx),%r12 addq 0(%rbx),%r12
adcq 8(%rbx),%r13 adcq 8(%rbx),%r13
movq %r12,%rax movq %r12,%rax
adcq 16(%rbx),%r8 adcq 16(%rbx),%r8
adcq 24(%rbx),%r9 adcq 24(%rbx),%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1475,14 +1477,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -1550,13 +1552,14 @@ __ecp_nistz256_subq:
.p2align 5 .p2align 5
__ecp_nistz256_mul_by_2q: __ecp_nistz256_mul_by_2q:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
adcq %r13,%r13 adcq %r13,%r13
movq %r12,%rax movq %r12,%rax
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1564,14 +1567,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -1809,16 +1812,14 @@ _ecp_nistz256_point_add:
movq %rdx,%rsi movq %rdx,%rsi
movdqa %xmm0,384(%rsp) movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp) movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp) movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp) movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp) movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp) movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0 movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1 movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2 movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -1830,14 +1831,14 @@ _ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp) movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4 pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp) movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1 movdqu 64(%rsi),%xmm0
.byte 102,72,15,110,199 movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp) movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp) movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5 por %xmm4,%xmm5
pxor %xmm4,%xmm4 pxor %xmm4,%xmm4
por %xmm1,%xmm3 por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp) movq %rax,544+0(%rsp)
@ -1848,8 +1849,8 @@ _ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5 pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4 pshufd $0xb1,%xmm1,%xmm4
por %xmm3,%xmm4 por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5 pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3 pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4 por %xmm3,%xmm4
@ -2032,6 +2033,7 @@ L$add_proceedq:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 96(%rsp),%rsi leaq 96(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -2039,7 +2041,7 @@ L$add_proceedq:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -2047,15 +2049,15 @@ L$add_proceedq:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq
@ -2213,16 +2215,14 @@ _ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8 movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp) movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp) movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp) movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp) movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp) movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp) movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0 movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1 movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2 movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -2340,6 +2340,7 @@ _ecp_nistz256_point_add_affine:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 192(%rsp),%rsi leaq 192(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -2347,7 +2348,7 @@ _ecp_nistz256_point_add_affine:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -2355,15 +2356,15 @@ _ecp_nistz256_point_add_affine:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq
@ -2510,14 +2511,14 @@ __ecp_nistz256_add_tox:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
sbbq $0,%r11
btq $0,%r11 cmovcq %rax,%r12
cmovncq %rax,%r12 cmovcq %rbp,%r13
cmovncq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovncq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovncq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -2605,14 +2606,14 @@ __ecp_nistz256_mul_by_2x:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
sbbq $0,%r11
btq $0,%r11 cmovcq %rax,%r12
cmovncq %rax,%r12 cmovcq %rbp,%r13
cmovncq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovncq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovncq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -2842,16 +2843,14 @@ L$point_addx:
movq %rdx,%rsi movq %rdx,%rsi
movdqa %xmm0,384(%rsp) movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp) movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp) movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp) movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp) movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp) movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0 movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1 movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2 movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -2863,14 +2862,14 @@ L$point_addx:
movdqa %xmm0,480(%rsp) movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4 pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp) movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1 movdqu 64(%rsi),%xmm0
.byte 102,72,15,110,199 movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp) movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp) movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5 por %xmm4,%xmm5
pxor %xmm4,%xmm4 pxor %xmm4,%xmm4
por %xmm1,%xmm3 por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-128(%rsi),%rsi leaq 64-128(%rsi),%rsi
movq %rdx,544+0(%rsp) movq %rdx,544+0(%rsp)
@ -2881,8 +2880,8 @@ L$point_addx:
call __ecp_nistz256_sqr_montx call __ecp_nistz256_sqr_montx
pcmpeqd %xmm4,%xmm5 pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4 pshufd $0xb1,%xmm1,%xmm4
por %xmm3,%xmm4 por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5 pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3 pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4 por %xmm3,%xmm4
@ -3065,6 +3064,7 @@ L$add_proceedx:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 96(%rsp),%rsi leaq 96(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -3072,7 +3072,7 @@ L$add_proceedx:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -3080,15 +3080,15 @@ L$add_proceedx:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subx call __ecp_nistz256_subx
@ -3242,16 +3242,14 @@ L$point_add_affinex:
movq 64+24(%rsi),%r8 movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp) movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp) movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp) movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp) movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp) movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp) movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0 movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1 movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2 movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -3369,6 +3367,7 @@ L$point_add_affinex:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 192(%rsp),%rsi leaq 192(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -3376,7 +3375,7 @@ L$point_add_affinex:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -3384,15 +3383,15 @@ L$point_add_affinex:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subx call __ecp_nistz256_subx

4
deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s

@ -1263,9 +1263,9 @@ _shaext_shortcut:
.p2align 4 .p2align 4
L$oop_shaext: L$oop_shaext:
decq %rdx decq %rdx
leaq 64(%rsi),%rax leaq 64(%rsi),%r8
paddd %xmm4,%xmm1 paddd %xmm4,%xmm1
cmovneq %rax,%rsi cmovneq %r8,%rsi
movdqa %xmm0,%xmm8 movdqa %xmm0,%xmm8
.byte 15,56,201,229 .byte 15,56,201,229
movdqa %xmm0,%xmm2 movdqa %xmm0,%xmm2

185
deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm

@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
test r9d,3 test r9d,3
jnz $L$mul_enter jnz $L$mul_enter
cmp r9d,8 cmp r9d,8
@ -39,29 +41,36 @@ $L$mul_enter::
push r14 push r14
push r15 push r15
mov r9d,r9d neg r9
lea r10,QWORD PTR[2+r9]
mov r11,rsp mov r11,rsp
neg r10 lea r10,QWORD PTR[((-16))+r9*8+rsp]
lea rsp,QWORD PTR[r10*8+rsp] neg r9
and rsp,-1024 and r10,-1024
mov QWORD PTR[8+r9*8+rsp],r11
$L$mul_body::
sub r11,rsp sub r11,r10
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
jmp $L$mul_page_walk_done
ALIGN 16
$L$mul_page_walk:: $L$mul_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r11,QWORD PTR[rsp]
DB 066h,02eh cmp rsp,r10
jnc $L$mul_page_walk ja $L$mul_page_walk
$L$mul_page_walk_done::
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
mov r12,rdx mov r12,rdx
mov r8,QWORD PTR[r8] mov r8,QWORD PTR[r8]
mov rbx,QWORD PTR[r12] mov rbx,QWORD PTR[r12]
@ -229,13 +238,13 @@ $L$copy::
mov rsi,QWORD PTR[8+r9*8+rsp] mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1 mov rax,1
mov r15,QWORD PTR[rsi] mov r15,QWORD PTR[((-48))+rsi]
mov r14,QWORD PTR[8+rsi] mov r14,QWORD PTR[((-40))+rsi]
mov r13,QWORD PTR[16+rsi] mov r13,QWORD PTR[((-32))+rsi]
mov r12,QWORD PTR[24+rsi] mov r12,QWORD PTR[((-24))+rsi]
mov rbp,QWORD PTR[32+rsi] mov rbp,QWORD PTR[((-16))+rsi]
mov rbx,QWORD PTR[40+rsi] mov rbx,QWORD PTR[((-8))+rsi]
lea rsp,QWORD PTR[48+rsi] lea rsp,QWORD PTR[rsi]
$L$mul_epilogue:: $L$mul_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp] mov rsi,QWORD PTR[16+rsp]
@ -257,6 +266,8 @@ $L$SEH_begin_bn_mul4x_mont::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
$L$mul4x_enter:: $L$mul4x_enter::
and r11d,080100h and r11d,080100h
cmp r11d,080100h cmp r11d,080100h
@ -268,23 +279,29 @@ $L$mul4x_enter::
push r14 push r14
push r15 push r15
mov r9d,r9d neg r9
lea r10,QWORD PTR[4+r9]
mov r11,rsp mov r11,rsp
neg r10 lea r10,QWORD PTR[((-32))+r9*8+rsp]
lea rsp,QWORD PTR[r10*8+rsp] neg r9
and rsp,-1024 and r10,-1024
mov QWORD PTR[8+r9*8+rsp],r11 sub r11,r10
$L$mul4x_body::
sub r11,rsp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul4x_page_walk
jmp $L$mul4x_page_walk_done
$L$mul4x_page_walk:: $L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r11,QWORD PTR[rsp]
DB 02eh cmp rsp,r10
jnc $L$mul4x_page_walk ja $L$mul4x_page_walk
$L$mul4x_page_walk_done::
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul4x_body::
mov QWORD PTR[16+r9*8+rsp],rdi mov QWORD PTR[16+r9*8+rsp],rdi
mov r12,rdx mov r12,rdx
mov r8,QWORD PTR[r8] mov r8,QWORD PTR[r8]
@ -653,13 +670,13 @@ $L$copy4x::
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 movdqu XMMWORD PTR[16+r14*1+rdi],xmm2
mov rsi,QWORD PTR[8+r9*8+rsp] mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1 mov rax,1
mov r15,QWORD PTR[rsi] mov r15,QWORD PTR[((-48))+rsi]
mov r14,QWORD PTR[8+rsi] mov r14,QWORD PTR[((-40))+rsi]
mov r13,QWORD PTR[16+rsi] mov r13,QWORD PTR[((-32))+rsi]
mov r12,QWORD PTR[24+rsi] mov r12,QWORD PTR[((-24))+rsi]
mov rbp,QWORD PTR[32+rsi] mov rbp,QWORD PTR[((-16))+rsi]
mov rbx,QWORD PTR[40+rsi] mov rbx,QWORD PTR[((-8))+rsi]
lea rsp,QWORD PTR[48+rsi] lea rsp,QWORD PTR[rsi]
$L$mul4x_epilogue:: $L$mul4x_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp] mov rsi,QWORD PTR[16+rsp]
@ -684,14 +701,15 @@ $L$SEH_begin_bn_sqr8x_mont::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
$L$sqr8x_enter::
mov rax,rsp mov rax,rsp
$L$sqr8x_enter::
push rbx push rbx
push rbp push rbp
push r12 push r12
push r13 push r13
push r14 push r14
push r15 push r15
$L$sqr8x_prologue::
mov r10d,r9d mov r10d,r9d
shl r9d,3 shl r9d,3
@ -704,33 +722,42 @@ $L$sqr8x_enter::
lea r11,QWORD PTR[((-64))+r9*2+rsp] lea r11,QWORD PTR[((-64))+r9*2+rsp]
mov rbp,rsp
mov r8,QWORD PTR[r8] mov r8,QWORD PTR[r8]
sub r11,rsi sub r11,rsi
and r11,4095 and r11,4095
cmp r10,r11 cmp r10,r11
jb $L$sqr8x_sp_alt jb $L$sqr8x_sp_alt
sub rsp,r11 sub rbp,r11
lea rsp,QWORD PTR[((-64))+r9*2+rsp] lea rbp,QWORD PTR[((-64))+r9*2+rbp]
jmp $L$sqr8x_sp_done jmp $L$sqr8x_sp_done
ALIGN 32 ALIGN 32
$L$sqr8x_sp_alt:: $L$sqr8x_sp_alt::
lea r10,QWORD PTR[((4096-64))+r9*2] lea r10,QWORD PTR[((4096-64))+r9*2]
lea rsp,QWORD PTR[((-64))+r9*2+rsp] lea rbp,QWORD PTR[((-64))+r9*2+rbp]
sub r11,r10 sub r11,r10
mov r10,0 mov r10,0
cmovc r11,r10 cmovc r11,r10
sub rsp,r11 sub rbp,r11
$L$sqr8x_sp_done:: $L$sqr8x_sp_done::
and rsp,-64 and rbp,-64
mov r11,rax mov r11,rsp
sub r11,rsp sub r11,rbp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$sqr8x_page_walk
jmp $L$sqr8x_page_walk_done
ALIGN 16
$L$sqr8x_page_walk:: $L$sqr8x_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r10,QWORD PTR[rsp]
DB 02eh cmp rsp,rbp
jnc $L$sqr8x_page_walk ja $L$sqr8x_page_walk
$L$sqr8x_page_walk_done::
mov r10,r9 mov r10,r9
neg r9 neg r9
@ -858,30 +885,38 @@ $L$SEH_begin_bn_mulx4x_mont::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
$L$mulx4x_enter::
mov rax,rsp mov rax,rsp
$L$mulx4x_enter::
push rbx push rbx
push rbp push rbp
push r12 push r12
push r13 push r13
push r14 push r14
push r15 push r15
$L$mulx4x_prologue::
shl r9d,3 shl r9d,3
DB 067h
xor r10,r10 xor r10,r10
sub r10,r9 sub r10,r9
mov r8,QWORD PTR[r8] mov r8,QWORD PTR[r8]
lea rsp,QWORD PTR[((-72))+r10*1+rsp] lea rbp,QWORD PTR[((-72))+r10*1+rsp]
and rsp,-128 and rbp,-128
mov r11,rax mov r11,rsp
sub r11,rsp sub r11,rbp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mulx4x_page_walk
jmp $L$mulx4x_page_walk_done
ALIGN 16
$L$mulx4x_page_walk:: $L$mulx4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r10,QWORD PTR[rsp]
DB 066h,02eh cmp rsp,rbp
jnc $L$mulx4x_page_walk ja $L$mulx4x_page_walk
$L$mulx4x_page_walk_done::
lea r10,QWORD PTR[r9*1+rdx] lea r10,QWORD PTR[r9*1+rdx]
@ -1230,22 +1265,8 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8] mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax] mov rax,QWORD PTR[8+r10*8+rax]
lea rax,QWORD PTR[48+rax]
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
mov r13,QWORD PTR[((-32))+rax]
mov r14,QWORD PTR[((-40))+rax]
mov r15,QWORD PTR[((-48))+rax]
mov QWORD PTR[144+r8],rbx
mov QWORD PTR[160+r8],rbp
mov QWORD PTR[216+r8],r12
mov QWORD PTR[224+r8],r13
mov QWORD PTR[232+r8],r14
mov QWORD PTR[240+r8],r15
jmp $L$common_seh_tail jmp $L$common_pop_regs
mul_handler ENDP mul_handler ENDP
@ -1273,15 +1294,21 @@ sqr_handler PROC PRIVATE
cmp rbx,r10 cmp rbx,r10
jb $L$common_seh_tail jb $L$common_seh_tail
mov r10d,DWORD PTR[4+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$common_pop_regs
mov rax,QWORD PTR[152+r8] mov rax,QWORD PTR[152+r8]
mov r10d,DWORD PTR[4+r11] mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi] lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10 cmp rbx,r10
jae $L$common_seh_tail jae $L$common_seh_tail
mov rax,QWORD PTR[40+rax] mov rax,QWORD PTR[40+rax]
$L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax] mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax] mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax] mov r12,QWORD PTR[((-24))+rax]
@ -1366,11 +1393,13 @@ DB 9,0,0,0
$L$SEH_info_bn_sqr8x_mont:: $L$SEH_info_bn_sqr8x_mont::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel sqr_handler DD imagerel sqr_handler
DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue DD imagerel $L$sqr8x_prologue,imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue
ALIGN 8
$L$SEH_info_bn_mulx4x_mont:: $L$SEH_info_bn_mulx4x_mont::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel sqr_handler DD imagerel sqr_handler
DD imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue DD imagerel $L$mulx4x_prologue,imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue
ALIGN 8
.xdata ENDS .xdata ENDS
END END

227
deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm

@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont_gather5::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
test r9d,7 test r9d,7
jnz $L$mul_enter jnz $L$mul_enter
mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
@ -26,10 +28,7 @@ $L$SEH_begin_bn_mul_mont_gather5::
ALIGN 16 ALIGN 16
$L$mul_enter:: $L$mul_enter::
mov r9d,r9d
mov rax,rsp
movd xmm5,DWORD PTR[56+rsp] movd xmm5,DWORD PTR[56+rsp]
lea r10,QWORD PTR[$L$inc]
push rbx push rbx
push rbp push rbp
push r12 push r12
@ -37,26 +36,36 @@ $L$mul_enter::
push r14 push r14
push r15 push r15
lea r11,QWORD PTR[2+r9] neg r9
neg r11 mov r11,rsp
lea rsp,QWORD PTR[((-264))+r11*8+rsp] lea r10,QWORD PTR[((-280))+r9*8+rsp]
and rsp,-1024 neg r9
and r10,-1024
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
sub r11,r10
and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
jmp $L$mul_page_walk_done
sub rax,rsp
and rax,-4096
$L$mul_page_walk:: $L$mul_page_walk::
mov r11,QWORD PTR[rax*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub rax,4096 mov r11,QWORD PTR[rsp]
DB 02eh cmp rsp,r10
jnc $L$mul_page_walk ja $L$mul_page_walk
$L$mul_page_walk_done::
lea r10,QWORD PTR[$L$inc]
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
lea r12,QWORD PTR[128+rdx] lea r12,QWORD PTR[128+rdx]
movdqa xmm0,XMMWORD PTR[r10] movdqa xmm0,XMMWORD PTR[r10]
@ -442,18 +451,19 @@ $L$SEH_begin_bn_mul4x_mont_gather5::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
DB 067h
mov rax,rsp
$L$mul4x_enter:: $L$mul4x_enter::
and r11d,080108h and r11d,080108h
cmp r11d,080108h cmp r11d,080108h
je $L$mulx4x_enter je $L$mulx4x_enter
DB 067h
mov rax,rsp
push rbx push rbx
push rbp push rbp
push r12 push r12
push r13 push r13
push r14 push r14
push r15 push r15
$L$mul4x_prologue::
DB 067h DB 067h
shl r9d,3 shl r9d,3
@ -470,32 +480,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp] lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi sub r11,rdi
and r11,4095 and r11,4095
cmp r10,r11 cmp r10,r11
jb $L$mul4xsp_alt jb $L$mul4xsp_alt
sub rsp,r11 sub rbp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$mul4xsp_done jmp $L$mul4xsp_done
ALIGN 32 ALIGN 32
$L$mul4xsp_alt:: $L$mul4xsp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2] lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10 sub r11,r10
mov r10,0 mov r10,0
cmovc r11,r10 cmovc r11,r10
sub rsp,r11 sub rbp,r11
$L$mul4xsp_done:: $L$mul4xsp_done::
and rsp,-64 and rbp,-64
mov r11,rax mov r11,rsp
sub r11,rsp sub r11,rbp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mul4x_page_walk
jmp $L$mul4x_page_walk_done
$L$mul4x_page_walk:: $L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r10,QWORD PTR[rsp]
DB 02eh cmp rsp,rbp
jnc $L$mul4x_page_walk ja $L$mul4x_page_walk
$L$mul4x_page_walk_done::
neg r9 neg r9
@ -1062,17 +1080,18 @@ $L$SEH_begin_bn_power5::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
mov rax,rsp
mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
and r11d,080108h and r11d,080108h
cmp r11d,080108h cmp r11d,080108h
je $L$powerx5_enter je $L$powerx5_enter
mov rax,rsp
push rbx push rbx
push rbp push rbp
push r12 push r12
push r13 push r13
push r14 push r14
push r15 push r15
$L$power5_prologue::
shl r9d,3 shl r9d,3
lea r10d,DWORD PTR[r9*2+r9] lea r10d,DWORD PTR[r9*2+r9]
@ -1087,32 +1106,40 @@ $L$SEH_begin_bn_power5::
lea r11,QWORD PTR[((-320))+r9*2+rsp] lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi sub r11,rdi
and r11,4095 and r11,4095
cmp r10,r11 cmp r10,r11
jb $L$pwr_sp_alt jb $L$pwr_sp_alt
sub rsp,r11 sub rbp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$pwr_sp_done jmp $L$pwr_sp_done
ALIGN 32 ALIGN 32
$L$pwr_sp_alt:: $L$pwr_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2] lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10 sub r11,r10
mov r10,0 mov r10,0
cmovc r11,r10 cmovc r11,r10
sub rsp,r11 sub rbp,r11
$L$pwr_sp_done:: $L$pwr_sp_done::
and rsp,-64 and rbp,-64
mov r11,rax mov r11,rsp
sub r11,rsp sub r11,rbp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$pwr_page_walk
jmp $L$pwr_page_walk_done
$L$pwr_page_walk:: $L$pwr_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r10,QWORD PTR[rsp]
DB 02eh cmp rsp,rbp
jnc $L$pwr_page_walk ja $L$pwr_page_walk
$L$pwr_page_walk_done::
mov r10,r9 mov r10,r9
neg r9 neg r9
@ -2038,6 +2065,7 @@ DB 067h
push r13 push r13
push r14 push r14
push r15 push r15
$L$from_prologue::
shl r9d,3 shl r9d,3
lea r10,QWORD PTR[r9*2+r9] lea r10,QWORD PTR[r9*2+r9]
@ -2052,32 +2080,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp] lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi sub r11,rdi
and r11,4095 and r11,4095
cmp r10,r11 cmp r10,r11
jb $L$from_sp_alt jb $L$from_sp_alt
sub rsp,r11 sub rbp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$from_sp_done jmp $L$from_sp_done
ALIGN 32 ALIGN 32
$L$from_sp_alt:: $L$from_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2] lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10 sub r11,r10
mov r10,0 mov r10,0
cmovc r11,r10 cmovc r11,r10
sub rsp,r11 sub rbp,r11
$L$from_sp_done:: $L$from_sp_done::
and rsp,-64 and rbp,-64
mov r11,rax mov r11,rsp
sub r11,rsp sub r11,rbp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$from_page_walk
jmp $L$from_page_walk_done
$L$from_page_walk:: $L$from_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r10,QWORD PTR[rsp]
DB 02eh cmp rsp,rbp
jnc $L$from_page_walk ja $L$from_page_walk
$L$from_page_walk_done::
mov r10,r9 mov r10,r9
neg r9 neg r9
@ -2186,14 +2222,15 @@ $L$SEH_begin_bn_mulx4x_mont_gather5::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
$L$mulx4x_enter::
mov rax,rsp mov rax,rsp
$L$mulx4x_enter::
push rbx push rbx
push rbp push rbp
push r12 push r12
push r13 push r13
push r14 push r14
push r15 push r15
$L$mulx4x_prologue::
shl r9d,3 shl r9d,3
lea r10,QWORD PTR[r9*2+r9] lea r10,QWORD PTR[r9*2+r9]
@ -2210,31 +2247,39 @@ $L$mulx4x_enter::
lea r11,QWORD PTR[((-320))+r9*2+rsp] lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi sub r11,rdi
and r11,4095 and r11,4095
cmp r10,r11 cmp r10,r11
jb $L$mulx4xsp_alt jb $L$mulx4xsp_alt
sub rsp,r11 sub rbp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$mulx4xsp_done jmp $L$mulx4xsp_done
$L$mulx4xsp_alt:: $L$mulx4xsp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2] lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10 sub r11,r10
mov r10,0 mov r10,0
cmovc r11,r10 cmovc r11,r10
sub rsp,r11 sub rbp,r11
$L$mulx4xsp_done:: $L$mulx4xsp_done::
and rsp,-64 and rbp,-64
mov r11,rax mov r11,rsp
sub r11,rsp sub r11,rbp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mulx4x_page_walk
jmp $L$mulx4x_page_walk_done
$L$mulx4x_page_walk:: $L$mulx4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r10,QWORD PTR[rsp]
DB 02eh cmp rsp,rbp
jnc $L$mulx4x_page_walk ja $L$mulx4x_page_walk
$L$mulx4x_page_walk_done::
@ -2707,14 +2752,15 @@ $L$SEH_begin_bn_powerx5::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
$L$powerx5_enter::
mov rax,rsp mov rax,rsp
$L$powerx5_enter::
push rbx push rbx
push rbp push rbp
push r12 push r12
push r13 push r13
push r14 push r14
push r15 push r15
$L$powerx5_prologue::
shl r9d,3 shl r9d,3
lea r10,QWORD PTR[r9*2+r9] lea r10,QWORD PTR[r9*2+r9]
@ -2729,32 +2775,40 @@ $L$powerx5_enter::
lea r11,QWORD PTR[((-320))+r9*2+rsp] lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi sub r11,rdi
and r11,4095 and r11,4095
cmp r10,r11 cmp r10,r11
jb $L$pwrx_sp_alt jb $L$pwrx_sp_alt
sub rsp,r11 sub rbp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$pwrx_sp_done jmp $L$pwrx_sp_done
ALIGN 32 ALIGN 32
$L$pwrx_sp_alt:: $L$pwrx_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2] lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10 sub r11,r10
mov r10,0 mov r10,0
cmovc r11,r10 cmovc r11,r10
sub rsp,r11 sub rbp,r11
$L$pwrx_sp_done:: $L$pwrx_sp_done::
and rsp,-64 and rbp,-64
mov r11,rax mov r11,rsp
sub r11,rsp sub r11,rbp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$pwrx_page_walk
jmp $L$pwrx_page_walk_done
$L$pwrx_page_walk:: $L$pwrx_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r10,QWORD PTR[rsp]
DB 02eh cmp rsp,rbp
jnc $L$pwrx_page_walk ja $L$pwrx_page_walk
$L$pwrx_page_walk_done::
mov r10,r9 mov r10,r9
neg r9 neg r9
@ -3712,9 +3766,14 @@ mul_handler PROC PRIVATE
cmp rbx,r10 cmp rbx,r10
jb $L$common_seh_tail jb $L$common_seh_tail
mov r10d,DWORD PTR[4+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$common_pop_regs
mov rax,QWORD PTR[152+r8] mov rax,QWORD PTR[152+r8]
mov r10d,DWORD PTR[4+r11] mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi] lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10 cmp rbx,r10
jae $L$common_seh_tail jae $L$common_seh_tail
@ -3726,11 +3785,11 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8] mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax] mov rax,QWORD PTR[8+r10*8+rax]
jmp $L$body_proceed jmp $L$common_pop_regs
$L$body_40:: $L$body_40::
mov rax,QWORD PTR[40+rax] mov rax,QWORD PTR[40+rax]
$L$body_proceed:: $L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax] mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax] mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax] mov r12,QWORD PTR[((-24))+rax]
@ -3819,32 +3878,32 @@ ALIGN 8
$L$SEH_info_bn_mul_mont_gather5:: $L$SEH_info_bn_mul_mont_gather5::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel mul_handler DD imagerel mul_handler
DD imagerel $L$mul_body,imagerel $L$mul_epilogue DD imagerel $L$mul_body,imagerel $L$mul_body,imagerel $L$mul_epilogue
ALIGN 8 ALIGN 8
$L$SEH_info_bn_mul4x_mont_gather5:: $L$SEH_info_bn_mul4x_mont_gather5::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel mul_handler DD imagerel mul_handler
DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue DD imagerel $L$mul4x_prologue,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
ALIGN 8 ALIGN 8
$L$SEH_info_bn_power5:: $L$SEH_info_bn_power5::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel mul_handler DD imagerel mul_handler
DD imagerel $L$power5_body,imagerel $L$power5_epilogue DD imagerel $L$power5_prologue,imagerel $L$power5_body,imagerel $L$power5_epilogue
ALIGN 8 ALIGN 8
$L$SEH_info_bn_from_mont8x:: $L$SEH_info_bn_from_mont8x::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel mul_handler DD imagerel mul_handler
DD imagerel $L$from_body,imagerel $L$from_epilogue DD imagerel $L$from_prologue,imagerel $L$from_body,imagerel $L$from_epilogue
ALIGN 8 ALIGN 8
$L$SEH_info_bn_mulx4x_mont_gather5:: $L$SEH_info_bn_mulx4x_mont_gather5::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel mul_handler DD imagerel mul_handler
DD imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue DD imagerel $L$mulx4x_prologue,imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue
ALIGN 8 ALIGN 8
$L$SEH_info_bn_powerx5:: $L$SEH_info_bn_powerx5::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel mul_handler DD imagerel mul_handler
DD imagerel $L$powerx5_body,imagerel $L$powerx5_epilogue DD imagerel $L$powerx5_prologue,imagerel $L$powerx5_body,imagerel $L$powerx5_epilogue
ALIGN 8 ALIGN 8
$L$SEH_info_bn_gather5:: $L$SEH_info_bn_gather5::
DB 001h,00bh,003h,00ah DB 001h,00bh,003h,00ah

189
deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm

@ -36,6 +36,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
push r13 push r13
mov r8,QWORD PTR[rsi] mov r8,QWORD PTR[rsi]
xor r13,r13
mov r9,QWORD PTR[8+rsi] mov r9,QWORD PTR[8+rsi]
add r8,r8 add r8,r8
mov r10,QWORD PTR[16+rsi] mov r10,QWORD PTR[16+rsi]
@ -46,7 +47,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
adc r10,r10 adc r10,r10
adc r11,r11 adc r11,r11
mov rdx,r9 mov rdx,r9
sbb r13,r13 adc r13,0
sub r8,QWORD PTR[rsi] sub r8,QWORD PTR[rsi]
mov rcx,r10 mov rcx,r10
@ -54,14 +55,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
sbb r10,QWORD PTR[16+rsi] sbb r10,QWORD PTR[16+rsi]
mov r12,r11 mov r12,r11
sbb r11,QWORD PTR[24+rsi] sbb r11,QWORD PTR[24+rsi]
test r13,r13 sbb r13,0
cmovz r8,rax cmovc r8,rax
cmovz r9,rdx cmovc r9,rdx
mov QWORD PTR[rdi],r8 mov QWORD PTR[rdi],r8
cmovz r10,rcx cmovc r10,rcx
mov QWORD PTR[8+rdi],r9 mov QWORD PTR[8+rdi],r9
cmovz r11,r12 cmovc r11,r12
mov QWORD PTR[16+rdi],r10 mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11 mov QWORD PTR[24+rdi],r11
@ -180,12 +181,12 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0 sbb r10,0
mov r12,r11 mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))] sbb r11,QWORD PTR[(($L$poly+24))]
test r13,r13 sbb r13,0
cmovz r8,rax cmovc r8,rax
cmovz r9,rdx cmovc r9,rdx
cmovz r10,rcx cmovc r10,rcx
cmovz r11,r12 cmovc r11,r12
xor r13,r13 xor r13,r13
add r8,QWORD PTR[rsi] add r8,QWORD PTR[rsi]
@ -202,14 +203,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0 sbb r10,0
mov r12,r11 mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))] sbb r11,QWORD PTR[(($L$poly+24))]
test r13,r13 sbb r13,0
cmovz r8,rax cmovc r8,rax
cmovz r9,rdx cmovc r9,rdx
mov QWORD PTR[rdi],r8 mov QWORD PTR[rdi],r8
cmovz r10,rcx cmovc r10,rcx
mov QWORD PTR[8+rdi],r9 mov QWORD PTR[8+rdi],r9
cmovz r11,r12 cmovc r11,r12
mov QWORD PTR[16+rdi],r10 mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11 mov QWORD PTR[24+rdi],r11
@ -260,14 +261,14 @@ $L$SEH_begin_ecp_nistz256_add::
sbb r10,QWORD PTR[16+rsi] sbb r10,QWORD PTR[16+rsi]
mov r12,r11 mov r12,r11
sbb r11,QWORD PTR[24+rsi] sbb r11,QWORD PTR[24+rsi]
test r13,r13 sbb r13,0
cmovz r8,rax cmovc r8,rax
cmovz r9,rdx cmovc r9,rdx
mov QWORD PTR[rdi],r8 mov QWORD PTR[rdi],r8
cmovz r10,rcx cmovc r10,rcx
mov QWORD PTR[8+rdi],r9 mov QWORD PTR[8+rdi],r9
cmovz r11,r12 cmovc r11,r12
mov QWORD PTR[16+rdi],r10 mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11 mov QWORD PTR[24+rdi],r11
@ -1673,13 +1674,14 @@ ecp_nistz256_avx2_select_w7 ENDP
ALIGN 32 ALIGN 32
__ecp_nistz256_add_toq PROC PRIVATE __ecp_nistz256_add_toq PROC PRIVATE
xor r11,r11
add r12,QWORD PTR[rbx] add r12,QWORD PTR[rbx]
adc r13,QWORD PTR[8+rbx] adc r13,QWORD PTR[8+rbx]
mov rax,r12 mov rax,r12
adc r8,QWORD PTR[16+rbx] adc r8,QWORD PTR[16+rbx]
adc r9,QWORD PTR[24+rbx] adc r9,QWORD PTR[24+rbx]
mov rbp,r13 mov rbp,r13
sbb r11,r11 adc r11,0
sub r12,-1 sub r12,-1
mov rcx,r8 mov rcx,r8
@ -1687,14 +1689,14 @@ __ecp_nistz256_add_toq PROC PRIVATE
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
test r11,r11 sbb r11,0
cmovz r12,rax cmovc r12,rax
cmovz r13,rbp cmovc r13,rbp
mov QWORD PTR[rdi],r12 mov QWORD PTR[rdi],r12
cmovz r8,rcx cmovc r8,rcx
mov QWORD PTR[8+rdi],r13 mov QWORD PTR[8+rdi],r13
cmovz r9,r10 cmovc r9,r10
mov QWORD PTR[16+rdi],r8 mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9 mov QWORD PTR[24+rdi],r9
@ -1762,13 +1764,14 @@ __ecp_nistz256_subq ENDP
ALIGN 32 ALIGN 32
__ecp_nistz256_mul_by_2q PROC PRIVATE __ecp_nistz256_mul_by_2q PROC PRIVATE
xor r11,r11
add r12,r12 add r12,r12
adc r13,r13 adc r13,r13
mov rax,r12 mov rax,r12
adc r8,r8 adc r8,r8
adc r9,r9 adc r9,r9
mov rbp,r13 mov rbp,r13
sbb r11,r11 adc r11,0
sub r12,-1 sub r12,-1
mov rcx,r8 mov rcx,r8
@ -1776,14 +1779,14 @@ __ecp_nistz256_mul_by_2q PROC PRIVATE
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
test r11,r11 sbb r11,0
cmovz r12,rax cmovc r12,rax
cmovz r13,rbp cmovc r13,rbp
mov QWORD PTR[rdi],r12 mov QWORD PTR[rdi],r12
cmovz r8,rcx cmovc r8,rcx
mov QWORD PTR[8+rdi],r13 mov QWORD PTR[8+rdi],r13
cmovz r9,r10 cmovc r9,r10
mov QWORD PTR[16+rdi],r8 mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9 mov QWORD PTR[24+rdi],r9
@ -2041,16 +2044,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
mov rsi,rdx mov rsi,rdx
movdqa XMMWORD PTR[384+rsp],xmm0 movdqa XMMWORD PTR[384+rsp],xmm0
movdqa XMMWORD PTR[(384+16)+rsp],xmm1 movdqa XMMWORD PTR[(384+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[416+rsp],xmm2 movdqa XMMWORD PTR[416+rsp],xmm2
movdqa XMMWORD PTR[(416+16)+rsp],xmm3 movdqa XMMWORD PTR[(416+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[448+rsp],xmm4 movdqa XMMWORD PTR[448+rsp],xmm4
movdqa XMMWORD PTR[(448+16)+rsp],xmm5 movdqa XMMWORD PTR[(448+16)+rsp],xmm5
por xmm3,xmm1 por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rsi] movdqu xmm0,XMMWORD PTR[rsi]
pshufd xmm5,xmm3,1h pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rsi] movdqu xmm1,XMMWORD PTR[16+rsi]
movdqu xmm2,XMMWORD PTR[32+rsi] movdqu xmm2,XMMWORD PTR[32+rsi]
por xmm5,xmm3 por xmm5,xmm3
@ -2062,14 +2063,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
movdqa XMMWORD PTR[480+rsp],xmm0 movdqa XMMWORD PTR[480+rsp],xmm0
pshufd xmm4,xmm5,01eh pshufd xmm4,xmm5,01eh
movdqa XMMWORD PTR[(480+16)+rsp],xmm1 movdqa XMMWORD PTR[(480+16)+rsp],xmm1
por xmm1,xmm0 movdqu xmm0,XMMWORD PTR[64+rsi]
DB 102,72,15,110,199 movdqu xmm1,XMMWORD PTR[80+rsi]
movdqa XMMWORD PTR[512+rsp],xmm2 movdqa XMMWORD PTR[512+rsp],xmm2
movdqa XMMWORD PTR[(512+16)+rsp],xmm3 movdqa XMMWORD PTR[(512+16)+rsp],xmm3
por xmm3,xmm2
por xmm5,xmm4 por xmm5,xmm4
pxor xmm4,xmm4 pxor xmm4,xmm4
por xmm3,xmm1 por xmm1,xmm0
DB 102,72,15,110,199
lea rsi,QWORD PTR[((64-0))+rsi] lea rsi,QWORD PTR[((64-0))+rsi]
mov QWORD PTR[((544+0))+rsp],rax mov QWORD PTR[((544+0))+rsp],rax
@ -2080,8 +2081,8 @@ DB 102,72,15,110,199
call __ecp_nistz256_sqr_montq call __ecp_nistz256_sqr_montq
pcmpeqd xmm5,xmm4 pcmpeqd xmm5,xmm4
pshufd xmm4,xmm3,1h pshufd xmm4,xmm1,1h
por xmm4,xmm3 por xmm4,xmm1
pshufd xmm5,xmm5,0 pshufd xmm5,xmm5,0
pshufd xmm3,xmm4,01eh pshufd xmm3,xmm4,01eh
por xmm4,xmm3 por xmm4,xmm3
@ -2264,6 +2265,7 @@ $L$add_proceedq::
xor r11,r11
add r12,r12 add r12,r12
lea rsi,QWORD PTR[96+rsp] lea rsi,QWORD PTR[96+rsp]
adc r13,r13 adc r13,r13
@ -2271,7 +2273,7 @@ $L$add_proceedq::
adc r8,r8 adc r8,r8
adc r9,r9 adc r9,r9
mov rbp,r13 mov rbp,r13
sbb r11,r11 adc r11,0
sub r12,-1 sub r12,-1
mov rcx,r8 mov rcx,r8
@ -2279,15 +2281,15 @@ $L$add_proceedq::
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
test r11,r11 sbb r11,0
cmovz r12,rax cmovc r12,rax
mov rax,QWORD PTR[rsi] mov rax,QWORD PTR[rsi]
cmovz r13,rbp cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi] mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi] mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10 cmovc r9,r10
mov r10,QWORD PTR[24+rsi] mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq call __ecp_nistz256_subq
@ -2457,16 +2459,14 @@ $L$SEH_begin_ecp_nistz256_point_add_affine::
mov r8,QWORD PTR[((64+24))+rsi] mov r8,QWORD PTR[((64+24))+rsi]
movdqa XMMWORD PTR[320+rsp],xmm0 movdqa XMMWORD PTR[320+rsp],xmm0
movdqa XMMWORD PTR[(320+16)+rsp],xmm1 movdqa XMMWORD PTR[(320+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[352+rsp],xmm2 movdqa XMMWORD PTR[352+rsp],xmm2
movdqa XMMWORD PTR[(352+16)+rsp],xmm3 movdqa XMMWORD PTR[(352+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[384+rsp],xmm4 movdqa XMMWORD PTR[384+rsp],xmm4
movdqa XMMWORD PTR[(384+16)+rsp],xmm5 movdqa XMMWORD PTR[(384+16)+rsp],xmm5
por xmm3,xmm1 por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rbx] movdqu xmm0,XMMWORD PTR[rbx]
pshufd xmm5,xmm3,1h pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rbx] movdqu xmm1,XMMWORD PTR[16+rbx]
movdqu xmm2,XMMWORD PTR[32+rbx] movdqu xmm2,XMMWORD PTR[32+rbx]
por xmm5,xmm3 por xmm5,xmm3
@ -2584,6 +2584,7 @@ DB 102,72,15,110,199
xor r11,r11
add r12,r12 add r12,r12
lea rsi,QWORD PTR[192+rsp] lea rsi,QWORD PTR[192+rsp]
adc r13,r13 adc r13,r13
@ -2591,7 +2592,7 @@ DB 102,72,15,110,199
adc r8,r8 adc r8,r8
adc r9,r9 adc r9,r9
mov rbp,r13 mov rbp,r13
sbb r11,r11 adc r11,0
sub r12,-1 sub r12,-1
mov rcx,r8 mov rcx,r8
@ -2599,15 +2600,15 @@ DB 102,72,15,110,199
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
test r11,r11 sbb r11,0
cmovz r12,rax cmovc r12,rax
mov rax,QWORD PTR[rsi] mov rax,QWORD PTR[rsi]
cmovz r13,rbp cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi] mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi] mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10 cmovc r9,r10
mov r10,QWORD PTR[24+rsi] mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq call __ecp_nistz256_subq
@ -2757,14 +2758,14 @@ __ecp_nistz256_add_tox PROC PRIVATE
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
sbb r11,0
bt r11,0 cmovc r12,rax
cmovnc r12,rax cmovc r13,rbp
cmovnc r13,rbp
mov QWORD PTR[rdi],r12 mov QWORD PTR[rdi],r12
cmovnc r8,rcx cmovc r8,rcx
mov QWORD PTR[8+rdi],r13 mov QWORD PTR[8+rdi],r13
cmovnc r9,r10 cmovc r9,r10
mov QWORD PTR[16+rdi],r8 mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9 mov QWORD PTR[24+rdi],r9
@ -2852,14 +2853,14 @@ __ecp_nistz256_mul_by_2x PROC PRIVATE
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
sbb r11,0
bt r11,0 cmovc r12,rax
cmovnc r12,rax cmovc r13,rbp
cmovnc r13,rbp
mov QWORD PTR[rdi],r12 mov QWORD PTR[rdi],r12
cmovnc r8,rcx cmovc r8,rcx
mov QWORD PTR[8+rdi],r13 mov QWORD PTR[8+rdi],r13
cmovnc r9,r10 cmovc r9,r10
mov QWORD PTR[16+rdi],r8 mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9 mov QWORD PTR[24+rdi],r9
@ -3109,16 +3110,14 @@ $L$point_addx::
mov rsi,rdx mov rsi,rdx
movdqa XMMWORD PTR[384+rsp],xmm0 movdqa XMMWORD PTR[384+rsp],xmm0
movdqa XMMWORD PTR[(384+16)+rsp],xmm1 movdqa XMMWORD PTR[(384+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[416+rsp],xmm2 movdqa XMMWORD PTR[416+rsp],xmm2
movdqa XMMWORD PTR[(416+16)+rsp],xmm3 movdqa XMMWORD PTR[(416+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[448+rsp],xmm4 movdqa XMMWORD PTR[448+rsp],xmm4
movdqa XMMWORD PTR[(448+16)+rsp],xmm5 movdqa XMMWORD PTR[(448+16)+rsp],xmm5
por xmm3,xmm1 por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rsi] movdqu xmm0,XMMWORD PTR[rsi]
pshufd xmm5,xmm3,1h pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rsi] movdqu xmm1,XMMWORD PTR[16+rsi]
movdqu xmm2,XMMWORD PTR[32+rsi] movdqu xmm2,XMMWORD PTR[32+rsi]
por xmm5,xmm3 por xmm5,xmm3
@ -3130,14 +3129,14 @@ $L$point_addx::
movdqa XMMWORD PTR[480+rsp],xmm0 movdqa XMMWORD PTR[480+rsp],xmm0
pshufd xmm4,xmm5,01eh pshufd xmm4,xmm5,01eh
movdqa XMMWORD PTR[(480+16)+rsp],xmm1 movdqa XMMWORD PTR[(480+16)+rsp],xmm1
por xmm1,xmm0 movdqu xmm0,XMMWORD PTR[64+rsi]
DB 102,72,15,110,199 movdqu xmm1,XMMWORD PTR[80+rsi]
movdqa XMMWORD PTR[512+rsp],xmm2 movdqa XMMWORD PTR[512+rsp],xmm2
movdqa XMMWORD PTR[(512+16)+rsp],xmm3 movdqa XMMWORD PTR[(512+16)+rsp],xmm3
por xmm3,xmm2
por xmm5,xmm4 por xmm5,xmm4
pxor xmm4,xmm4 pxor xmm4,xmm4
por xmm3,xmm1 por xmm1,xmm0
DB 102,72,15,110,199
lea rsi,QWORD PTR[((64-128))+rsi] lea rsi,QWORD PTR[((64-128))+rsi]
mov QWORD PTR[((544+0))+rsp],rdx mov QWORD PTR[((544+0))+rsp],rdx
@ -3148,8 +3147,8 @@ DB 102,72,15,110,199
call __ecp_nistz256_sqr_montx call __ecp_nistz256_sqr_montx
pcmpeqd xmm5,xmm4 pcmpeqd xmm5,xmm4
pshufd xmm4,xmm3,1h pshufd xmm4,xmm1,1h
por xmm4,xmm3 por xmm4,xmm1
pshufd xmm5,xmm5,0 pshufd xmm5,xmm5,0
pshufd xmm3,xmm4,01eh pshufd xmm3,xmm4,01eh
por xmm4,xmm3 por xmm4,xmm3
@ -3332,6 +3331,7 @@ $L$add_proceedx::
xor r11,r11
add r12,r12 add r12,r12
lea rsi,QWORD PTR[96+rsp] lea rsi,QWORD PTR[96+rsp]
adc r13,r13 adc r13,r13
@ -3339,7 +3339,7 @@ $L$add_proceedx::
adc r8,r8 adc r8,r8
adc r9,r9 adc r9,r9
mov rbp,r13 mov rbp,r13
sbb r11,r11 adc r11,0
sub r12,-1 sub r12,-1
mov rcx,r8 mov rcx,r8
@ -3347,15 +3347,15 @@ $L$add_proceedx::
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
test r11,r11 sbb r11,0
cmovz r12,rax cmovc r12,rax
mov rax,QWORD PTR[rsi] mov rax,QWORD PTR[rsi]
cmovz r13,rbp cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi] mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi] mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10 cmovc r9,r10
mov r10,QWORD PTR[24+rsi] mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subx call __ecp_nistz256_subx
@ -3521,16 +3521,14 @@ $L$point_add_affinex::
mov r8,QWORD PTR[((64+24))+rsi] mov r8,QWORD PTR[((64+24))+rsi]
movdqa XMMWORD PTR[320+rsp],xmm0 movdqa XMMWORD PTR[320+rsp],xmm0
movdqa XMMWORD PTR[(320+16)+rsp],xmm1 movdqa XMMWORD PTR[(320+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[352+rsp],xmm2 movdqa XMMWORD PTR[352+rsp],xmm2
movdqa XMMWORD PTR[(352+16)+rsp],xmm3 movdqa XMMWORD PTR[(352+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[384+rsp],xmm4 movdqa XMMWORD PTR[384+rsp],xmm4
movdqa XMMWORD PTR[(384+16)+rsp],xmm5 movdqa XMMWORD PTR[(384+16)+rsp],xmm5
por xmm3,xmm1 por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rbx] movdqu xmm0,XMMWORD PTR[rbx]
pshufd xmm5,xmm3,1h pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rbx] movdqu xmm1,XMMWORD PTR[16+rbx]
movdqu xmm2,XMMWORD PTR[32+rbx] movdqu xmm2,XMMWORD PTR[32+rbx]
por xmm5,xmm3 por xmm5,xmm3
@ -3648,6 +3646,7 @@ DB 102,72,15,110,199
xor r11,r11
add r12,r12 add r12,r12
lea rsi,QWORD PTR[192+rsp] lea rsi,QWORD PTR[192+rsp]
adc r13,r13 adc r13,r13
@ -3655,7 +3654,7 @@ DB 102,72,15,110,199
adc r8,r8 adc r8,r8
adc r9,r9 adc r9,r9
mov rbp,r13 mov rbp,r13
sbb r11,r11 adc r11,0
sub r12,-1 sub r12,-1
mov rcx,r8 mov rcx,r8
@ -3663,15 +3662,15 @@ DB 102,72,15,110,199
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
test r11,r11 sbb r11,0
cmovz r12,rax cmovc r12,rax
mov rax,QWORD PTR[rsi] mov rax,QWORD PTR[rsi]
cmovz r13,rbp cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi] mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi] mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10 cmovc r9,r10
mov r10,QWORD PTR[24+rsi] mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subx call __ecp_nistz256_subx

4
deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm

@ -1291,9 +1291,9 @@ DB 102,15,56,0,251
ALIGN 16 ALIGN 16
$L$oop_shaext:: $L$oop_shaext::
dec rdx dec rdx
lea rax,QWORD PTR[64+rsi] lea r8,QWORD PTR[64+rsi]
paddd xmm1,xmm4 paddd xmm1,xmm4
cmovne rsi,rax cmovne rsi,r8
movdqa xmm8,xmm0 movdqa xmm8,xmm0
DB 15,56,201,229 DB 15,56,201,229
movdqa xmm2,xmm0 movdqa xmm2,xmm0

109
deps/openssl/asm/x86-elf-gas/bn/x86-mont.s

@ -15,44 +15,51 @@ bn_mul_mont:
jl .L000just_leave jl .L000just_leave
leal 20(%esp),%esi leal 20(%esp),%esi
leal 24(%esp),%edx leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi addl $2,%edi
negl %edi negl %edi
leal -32(%esp,%edi,4),%esp leal -32(%esp,%edi,4),%ebp
negl %edi negl %edi
movl %esp,%eax movl %ebp,%eax
subl %edx,%eax subl %edx,%eax
andl $2047,%eax andl $2047,%eax
subl %eax,%esp subl %eax,%ebp
xorl %esp,%edx xorl %ebp,%edx
andl $2048,%edx andl $2048,%edx
xorl $2048,%edx xorl $2048,%edx
subl %edx,%esp subl %edx,%ebp
andl $-64,%esp andl $-64,%ebp
movl %ebp,%eax movl %esp,%eax
subl %esp,%eax subl %ebp,%eax
andl $-4096,%eax andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
jmp .L002page_walk_done
.align 16
.L001page_walk: .L001page_walk:
movl (%esp,%eax,1),%edx leal -4096(%esp),%esp
subl $4096,%eax movl (%esp),%eax
.byte 46 cmpl %ebp,%esp
jnc .L001page_walk ja .L001page_walk
.L002page_walk_done:
movl (%esi),%eax movl (%esi),%eax
movl 4(%esi),%ebx movl 4(%esi),%ebx
movl 8(%esi),%ecx movl 8(%esi),%ecx
movl 12(%esi),%edx movl 12(%esi),%ebp
movl 16(%esi),%esi movl 16(%esi),%esi
movl (%esi),%esi movl (%esi),%esi
movl %eax,4(%esp) movl %eax,4(%esp)
movl %ebx,8(%esp) movl %ebx,8(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edx,16(%esp) movl %ebp,16(%esp)
movl %esi,20(%esp) movl %esi,20(%esp)
leal -3(%edi),%ebx leal -3(%edi),%ebx
movl %ebp,24(%esp) movl %edx,24(%esp)
leal OPENSSL_ia32cap_P,%eax leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax) btl $26,(%eax)
jnc .L002non_sse2 jnc .L003non_sse2
movl $-1,%eax movl $-1,%eax
movd %eax,%mm7 movd %eax,%mm7
movl 8(%esp),%esi movl 8(%esp),%esi
@ -76,7 +83,7 @@ bn_mul_mont:
psrlq $32,%mm3 psrlq $32,%mm3
incl %ecx incl %ecx
.align 16 .align 16
.L0031st: .L0041st:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -91,7 +98,7 @@ bn_mul_mont:
psrlq $32,%mm3 psrlq $32,%mm3
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
cmpl %ebx,%ecx cmpl %ebx,%ecx
jl .L0031st jl .L0041st
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -105,7 +112,7 @@ bn_mul_mont:
paddq %mm2,%mm3 paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
incl %edx incl %edx
.L004outer: .L005outer:
xorl %ecx,%ecx xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4 movd (%edi,%edx,4),%mm4
movd (%esi),%mm5 movd (%esi),%mm5
@ -127,7 +134,7 @@ bn_mul_mont:
paddq %mm6,%mm2 paddq %mm6,%mm2
incl %ecx incl %ecx
decl %ebx decl %ebx
.L005inner: .L006inner:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -144,7 +151,7 @@ bn_mul_mont:
paddq %mm6,%mm2 paddq %mm6,%mm2
decl %ebx decl %ebx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
jnz .L005inner jnz .L006inner
movl %ecx,%ebx movl %ecx,%ebx
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
@ -162,11 +169,11 @@ bn_mul_mont:
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx leal 1(%edx),%edx
cmpl %ebx,%edx cmpl %ebx,%edx
jle .L004outer jle .L005outer
emms emms
jmp .L006common_tail jmp .L007common_tail
.align 16 .align 16
.L002non_sse2: .L003non_sse2:
movl 8(%esp),%esi movl 8(%esp),%esi
leal 1(%ebx),%ebp leal 1(%ebx),%ebp
movl 12(%esp),%edi movl 12(%esp),%edi
@ -177,12 +184,12 @@ bn_mul_mont:
leal 4(%edi,%ebx,4),%eax leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp orl %edx,%ebp
movl (%edi),%edi movl (%edi),%edi
jz .L007bn_sqr_mont jz .L008bn_sqr_mont
movl %eax,28(%esp) movl %eax,28(%esp)
movl (%esi),%eax movl (%esi),%eax
xorl %edx,%edx xorl %edx,%edx
.align 16 .align 16
.L008mull: .L009mull:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl %eax,%ebp addl %eax,%ebp
@ -191,7 +198,7 @@ bn_mul_mont:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl .L008mull jl .L009mull
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
movl 20(%esp),%edi movl 20(%esp),%edi
@ -209,9 +216,9 @@ bn_mul_mont:
movl 4(%esi),%eax movl 4(%esi),%eax
adcl $0,%edx adcl $0,%edx
incl %ecx incl %ecx
jmp .L0092ndmadd jmp .L0102ndmadd
.align 16 .align 16
.L0101stmadd: .L0111stmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -222,7 +229,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl .L0101stmadd jl .L0111stmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%eax addl 32(%esp,%ebx,4),%eax
@ -245,7 +252,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
.align 16 .align 16
.L0092ndmadd: .L0102ndmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -256,7 +263,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl .L0092ndmadd jl .L0102ndmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -272,16 +279,16 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je .L006common_tail je .L007common_tail
movl (%ecx),%edi movl (%ecx),%edi
movl 8(%esp),%esi movl 8(%esp),%esi
movl %ecx,12(%esp) movl %ecx,12(%esp)
xorl %ecx,%ecx xorl %ecx,%ecx
xorl %edx,%edx xorl %edx,%edx
movl (%esi),%eax movl (%esi),%eax
jmp .L0101stmadd jmp .L0111stmadd
.align 16 .align 16
.L007bn_sqr_mont: .L008bn_sqr_mont:
movl %ebx,(%esp) movl %ebx,(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edi,%eax movl %edi,%eax
@ -292,7 +299,7 @@ bn_mul_mont:
andl $1,%ebx andl $1,%ebx
incl %ecx incl %ecx
.align 16 .align 16
.L011sqr: .L012sqr:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -304,7 +311,7 @@ bn_mul_mont:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %eax,%ebx movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl .L011sqr jl .L012sqr
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -328,7 +335,7 @@ bn_mul_mont:
movl 4(%esi),%eax movl 4(%esi),%eax
movl $1,%ecx movl $1,%ecx
.align 16 .align 16
.L0123rdmadd: .L0133rdmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -347,7 +354,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl .L0123rdmadd jl .L0133rdmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -363,7 +370,7 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je .L006common_tail je .L007common_tail
movl 4(%esi,%ecx,4),%edi movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
movl %edi,%eax movl %edi,%eax
@ -375,12 +382,12 @@ bn_mul_mont:
xorl %ebp,%ebp xorl %ebp,%ebp
cmpl %ebx,%ecx cmpl %ebx,%ecx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
je .L013sqrlast je .L014sqrlast
movl %edx,%ebx movl %edx,%ebx
shrl $1,%edx shrl $1,%edx
andl $1,%ebx andl $1,%ebx
.align 16 .align 16
.L014sqradd: .L015sqradd:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -396,13 +403,13 @@ bn_mul_mont:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx movl %eax,%ebx
jle .L014sqradd jle .L015sqradd
movl %edx,%ebp movl %edx,%ebp
addl %edx,%edx addl %edx,%edx
shrl $31,%ebp shrl $31,%ebp
addl %ebx,%edx addl %ebx,%edx
adcl $0,%ebp adcl $0,%ebp
.L013sqrlast: .L014sqrlast:
movl 20(%esp),%edi movl 20(%esp),%edi
movl 16(%esp),%esi movl 16(%esp),%esi
imull 32(%esp),%edi imull 32(%esp),%edi
@ -417,9 +424,9 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
movl 4(%esi),%eax movl 4(%esi),%eax
jmp .L0123rdmadd jmp .L0133rdmadd
.align 16 .align 16
.L006common_tail: .L007common_tail:
movl 16(%esp),%ebp movl 16(%esp),%ebp
movl 4(%esp),%edi movl 4(%esp),%edi
leal 32(%esp),%esi leal 32(%esp),%esi
@ -427,13 +434,13 @@ bn_mul_mont:
movl %ebx,%ecx movl %ebx,%ecx
xorl %edx,%edx xorl %edx,%edx
.align 16 .align 16
.L015sub: .L016sub:
sbbl (%ebp,%edx,4),%eax sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4) movl %eax,(%edi,%edx,4)
decl %ecx decl %ecx
movl 4(%esi,%edx,4),%eax movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx leal 1(%edx),%edx
jge .L015sub jge .L016sub
sbbl $0,%eax sbbl $0,%eax
andl %eax,%esi andl %eax,%esi
notl %eax notl %eax
@ -441,12 +448,12 @@ bn_mul_mont:
andl %eax,%ebp andl %eax,%ebp
orl %ebp,%esi orl %ebp,%esi
.align 16 .align 16
.L016copy: .L017copy:
movl (%esi,%ebx,4),%eax movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4) movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4) movl %ecx,32(%esp,%ebx,4)
decl %ebx decl %ebx
jge .L016copy jge .L017copy
movl 24(%esp),%esp movl 24(%esp),%esp
movl $1,%eax movl $1,%eax
.L000just_leave: .L000just_leave:

115
deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s

@ -14,47 +14,54 @@ L_bn_mul_mont_begin:
jl L000just_leave jl L000just_leave
leal 20(%esp),%esi leal 20(%esp),%esi
leal 24(%esp),%edx leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi addl $2,%edi
negl %edi negl %edi
leal -32(%esp,%edi,4),%esp leal -32(%esp,%edi,4),%ebp
negl %edi negl %edi
movl %esp,%eax movl %ebp,%eax
subl %edx,%eax subl %edx,%eax
andl $2047,%eax andl $2047,%eax
subl %eax,%esp subl %eax,%ebp
xorl %esp,%edx xorl %ebp,%edx
andl $2048,%edx andl $2048,%edx
xorl $2048,%edx xorl $2048,%edx
subl %edx,%esp subl %edx,%ebp
andl $-64,%esp andl $-64,%ebp
movl %ebp,%eax movl %esp,%eax
subl %esp,%eax subl %ebp,%eax
andl $-4096,%eax andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja L001page_walk
jmp L002page_walk_done
.align 4,0x90
L001page_walk: L001page_walk:
movl (%esp,%eax,1),%edx leal -4096(%esp),%esp
subl $4096,%eax movl (%esp),%eax
.byte 46 cmpl %ebp,%esp
jnc L001page_walk ja L001page_walk
L002page_walk_done:
movl (%esi),%eax movl (%esi),%eax
movl 4(%esi),%ebx movl 4(%esi),%ebx
movl 8(%esi),%ecx movl 8(%esi),%ecx
movl 12(%esi),%edx movl 12(%esi),%ebp
movl 16(%esi),%esi movl 16(%esi),%esi
movl (%esi),%esi movl (%esi),%esi
movl %eax,4(%esp) movl %eax,4(%esp)
movl %ebx,8(%esp) movl %ebx,8(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edx,16(%esp) movl %ebp,16(%esp)
movl %esi,20(%esp) movl %esi,20(%esp)
leal -3(%edi),%ebx leal -3(%edi),%ebx
movl %ebp,24(%esp) movl %edx,24(%esp)
call L002PIC_me_up call L003PIC_me_up
L002PIC_me_up: L003PIC_me_up:
popl %eax popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L002PIC_me_up(%eax),%eax movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax
btl $26,(%eax) btl $26,(%eax)
jnc L003non_sse2 jnc L004non_sse2
movl $-1,%eax movl $-1,%eax
movd %eax,%mm7 movd %eax,%mm7
movl 8(%esp),%esi movl 8(%esp),%esi
@ -78,7 +85,7 @@ L002PIC_me_up:
psrlq $32,%mm3 psrlq $32,%mm3
incl %ecx incl %ecx
.align 4,0x90 .align 4,0x90
L0041st: L0051st:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -93,7 +100,7 @@ L0041st:
psrlq $32,%mm3 psrlq $32,%mm3
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
cmpl %ebx,%ecx cmpl %ebx,%ecx
jl L0041st jl L0051st
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -107,7 +114,7 @@ L0041st:
paddq %mm2,%mm3 paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
incl %edx incl %edx
L005outer: L006outer:
xorl %ecx,%ecx xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4 movd (%edi,%edx,4),%mm4
movd (%esi),%mm5 movd (%esi),%mm5
@ -129,7 +136,7 @@ L005outer:
paddq %mm6,%mm2 paddq %mm6,%mm2
incl %ecx incl %ecx
decl %ebx decl %ebx
L006inner: L007inner:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -146,7 +153,7 @@ L006inner:
paddq %mm6,%mm2 paddq %mm6,%mm2
decl %ebx decl %ebx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
jnz L006inner jnz L007inner
movl %ecx,%ebx movl %ecx,%ebx
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
@ -164,11 +171,11 @@ L006inner:
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx leal 1(%edx),%edx
cmpl %ebx,%edx cmpl %ebx,%edx
jle L005outer jle L006outer
emms emms
jmp L007common_tail jmp L008common_tail
.align 4,0x90 .align 4,0x90
L003non_sse2: L004non_sse2:
movl 8(%esp),%esi movl 8(%esp),%esi
leal 1(%ebx),%ebp leal 1(%ebx),%ebp
movl 12(%esp),%edi movl 12(%esp),%edi
@ -179,12 +186,12 @@ L003non_sse2:
leal 4(%edi,%ebx,4),%eax leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp orl %edx,%ebp
movl (%edi),%edi movl (%edi),%edi
jz L008bn_sqr_mont jz L009bn_sqr_mont
movl %eax,28(%esp) movl %eax,28(%esp)
movl (%esi),%eax movl (%esi),%eax
xorl %edx,%edx xorl %edx,%edx
.align 4,0x90 .align 4,0x90
L009mull: L010mull:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl %eax,%ebp addl %eax,%ebp
@ -193,7 +200,7 @@ L009mull:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl L009mull jl L010mull
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
movl 20(%esp),%edi movl 20(%esp),%edi
@ -211,9 +218,9 @@ L009mull:
movl 4(%esi),%eax movl 4(%esi),%eax
adcl $0,%edx adcl $0,%edx
incl %ecx incl %ecx
jmp L0102ndmadd jmp L0112ndmadd
.align 4,0x90 .align 4,0x90
L0111stmadd: L0121stmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -224,7 +231,7 @@ L0111stmadd:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl L0111stmadd jl L0121stmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%eax addl 32(%esp,%ebx,4),%eax
@ -247,7 +254,7 @@ L0111stmadd:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
.align 4,0x90 .align 4,0x90
L0102ndmadd: L0112ndmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -258,7 +265,7 @@ L0102ndmadd:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl L0102ndmadd jl L0112ndmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -274,16 +281,16 @@ L0102ndmadd:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je L007common_tail je L008common_tail
movl (%ecx),%edi movl (%ecx),%edi
movl 8(%esp),%esi movl 8(%esp),%esi
movl %ecx,12(%esp) movl %ecx,12(%esp)
xorl %ecx,%ecx xorl %ecx,%ecx
xorl %edx,%edx xorl %edx,%edx
movl (%esi),%eax movl (%esi),%eax
jmp L0111stmadd jmp L0121stmadd
.align 4,0x90 .align 4,0x90
L008bn_sqr_mont: L009bn_sqr_mont:
movl %ebx,(%esp) movl %ebx,(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edi,%eax movl %edi,%eax
@ -294,7 +301,7 @@ L008bn_sqr_mont:
andl $1,%ebx andl $1,%ebx
incl %ecx incl %ecx
.align 4,0x90 .align 4,0x90
L012sqr: L013sqr:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -306,7 +313,7 @@ L012sqr:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %eax,%ebx movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl L012sqr jl L013sqr
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -330,7 +337,7 @@ L012sqr:
movl 4(%esi),%eax movl 4(%esi),%eax
movl $1,%ecx movl $1,%ecx
.align 4,0x90 .align 4,0x90
L0133rdmadd: L0143rdmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -349,7 +356,7 @@ L0133rdmadd:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl L0133rdmadd jl L0143rdmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -365,7 +372,7 @@ L0133rdmadd:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je L007common_tail je L008common_tail
movl 4(%esi,%ecx,4),%edi movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
movl %edi,%eax movl %edi,%eax
@ -377,12 +384,12 @@ L0133rdmadd:
xorl %ebp,%ebp xorl %ebp,%ebp
cmpl %ebx,%ecx cmpl %ebx,%ecx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
je L014sqrlast je L015sqrlast
movl %edx,%ebx movl %edx,%ebx
shrl $1,%edx shrl $1,%edx
andl $1,%ebx andl $1,%ebx
.align 4,0x90 .align 4,0x90
L015sqradd: L016sqradd:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -398,13 +405,13 @@ L015sqradd:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx movl %eax,%ebx
jle L015sqradd jle L016sqradd
movl %edx,%ebp movl %edx,%ebp
addl %edx,%edx addl %edx,%edx
shrl $31,%ebp shrl $31,%ebp
addl %ebx,%edx addl %ebx,%edx
adcl $0,%ebp adcl $0,%ebp
L014sqrlast: L015sqrlast:
movl 20(%esp),%edi movl 20(%esp),%edi
movl 16(%esp),%esi movl 16(%esp),%esi
imull 32(%esp),%edi imull 32(%esp),%edi
@ -419,9 +426,9 @@ L014sqrlast:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
movl 4(%esi),%eax movl 4(%esi),%eax
jmp L0133rdmadd jmp L0143rdmadd
.align 4,0x90 .align 4,0x90
L007common_tail: L008common_tail:
movl 16(%esp),%ebp movl 16(%esp),%ebp
movl 4(%esp),%edi movl 4(%esp),%edi
leal 32(%esp),%esi leal 32(%esp),%esi
@ -429,13 +436,13 @@ L007common_tail:
movl %ebx,%ecx movl %ebx,%ecx
xorl %edx,%edx xorl %edx,%edx
.align 4,0x90 .align 4,0x90
L016sub: L017sub:
sbbl (%ebp,%edx,4),%eax sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4) movl %eax,(%edi,%edx,4)
decl %ecx decl %ecx
movl 4(%esi,%edx,4),%eax movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx leal 1(%edx),%edx
jge L016sub jge L017sub
sbbl $0,%eax sbbl $0,%eax
andl %eax,%esi andl %eax,%esi
notl %eax notl %eax
@ -443,12 +450,12 @@ L016sub:
andl %eax,%ebp andl %eax,%ebp
orl %ebp,%esi orl %ebp,%esi
.align 4,0x90 .align 4,0x90
L017copy: L018copy:
movl (%esi,%ebx,4),%eax movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4) movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4) movl %ecx,32(%esp,%ebx,4)
decl %ebx decl %ebx
jge L017copy jge L018copy
movl 24(%esp),%esp movl 24(%esp),%esp
movl $1,%eax movl $1,%eax
L000just_leave: L000just_leave:

109
deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm

@ -31,44 +31,51 @@ $L_bn_mul_mont_begin::
jl $L000just_leave jl $L000just_leave
lea esi,DWORD PTR 20[esp] lea esi,DWORD PTR 20[esp]
lea edx,DWORD PTR 24[esp] lea edx,DWORD PTR 24[esp]
mov ebp,esp
add edi,2 add edi,2
neg edi neg edi
lea esp,DWORD PTR [edi*4+esp-32] lea ebp,DWORD PTR [edi*4+esp-32]
neg edi neg edi
mov eax,esp mov eax,ebp
sub eax,edx sub eax,edx
and eax,2047 and eax,2047
sub esp,eax sub ebp,eax
xor edx,esp xor edx,ebp
and edx,2048 and edx,2048
xor edx,2048 xor edx,2048
sub esp,edx sub ebp,edx
and esp,-64 and ebp,-64
mov eax,ebp mov eax,esp
sub eax,esp sub eax,ebp
and eax,-4096 and eax,-4096
mov edx,esp
lea esp,DWORD PTR [eax*1+ebp]
mov eax,DWORD PTR [esp]
cmp esp,ebp
ja $L001page_walk
jmp $L002page_walk_done
ALIGN 16
$L001page_walk: $L001page_walk:
mov edx,DWORD PTR [eax*1+esp] lea esp,DWORD PTR [esp-4096]
sub eax,4096 mov eax,DWORD PTR [esp]
DB 46 cmp esp,ebp
jnc $L001page_walk ja $L001page_walk
$L002page_walk_done:
mov eax,DWORD PTR [esi] mov eax,DWORD PTR [esi]
mov ebx,DWORD PTR 4[esi] mov ebx,DWORD PTR 4[esi]
mov ecx,DWORD PTR 8[esi] mov ecx,DWORD PTR 8[esi]
mov edx,DWORD PTR 12[esi] mov ebp,DWORD PTR 12[esi]
mov esi,DWORD PTR 16[esi] mov esi,DWORD PTR 16[esi]
mov esi,DWORD PTR [esi] mov esi,DWORD PTR [esi]
mov DWORD PTR 4[esp],eax mov DWORD PTR 4[esp],eax
mov DWORD PTR 8[esp],ebx mov DWORD PTR 8[esp],ebx
mov DWORD PTR 12[esp],ecx mov DWORD PTR 12[esp],ecx
mov DWORD PTR 16[esp],edx mov DWORD PTR 16[esp],ebp
mov DWORD PTR 20[esp],esi mov DWORD PTR 20[esp],esi
lea ebx,DWORD PTR [edi-3] lea ebx,DWORD PTR [edi-3]
mov DWORD PTR 24[esp],ebp mov DWORD PTR 24[esp],edx
lea eax,DWORD PTR _OPENSSL_ia32cap_P lea eax,DWORD PTR _OPENSSL_ia32cap_P
bt DWORD PTR [eax],26 bt DWORD PTR [eax],26
jnc $L002non_sse2 jnc $L003non_sse2
mov eax,-1 mov eax,-1
movd mm7,eax movd mm7,eax
mov esi,DWORD PTR 8[esp] mov esi,DWORD PTR 8[esp]
@ -92,7 +99,7 @@ DB 46
psrlq mm3,32 psrlq mm3,32
inc ecx inc ecx
ALIGN 16 ALIGN 16
$L0031st: $L0041st:
pmuludq mm0,mm4 pmuludq mm0,mm4
pmuludq mm1,mm5 pmuludq mm1,mm5
paddq mm2,mm0 paddq mm2,mm0
@ -107,7 +114,7 @@ $L0031st:
psrlq mm3,32 psrlq mm3,32
lea ecx,DWORD PTR 1[ecx] lea ecx,DWORD PTR 1[ecx]
cmp ecx,ebx cmp ecx,ebx
jl $L0031st jl $L0041st
pmuludq mm0,mm4 pmuludq mm0,mm4
pmuludq mm1,mm5 pmuludq mm1,mm5
paddq mm2,mm0 paddq mm2,mm0
@ -121,7 +128,7 @@ $L0031st:
paddq mm3,mm2 paddq mm3,mm2
movq QWORD PTR 32[ebx*4+esp],mm3 movq QWORD PTR 32[ebx*4+esp],mm3
inc edx inc edx
$L004outer: $L005outer:
xor ecx,ecx xor ecx,ecx
movd mm4,DWORD PTR [edx*4+edi] movd mm4,DWORD PTR [edx*4+edi]
movd mm5,DWORD PTR [esi] movd mm5,DWORD PTR [esi]
@ -143,7 +150,7 @@ $L004outer:
paddq mm2,mm6 paddq mm2,mm6
inc ecx inc ecx
dec ebx dec ebx
$L005inner: $L006inner:
pmuludq mm0,mm4 pmuludq mm0,mm4
pmuludq mm1,mm5 pmuludq mm1,mm5
paddq mm2,mm0 paddq mm2,mm0
@ -160,7 +167,7 @@ $L005inner:
paddq mm2,mm6 paddq mm2,mm6
dec ebx dec ebx
lea ecx,DWORD PTR 1[ecx] lea ecx,DWORD PTR 1[ecx]
jnz $L005inner jnz $L006inner
mov ebx,ecx mov ebx,ecx
pmuludq mm0,mm4 pmuludq mm0,mm4
pmuludq mm1,mm5 pmuludq mm1,mm5
@ -178,11 +185,11 @@ $L005inner:
movq QWORD PTR 32[ebx*4+esp],mm3 movq QWORD PTR 32[ebx*4+esp],mm3
lea edx,DWORD PTR 1[edx] lea edx,DWORD PTR 1[edx]
cmp edx,ebx cmp edx,ebx
jle $L004outer jle $L005outer
emms emms
jmp $L006common_tail jmp $L007common_tail
ALIGN 16 ALIGN 16
$L002non_sse2: $L003non_sse2:
mov esi,DWORD PTR 8[esp] mov esi,DWORD PTR 8[esp]
lea ebp,DWORD PTR 1[ebx] lea ebp,DWORD PTR 1[ebx]
mov edi,DWORD PTR 12[esp] mov edi,DWORD PTR 12[esp]
@ -193,12 +200,12 @@ $L002non_sse2:
lea eax,DWORD PTR 4[ebx*4+edi] lea eax,DWORD PTR 4[ebx*4+edi]
or ebp,edx or ebp,edx
mov edi,DWORD PTR [edi] mov edi,DWORD PTR [edi]
jz $L007bn_sqr_mont jz $L008bn_sqr_mont
mov DWORD PTR 28[esp],eax mov DWORD PTR 28[esp],eax
mov eax,DWORD PTR [esi] mov eax,DWORD PTR [esi]
xor edx,edx xor edx,edx
ALIGN 16 ALIGN 16
$L008mull: $L009mull:
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,eax add ebp,eax
@ -207,7 +214,7 @@ $L008mull:
mov eax,DWORD PTR [ecx*4+esi] mov eax,DWORD PTR [ecx*4+esi]
cmp ecx,ebx cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp mov DWORD PTR 28[ecx*4+esp],ebp
jl $L008mull jl $L009mull
mov ebp,edx mov ebp,edx
mul edi mul edi
mov edi,DWORD PTR 20[esp] mov edi,DWORD PTR 20[esp]
@ -225,9 +232,9 @@ $L008mull:
mov eax,DWORD PTR 4[esi] mov eax,DWORD PTR 4[esi]
adc edx,0 adc edx,0
inc ecx inc ecx
jmp $L0092ndmadd jmp $L0102ndmadd
ALIGN 16 ALIGN 16
$L0101stmadd: $L0111stmadd:
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,DWORD PTR 32[ecx*4+esp] add ebp,DWORD PTR 32[ecx*4+esp]
@ -238,7 +245,7 @@ $L0101stmadd:
adc edx,0 adc edx,0
cmp ecx,ebx cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp mov DWORD PTR 28[ecx*4+esp],ebp
jl $L0101stmadd jl $L0111stmadd
mov ebp,edx mov ebp,edx
mul edi mul edi
add eax,DWORD PTR 32[ebx*4+esp] add eax,DWORD PTR 32[ebx*4+esp]
@ -261,7 +268,7 @@ $L0101stmadd:
adc edx,0 adc edx,0
mov ecx,1 mov ecx,1
ALIGN 16 ALIGN 16
$L0092ndmadd: $L0102ndmadd:
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,DWORD PTR 32[ecx*4+esp] add ebp,DWORD PTR 32[ecx*4+esp]
@ -272,7 +279,7 @@ $L0092ndmadd:
adc edx,0 adc edx,0
cmp ecx,ebx cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0092ndmadd jl $L0102ndmadd
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,DWORD PTR 32[ebx*4+esp] add ebp,DWORD PTR 32[ebx*4+esp]
@ -288,16 +295,16 @@ $L0092ndmadd:
mov DWORD PTR 32[ebx*4+esp],edx mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,DWORD PTR 28[esp] cmp ecx,DWORD PTR 28[esp]
mov DWORD PTR 36[ebx*4+esp],eax mov DWORD PTR 36[ebx*4+esp],eax
je $L006common_tail je $L007common_tail
mov edi,DWORD PTR [ecx] mov edi,DWORD PTR [ecx]
mov esi,DWORD PTR 8[esp] mov esi,DWORD PTR 8[esp]
mov DWORD PTR 12[esp],ecx mov DWORD PTR 12[esp],ecx
xor ecx,ecx xor ecx,ecx
xor edx,edx xor edx,edx
mov eax,DWORD PTR [esi] mov eax,DWORD PTR [esi]
jmp $L0101stmadd jmp $L0111stmadd
ALIGN 16 ALIGN 16
$L007bn_sqr_mont: $L008bn_sqr_mont:
mov DWORD PTR [esp],ebx mov DWORD PTR [esp],ebx
mov DWORD PTR 12[esp],ecx mov DWORD PTR 12[esp],ecx
mov eax,edi mov eax,edi
@ -308,7 +315,7 @@ $L007bn_sqr_mont:
and ebx,1 and ebx,1
inc ecx inc ecx
ALIGN 16 ALIGN 16
$L011sqr: $L012sqr:
mov eax,DWORD PTR [ecx*4+esi] mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx mov ebp,edx
mul edi mul edi
@ -320,7 +327,7 @@ $L011sqr:
cmp ecx,DWORD PTR [esp] cmp ecx,DWORD PTR [esp]
mov ebx,eax mov ebx,eax
mov DWORD PTR 28[ecx*4+esp],ebp mov DWORD PTR 28[ecx*4+esp],ebp
jl $L011sqr jl $L012sqr
mov eax,DWORD PTR [ecx*4+esi] mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx mov ebp,edx
mul edi mul edi
@ -344,7 +351,7 @@ $L011sqr:
mov eax,DWORD PTR 4[esi] mov eax,DWORD PTR 4[esi]
mov ecx,1 mov ecx,1
ALIGN 16 ALIGN 16
$L0123rdmadd: $L0133rdmadd:
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,DWORD PTR 32[ecx*4+esp] add ebp,DWORD PTR 32[ecx*4+esp]
@ -363,7 +370,7 @@ $L0123rdmadd:
adc edx,0 adc edx,0
cmp ecx,ebx cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0123rdmadd jl $L0133rdmadd
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,DWORD PTR 32[ebx*4+esp] add ebp,DWORD PTR 32[ebx*4+esp]
@ -379,7 +386,7 @@ $L0123rdmadd:
mov DWORD PTR 32[ebx*4+esp],edx mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,ebx cmp ecx,ebx
mov DWORD PTR 36[ebx*4+esp],eax mov DWORD PTR 36[ebx*4+esp],eax
je $L006common_tail je $L007common_tail
mov edi,DWORD PTR 4[ecx*4+esi] mov edi,DWORD PTR 4[ecx*4+esi]
lea ecx,DWORD PTR 1[ecx] lea ecx,DWORD PTR 1[ecx]
mov eax,edi mov eax,edi
@ -391,12 +398,12 @@ $L0123rdmadd:
xor ebp,ebp xor ebp,ebp
cmp ecx,ebx cmp ecx,ebx
lea ecx,DWORD PTR 1[ecx] lea ecx,DWORD PTR 1[ecx]
je $L013sqrlast je $L014sqrlast
mov ebx,edx mov ebx,edx
shr edx,1 shr edx,1
and ebx,1 and ebx,1
ALIGN 16 ALIGN 16
$L014sqradd: $L015sqradd:
mov eax,DWORD PTR [ecx*4+esi] mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx mov ebp,edx
mul edi mul edi
@ -412,13 +419,13 @@ $L014sqradd:
cmp ecx,DWORD PTR [esp] cmp ecx,DWORD PTR [esp]
mov DWORD PTR 28[ecx*4+esp],ebp mov DWORD PTR 28[ecx*4+esp],ebp
mov ebx,eax mov ebx,eax
jle $L014sqradd jle $L015sqradd
mov ebp,edx mov ebp,edx
add edx,edx add edx,edx
shr ebp,31 shr ebp,31
add edx,ebx add edx,ebx
adc ebp,0 adc ebp,0
$L013sqrlast: $L014sqrlast:
mov edi,DWORD PTR 20[esp] mov edi,DWORD PTR 20[esp]
mov esi,DWORD PTR 16[esp] mov esi,DWORD PTR 16[esp]
imul edi,DWORD PTR 32[esp] imul edi,DWORD PTR 32[esp]
@ -433,9 +440,9 @@ $L013sqrlast:
adc edx,0 adc edx,0
mov ecx,1 mov ecx,1
mov eax,DWORD PTR 4[esi] mov eax,DWORD PTR 4[esi]
jmp $L0123rdmadd jmp $L0133rdmadd
ALIGN 16 ALIGN 16
$L006common_tail: $L007common_tail:
mov ebp,DWORD PTR 16[esp] mov ebp,DWORD PTR 16[esp]
mov edi,DWORD PTR 4[esp] mov edi,DWORD PTR 4[esp]
lea esi,DWORD PTR 32[esp] lea esi,DWORD PTR 32[esp]
@ -443,13 +450,13 @@ $L006common_tail:
mov ecx,ebx mov ecx,ebx
xor edx,edx xor edx,edx
ALIGN 16 ALIGN 16
$L015sub: $L016sub:
sbb eax,DWORD PTR [edx*4+ebp] sbb eax,DWORD PTR [edx*4+ebp]
mov DWORD PTR [edx*4+edi],eax mov DWORD PTR [edx*4+edi],eax
dec ecx dec ecx
mov eax,DWORD PTR 4[edx*4+esi] mov eax,DWORD PTR 4[edx*4+esi]
lea edx,DWORD PTR 1[edx] lea edx,DWORD PTR 1[edx]
jge $L015sub jge $L016sub
sbb eax,0 sbb eax,0
and esi,eax and esi,eax
not eax not eax
@ -457,12 +464,12 @@ $L015sub:
and ebp,eax and ebp,eax
or esi,ebp or esi,ebp
ALIGN 16 ALIGN 16
$L016copy: $L017copy:
mov eax,DWORD PTR [ebx*4+esi] mov eax,DWORD PTR [ebx*4+esi]
mov DWORD PTR [ebx*4+edi],eax mov DWORD PTR [ebx*4+edi],eax
mov DWORD PTR 32[ebx*4+esp],ecx mov DWORD PTR 32[ebx*4+esp],ecx
dec ebx dec ebx
jge $L016copy jge $L017copy
mov esp,DWORD PTR 24[esp] mov esp,DWORD PTR 24[esp]
mov eax,1 mov eax,1
$L000just_leave: $L000just_leave:

24
deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S

@ -1816,8 +1816,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_6: .Lxts_enc_6:
vst1.64 {q14}, [r0,:128] @ next round tweak
veor q4, q4, q12 veor q4, q4, q12
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1853,8 +1851,6 @@ bsaes_xts_encrypt:
.align 5 .align 5
.Lxts_enc_5: .Lxts_enc_5:
vst1.64 {q13}, [r0,:128] @ next round tweak
veor q3, q3, q11 veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1883,8 +1879,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_4: .Lxts_enc_4:
vst1.64 {q12}, [r0,:128] @ next round tweak
veor q2, q2, q10 veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1910,8 +1904,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_3: .Lxts_enc_3:
vst1.64 {q11}, [r0,:128] @ next round tweak
veor q1, q1, q9 veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1936,8 +1928,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_2: .Lxts_enc_2:
vst1.64 {q10}, [r0,:128] @ next round tweak
veor q0, q0, q8 veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1960,7 +1950,7 @@ bsaes_xts_encrypt:
.align 4 .align 4
.Lxts_enc_1: .Lxts_enc_1:
mov r0, sp mov r0, sp
veor q0, q8 veor q0, q0, q8
mov r1, sp mov r1, sp
vst1.8 {q0}, [sp,:128] vst1.8 {q0}, [sp,:128]
mov r2, r10 mov r2, r10
@ -2346,8 +2336,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_5: .Lxts_dec_5:
vst1.64 {q13}, [r0,:128] @ next round tweak
veor q3, q3, q11 veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2376,8 +2364,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_4: .Lxts_dec_4:
vst1.64 {q12}, [r0,:128] @ next round tweak
veor q2, q2, q10 veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2403,8 +2389,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_3: .Lxts_dec_3:
vst1.64 {q11}, [r0,:128] @ next round tweak
veor q1, q1, q9 veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2429,8 +2413,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_2: .Lxts_dec_2:
vst1.64 {q10}, [r0,:128] @ next round tweak
veor q0, q0, q8 veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2453,12 +2435,12 @@ bsaes_xts_decrypt:
.align 4 .align 4
.Lxts_dec_1: .Lxts_dec_1:
mov r0, sp mov r0, sp
veor q0, q8 veor q0, q0, q8
mov r1, sp mov r1, sp
vst1.8 {q0}, [sp,:128] vst1.8 {q0}, [sp,:128]
mov r5, r2 @ preserve magic
mov r2, r10 mov r2, r10
mov r4, r3 @ preserve fp mov r4, r3 @ preserve fp
mov r5, r2 @ preserve magic
bl AES_decrypt bl AES_decrypt

127
deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s

@ -6,6 +6,8 @@
.type bn_mul_mont,@function .type bn_mul_mont,@function
.align 16 .align 16
bn_mul_mont: bn_mul_mont:
movl %r9d,%r9d
movq %rsp,%rax
testl $3,%r9d testl $3,%r9d
jnz .Lmul_enter jnz .Lmul_enter
cmpl $8,%r9d cmpl $8,%r9d
@ -25,29 +27,36 @@ bn_mul_mont:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movl %r9d,%r9d negq %r9
leaq 2(%r9),%r10
movq %rsp,%r11 movq %rsp,%r11
negq %r10 leaq -16(%rsp,%r9,8),%r10
leaq (%rsp,%r10,8),%rsp negq %r9
andq $-1024,%rsp andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
.Lmul_body:
subq %rsp,%r11 subq %r10,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
jmp .Lmul_page_walk_done
.align 16
.Lmul_page_walk: .Lmul_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r11
.byte 0x66,0x2e cmpq %r10,%rsp
jnc .Lmul_page_walk ja .Lmul_page_walk
.Lmul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
movq %rdx,%r12 movq %rdx,%r12
movq (%r8),%r8 movq (%r8),%r8
movq (%r12),%rbx movq (%r12),%rbx
@ -215,19 +224,21 @@ bn_mul_mont:
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Lmul_epilogue: .Lmul_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size bn_mul_mont,.-bn_mul_mont .size bn_mul_mont,.-bn_mul_mont
.type bn_mul4x_mont,@function .type bn_mul4x_mont,@function
.align 16 .align 16
bn_mul4x_mont: bn_mul4x_mont:
movl %r9d,%r9d
movq %rsp,%rax
.Lmul4x_enter: .Lmul4x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
@ -236,23 +247,29 @@ bn_mul4x_mont:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movl %r9d,%r9d negq %r9
leaq 4(%r9),%r10
movq %rsp,%r11 movq %rsp,%r11
negq %r10 leaq -32(%rsp,%r9,8),%r10
leaq (%rsp,%r10,8),%rsp negq %r9
andq $-1024,%rsp andq $-1024,%r10
movq %r11,8(%rsp,%r9,8) subq %r10,%r11
.Lmul4x_body:
subq %rsp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul4x_page_walk
jmp .Lmul4x_page_walk_done
.Lmul4x_page_walk: .Lmul4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r11
.byte 0x2e cmpq %r10,%rsp
jnc .Lmul4x_page_walk ja .Lmul4x_page_walk
.Lmul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.Lmul4x_body:
movq %rdi,16(%rsp,%r9,8) movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12 movq %rdx,%r12
movq (%r8),%r8 movq (%r8),%r8
@ -621,13 +638,13 @@ bn_mul4x_mont:
movdqu %xmm2,16(%rdi,%r14,1) movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Lmul4x_epilogue: .Lmul4x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size bn_mul4x_mont,.-bn_mul4x_mont .size bn_mul4x_mont,.-bn_mul4x_mont
@ -636,14 +653,15 @@ bn_mul4x_mont:
.type bn_sqr8x_mont,@function .type bn_sqr8x_mont,@function
.align 32 .align 32
bn_sqr8x_mont: bn_sqr8x_mont:
.Lsqr8x_enter:
movq %rsp,%rax movq %rsp,%rax
.Lsqr8x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
.Lsqr8x_prologue:
movl %r9d,%r10d movl %r9d,%r10d
shll $3,%r9d shll $3,%r9d
@ -656,33 +674,42 @@ bn_sqr8x_mont:
leaq -64(%rsp,%r9,2),%r11 leaq -64(%rsp,%r9,2),%r11
movq %rsp,%rbp
movq (%r8),%r8 movq (%r8),%r8
subq %rsi,%r11 subq %rsi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lsqr8x_sp_alt jb .Lsqr8x_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
jmp .Lsqr8x_sp_done jmp .Lsqr8x_sp_done
.align 32 .align 32
.Lsqr8x_sp_alt: .Lsqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10 leaq 4096-64(,%r9,2),%r10
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lsqr8x_sp_done: .Lsqr8x_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lsqr8x_page_walk
jmp .Lsqr8x_page_walk_done
.align 16
.Lsqr8x_page_walk: .Lsqr8x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc .Lsqr8x_page_walk ja .Lsqr8x_page_walk
.Lsqr8x_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9

134
deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s

@ -6,16 +6,15 @@
.type bn_mul_mont_gather5,@function .type bn_mul_mont_gather5,@function
.align 64 .align 64
bn_mul_mont_gather5: bn_mul_mont_gather5:
movl %r9d,%r9d
movq %rsp,%rax
testl $7,%r9d testl $7,%r9d
jnz .Lmul_enter jnz .Lmul_enter
jmp .Lmul4x_enter jmp .Lmul4x_enter
.align 16 .align 16
.Lmul_enter: .Lmul_enter:
movl %r9d,%r9d
movq %rsp,%rax
movd 8(%rsp),%xmm5 movd 8(%rsp),%xmm5
leaq .Linc(%rip),%r10
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
@ -23,26 +22,36 @@ bn_mul_mont_gather5:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
leaq 2(%r9),%r11 negq %r9
negq %r11 movq %rsp,%r11
leaq -264(%rsp,%r11,8),%rsp leaq -280(%rsp,%r9,8),%r10
andq $-1024,%rsp negq %r9
andq $-1024,%r10
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
jmp .Lmul_page_walk_done
subq %rsp,%rax
andq $-4096,%rax
.Lmul_page_walk: .Lmul_page_walk:
movq (%rsp,%rax,1),%r11 leaq -4096(%rsp),%rsp
subq $4096,%rax movq (%rsp),%r11
.byte 0x2e cmpq %r10,%rsp
jnc .Lmul_page_walk ja .Lmul_page_walk
.Lmul_page_walk_done:
leaq .Linc(%rip),%r10
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
leaq 128(%rdx),%r12 leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0 movdqa 0(%r10),%xmm0
@ -413,15 +422,16 @@ bn_mul_mont_gather5:
.type bn_mul4x_mont_gather5,@function .type bn_mul4x_mont_gather5,@function
.align 32 .align 32
bn_mul4x_mont_gather5: bn_mul4x_mont_gather5:
.Lmul4x_enter:
.byte 0x67 .byte 0x67
movq %rsp,%rax movq %rsp,%rax
.Lmul4x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
.Lmul4x_prologue:
.byte 0x67 .byte 0x67
shll $3,%r9d shll $3,%r9d
@ -438,32 +448,40 @@ bn_mul4x_mont_gather5:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lmul4xsp_alt jb .Lmul4xsp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp .Lmul4xsp_done jmp .Lmul4xsp_done
.align 32 .align 32
.Lmul4xsp_alt: .Lmul4xsp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lmul4xsp_done: .Lmul4xsp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmul4x_page_walk
jmp .Lmul4x_page_walk_done
.Lmul4x_page_walk: .Lmul4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc .Lmul4x_page_walk ja .Lmul4x_page_walk
.Lmul4x_page_walk_done:
negq %r9 negq %r9
@ -1022,6 +1040,7 @@ bn_power5:
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
.Lpower5_prologue:
shll $3,%r9d shll $3,%r9d
leal (%r9,%r9,2),%r10d leal (%r9,%r9,2),%r10d
@ -1036,32 +1055,40 @@ bn_power5:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lpwr_sp_alt jb .Lpwr_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp .Lpwr_sp_done jmp .Lpwr_sp_done
.align 32 .align 32
.Lpwr_sp_alt: .Lpwr_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lpwr_sp_done: .Lpwr_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lpwr_page_walk
jmp .Lpwr_page_walk_done
.Lpwr_page_walk: .Lpwr_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc .Lpwr_page_walk ja .Lpwr_page_walk
.Lpwr_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -1972,6 +1999,7 @@ bn_from_mont8x:
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
.Lfrom_prologue:
shll $3,%r9d shll $3,%r9d
leaq (%r9,%r9,2),%r10 leaq (%r9,%r9,2),%r10
@ -1986,32 +2014,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lfrom_sp_alt jb .Lfrom_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp .Lfrom_sp_done jmp .Lfrom_sp_done
.align 32 .align 32
.Lfrom_sp_alt: .Lfrom_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lfrom_sp_done: .Lfrom_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lfrom_page_walk
jmp .Lfrom_page_walk_done
.Lfrom_page_walk: .Lfrom_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc .Lfrom_page_walk ja .Lfrom_page_walk
.Lfrom_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9

119
deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s

@ -27,6 +27,7 @@ ecp_nistz256_mul_by_2:
pushq %r13 pushq %r13
movq 0(%rsi),%r8 movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9 movq 8(%rsi),%r9
addq %r8,%r8 addq %r8,%r8
movq 16(%rsi),%r10 movq 16(%rsi),%r10
@ -37,7 +38,7 @@ ecp_nistz256_mul_by_2:
adcq %r10,%r10 adcq %r10,%r10
adcq %r11,%r11 adcq %r11,%r11
movq %r9,%rdx movq %r9,%rdx
sbbq %r13,%r13 adcq $0,%r13
subq 0(%rsi),%r8 subq 0(%rsi),%r8
movq %r10,%rcx movq %r10,%rcx
@ -45,14 +46,14 @@ ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10 sbbq 16(%rsi),%r10
movq %r11,%r12 movq %r11,%r12
sbbq 24(%rsi),%r11 sbbq 24(%rsi),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -149,12 +150,12 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10 sbbq $0,%r10
movq %r11,%r12 movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11 sbbq .Lpoly+24(%rip),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
cmovzq %rcx,%r10 cmovcq %rcx,%r10
cmovzq %r12,%r11 cmovcq %r12,%r11
xorq %r13,%r13 xorq %r13,%r13
addq 0(%rsi),%r8 addq 0(%rsi),%r8
@ -171,14 +172,14 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10 sbbq $0,%r10
movq %r11,%r12 movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11 sbbq .Lpoly+24(%rip),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -217,14 +218,14 @@ ecp_nistz256_add:
sbbq 16(%rsi),%r10 sbbq 16(%rsi),%r10
movq %r11,%r12 movq %r11,%r12
sbbq 24(%rsi),%r11 sbbq 24(%rsi),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -993,13 +994,14 @@ ecp_nistz256_avx2_select_w7:
.type __ecp_nistz256_add_toq,@function .type __ecp_nistz256_add_toq,@function
.align 32 .align 32
__ecp_nistz256_add_toq: __ecp_nistz256_add_toq:
xorq %r11,%r11
addq 0(%rbx),%r12 addq 0(%rbx),%r12
adcq 8(%rbx),%r13 adcq 8(%rbx),%r13
movq %r12,%rax movq %r12,%rax
adcq 16(%rbx),%r8 adcq 16(%rbx),%r8
adcq 24(%rbx),%r9 adcq 24(%rbx),%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1007,14 +1009,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -1082,13 +1084,14 @@ __ecp_nistz256_subq:
.type __ecp_nistz256_mul_by_2q,@function .type __ecp_nistz256_mul_by_2q,@function
.align 32 .align 32
__ecp_nistz256_mul_by_2q: __ecp_nistz256_mul_by_2q:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
adcq %r13,%r13 adcq %r13,%r13
movq %r12,%rax movq %r12,%rax
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1096,14 +1099,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -1333,16 +1336,14 @@ ecp_nistz256_point_add:
movq %rdx,%rsi movq %rdx,%rsi
movdqa %xmm0,384(%rsp) movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp) movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp) movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp) movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp) movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp) movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0 movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1 movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2 movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -1354,14 +1355,14 @@ ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp) movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4 pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp) movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1 movdqu 64(%rsi),%xmm0
.byte 102,72,15,110,199 movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp) movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp) movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5 por %xmm4,%xmm5
pxor %xmm4,%xmm4 pxor %xmm4,%xmm4
por %xmm1,%xmm3 por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp) movq %rax,544+0(%rsp)
@ -1372,8 +1373,8 @@ ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5 pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4 pshufd $0xb1,%xmm1,%xmm4
por %xmm3,%xmm4 por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5 pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3 pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4 por %xmm3,%xmm4
@ -1556,6 +1557,7 @@ ecp_nistz256_point_add:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 96(%rsp),%rsi leaq 96(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -1563,7 +1565,7 @@ ecp_nistz256_point_add:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1571,15 +1573,15 @@ ecp_nistz256_point_add:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq
@ -1733,16 +1735,14 @@ ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8 movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp) movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp) movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp) movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp) movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp) movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp) movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0 movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1 movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2 movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -1860,6 +1860,7 @@ ecp_nistz256_point_add_affine:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 192(%rsp),%rsi leaq 192(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -1867,7 +1868,7 @@ ecp_nistz256_point_add_affine:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1875,15 +1876,15 @@ ecp_nistz256_point_add_affine:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq

4
deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s

@ -1255,9 +1255,9 @@ _shaext_shortcut:
.align 16 .align 16
.Loop_shaext: .Loop_shaext:
decq %rdx decq %rdx
leaq 64(%rsi),%rax leaq 64(%rsi),%r8
paddd %xmm4,%xmm1 paddd %xmm4,%xmm1
cmovneq %rax,%rsi cmovneq %r8,%rsi
movdqa %xmm0,%xmm8 movdqa %xmm0,%xmm8
.byte 15,56,201,229 .byte 15,56,201,229
movdqa %xmm0,%xmm2 movdqa %xmm0,%xmm2

127
deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s

@ -6,6 +6,8 @@
.p2align 4 .p2align 4
_bn_mul_mont: _bn_mul_mont:
movl %r9d,%r9d
movq %rsp,%rax
testl $3,%r9d testl $3,%r9d
jnz L$mul_enter jnz L$mul_enter
cmpl $8,%r9d cmpl $8,%r9d
@ -25,29 +27,36 @@ L$mul_enter:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movl %r9d,%r9d negq %r9
leaq 2(%r9),%r10
movq %rsp,%r11 movq %rsp,%r11
negq %r10 leaq -16(%rsp,%r9,8),%r10
leaq (%rsp,%r10,8),%rsp negq %r9
andq $-1024,%rsp andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
L$mul_body:
subq %rsp,%r11 subq %r10,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
jmp L$mul_page_walk_done
.p2align 4
L$mul_page_walk: L$mul_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r11
.byte 0x66,0x2e cmpq %r10,%rsp
jnc L$mul_page_walk ja L$mul_page_walk
L$mul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
L$mul_body:
movq %rdx,%r12 movq %rdx,%r12
movq (%r8),%r8 movq (%r8),%r8
movq (%r12),%rbx movq (%r12),%rbx
@ -215,19 +224,21 @@ L$copy:
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$mul_epilogue: L$mul_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.p2align 4 .p2align 4
bn_mul4x_mont: bn_mul4x_mont:
movl %r9d,%r9d
movq %rsp,%rax
L$mul4x_enter: L$mul4x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
@ -236,23 +247,29 @@ L$mul4x_enter:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movl %r9d,%r9d negq %r9
leaq 4(%r9),%r10
movq %rsp,%r11 movq %rsp,%r11
negq %r10 leaq -32(%rsp,%r9,8),%r10
leaq (%rsp,%r10,8),%rsp negq %r9
andq $-1024,%rsp andq $-1024,%r10
movq %r11,8(%rsp,%r9,8) subq %r10,%r11
L$mul4x_body:
subq %rsp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul4x_page_walk
jmp L$mul4x_page_walk_done
L$mul4x_page_walk: L$mul4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r11
.byte 0x2e cmpq %r10,%rsp
jnc L$mul4x_page_walk ja L$mul4x_page_walk
L$mul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
L$mul4x_body:
movq %rdi,16(%rsp,%r9,8) movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12 movq %rdx,%r12
movq (%r8),%r8 movq (%r8),%r8
@ -621,13 +638,13 @@ L$copy4x:
movdqu %xmm2,16(%rdi,%r14,1) movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$mul4x_epilogue: L$mul4x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -636,14 +653,15 @@ L$mul4x_epilogue:
.p2align 5 .p2align 5
bn_sqr8x_mont: bn_sqr8x_mont:
L$sqr8x_enter:
movq %rsp,%rax movq %rsp,%rax
L$sqr8x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$sqr8x_prologue:
movl %r9d,%r10d movl %r9d,%r10d
shll $3,%r9d shll $3,%r9d
@ -656,33 +674,42 @@ L$sqr8x_enter:
leaq -64(%rsp,%r9,2),%r11 leaq -64(%rsp,%r9,2),%r11
movq %rsp,%rbp
movq (%r8),%r8 movq (%r8),%r8
subq %rsi,%r11 subq %rsi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$sqr8x_sp_alt jb L$sqr8x_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
jmp L$sqr8x_sp_done jmp L$sqr8x_sp_done
.p2align 5 .p2align 5
L$sqr8x_sp_alt: L$sqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10 leaq 4096-64(,%r9,2),%r10
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$sqr8x_sp_done: L$sqr8x_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$sqr8x_page_walk
jmp L$sqr8x_page_walk_done
.p2align 4
L$sqr8x_page_walk: L$sqr8x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc L$sqr8x_page_walk ja L$sqr8x_page_walk
L$sqr8x_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9

134
deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s

@ -6,16 +6,15 @@
.p2align 6 .p2align 6
_bn_mul_mont_gather5: _bn_mul_mont_gather5:
movl %r9d,%r9d
movq %rsp,%rax
testl $7,%r9d testl $7,%r9d
jnz L$mul_enter jnz L$mul_enter
jmp L$mul4x_enter jmp L$mul4x_enter
.p2align 4 .p2align 4
L$mul_enter: L$mul_enter:
movl %r9d,%r9d
movq %rsp,%rax
movd 8(%rsp),%xmm5 movd 8(%rsp),%xmm5
leaq L$inc(%rip),%r10
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
@ -23,26 +22,36 @@ L$mul_enter:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
leaq 2(%r9),%r11 negq %r9
negq %r11 movq %rsp,%r11
leaq -264(%rsp,%r11,8),%rsp leaq -280(%rsp,%r9,8),%r10
andq $-1024,%rsp negq %r9
andq $-1024,%r10
movq %rax,8(%rsp,%r9,8)
L$mul_body:
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
jmp L$mul_page_walk_done
subq %rsp,%rax
andq $-4096,%rax
L$mul_page_walk: L$mul_page_walk:
movq (%rsp,%rax,1),%r11 leaq -4096(%rsp),%rsp
subq $4096,%rax movq (%rsp),%r11
.byte 0x2e cmpq %r10,%rsp
jnc L$mul_page_walk ja L$mul_page_walk
L$mul_page_walk_done:
leaq L$inc(%rip),%r10
movq %rax,8(%rsp,%r9,8)
L$mul_body:
leaq 128(%rdx),%r12 leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0 movdqa 0(%r10),%xmm0
@ -413,15 +422,16 @@ L$mul_epilogue:
.p2align 5 .p2align 5
bn_mul4x_mont_gather5: bn_mul4x_mont_gather5:
L$mul4x_enter:
.byte 0x67 .byte 0x67
movq %rsp,%rax movq %rsp,%rax
L$mul4x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$mul4x_prologue:
.byte 0x67 .byte 0x67
shll $3,%r9d shll $3,%r9d
@ -438,32 +448,40 @@ L$mul4x_enter:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$mul4xsp_alt jb L$mul4xsp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp L$mul4xsp_done jmp L$mul4xsp_done
.p2align 5 .p2align 5
L$mul4xsp_alt: L$mul4xsp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$mul4xsp_done: L$mul4xsp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mul4x_page_walk
jmp L$mul4x_page_walk_done
L$mul4x_page_walk: L$mul4x_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc L$mul4x_page_walk ja L$mul4x_page_walk
L$mul4x_page_walk_done:
negq %r9 negq %r9
@ -1022,6 +1040,7 @@ _bn_power5:
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$power5_prologue:
shll $3,%r9d shll $3,%r9d
leal (%r9,%r9,2),%r10d leal (%r9,%r9,2),%r10d
@ -1036,32 +1055,40 @@ _bn_power5:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$pwr_sp_alt jb L$pwr_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp L$pwr_sp_done jmp L$pwr_sp_done
.p2align 5 .p2align 5
L$pwr_sp_alt: L$pwr_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$pwr_sp_done: L$pwr_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$pwr_page_walk
jmp L$pwr_page_walk_done
L$pwr_page_walk: L$pwr_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc L$pwr_page_walk ja L$pwr_page_walk
L$pwr_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -1972,6 +1999,7 @@ bn_from_mont8x:
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$from_prologue:
shll $3,%r9d shll $3,%r9d
leaq (%r9,%r9,2),%r10 leaq (%r9,%r9,2),%r10
@ -1986,32 +2014,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$from_sp_alt jb L$from_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp L$from_sp_done jmp L$from_sp_done
.p2align 5 .p2align 5
L$from_sp_alt: L$from_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$from_sp_done: L$from_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rax,%r11 movq %rsp,%r11
subq %rsp,%r11 subq %rbp,%r11
andq $-4096,%r11 andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$from_page_walk
jmp L$from_page_walk_done
L$from_page_walk: L$from_page_walk:
movq (%rsp,%r11,1),%r10 leaq -4096(%rsp),%rsp
subq $4096,%r11 movq (%rsp),%r10
.byte 0x2e cmpq %rbp,%rsp
jnc L$from_page_walk ja L$from_page_walk
L$from_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9

119
deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s

@ -27,6 +27,7 @@ _ecp_nistz256_mul_by_2:
pushq %r13 pushq %r13
movq 0(%rsi),%r8 movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9 movq 8(%rsi),%r9
addq %r8,%r8 addq %r8,%r8
movq 16(%rsi),%r10 movq 16(%rsi),%r10
@ -37,7 +38,7 @@ _ecp_nistz256_mul_by_2:
adcq %r10,%r10 adcq %r10,%r10
adcq %r11,%r11 adcq %r11,%r11
movq %r9,%rdx movq %r9,%rdx
sbbq %r13,%r13 adcq $0,%r13
subq 0(%rsi),%r8 subq 0(%rsi),%r8
movq %r10,%rcx movq %r10,%rcx
@ -45,14 +46,14 @@ _ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10 sbbq 16(%rsi),%r10
movq %r11,%r12 movq %r11,%r12
sbbq 24(%rsi),%r11 sbbq 24(%rsi),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -149,12 +150,12 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10 sbbq $0,%r10
movq %r11,%r12 movq %r11,%r12
sbbq L$poly+24(%rip),%r11 sbbq L$poly+24(%rip),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
cmovzq %rcx,%r10 cmovcq %rcx,%r10
cmovzq %r12,%r11 cmovcq %r12,%r11
xorq %r13,%r13 xorq %r13,%r13
addq 0(%rsi),%r8 addq 0(%rsi),%r8
@ -171,14 +172,14 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10 sbbq $0,%r10
movq %r11,%r12 movq %r11,%r12
sbbq L$poly+24(%rip),%r11 sbbq L$poly+24(%rip),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -217,14 +218,14 @@ _ecp_nistz256_add:
sbbq 16(%rsi),%r10 sbbq 16(%rsi),%r10
movq %r11,%r12 movq %r11,%r12
sbbq 24(%rsi),%r11 sbbq 24(%rsi),%r11
testq %r13,%r13 sbbq $0,%r13
cmovzq %rax,%r8 cmovcq %rax,%r8
cmovzq %rdx,%r9 cmovcq %rdx,%r9
movq %r8,0(%rdi) movq %r8,0(%rdi)
cmovzq %rcx,%r10 cmovcq %rcx,%r10
movq %r9,8(%rdi) movq %r9,8(%rdi)
cmovzq %r12,%r11 cmovcq %r12,%r11
movq %r10,16(%rdi) movq %r10,16(%rdi)
movq %r11,24(%rdi) movq %r11,24(%rdi)
@ -993,13 +994,14 @@ _ecp_nistz256_avx2_select_w7:
.p2align 5 .p2align 5
__ecp_nistz256_add_toq: __ecp_nistz256_add_toq:
xorq %r11,%r11
addq 0(%rbx),%r12 addq 0(%rbx),%r12
adcq 8(%rbx),%r13 adcq 8(%rbx),%r13
movq %r12,%rax movq %r12,%rax
adcq 16(%rbx),%r8 adcq 16(%rbx),%r8
adcq 24(%rbx),%r9 adcq 24(%rbx),%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1007,14 +1009,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -1082,13 +1084,14 @@ __ecp_nistz256_subq:
.p2align 5 .p2align 5
__ecp_nistz256_mul_by_2q: __ecp_nistz256_mul_by_2q:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
adcq %r13,%r13 adcq %r13,%r13
movq %r12,%rax movq %r12,%rax
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1096,14 +1099,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -1333,16 +1336,14 @@ _ecp_nistz256_point_add:
movq %rdx,%rsi movq %rdx,%rsi
movdqa %xmm0,384(%rsp) movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp) movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp) movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp) movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp) movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp) movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0 movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1 movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2 movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -1354,14 +1355,14 @@ _ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp) movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4 pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp) movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1 movdqu 64(%rsi),%xmm0
.byte 102,72,15,110,199 movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp) movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp) movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5 por %xmm4,%xmm5
pxor %xmm4,%xmm4 pxor %xmm4,%xmm4
por %xmm1,%xmm3 por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp) movq %rax,544+0(%rsp)
@ -1372,8 +1373,8 @@ _ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5 pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4 pshufd $0xb1,%xmm1,%xmm4
por %xmm3,%xmm4 por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5 pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3 pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4 por %xmm3,%xmm4
@ -1556,6 +1557,7 @@ L$add_proceedq:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 96(%rsp),%rsi leaq 96(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -1563,7 +1565,7 @@ L$add_proceedq:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1571,15 +1573,15 @@ L$add_proceedq:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq
@ -1733,16 +1735,14 @@ _ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8 movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp) movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp) movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp) movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp) movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp) movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp) movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0 movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1 movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2 movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -1860,6 +1860,7 @@ _ecp_nistz256_point_add_affine:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 192(%rsp),%rsi leaq 192(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -1867,7 +1868,7 @@ _ecp_nistz256_point_add_affine:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1875,15 +1876,15 @@ _ecp_nistz256_point_add_affine:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq

4
deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s

@ -1255,9 +1255,9 @@ _shaext_shortcut:
.p2align 4 .p2align 4
L$oop_shaext: L$oop_shaext:
decq %rdx decq %rdx
leaq 64(%rsi),%rax leaq 64(%rsi),%r8
paddd %xmm4,%xmm1 paddd %xmm4,%xmm1
cmovneq %rax,%rsi cmovneq %r8,%rsi
movdqa %xmm0,%xmm8 movdqa %xmm0,%xmm8
.byte 15,56,201,229 .byte 15,56,201,229
movdqa %xmm0,%xmm2 movdqa %xmm0,%xmm2

154
deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm

@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
test r9d,3 test r9d,3
jnz $L$mul_enter jnz $L$mul_enter
cmp r9d,8 cmp r9d,8
@ -38,29 +40,36 @@ $L$mul_enter::
push r14 push r14
push r15 push r15
mov r9d,r9d neg r9
lea r10,QWORD PTR[2+r9]
mov r11,rsp mov r11,rsp
neg r10 lea r10,QWORD PTR[((-16))+r9*8+rsp]
lea rsp,QWORD PTR[r10*8+rsp] neg r9
and rsp,-1024 and r10,-1024
mov QWORD PTR[8+r9*8+rsp],r11
$L$mul_body::
sub r11,rsp sub r11,r10
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
jmp $L$mul_page_walk_done
ALIGN 16
$L$mul_page_walk:: $L$mul_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r11,QWORD PTR[rsp]
DB 066h,02eh cmp rsp,r10
jnc $L$mul_page_walk ja $L$mul_page_walk
$L$mul_page_walk_done::
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
mov r12,rdx mov r12,rdx
mov r8,QWORD PTR[r8] mov r8,QWORD PTR[r8]
mov rbx,QWORD PTR[r12] mov rbx,QWORD PTR[r12]
@ -228,13 +237,13 @@ $L$copy::
mov rsi,QWORD PTR[8+r9*8+rsp] mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1 mov rax,1
mov r15,QWORD PTR[rsi] mov r15,QWORD PTR[((-48))+rsi]
mov r14,QWORD PTR[8+rsi] mov r14,QWORD PTR[((-40))+rsi]
mov r13,QWORD PTR[16+rsi] mov r13,QWORD PTR[((-32))+rsi]
mov r12,QWORD PTR[24+rsi] mov r12,QWORD PTR[((-24))+rsi]
mov rbp,QWORD PTR[32+rsi] mov rbp,QWORD PTR[((-16))+rsi]
mov rbx,QWORD PTR[40+rsi] mov rbx,QWORD PTR[((-8))+rsi]
lea rsp,QWORD PTR[48+rsi] lea rsp,QWORD PTR[rsi]
$L$mul_epilogue:: $L$mul_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp] mov rsi,QWORD PTR[16+rsp]
@ -256,6 +265,8 @@ $L$SEH_begin_bn_mul4x_mont::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
$L$mul4x_enter:: $L$mul4x_enter::
push rbx push rbx
push rbp push rbp
@ -264,23 +275,29 @@ $L$mul4x_enter::
push r14 push r14
push r15 push r15
mov r9d,r9d neg r9
lea r10,QWORD PTR[4+r9]
mov r11,rsp mov r11,rsp
neg r10 lea r10,QWORD PTR[((-32))+r9*8+rsp]
lea rsp,QWORD PTR[r10*8+rsp] neg r9
and rsp,-1024 and r10,-1024
mov QWORD PTR[8+r9*8+rsp],r11 sub r11,r10
$L$mul4x_body::
sub r11,rsp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul4x_page_walk
jmp $L$mul4x_page_walk_done
$L$mul4x_page_walk:: $L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r11,QWORD PTR[rsp]
DB 02eh cmp rsp,r10
jnc $L$mul4x_page_walk ja $L$mul4x_page_walk
$L$mul4x_page_walk_done::
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul4x_body::
mov QWORD PTR[16+r9*8+rsp],rdi mov QWORD PTR[16+r9*8+rsp],rdi
mov r12,rdx mov r12,rdx
mov r8,QWORD PTR[r8] mov r8,QWORD PTR[r8]
@ -649,13 +666,13 @@ $L$copy4x::
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 movdqu XMMWORD PTR[16+r14*1+rdi],xmm2
mov rsi,QWORD PTR[8+r9*8+rsp] mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1 mov rax,1
mov r15,QWORD PTR[rsi] mov r15,QWORD PTR[((-48))+rsi]
mov r14,QWORD PTR[8+rsi] mov r14,QWORD PTR[((-40))+rsi]
mov r13,QWORD PTR[16+rsi] mov r13,QWORD PTR[((-32))+rsi]
mov r12,QWORD PTR[24+rsi] mov r12,QWORD PTR[((-24))+rsi]
mov rbp,QWORD PTR[32+rsi] mov rbp,QWORD PTR[((-16))+rsi]
mov rbx,QWORD PTR[40+rsi] mov rbx,QWORD PTR[((-8))+rsi]
lea rsp,QWORD PTR[48+rsi] lea rsp,QWORD PTR[rsi]
$L$mul4x_epilogue:: $L$mul4x_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp] mov rsi,QWORD PTR[16+rsp]
@ -679,14 +696,15 @@ $L$SEH_begin_bn_sqr8x_mont::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
$L$sqr8x_enter::
mov rax,rsp mov rax,rsp
$L$sqr8x_enter::
push rbx push rbx
push rbp push rbp
push r12 push r12
push r13 push r13
push r14 push r14
push r15 push r15
$L$sqr8x_prologue::
mov r10d,r9d mov r10d,r9d
shl r9d,3 shl r9d,3
@ -699,33 +717,42 @@ $L$sqr8x_enter::
lea r11,QWORD PTR[((-64))+r9*2+rsp] lea r11,QWORD PTR[((-64))+r9*2+rsp]
mov rbp,rsp
mov r8,QWORD PTR[r8] mov r8,QWORD PTR[r8]
sub r11,rsi sub r11,rsi
and r11,4095 and r11,4095
cmp r10,r11 cmp r10,r11
jb $L$sqr8x_sp_alt jb $L$sqr8x_sp_alt
sub rsp,r11 sub rbp,r11
lea rsp,QWORD PTR[((-64))+r9*2+rsp] lea rbp,QWORD PTR[((-64))+r9*2+rbp]
jmp $L$sqr8x_sp_done jmp $L$sqr8x_sp_done
ALIGN 32 ALIGN 32
$L$sqr8x_sp_alt:: $L$sqr8x_sp_alt::
lea r10,QWORD PTR[((4096-64))+r9*2] lea r10,QWORD PTR[((4096-64))+r9*2]
lea rsp,QWORD PTR[((-64))+r9*2+rsp] lea rbp,QWORD PTR[((-64))+r9*2+rbp]
sub r11,r10 sub r11,r10
mov r10,0 mov r10,0
cmovc r11,r10 cmovc r11,r10
sub rsp,r11 sub rbp,r11
$L$sqr8x_sp_done:: $L$sqr8x_sp_done::
and rsp,-64 and rbp,-64
mov r11,rax mov r11,rsp
sub r11,rsp sub r11,rbp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$sqr8x_page_walk
jmp $L$sqr8x_page_walk_done
ALIGN 16
$L$sqr8x_page_walk:: $L$sqr8x_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r10,QWORD PTR[rsp]
DB 02eh cmp rsp,rbp
jnc $L$sqr8x_page_walk ja $L$sqr8x_page_walk
$L$sqr8x_page_walk_done::
mov r10,r9 mov r10,r9
neg r9 neg r9
@ -860,22 +887,8 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8] mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax] mov rax,QWORD PTR[8+r10*8+rax]
lea rax,QWORD PTR[48+rax]
mov rbx,QWORD PTR[((-8))+rax] jmp $L$common_pop_regs
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
mov r13,QWORD PTR[((-32))+rax]
mov r14,QWORD PTR[((-40))+rax]
mov r15,QWORD PTR[((-48))+rax]
mov QWORD PTR[144+r8],rbx
mov QWORD PTR[160+r8],rbp
mov QWORD PTR[216+r8],r12
mov QWORD PTR[224+r8],r13
mov QWORD PTR[232+r8],r14
mov QWORD PTR[240+r8],r15
jmp $L$common_seh_tail
mul_handler ENDP mul_handler ENDP
@ -903,15 +916,21 @@ sqr_handler PROC PRIVATE
cmp rbx,r10 cmp rbx,r10
jb $L$common_seh_tail jb $L$common_seh_tail
mov r10d,DWORD PTR[4+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$common_pop_regs
mov rax,QWORD PTR[152+r8] mov rax,QWORD PTR[152+r8]
mov r10d,DWORD PTR[4+r11] mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi] lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10 cmp rbx,r10
jae $L$common_seh_tail jae $L$common_seh_tail
mov rax,QWORD PTR[40+rax] mov rax,QWORD PTR[40+rax]
$L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax] mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax] mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax] mov r12,QWORD PTR[((-24))+rax]
@ -993,7 +1012,8 @@ DB 9,0,0,0
$L$SEH_info_bn_sqr8x_mont:: $L$SEH_info_bn_sqr8x_mont::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel sqr_handler DD imagerel sqr_handler
DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue DD imagerel $L$sqr8x_prologue,imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue
ALIGN 8
.xdata ENDS .xdata ENDS
END END

153
deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm

@ -19,16 +19,15 @@ $L$SEH_begin_bn_mul_mont_gather5::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
mov r9d,r9d
mov rax,rsp
test r9d,7 test r9d,7
jnz $L$mul_enter jnz $L$mul_enter
jmp $L$mul4x_enter jmp $L$mul4x_enter
ALIGN 16 ALIGN 16
$L$mul_enter:: $L$mul_enter::
mov r9d,r9d
mov rax,rsp
movd xmm5,DWORD PTR[56+rsp] movd xmm5,DWORD PTR[56+rsp]
lea r10,QWORD PTR[$L$inc]
push rbx push rbx
push rbp push rbp
push r12 push r12
@ -36,26 +35,36 @@ $L$mul_enter::
push r14 push r14
push r15 push r15
lea r11,QWORD PTR[2+r9] neg r9
neg r11 mov r11,rsp
lea rsp,QWORD PTR[((-264))+r11*8+rsp] lea r10,QWORD PTR[((-280))+r9*8+rsp]
and rsp,-1024 neg r9
and r10,-1024
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
sub r11,r10
and r11,-4096
lea rsp,QWORD PTR[r11*1+r10]
mov r11,QWORD PTR[rsp]
cmp rsp,r10
ja $L$mul_page_walk
jmp $L$mul_page_walk_done
sub rax,rsp
and rax,-4096
$L$mul_page_walk:: $L$mul_page_walk::
mov r11,QWORD PTR[rax*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub rax,4096 mov r11,QWORD PTR[rsp]
DB 02eh cmp rsp,r10
jnc $L$mul_page_walk ja $L$mul_page_walk
$L$mul_page_walk_done::
lea r10,QWORD PTR[$L$inc]
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
lea r12,QWORD PTR[128+rdx] lea r12,QWORD PTR[128+rdx]
movdqa xmm0,XMMWORD PTR[r10] movdqa xmm0,XMMWORD PTR[r10]
@ -441,15 +450,16 @@ $L$SEH_begin_bn_mul4x_mont_gather5::
mov r9,QWORD PTR[48+rsp] mov r9,QWORD PTR[48+rsp]
$L$mul4x_enter::
DB 067h DB 067h
mov rax,rsp mov rax,rsp
$L$mul4x_enter::
push rbx push rbx
push rbp push rbp
push r12 push r12
push r13 push r13
push r14 push r14
push r15 push r15
$L$mul4x_prologue::
DB 067h DB 067h
shl r9d,3 shl r9d,3
@ -466,32 +476,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp] lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi sub r11,rdi
and r11,4095 and r11,4095
cmp r10,r11 cmp r10,r11
jb $L$mul4xsp_alt jb $L$mul4xsp_alt
sub rsp,r11 sub rbp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$mul4xsp_done jmp $L$mul4xsp_done
ALIGN 32 ALIGN 32
$L$mul4xsp_alt:: $L$mul4xsp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2] lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10 sub r11,r10
mov r10,0 mov r10,0
cmovc r11,r10 cmovc r11,r10
sub rsp,r11 sub rbp,r11
$L$mul4xsp_done:: $L$mul4xsp_done::
and rsp,-64 and rbp,-64
mov r11,rax mov r11,rsp
sub r11,rsp sub r11,rbp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$mul4x_page_walk
jmp $L$mul4x_page_walk_done
$L$mul4x_page_walk:: $L$mul4x_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r10,QWORD PTR[rsp]
DB 02eh cmp rsp,rbp
jnc $L$mul4x_page_walk ja $L$mul4x_page_walk
$L$mul4x_page_walk_done::
neg r9 neg r9
@ -1065,6 +1083,7 @@ $L$SEH_begin_bn_power5::
push r13 push r13
push r14 push r14
push r15 push r15
$L$power5_prologue::
shl r9d,3 shl r9d,3
lea r10d,DWORD PTR[r9*2+r9] lea r10d,DWORD PTR[r9*2+r9]
@ -1079,32 +1098,40 @@ $L$SEH_begin_bn_power5::
lea r11,QWORD PTR[((-320))+r9*2+rsp] lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi sub r11,rdi
and r11,4095 and r11,4095
cmp r10,r11 cmp r10,r11
jb $L$pwr_sp_alt jb $L$pwr_sp_alt
sub rsp,r11 sub rbp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$pwr_sp_done jmp $L$pwr_sp_done
ALIGN 32 ALIGN 32
$L$pwr_sp_alt:: $L$pwr_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2] lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10 sub r11,r10
mov r10,0 mov r10,0
cmovc r11,r10 cmovc r11,r10
sub rsp,r11 sub rbp,r11
$L$pwr_sp_done:: $L$pwr_sp_done::
and rsp,-64 and rbp,-64
mov r11,rax mov r11,rsp
sub r11,rsp sub r11,rbp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$pwr_page_walk
jmp $L$pwr_page_walk_done
$L$pwr_page_walk:: $L$pwr_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r10,QWORD PTR[rsp]
DB 02eh cmp rsp,rbp
jnc $L$pwr_page_walk ja $L$pwr_page_walk
$L$pwr_page_walk_done::
mov r10,r9 mov r10,r9
neg r9 neg r9
@ -2030,6 +2057,7 @@ DB 067h
push r13 push r13
push r14 push r14
push r15 push r15
$L$from_prologue::
shl r9d,3 shl r9d,3
lea r10,QWORD PTR[r9*2+r9] lea r10,QWORD PTR[r9*2+r9]
@ -2044,32 +2072,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp] lea r11,QWORD PTR[((-320))+r9*2+rsp]
mov rbp,rsp
sub r11,rdi sub r11,rdi
and r11,4095 and r11,4095
cmp r10,r11 cmp r10,r11
jb $L$from_sp_alt jb $L$from_sp_alt
sub rsp,r11 sub rbp,r11
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$from_sp_done jmp $L$from_sp_done
ALIGN 32 ALIGN 32
$L$from_sp_alt:: $L$from_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2] lea r10,QWORD PTR[((4096-320))+r9*2]
lea rsp,QWORD PTR[((-320))+r9*2+rsp] lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10 sub r11,r10
mov r10,0 mov r10,0
cmovc r11,r10 cmovc r11,r10
sub rsp,r11 sub rbp,r11
$L$from_sp_done:: $L$from_sp_done::
and rsp,-64 and rbp,-64
mov r11,rax mov r11,rsp
sub r11,rsp sub r11,rbp
and r11,-4096 and r11,-4096
lea rsp,QWORD PTR[rbp*1+r11]
mov r10,QWORD PTR[rsp]
cmp rsp,rbp
ja $L$from_page_walk
jmp $L$from_page_walk_done
$L$from_page_walk:: $L$from_page_walk::
mov r10,QWORD PTR[r11*1+rsp] lea rsp,QWORD PTR[((-4096))+rsp]
sub r11,4096 mov r10,QWORD PTR[rsp]
DB 02eh cmp rsp,rbp
jnc $L$from_page_walk ja $L$from_page_walk
$L$from_page_walk_done::
mov r10,r9 mov r10,r9
neg r9 neg r9
@ -2383,9 +2419,14 @@ mul_handler PROC PRIVATE
cmp rbx,r10 cmp rbx,r10
jb $L$common_seh_tail jb $L$common_seh_tail
mov r10d,DWORD PTR[4+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$common_pop_regs
mov rax,QWORD PTR[152+r8] mov rax,QWORD PTR[152+r8]
mov r10d,DWORD PTR[4+r11] mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi] lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10 cmp rbx,r10
jae $L$common_seh_tail jae $L$common_seh_tail
@ -2397,11 +2438,11 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8] mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax] mov rax,QWORD PTR[8+r10*8+rax]
jmp $L$body_proceed jmp $L$common_pop_regs
$L$body_40:: $L$body_40::
mov rax,QWORD PTR[40+rax] mov rax,QWORD PTR[40+rax]
$L$body_proceed:: $L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax] mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax] mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax] mov r12,QWORD PTR[((-24))+rax]
@ -2483,22 +2524,22 @@ ALIGN 8
$L$SEH_info_bn_mul_mont_gather5:: $L$SEH_info_bn_mul_mont_gather5::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel mul_handler DD imagerel mul_handler
DD imagerel $L$mul_body,imagerel $L$mul_epilogue DD imagerel $L$mul_body,imagerel $L$mul_body,imagerel $L$mul_epilogue
ALIGN 8 ALIGN 8
$L$SEH_info_bn_mul4x_mont_gather5:: $L$SEH_info_bn_mul4x_mont_gather5::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel mul_handler DD imagerel mul_handler
DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue DD imagerel $L$mul4x_prologue,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
ALIGN 8 ALIGN 8
$L$SEH_info_bn_power5:: $L$SEH_info_bn_power5::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel mul_handler DD imagerel mul_handler
DD imagerel $L$power5_body,imagerel $L$power5_epilogue DD imagerel $L$power5_prologue,imagerel $L$power5_body,imagerel $L$power5_epilogue
ALIGN 8 ALIGN 8
$L$SEH_info_bn_from_mont8x:: $L$SEH_info_bn_from_mont8x::
DB 9,0,0,0 DB 9,0,0,0
DD imagerel mul_handler DD imagerel mul_handler
DD imagerel $L$from_body,imagerel $L$from_epilogue DD imagerel $L$from_prologue,imagerel $L$from_body,imagerel $L$from_epilogue
ALIGN 8 ALIGN 8
$L$SEH_info_bn_gather5:: $L$SEH_info_bn_gather5::
DB 001h,00bh,003h,00ah DB 001h,00bh,003h,00ah

119
deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm

@ -36,6 +36,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
push r13 push r13
mov r8,QWORD PTR[rsi] mov r8,QWORD PTR[rsi]
xor r13,r13
mov r9,QWORD PTR[8+rsi] mov r9,QWORD PTR[8+rsi]
add r8,r8 add r8,r8
mov r10,QWORD PTR[16+rsi] mov r10,QWORD PTR[16+rsi]
@ -46,7 +47,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
adc r10,r10 adc r10,r10
adc r11,r11 adc r11,r11
mov rdx,r9 mov rdx,r9
sbb r13,r13 adc r13,0
sub r8,QWORD PTR[rsi] sub r8,QWORD PTR[rsi]
mov rcx,r10 mov rcx,r10
@ -54,14 +55,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
sbb r10,QWORD PTR[16+rsi] sbb r10,QWORD PTR[16+rsi]
mov r12,r11 mov r12,r11
sbb r11,QWORD PTR[24+rsi] sbb r11,QWORD PTR[24+rsi]
test r13,r13 sbb r13,0
cmovz r8,rax cmovc r8,rax
cmovz r9,rdx cmovc r9,rdx
mov QWORD PTR[rdi],r8 mov QWORD PTR[rdi],r8
cmovz r10,rcx cmovc r10,rcx
mov QWORD PTR[8+rdi],r9 mov QWORD PTR[8+rdi],r9
cmovz r11,r12 cmovc r11,r12
mov QWORD PTR[16+rdi],r10 mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11 mov QWORD PTR[24+rdi],r11
@ -180,12 +181,12 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0 sbb r10,0
mov r12,r11 mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))] sbb r11,QWORD PTR[(($L$poly+24))]
test r13,r13 sbb r13,0
cmovz r8,rax cmovc r8,rax
cmovz r9,rdx cmovc r9,rdx
cmovz r10,rcx cmovc r10,rcx
cmovz r11,r12 cmovc r11,r12
xor r13,r13 xor r13,r13
add r8,QWORD PTR[rsi] add r8,QWORD PTR[rsi]
@ -202,14 +203,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0 sbb r10,0
mov r12,r11 mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))] sbb r11,QWORD PTR[(($L$poly+24))]
test r13,r13 sbb r13,0
cmovz r8,rax cmovc r8,rax
cmovz r9,rdx cmovc r9,rdx
mov QWORD PTR[rdi],r8 mov QWORD PTR[rdi],r8
cmovz r10,rcx cmovc r10,rcx
mov QWORD PTR[8+rdi],r9 mov QWORD PTR[8+rdi],r9
cmovz r11,r12 cmovc r11,r12
mov QWORD PTR[16+rdi],r10 mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11 mov QWORD PTR[24+rdi],r11
@ -260,14 +261,14 @@ $L$SEH_begin_ecp_nistz256_add::
sbb r10,QWORD PTR[16+rsi] sbb r10,QWORD PTR[16+rsi]
mov r12,r11 mov r12,r11
sbb r11,QWORD PTR[24+rsi] sbb r11,QWORD PTR[24+rsi]
test r13,r13 sbb r13,0
cmovz r8,rax cmovc r8,rax
cmovz r9,rdx cmovc r9,rdx
mov QWORD PTR[rdi],r8 mov QWORD PTR[rdi],r8
cmovz r10,rcx cmovc r10,rcx
mov QWORD PTR[8+rdi],r9 mov QWORD PTR[8+rdi],r9
cmovz r11,r12 cmovc r11,r12
mov QWORD PTR[16+rdi],r10 mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11 mov QWORD PTR[24+rdi],r11
@ -1167,13 +1168,14 @@ ecp_nistz256_avx2_select_w7 ENDP
ALIGN 32 ALIGN 32
__ecp_nistz256_add_toq PROC PRIVATE __ecp_nistz256_add_toq PROC PRIVATE
xor r11,r11
add r12,QWORD PTR[rbx] add r12,QWORD PTR[rbx]
adc r13,QWORD PTR[8+rbx] adc r13,QWORD PTR[8+rbx]
mov rax,r12 mov rax,r12
adc r8,QWORD PTR[16+rbx] adc r8,QWORD PTR[16+rbx]
adc r9,QWORD PTR[24+rbx] adc r9,QWORD PTR[24+rbx]
mov rbp,r13 mov rbp,r13
sbb r11,r11 adc r11,0
sub r12,-1 sub r12,-1
mov rcx,r8 mov rcx,r8
@ -1181,14 +1183,14 @@ __ecp_nistz256_add_toq PROC PRIVATE
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
test r11,r11 sbb r11,0
cmovz r12,rax cmovc r12,rax
cmovz r13,rbp cmovc r13,rbp
mov QWORD PTR[rdi],r12 mov QWORD PTR[rdi],r12
cmovz r8,rcx cmovc r8,rcx
mov QWORD PTR[8+rdi],r13 mov QWORD PTR[8+rdi],r13
cmovz r9,r10 cmovc r9,r10
mov QWORD PTR[16+rdi],r8 mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9 mov QWORD PTR[24+rdi],r9
@ -1256,13 +1258,14 @@ __ecp_nistz256_subq ENDP
ALIGN 32 ALIGN 32
__ecp_nistz256_mul_by_2q PROC PRIVATE __ecp_nistz256_mul_by_2q PROC PRIVATE
xor r11,r11
add r12,r12 add r12,r12
adc r13,r13 adc r13,r13
mov rax,r12 mov rax,r12
adc r8,r8 adc r8,r8
adc r9,r9 adc r9,r9
mov rbp,r13 mov rbp,r13
sbb r11,r11 adc r11,0
sub r12,-1 sub r12,-1
mov rcx,r8 mov rcx,r8
@ -1270,14 +1273,14 @@ __ecp_nistz256_mul_by_2q PROC PRIVATE
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
test r11,r11 sbb r11,0
cmovz r12,rax cmovc r12,rax
cmovz r13,rbp cmovc r13,rbp
mov QWORD PTR[rdi],r12 mov QWORD PTR[rdi],r12
cmovz r8,rcx cmovc r8,rcx
mov QWORD PTR[8+rdi],r13 mov QWORD PTR[8+rdi],r13
cmovz r9,r10 cmovc r9,r10
mov QWORD PTR[16+rdi],r8 mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9 mov QWORD PTR[24+rdi],r9
@ -1527,16 +1530,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
mov rsi,rdx mov rsi,rdx
movdqa XMMWORD PTR[384+rsp],xmm0 movdqa XMMWORD PTR[384+rsp],xmm0
movdqa XMMWORD PTR[(384+16)+rsp],xmm1 movdqa XMMWORD PTR[(384+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[416+rsp],xmm2 movdqa XMMWORD PTR[416+rsp],xmm2
movdqa XMMWORD PTR[(416+16)+rsp],xmm3 movdqa XMMWORD PTR[(416+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[448+rsp],xmm4 movdqa XMMWORD PTR[448+rsp],xmm4
movdqa XMMWORD PTR[(448+16)+rsp],xmm5 movdqa XMMWORD PTR[(448+16)+rsp],xmm5
por xmm3,xmm1 por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rsi] movdqu xmm0,XMMWORD PTR[rsi]
pshufd xmm5,xmm3,1h pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rsi] movdqu xmm1,XMMWORD PTR[16+rsi]
movdqu xmm2,XMMWORD PTR[32+rsi] movdqu xmm2,XMMWORD PTR[32+rsi]
por xmm5,xmm3 por xmm5,xmm3
@ -1548,14 +1549,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
movdqa XMMWORD PTR[480+rsp],xmm0 movdqa XMMWORD PTR[480+rsp],xmm0
pshufd xmm4,xmm5,01eh pshufd xmm4,xmm5,01eh
movdqa XMMWORD PTR[(480+16)+rsp],xmm1 movdqa XMMWORD PTR[(480+16)+rsp],xmm1
por xmm1,xmm0 movdqu xmm0,XMMWORD PTR[64+rsi]
DB 102,72,15,110,199 movdqu xmm1,XMMWORD PTR[80+rsi]
movdqa XMMWORD PTR[512+rsp],xmm2 movdqa XMMWORD PTR[512+rsp],xmm2
movdqa XMMWORD PTR[(512+16)+rsp],xmm3 movdqa XMMWORD PTR[(512+16)+rsp],xmm3
por xmm3,xmm2
por xmm5,xmm4 por xmm5,xmm4
pxor xmm4,xmm4 pxor xmm4,xmm4
por xmm3,xmm1 por xmm1,xmm0
DB 102,72,15,110,199
lea rsi,QWORD PTR[((64-0))+rsi] lea rsi,QWORD PTR[((64-0))+rsi]
mov QWORD PTR[((544+0))+rsp],rax mov QWORD PTR[((544+0))+rsp],rax
@ -1566,8 +1567,8 @@ DB 102,72,15,110,199
call __ecp_nistz256_sqr_montq call __ecp_nistz256_sqr_montq
pcmpeqd xmm5,xmm4 pcmpeqd xmm5,xmm4
pshufd xmm4,xmm3,1h pshufd xmm4,xmm1,1h
por xmm4,xmm3 por xmm4,xmm1
pshufd xmm5,xmm5,0 pshufd xmm5,xmm5,0
pshufd xmm3,xmm4,01eh pshufd xmm3,xmm4,01eh
por xmm4,xmm3 por xmm4,xmm3
@ -1750,6 +1751,7 @@ $L$add_proceedq::
xor r11,r11
add r12,r12 add r12,r12
lea rsi,QWORD PTR[96+rsp] lea rsi,QWORD PTR[96+rsp]
adc r13,r13 adc r13,r13
@ -1757,7 +1759,7 @@ $L$add_proceedq::
adc r8,r8 adc r8,r8
adc r9,r9 adc r9,r9
mov rbp,r13 mov rbp,r13
sbb r11,r11 adc r11,0
sub r12,-1 sub r12,-1
mov rcx,r8 mov rcx,r8
@ -1765,15 +1767,15 @@ $L$add_proceedq::
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
test r11,r11 sbb r11,0
cmovz r12,rax cmovc r12,rax
mov rax,QWORD PTR[rsi] mov rax,QWORD PTR[rsi]
cmovz r13,rbp cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi] mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi] mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10 cmovc r9,r10
mov r10,QWORD PTR[24+rsi] mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq call __ecp_nistz256_subq
@ -1939,16 +1941,14 @@ $L$SEH_begin_ecp_nistz256_point_add_affine::
mov r8,QWORD PTR[((64+24))+rsi] mov r8,QWORD PTR[((64+24))+rsi]
movdqa XMMWORD PTR[320+rsp],xmm0 movdqa XMMWORD PTR[320+rsp],xmm0
movdqa XMMWORD PTR[(320+16)+rsp],xmm1 movdqa XMMWORD PTR[(320+16)+rsp],xmm1
por xmm1,xmm0
movdqa XMMWORD PTR[352+rsp],xmm2 movdqa XMMWORD PTR[352+rsp],xmm2
movdqa XMMWORD PTR[(352+16)+rsp],xmm3 movdqa XMMWORD PTR[(352+16)+rsp],xmm3
por xmm3,xmm2
movdqa XMMWORD PTR[384+rsp],xmm4 movdqa XMMWORD PTR[384+rsp],xmm4
movdqa XMMWORD PTR[(384+16)+rsp],xmm5 movdqa XMMWORD PTR[(384+16)+rsp],xmm5
por xmm3,xmm1 por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rbx] movdqu xmm0,XMMWORD PTR[rbx]
pshufd xmm5,xmm3,1h pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rbx] movdqu xmm1,XMMWORD PTR[16+rbx]
movdqu xmm2,XMMWORD PTR[32+rbx] movdqu xmm2,XMMWORD PTR[32+rbx]
por xmm5,xmm3 por xmm5,xmm3
@ -2066,6 +2066,7 @@ DB 102,72,15,110,199
xor r11,r11
add r12,r12 add r12,r12
lea rsi,QWORD PTR[192+rsp] lea rsi,QWORD PTR[192+rsp]
adc r13,r13 adc r13,r13
@ -2073,7 +2074,7 @@ DB 102,72,15,110,199
adc r8,r8 adc r8,r8
adc r9,r9 adc r9,r9
mov rbp,r13 mov rbp,r13
sbb r11,r11 adc r11,0
sub r12,-1 sub r12,-1
mov rcx,r8 mov rcx,r8
@ -2081,15 +2082,15 @@ DB 102,72,15,110,199
sbb r8,0 sbb r8,0
mov r10,r9 mov r10,r9
sbb r9,r15 sbb r9,r15
test r11,r11 sbb r11,0
cmovz r12,rax cmovc r12,rax
mov rax,QWORD PTR[rsi] mov rax,QWORD PTR[rsi]
cmovz r13,rbp cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi] mov rbp,QWORD PTR[8+rsi]
cmovz r8,rcx cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi] mov rcx,QWORD PTR[16+rsi]
cmovz r9,r10 cmovc r9,r10
mov r10,QWORD PTR[24+rsi] mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq call __ecp_nistz256_subq

4
deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm

@ -1283,9 +1283,9 @@ DB 102,15,56,0,251
ALIGN 16 ALIGN 16
$L$oop_shaext:: $L$oop_shaext::
dec rdx dec rdx
lea rax,QWORD PTR[64+rsi] lea r8,QWORD PTR[64+rsi]
paddd xmm1,xmm4 paddd xmm1,xmm4
cmovne rsi,rax cmovne rsi,r8
movdqa xmm8,xmm0 movdqa xmm8,xmm0
DB 15,56,201,229 DB 15,56,201,229
movdqa xmm2,xmm0 movdqa xmm2,xmm0

109
deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s

@ -15,44 +15,51 @@ bn_mul_mont:
jl .L000just_leave jl .L000just_leave
leal 20(%esp),%esi leal 20(%esp),%esi
leal 24(%esp),%edx leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi addl $2,%edi
negl %edi negl %edi
leal -32(%esp,%edi,4),%esp leal -32(%esp,%edi,4),%ebp
negl %edi negl %edi
movl %esp,%eax movl %ebp,%eax
subl %edx,%eax subl %edx,%eax
andl $2047,%eax andl $2047,%eax
subl %eax,%esp subl %eax,%ebp
xorl %esp,%edx xorl %ebp,%edx
andl $2048,%edx andl $2048,%edx
xorl $2048,%edx xorl $2048,%edx
subl %edx,%esp subl %edx,%ebp
andl $-64,%esp andl $-64,%ebp
movl %ebp,%eax movl %esp,%eax
subl %esp,%eax subl %ebp,%eax
andl $-4096,%eax andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
jmp .L002page_walk_done
.align 16
.L001page_walk: .L001page_walk:
movl (%esp,%eax,1),%edx leal -4096(%esp),%esp
subl $4096,%eax movl (%esp),%eax
.byte 46 cmpl %ebp,%esp
jnc .L001page_walk ja .L001page_walk
.L002page_walk_done:
movl (%esi),%eax movl (%esi),%eax
movl 4(%esi),%ebx movl 4(%esi),%ebx
movl 8(%esi),%ecx movl 8(%esi),%ecx
movl 12(%esi),%edx movl 12(%esi),%ebp
movl 16(%esi),%esi movl 16(%esi),%esi
movl (%esi),%esi movl (%esi),%esi
movl %eax,4(%esp) movl %eax,4(%esp)
movl %ebx,8(%esp) movl %ebx,8(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edx,16(%esp) movl %ebp,16(%esp)
movl %esi,20(%esp) movl %esi,20(%esp)
leal -3(%edi),%ebx leal -3(%edi),%ebx
movl %ebp,24(%esp) movl %edx,24(%esp)
leal OPENSSL_ia32cap_P,%eax leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax) btl $26,(%eax)
jnc .L002non_sse2 jnc .L003non_sse2
movl $-1,%eax movl $-1,%eax
movd %eax,%mm7 movd %eax,%mm7
movl 8(%esp),%esi movl 8(%esp),%esi
@ -76,7 +83,7 @@ bn_mul_mont:
psrlq $32,%mm3 psrlq $32,%mm3
incl %ecx incl %ecx
.align 16 .align 16
.L0031st: .L0041st:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -91,7 +98,7 @@ bn_mul_mont:
psrlq $32,%mm3 psrlq $32,%mm3
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
cmpl %ebx,%ecx cmpl %ebx,%ecx
jl .L0031st jl .L0041st
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -105,7 +112,7 @@ bn_mul_mont:
paddq %mm2,%mm3 paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
incl %edx incl %edx
.L004outer: .L005outer:
xorl %ecx,%ecx xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4 movd (%edi,%edx,4),%mm4
movd (%esi),%mm5 movd (%esi),%mm5
@ -127,7 +134,7 @@ bn_mul_mont:
paddq %mm6,%mm2 paddq %mm6,%mm2
incl %ecx incl %ecx
decl %ebx decl %ebx
.L005inner: .L006inner:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -144,7 +151,7 @@ bn_mul_mont:
paddq %mm6,%mm2 paddq %mm6,%mm2
decl %ebx decl %ebx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
jnz .L005inner jnz .L006inner
movl %ecx,%ebx movl %ecx,%ebx
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
@ -162,11 +169,11 @@ bn_mul_mont:
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx leal 1(%edx),%edx
cmpl %ebx,%edx cmpl %ebx,%edx
jle .L004outer jle .L005outer
emms emms
jmp .L006common_tail jmp .L007common_tail
.align 16 .align 16
.L002non_sse2: .L003non_sse2:
movl 8(%esp),%esi movl 8(%esp),%esi
leal 1(%ebx),%ebp leal 1(%ebx),%ebp
movl 12(%esp),%edi movl 12(%esp),%edi
@ -177,12 +184,12 @@ bn_mul_mont:
leal 4(%edi,%ebx,4),%eax leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp orl %edx,%ebp
movl (%edi),%edi movl (%edi),%edi
jz .L007bn_sqr_mont jz .L008bn_sqr_mont
movl %eax,28(%esp) movl %eax,28(%esp)
movl (%esi),%eax movl (%esi),%eax
xorl %edx,%edx xorl %edx,%edx
.align 16 .align 16
.L008mull: .L009mull:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl %eax,%ebp addl %eax,%ebp
@ -191,7 +198,7 @@ bn_mul_mont:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl .L008mull jl .L009mull
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
movl 20(%esp),%edi movl 20(%esp),%edi
@ -209,9 +216,9 @@ bn_mul_mont:
movl 4(%esi),%eax movl 4(%esi),%eax
adcl $0,%edx adcl $0,%edx
incl %ecx incl %ecx
jmp .L0092ndmadd jmp .L0102ndmadd
.align 16 .align 16
.L0101stmadd: .L0111stmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -222,7 +229,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl .L0101stmadd jl .L0111stmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%eax addl 32(%esp,%ebx,4),%eax
@ -245,7 +252,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
.align 16 .align 16
.L0092ndmadd: .L0102ndmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -256,7 +263,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl .L0092ndmadd jl .L0102ndmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -272,16 +279,16 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je .L006common_tail je .L007common_tail
movl (%ecx),%edi movl (%ecx),%edi
movl 8(%esp),%esi movl 8(%esp),%esi
movl %ecx,12(%esp) movl %ecx,12(%esp)
xorl %ecx,%ecx xorl %ecx,%ecx
xorl %edx,%edx xorl %edx,%edx
movl (%esi),%eax movl (%esi),%eax
jmp .L0101stmadd jmp .L0111stmadd
.align 16 .align 16
.L007bn_sqr_mont: .L008bn_sqr_mont:
movl %ebx,(%esp) movl %ebx,(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edi,%eax movl %edi,%eax
@ -292,7 +299,7 @@ bn_mul_mont:
andl $1,%ebx andl $1,%ebx
incl %ecx incl %ecx
.align 16 .align 16
.L011sqr: .L012sqr:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -304,7 +311,7 @@ bn_mul_mont:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %eax,%ebx movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl .L011sqr jl .L012sqr
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -328,7 +335,7 @@ bn_mul_mont:
movl 4(%esi),%eax movl 4(%esi),%eax
movl $1,%ecx movl $1,%ecx
.align 16 .align 16
.L0123rdmadd: .L0133rdmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -347,7 +354,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl .L0123rdmadd jl .L0133rdmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -363,7 +370,7 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je .L006common_tail je .L007common_tail
movl 4(%esi,%ecx,4),%edi movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
movl %edi,%eax movl %edi,%eax
@ -375,12 +382,12 @@ bn_mul_mont:
xorl %ebp,%ebp xorl %ebp,%ebp
cmpl %ebx,%ecx cmpl %ebx,%ecx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
je .L013sqrlast je .L014sqrlast
movl %edx,%ebx movl %edx,%ebx
shrl $1,%edx shrl $1,%edx
andl $1,%ebx andl $1,%ebx
.align 16 .align 16
.L014sqradd: .L015sqradd:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -396,13 +403,13 @@ bn_mul_mont:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx movl %eax,%ebx
jle .L014sqradd jle .L015sqradd
movl %edx,%ebp movl %edx,%ebp
addl %edx,%edx addl %edx,%edx
shrl $31,%ebp shrl $31,%ebp
addl %ebx,%edx addl %ebx,%edx
adcl $0,%ebp adcl $0,%ebp
.L013sqrlast: .L014sqrlast:
movl 20(%esp),%edi movl 20(%esp),%edi
movl 16(%esp),%esi movl 16(%esp),%esi
imull 32(%esp),%edi imull 32(%esp),%edi
@ -417,9 +424,9 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
movl 4(%esi),%eax movl 4(%esi),%eax
jmp .L0123rdmadd jmp .L0133rdmadd
.align 16 .align 16
.L006common_tail: .L007common_tail:
movl 16(%esp),%ebp movl 16(%esp),%ebp
movl 4(%esp),%edi movl 4(%esp),%edi
leal 32(%esp),%esi leal 32(%esp),%esi
@ -427,13 +434,13 @@ bn_mul_mont:
movl %ebx,%ecx movl %ebx,%ecx
xorl %edx,%edx xorl %edx,%edx
.align 16 .align 16
.L015sub: .L016sub:
sbbl (%ebp,%edx,4),%eax sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4) movl %eax,(%edi,%edx,4)
decl %ecx decl %ecx
movl 4(%esi,%edx,4),%eax movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx leal 1(%edx),%edx
jge .L015sub jge .L016sub
sbbl $0,%eax sbbl $0,%eax
andl %eax,%esi andl %eax,%esi
notl %eax notl %eax
@ -441,12 +448,12 @@ bn_mul_mont:
andl %eax,%ebp andl %eax,%ebp
orl %ebp,%esi orl %ebp,%esi
.align 16 .align 16
.L016copy: .L017copy:
movl (%esi,%ebx,4),%eax movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4) movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4) movl %ecx,32(%esp,%ebx,4)
decl %ebx decl %ebx
jge .L016copy jge .L017copy
movl 24(%esp),%esp movl 24(%esp),%esp
movl $1,%eax movl $1,%eax
.L000just_leave: .L000just_leave:

115
deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s

@ -14,47 +14,54 @@ L_bn_mul_mont_begin:
jl L000just_leave jl L000just_leave
leal 20(%esp),%esi leal 20(%esp),%esi
leal 24(%esp),%edx leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi addl $2,%edi
negl %edi negl %edi
leal -32(%esp,%edi,4),%esp leal -32(%esp,%edi,4),%ebp
negl %edi negl %edi
movl %esp,%eax movl %ebp,%eax
subl %edx,%eax subl %edx,%eax
andl $2047,%eax andl $2047,%eax
subl %eax,%esp subl %eax,%ebp
xorl %esp,%edx xorl %ebp,%edx
andl $2048,%edx andl $2048,%edx
xorl $2048,%edx xorl $2048,%edx
subl %edx,%esp subl %edx,%ebp
andl $-64,%esp andl $-64,%ebp
movl %ebp,%eax movl %esp,%eax
subl %esp,%eax subl %ebp,%eax
andl $-4096,%eax andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja L001page_walk
jmp L002page_walk_done
.align 4,0x90
L001page_walk: L001page_walk:
movl (%esp,%eax,1),%edx leal -4096(%esp),%esp
subl $4096,%eax movl (%esp),%eax
.byte 46 cmpl %ebp,%esp
jnc L001page_walk ja L001page_walk
L002page_walk_done:
movl (%esi),%eax movl (%esi),%eax
movl 4(%esi),%ebx movl 4(%esi),%ebx
movl 8(%esi),%ecx movl 8(%esi),%ecx
movl 12(%esi),%edx movl 12(%esi),%ebp
movl 16(%esi),%esi movl 16(%esi),%esi
movl (%esi),%esi movl (%esi),%esi
movl %eax,4(%esp) movl %eax,4(%esp)
movl %ebx,8(%esp) movl %ebx,8(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edx,16(%esp) movl %ebp,16(%esp)
movl %esi,20(%esp) movl %esi,20(%esp)
leal -3(%edi),%ebx leal -3(%edi),%ebx
movl %ebp,24(%esp) movl %edx,24(%esp)
call L002PIC_me_up call L003PIC_me_up
L002PIC_me_up: L003PIC_me_up:
popl %eax popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L002PIC_me_up(%eax),%eax movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax
btl $26,(%eax) btl $26,(%eax)
jnc L003non_sse2 jnc L004non_sse2
movl $-1,%eax movl $-1,%eax
movd %eax,%mm7 movd %eax,%mm7
movl 8(%esp),%esi movl 8(%esp),%esi
@ -78,7 +85,7 @@ L002PIC_me_up:
psrlq $32,%mm3 psrlq $32,%mm3
incl %ecx incl %ecx
.align 4,0x90 .align 4,0x90
L0041st: L0051st:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -93,7 +100,7 @@ L0041st:
psrlq $32,%mm3 psrlq $32,%mm3
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
cmpl %ebx,%ecx cmpl %ebx,%ecx
jl L0041st jl L0051st
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -107,7 +114,7 @@ L0041st:
paddq %mm2,%mm3 paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
incl %edx incl %edx
L005outer: L006outer:
xorl %ecx,%ecx xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4 movd (%edi,%edx,4),%mm4
movd (%esi),%mm5 movd (%esi),%mm5
@ -129,7 +136,7 @@ L005outer:
paddq %mm6,%mm2 paddq %mm6,%mm2
incl %ecx incl %ecx
decl %ebx decl %ebx
L006inner: L007inner:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -146,7 +153,7 @@ L006inner:
paddq %mm6,%mm2 paddq %mm6,%mm2
decl %ebx decl %ebx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
jnz L006inner jnz L007inner
movl %ecx,%ebx movl %ecx,%ebx
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
@ -164,11 +171,11 @@ L006inner:
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx leal 1(%edx),%edx
cmpl %ebx,%edx cmpl %ebx,%edx
jle L005outer jle L006outer
emms emms
jmp L007common_tail jmp L008common_tail
.align 4,0x90 .align 4,0x90
L003non_sse2: L004non_sse2:
movl 8(%esp),%esi movl 8(%esp),%esi
leal 1(%ebx),%ebp leal 1(%ebx),%ebp
movl 12(%esp),%edi movl 12(%esp),%edi
@ -179,12 +186,12 @@ L003non_sse2:
leal 4(%edi,%ebx,4),%eax leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp orl %edx,%ebp
movl (%edi),%edi movl (%edi),%edi
jz L008bn_sqr_mont jz L009bn_sqr_mont
movl %eax,28(%esp) movl %eax,28(%esp)
movl (%esi),%eax movl (%esi),%eax
xorl %edx,%edx xorl %edx,%edx
.align 4,0x90 .align 4,0x90
L009mull: L010mull:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl %eax,%ebp addl %eax,%ebp
@ -193,7 +200,7 @@ L009mull:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl L009mull jl L010mull
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
movl 20(%esp),%edi movl 20(%esp),%edi
@ -211,9 +218,9 @@ L009mull:
movl 4(%esi),%eax movl 4(%esi),%eax
adcl $0,%edx adcl $0,%edx
incl %ecx incl %ecx
jmp L0102ndmadd jmp L0112ndmadd
.align 4,0x90 .align 4,0x90
L0111stmadd: L0121stmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -224,7 +231,7 @@ L0111stmadd:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl L0111stmadd jl L0121stmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%eax addl 32(%esp,%ebx,4),%eax
@ -247,7 +254,7 @@ L0111stmadd:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
.align 4,0x90 .align 4,0x90
L0102ndmadd: L0112ndmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -258,7 +265,7 @@ L0102ndmadd:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl L0102ndmadd jl L0112ndmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -274,16 +281,16 @@ L0102ndmadd:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je L007common_tail je L008common_tail
movl (%ecx),%edi movl (%ecx),%edi
movl 8(%esp),%esi movl 8(%esp),%esi
movl %ecx,12(%esp) movl %ecx,12(%esp)
xorl %ecx,%ecx xorl %ecx,%ecx
xorl %edx,%edx xorl %edx,%edx
movl (%esi),%eax movl (%esi),%eax
jmp L0111stmadd jmp L0121stmadd
.align 4,0x90 .align 4,0x90
L008bn_sqr_mont: L009bn_sqr_mont:
movl %ebx,(%esp) movl %ebx,(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edi,%eax movl %edi,%eax
@ -294,7 +301,7 @@ L008bn_sqr_mont:
andl $1,%ebx andl $1,%ebx
incl %ecx incl %ecx
.align 4,0x90 .align 4,0x90
L012sqr: L013sqr:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -306,7 +313,7 @@ L012sqr:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %eax,%ebx movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl L012sqr jl L013sqr
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -330,7 +337,7 @@ L012sqr:
movl 4(%esi),%eax movl 4(%esi),%eax
movl $1,%ecx movl $1,%ecx
.align 4,0x90 .align 4,0x90
L0133rdmadd: L0143rdmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -349,7 +356,7 @@ L0133rdmadd:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl L0133rdmadd jl L0143rdmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -365,7 +372,7 @@ L0133rdmadd:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je L007common_tail je L008common_tail
movl 4(%esi,%ecx,4),%edi movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
movl %edi,%eax movl %edi,%eax
@ -377,12 +384,12 @@ L0133rdmadd:
xorl %ebp,%ebp xorl %ebp,%ebp
cmpl %ebx,%ecx cmpl %ebx,%ecx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
je L014sqrlast je L015sqrlast
movl %edx,%ebx movl %edx,%ebx
shrl $1,%edx shrl $1,%edx
andl $1,%ebx andl $1,%ebx
.align 4,0x90 .align 4,0x90
L015sqradd: L016sqradd:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -398,13 +405,13 @@ L015sqradd:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx movl %eax,%ebx
jle L015sqradd jle L016sqradd
movl %edx,%ebp movl %edx,%ebp
addl %edx,%edx addl %edx,%edx
shrl $31,%ebp shrl $31,%ebp
addl %ebx,%edx addl %ebx,%edx
adcl $0,%ebp adcl $0,%ebp
L014sqrlast: L015sqrlast:
movl 20(%esp),%edi movl 20(%esp),%edi
movl 16(%esp),%esi movl 16(%esp),%esi
imull 32(%esp),%edi imull 32(%esp),%edi
@ -419,9 +426,9 @@ L014sqrlast:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
movl 4(%esi),%eax movl 4(%esi),%eax
jmp L0133rdmadd jmp L0143rdmadd
.align 4,0x90 .align 4,0x90
L007common_tail: L008common_tail:
movl 16(%esp),%ebp movl 16(%esp),%ebp
movl 4(%esp),%edi movl 4(%esp),%edi
leal 32(%esp),%esi leal 32(%esp),%esi
@ -429,13 +436,13 @@ L007common_tail:
movl %ebx,%ecx movl %ebx,%ecx
xorl %edx,%edx xorl %edx,%edx
.align 4,0x90 .align 4,0x90
L016sub: L017sub:
sbbl (%ebp,%edx,4),%eax sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4) movl %eax,(%edi,%edx,4)
decl %ecx decl %ecx
movl 4(%esi,%edx,4),%eax movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx leal 1(%edx),%edx
jge L016sub jge L017sub
sbbl $0,%eax sbbl $0,%eax
andl %eax,%esi andl %eax,%esi
notl %eax notl %eax
@ -443,12 +450,12 @@ L016sub:
andl %eax,%ebp andl %eax,%ebp
orl %ebp,%esi orl %ebp,%esi
.align 4,0x90 .align 4,0x90
L017copy: L018copy:
movl (%esi,%ebx,4),%eax movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4) movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4) movl %ecx,32(%esp,%ebx,4)
decl %ebx decl %ebx
jge L017copy jge L018copy
movl 24(%esp),%esp movl 24(%esp),%esp
movl $1,%eax movl $1,%eax
L000just_leave: L000just_leave:

109
deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm

@ -31,44 +31,51 @@ $L_bn_mul_mont_begin::
jl $L000just_leave jl $L000just_leave
lea esi,DWORD PTR 20[esp] lea esi,DWORD PTR 20[esp]
lea edx,DWORD PTR 24[esp] lea edx,DWORD PTR 24[esp]
mov ebp,esp
add edi,2 add edi,2
neg edi neg edi
lea esp,DWORD PTR [edi*4+esp-32] lea ebp,DWORD PTR [edi*4+esp-32]
neg edi neg edi
mov eax,esp mov eax,ebp
sub eax,edx sub eax,edx
and eax,2047 and eax,2047
sub esp,eax sub ebp,eax
xor edx,esp xor edx,ebp
and edx,2048 and edx,2048
xor edx,2048 xor edx,2048
sub esp,edx sub ebp,edx
and esp,-64 and ebp,-64
mov eax,ebp mov eax,esp
sub eax,esp sub eax,ebp
and eax,-4096 and eax,-4096
mov edx,esp
lea esp,DWORD PTR [eax*1+ebp]
mov eax,DWORD PTR [esp]
cmp esp,ebp
ja $L001page_walk
jmp $L002page_walk_done
ALIGN 16
$L001page_walk: $L001page_walk:
mov edx,DWORD PTR [eax*1+esp] lea esp,DWORD PTR [esp-4096]
sub eax,4096 mov eax,DWORD PTR [esp]
DB 46 cmp esp,ebp
jnc $L001page_walk ja $L001page_walk
$L002page_walk_done:
mov eax,DWORD PTR [esi] mov eax,DWORD PTR [esi]
mov ebx,DWORD PTR 4[esi] mov ebx,DWORD PTR 4[esi]
mov ecx,DWORD PTR 8[esi] mov ecx,DWORD PTR 8[esi]
mov edx,DWORD PTR 12[esi] mov ebp,DWORD PTR 12[esi]
mov esi,DWORD PTR 16[esi] mov esi,DWORD PTR 16[esi]
mov esi,DWORD PTR [esi] mov esi,DWORD PTR [esi]
mov DWORD PTR 4[esp],eax mov DWORD PTR 4[esp],eax
mov DWORD PTR 8[esp],ebx mov DWORD PTR 8[esp],ebx
mov DWORD PTR 12[esp],ecx mov DWORD PTR 12[esp],ecx
mov DWORD PTR 16[esp],edx mov DWORD PTR 16[esp],ebp
mov DWORD PTR 20[esp],esi mov DWORD PTR 20[esp],esi
lea ebx,DWORD PTR [edi-3] lea ebx,DWORD PTR [edi-3]
mov DWORD PTR 24[esp],ebp mov DWORD PTR 24[esp],edx
lea eax,DWORD PTR _OPENSSL_ia32cap_P lea eax,DWORD PTR _OPENSSL_ia32cap_P
bt DWORD PTR [eax],26 bt DWORD PTR [eax],26
jnc $L002non_sse2 jnc $L003non_sse2
mov eax,-1 mov eax,-1
movd mm7,eax movd mm7,eax
mov esi,DWORD PTR 8[esp] mov esi,DWORD PTR 8[esp]
@ -92,7 +99,7 @@ DB 46
psrlq mm3,32 psrlq mm3,32
inc ecx inc ecx
ALIGN 16 ALIGN 16
$L0031st: $L0041st:
pmuludq mm0,mm4 pmuludq mm0,mm4
pmuludq mm1,mm5 pmuludq mm1,mm5
paddq mm2,mm0 paddq mm2,mm0
@ -107,7 +114,7 @@ $L0031st:
psrlq mm3,32 psrlq mm3,32
lea ecx,DWORD PTR 1[ecx] lea ecx,DWORD PTR 1[ecx]
cmp ecx,ebx cmp ecx,ebx
jl $L0031st jl $L0041st
pmuludq mm0,mm4 pmuludq mm0,mm4
pmuludq mm1,mm5 pmuludq mm1,mm5
paddq mm2,mm0 paddq mm2,mm0
@ -121,7 +128,7 @@ $L0031st:
paddq mm3,mm2 paddq mm3,mm2
movq QWORD PTR 32[ebx*4+esp],mm3 movq QWORD PTR 32[ebx*4+esp],mm3
inc edx inc edx
$L004outer: $L005outer:
xor ecx,ecx xor ecx,ecx
movd mm4,DWORD PTR [edx*4+edi] movd mm4,DWORD PTR [edx*4+edi]
movd mm5,DWORD PTR [esi] movd mm5,DWORD PTR [esi]
@ -143,7 +150,7 @@ $L004outer:
paddq mm2,mm6 paddq mm2,mm6
inc ecx inc ecx
dec ebx dec ebx
$L005inner: $L006inner:
pmuludq mm0,mm4 pmuludq mm0,mm4
pmuludq mm1,mm5 pmuludq mm1,mm5
paddq mm2,mm0 paddq mm2,mm0
@ -160,7 +167,7 @@ $L005inner:
paddq mm2,mm6 paddq mm2,mm6
dec ebx dec ebx
lea ecx,DWORD PTR 1[ecx] lea ecx,DWORD PTR 1[ecx]
jnz $L005inner jnz $L006inner
mov ebx,ecx mov ebx,ecx
pmuludq mm0,mm4 pmuludq mm0,mm4
pmuludq mm1,mm5 pmuludq mm1,mm5
@ -178,11 +185,11 @@ $L005inner:
movq QWORD PTR 32[ebx*4+esp],mm3 movq QWORD PTR 32[ebx*4+esp],mm3
lea edx,DWORD PTR 1[edx] lea edx,DWORD PTR 1[edx]
cmp edx,ebx cmp edx,ebx
jle $L004outer jle $L005outer
emms emms
jmp $L006common_tail jmp $L007common_tail
ALIGN 16 ALIGN 16
$L002non_sse2: $L003non_sse2:
mov esi,DWORD PTR 8[esp] mov esi,DWORD PTR 8[esp]
lea ebp,DWORD PTR 1[ebx] lea ebp,DWORD PTR 1[ebx]
mov edi,DWORD PTR 12[esp] mov edi,DWORD PTR 12[esp]
@ -193,12 +200,12 @@ $L002non_sse2:
lea eax,DWORD PTR 4[ebx*4+edi] lea eax,DWORD PTR 4[ebx*4+edi]
or ebp,edx or ebp,edx
mov edi,DWORD PTR [edi] mov edi,DWORD PTR [edi]
jz $L007bn_sqr_mont jz $L008bn_sqr_mont
mov DWORD PTR 28[esp],eax mov DWORD PTR 28[esp],eax
mov eax,DWORD PTR [esi] mov eax,DWORD PTR [esi]
xor edx,edx xor edx,edx
ALIGN 16 ALIGN 16
$L008mull: $L009mull:
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,eax add ebp,eax
@ -207,7 +214,7 @@ $L008mull:
mov eax,DWORD PTR [ecx*4+esi] mov eax,DWORD PTR [ecx*4+esi]
cmp ecx,ebx cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp mov DWORD PTR 28[ecx*4+esp],ebp
jl $L008mull jl $L009mull
mov ebp,edx mov ebp,edx
mul edi mul edi
mov edi,DWORD PTR 20[esp] mov edi,DWORD PTR 20[esp]
@ -225,9 +232,9 @@ $L008mull:
mov eax,DWORD PTR 4[esi] mov eax,DWORD PTR 4[esi]
adc edx,0 adc edx,0
inc ecx inc ecx
jmp $L0092ndmadd jmp $L0102ndmadd
ALIGN 16 ALIGN 16
$L0101stmadd: $L0111stmadd:
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,DWORD PTR 32[ecx*4+esp] add ebp,DWORD PTR 32[ecx*4+esp]
@ -238,7 +245,7 @@ $L0101stmadd:
adc edx,0 adc edx,0
cmp ecx,ebx cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp mov DWORD PTR 28[ecx*4+esp],ebp
jl $L0101stmadd jl $L0111stmadd
mov ebp,edx mov ebp,edx
mul edi mul edi
add eax,DWORD PTR 32[ebx*4+esp] add eax,DWORD PTR 32[ebx*4+esp]
@ -261,7 +268,7 @@ $L0101stmadd:
adc edx,0 adc edx,0
mov ecx,1 mov ecx,1
ALIGN 16 ALIGN 16
$L0092ndmadd: $L0102ndmadd:
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,DWORD PTR 32[ecx*4+esp] add ebp,DWORD PTR 32[ecx*4+esp]
@ -272,7 +279,7 @@ $L0092ndmadd:
adc edx,0 adc edx,0
cmp ecx,ebx cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0092ndmadd jl $L0102ndmadd
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,DWORD PTR 32[ebx*4+esp] add ebp,DWORD PTR 32[ebx*4+esp]
@ -288,16 +295,16 @@ $L0092ndmadd:
mov DWORD PTR 32[ebx*4+esp],edx mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,DWORD PTR 28[esp] cmp ecx,DWORD PTR 28[esp]
mov DWORD PTR 36[ebx*4+esp],eax mov DWORD PTR 36[ebx*4+esp],eax
je $L006common_tail je $L007common_tail
mov edi,DWORD PTR [ecx] mov edi,DWORD PTR [ecx]
mov esi,DWORD PTR 8[esp] mov esi,DWORD PTR 8[esp]
mov DWORD PTR 12[esp],ecx mov DWORD PTR 12[esp],ecx
xor ecx,ecx xor ecx,ecx
xor edx,edx xor edx,edx
mov eax,DWORD PTR [esi] mov eax,DWORD PTR [esi]
jmp $L0101stmadd jmp $L0111stmadd
ALIGN 16 ALIGN 16
$L007bn_sqr_mont: $L008bn_sqr_mont:
mov DWORD PTR [esp],ebx mov DWORD PTR [esp],ebx
mov DWORD PTR 12[esp],ecx mov DWORD PTR 12[esp],ecx
mov eax,edi mov eax,edi
@ -308,7 +315,7 @@ $L007bn_sqr_mont:
and ebx,1 and ebx,1
inc ecx inc ecx
ALIGN 16 ALIGN 16
$L011sqr: $L012sqr:
mov eax,DWORD PTR [ecx*4+esi] mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx mov ebp,edx
mul edi mul edi
@ -320,7 +327,7 @@ $L011sqr:
cmp ecx,DWORD PTR [esp] cmp ecx,DWORD PTR [esp]
mov ebx,eax mov ebx,eax
mov DWORD PTR 28[ecx*4+esp],ebp mov DWORD PTR 28[ecx*4+esp],ebp
jl $L011sqr jl $L012sqr
mov eax,DWORD PTR [ecx*4+esi] mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx mov ebp,edx
mul edi mul edi
@ -344,7 +351,7 @@ $L011sqr:
mov eax,DWORD PTR 4[esi] mov eax,DWORD PTR 4[esi]
mov ecx,1 mov ecx,1
ALIGN 16 ALIGN 16
$L0123rdmadd: $L0133rdmadd:
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,DWORD PTR 32[ecx*4+esp] add ebp,DWORD PTR 32[ecx*4+esp]
@ -363,7 +370,7 @@ $L0123rdmadd:
adc edx,0 adc edx,0
cmp ecx,ebx cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0123rdmadd jl $L0133rdmadd
mov ebp,edx mov ebp,edx
mul edi mul edi
add ebp,DWORD PTR 32[ebx*4+esp] add ebp,DWORD PTR 32[ebx*4+esp]
@ -379,7 +386,7 @@ $L0123rdmadd:
mov DWORD PTR 32[ebx*4+esp],edx mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,ebx cmp ecx,ebx
mov DWORD PTR 36[ebx*4+esp],eax mov DWORD PTR 36[ebx*4+esp],eax
je $L006common_tail je $L007common_tail
mov edi,DWORD PTR 4[ecx*4+esi] mov edi,DWORD PTR 4[ecx*4+esi]
lea ecx,DWORD PTR 1[ecx] lea ecx,DWORD PTR 1[ecx]
mov eax,edi mov eax,edi
@ -391,12 +398,12 @@ $L0123rdmadd:
xor ebp,ebp xor ebp,ebp
cmp ecx,ebx cmp ecx,ebx
lea ecx,DWORD PTR 1[ecx] lea ecx,DWORD PTR 1[ecx]
je $L013sqrlast je $L014sqrlast
mov ebx,edx mov ebx,edx
shr edx,1 shr edx,1
and ebx,1 and ebx,1
ALIGN 16 ALIGN 16
$L014sqradd: $L015sqradd:
mov eax,DWORD PTR [ecx*4+esi] mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx mov ebp,edx
mul edi mul edi
@ -412,13 +419,13 @@ $L014sqradd:
cmp ecx,DWORD PTR [esp] cmp ecx,DWORD PTR [esp]
mov DWORD PTR 28[ecx*4+esp],ebp mov DWORD PTR 28[ecx*4+esp],ebp
mov ebx,eax mov ebx,eax
jle $L014sqradd jle $L015sqradd
mov ebp,edx mov ebp,edx
add edx,edx add edx,edx
shr ebp,31 shr ebp,31
add edx,ebx add edx,ebx
adc ebp,0 adc ebp,0
$L013sqrlast: $L014sqrlast:
mov edi,DWORD PTR 20[esp] mov edi,DWORD PTR 20[esp]
mov esi,DWORD PTR 16[esp] mov esi,DWORD PTR 16[esp]
imul edi,DWORD PTR 32[esp] imul edi,DWORD PTR 32[esp]
@ -433,9 +440,9 @@ $L013sqrlast:
adc edx,0 adc edx,0
mov ecx,1 mov ecx,1
mov eax,DWORD PTR 4[esi] mov eax,DWORD PTR 4[esi]
jmp $L0123rdmadd jmp $L0133rdmadd
ALIGN 16 ALIGN 16
$L006common_tail: $L007common_tail:
mov ebp,DWORD PTR 16[esp] mov ebp,DWORD PTR 16[esp]
mov edi,DWORD PTR 4[esp] mov edi,DWORD PTR 4[esp]
lea esi,DWORD PTR 32[esp] lea esi,DWORD PTR 32[esp]
@ -443,13 +450,13 @@ $L006common_tail:
mov ecx,ebx mov ecx,ebx
xor edx,edx xor edx,edx
ALIGN 16 ALIGN 16
$L015sub: $L016sub:
sbb eax,DWORD PTR [edx*4+ebp] sbb eax,DWORD PTR [edx*4+ebp]
mov DWORD PTR [edx*4+edi],eax mov DWORD PTR [edx*4+edi],eax
dec ecx dec ecx
mov eax,DWORD PTR 4[edx*4+esi] mov eax,DWORD PTR 4[edx*4+esi]
lea edx,DWORD PTR 1[edx] lea edx,DWORD PTR 1[edx]
jge $L015sub jge $L016sub
sbb eax,0 sbb eax,0
and esi,eax and esi,eax
not eax not eax
@ -457,12 +464,12 @@ $L015sub:
and ebp,eax and ebp,eax
or esi,ebp or esi,ebp
ALIGN 16 ALIGN 16
$L016copy: $L017copy:
mov eax,DWORD PTR [ebx*4+esi] mov eax,DWORD PTR [ebx*4+esi]
mov DWORD PTR [ebx*4+edi],eax mov DWORD PTR [ebx*4+edi],eax
mov DWORD PTR 32[ebx*4+esp],ecx mov DWORD PTR 32[ebx*4+esp],ecx
dec ebx dec ebx
jge $L016copy jge $L017copy
mov esp,DWORD PTR 24[esp] mov esp,DWORD PTR 24[esp]
mov eax,1 mov eax,1
$L000just_leave: $L000just_leave:

Loading…
Cancel
Save