Browse Source

openssl: more asm

v0.11.10-release
Fedor Indutny 11 years ago
parent
commit
04b92632bd
  1. 12
      deps/openssl/asm/Makefile
  2. 295
      deps/openssl/asm/x64-elf-gas/bn/x86_64-gf2m.s
  3. 785
      deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s
  4. 295
      deps/openssl/asm/x64-macosx-gas/bn/x86_64-gf2m.s
  5. 785
      deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s
  6. 404
      deps/openssl/asm/x64-win32-masm/bn/x86_64-gf2m.asm
  7. 990
      deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm
  8. 19
      deps/openssl/openssl.gyp

12
deps/openssl/asm/Makefile

@ -26,6 +26,8 @@ OUTPUTS = \
x64-elf-gas/aes/aesni-sha1-x86_64.s \
x64-elf-gas/bn/modexp512-x86_64.s \
x64-elf-gas/bn/x86_64-mont.s \
x64-elf-gas/bn/x86_64-mont5.s \
x64-elf-gas/bn/x86_64-gf2m.s \
x64-elf-gas/camellia/cmll-x86_64.s \
x64-elf-gas/md5/md5-x86_64.s \
x64-elf-gas/rc4/rc4-x86_64.s \
@ -58,6 +60,8 @@ OUTPUTS = \
x64-macosx-gas/aes/aesni-sha1-x86_64.s \
x64-macosx-gas/bn/modexp512-x86_64.s \
x64-macosx-gas/bn/x86_64-mont.s \
x64-macosx-gas/bn/x86_64-mont5.s \
x64-macosx-gas/bn/x86_64-gf2m.s \
x64-macosx-gas/camellia/cmll-x86_64.s \
x64-macosx-gas/md5/md5-x86_64.s \
x64-macosx-gas/rc4/rc4-x86_64.s \
@ -90,6 +94,8 @@ OUTPUTS = \
x64-win32-masm/aes/aesni-sha1-x86_64.asm \
x64-win32-masm/bn/modexp512-x86_64.asm \
x64-win32-masm/bn/x86_64-mont.asm \
x64-win32-masm/bn/x86_64-mont5.asm \
x64-win32-masm/bn/x86_64-gf2m.asm \
x64-win32-masm/camellia/cmll-x86_64.asm \
x64-win32-masm/md5/md5-x86_64.asm \
x64-win32-masm/rc4/rc4-x86_64.asm \
@ -129,6 +135,8 @@ x64-elf-gas/aes/aesni-x86_64.s: ../openssl/crypto/aes/asm/aesni-x86_64.pl
x64-elf-gas/aes/aesni-sha1-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl
x64-elf-gas/bn/modexp512-x86_64.s: ../openssl/crypto/bn/asm/modexp512-x86_64.pl
x64-elf-gas/bn/x86_64-mont.s: ../openssl/crypto/bn/asm/x86_64-mont.pl
x64-elf-gas/bn/x86_64-mont5.s: ../openssl/crypto/bn/asm/x86_64-mont5.pl
x64-elf-gas/bn/x86_64-gf2m.s: ../openssl/crypto/bn/asm/x86_64-gf2m.pl
x64-elf-gas/camellia/cmll-x86_64.s: ../openssl/crypto/camellia/asm/cmll-x86_64.pl
x64-elf-gas/md5/md5-x86_64.s: ../openssl/crypto/md5/asm/md5-x86_64.pl
x64-elf-gas/rc4/rc4-x86_64.s: ../openssl/crypto/rc4/asm/rc4-x86_64.pl
@ -143,6 +151,8 @@ x64-macosx-gas/aes/aesni-x86_64.s: ../openssl/crypto/aes/asm/aesni-x86_64.pl
x64-macosx-gas/aes/aesni-sha1-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl
x64-macosx-gas/bn/modexp512-x86_64.s: ../openssl/crypto/bn/asm/modexp512-x86_64.pl
x64-macosx-gas/bn/x86_64-mont.s: ../openssl/crypto/bn/asm/x86_64-mont.pl
x64-macosx-gas/bn/x86_64-mont5.s: ../openssl/crypto/bn/asm/x86_64-mont5.pl
x64-macosx-gas/bn/x86_64-gf2m.s: ../openssl/crypto/bn/asm/x86_64-gf2m.pl
x64-macosx-gas/camellia/cmll-x86_64.s: ../openssl/crypto/camellia/asm/cmll-x86_64.pl
x64-macosx-gas/md5/md5-x86_64.s: ../openssl/crypto/md5/asm/md5-x86_64.pl
x64-macosx-gas/rc4/rc4-x86_64.s: ../openssl/crypto/rc4/asm/rc4-x86_64.pl
@ -157,6 +167,8 @@ x64-win32-masm/aes/aesni-x86_64.asm: ../openssl/crypto/aes/asm/aesni-x86_64.pl
x64-win32-masm/aes/aesni-sha1-x86_64.asm: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl
x64-win32-masm/bn/modexp512-x86_64.asm: ../openssl/crypto/bn/asm/modexp512-x86_64.pl
x64-win32-masm/bn/x86_64-mont.asm: ../openssl/crypto/bn/asm/x86_64-mont.pl
x64-win32-masm/bn/x86_64-mont5.asm: ../openssl/crypto/bn/asm/x86_64-mont5.pl
x64-win32-masm/bn/x86_64-gf2m.asm: ../openssl/crypto/bn/asm/x86_64-gf2m.pl
x64-win32-masm/camellia/cmll-x86_64.asm: ../openssl/crypto/camellia/asm/cmll-x86_64.pl
x64-win32-masm/md5/md5-x86_64.asm: ../openssl/crypto/md5/asm/md5-x86_64.pl
x64-win32-masm/rc4/rc4-x86_64.asm: ../openssl/crypto/rc4/asm/rc4-x86_64.pl

295
deps/openssl/asm/x64-elf-gas/bn/x86_64-gf2m.s

@ -0,0 +1,295 @@
.text
.type _mul_1x1,@function
.align 16
_mul_1x1:
subq $128+8,%rsp
movq $-1,%r9
leaq (%rax,%rax,1),%rsi
shrq $3,%r9
leaq (,%rax,4),%rdi
andq %rax,%r9
leaq (,%rax,8),%r12
sarq $63,%rax
leaq (%r9,%r9,1),%r10
sarq $63,%rsi
leaq (,%r9,4),%r11
andq %rbp,%rax
sarq $63,%rdi
movq %rax,%rdx
shlq $63,%rax
andq %rbp,%rsi
shrq $1,%rdx
movq %rsi,%rcx
shlq $62,%rsi
andq %rbp,%rdi
shrq $2,%rcx
xorq %rsi,%rax
movq %rdi,%rbx
shlq $61,%rdi
xorq %rcx,%rdx
shrq $3,%rbx
xorq %rdi,%rax
xorq %rbx,%rdx
movq %r9,%r13
movq $0,0(%rsp)
xorq %r10,%r13
movq %r9,8(%rsp)
movq %r11,%r14
movq %r10,16(%rsp)
xorq %r12,%r14
movq %r13,24(%rsp)
xorq %r11,%r9
movq %r11,32(%rsp)
xorq %r11,%r10
movq %r9,40(%rsp)
xorq %r11,%r13
movq %r10,48(%rsp)
xorq %r14,%r9
movq %r13,56(%rsp)
xorq %r14,%r10
movq %r12,64(%rsp)
xorq %r14,%r13
movq %r9,72(%rsp)
xorq %r11,%r9
movq %r10,80(%rsp)
xorq %r11,%r10
movq %r13,88(%rsp)
xorq %r11,%r13
movq %r14,96(%rsp)
movq %r8,%rsi
movq %r9,104(%rsp)
andq %rbp,%rsi
movq %r10,112(%rsp)
shrq $4,%rbp
movq %r13,120(%rsp)
movq %r8,%rdi
andq %rbp,%rdi
shrq $4,%rbp
movq (%rsp,%rsi,8),%xmm0
movq %r8,%rsi
andq %rbp,%rsi
shrq $4,%rbp
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $4,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $60,%rbx
xorq %rcx,%rax
pslldq $1,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $12,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $52,%rbx
xorq %rcx,%rax
pslldq $2,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $20,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $44,%rbx
xorq %rcx,%rax
pslldq $3,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $28,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $36,%rbx
xorq %rcx,%rax
pslldq $4,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $36,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $28,%rbx
xorq %rcx,%rax
pslldq $5,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $44,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $20,%rbx
xorq %rcx,%rax
pslldq $6,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $52,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $12,%rbx
xorq %rcx,%rax
pslldq $7,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %rcx,%rbx
shlq $60,%rcx
.byte 102,72,15,126,198
shrq $4,%rbx
xorq %rcx,%rax
psrldq $8,%xmm0
xorq %rbx,%rdx
.byte 102,72,15,126,199
xorq %rsi,%rax
xorq %rdi,%rdx
addq $128+8,%rsp
.byte 0xf3,0xc3
.Lend_mul_1x1:
.size _mul_1x1,.-_mul_1x1
.globl bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,@function
.align 16
bn_GF2m_mul_2x2:
movq OPENSSL_ia32cap_P(%rip),%rax
btq $33,%rax
jnc .Lvanilla_mul_2x2
.byte 102,72,15,110,198
.byte 102,72,15,110,201
.byte 102,72,15,110,210
.byte 102,73,15,110,216
movdqa %xmm0,%xmm4
movdqa %xmm1,%xmm5
.byte 102,15,58,68,193,0
pxor %xmm2,%xmm4
pxor %xmm3,%xmm5
.byte 102,15,58,68,211,0
.byte 102,15,58,68,229,0
xorps %xmm0,%xmm4
xorps %xmm2,%xmm4
movdqa %xmm4,%xmm5
pslldq $8,%xmm4
psrldq $8,%xmm5
pxor %xmm4,%xmm2
pxor %xmm5,%xmm0
movdqu %xmm2,0(%rdi)
movdqu %xmm0,16(%rdi)
.byte 0xf3,0xc3
.align 16
.Lvanilla_mul_2x2:
leaq -136(%rsp),%rsp
movq %r14,80(%rsp)
movq %r13,88(%rsp)
movq %r12,96(%rsp)
movq %rbp,104(%rsp)
movq %rbx,112(%rsp)
.Lbody_mul_2x2:
movq %rdi,32(%rsp)
movq %rsi,40(%rsp)
movq %rdx,48(%rsp)
movq %rcx,56(%rsp)
movq %r8,64(%rsp)
movq $15,%r8
movq %rsi,%rax
movq %rcx,%rbp
call _mul_1x1
movq %rax,16(%rsp)
movq %rdx,24(%rsp)
movq 48(%rsp),%rax
movq 64(%rsp),%rbp
call _mul_1x1
movq %rax,0(%rsp)
movq %rdx,8(%rsp)
movq 40(%rsp),%rax
movq 56(%rsp),%rbp
xorq 48(%rsp),%rax
xorq 64(%rsp),%rbp
call _mul_1x1
movq 0(%rsp),%rbx
movq 8(%rsp),%rcx
movq 16(%rsp),%rdi
movq 24(%rsp),%rsi
movq 32(%rsp),%rbp
xorq %rdx,%rax
xorq %rcx,%rdx
xorq %rbx,%rax
movq %rbx,0(%rbp)
xorq %rdi,%rdx
movq %rsi,24(%rbp)
xorq %rsi,%rax
xorq %rsi,%rdx
xorq %rdx,%rax
movq %rdx,16(%rbp)
movq %rax,8(%rbp)
movq 80(%rsp),%r14
movq 88(%rsp),%r13
movq 96(%rsp),%r12
movq 104(%rsp),%rbp
movq 112(%rsp),%rbx
leaq 136(%rsp),%rsp
.byte 0xf3,0xc3
.Lend_mul_2x2:
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16

785
deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s

@ -0,0 +1,785 @@
.text
.globl bn_mul_mont_gather5
.type bn_mul_mont_gather5,@function
.align 64
bn_mul_mont_gather5:
testl $3,%r9d
jnz .Lmul_enter
cmpl $8,%r9d
jb .Lmul_enter
jmp .Lmul4x_enter
.align 16
.Lmul_enter:
movl %r9d,%r9d
movl 8(%rsp),%r10d
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movq %rsp,%rax
leaq 2(%r9),%r11
negq %r11
leaq (%rsp,%r11,8),%rsp
andq $-1024,%rsp
movq %rax,8(%rsp,%r9,8)
.Lmul_body:
movq %rdx,%r12
movq %r10,%r11
shrq $3,%r10
andq $7,%r11
notq %r10
leaq .Lmagic_masks(%rip),%rax
andq $3,%r10
leaq 96(%r12,%r11,8),%r12
movq 0(%rax,%r10,8),%xmm4
movq 8(%rax,%r10,8),%xmm5
movq 16(%rax,%r10,8),%xmm6
movq 24(%rax,%r10,8),%xmm7
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
.byte 102,72,15,126,195
movq (%r8),%r8
movq (%rsi),%rax
xorq %r14,%r14
xorq %r15,%r15
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
movq %r8,%rbp
mulq %rbx
movq %rax,%r10
movq (%rcx),%rax
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
imulq %r10,%rbp
movq %rdx,%r11
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%r13
leaq 1(%r15),%r15
jmp .L1st_enter
.align 16
.L1st:
addq %rax,%r13
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%r13
movq %r10,%r11
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
.L1st_enter:
mulq %rbx
addq %rax,%r11
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
leaq 1(%r15),%r15
movq %rdx,%r10
mulq %rbp
cmpq %r9,%r15
jne .L1st
.byte 102,72,15,126,195
addq %rax,%r13
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%r13
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
movq %r10,%r11
xorq %rdx,%rdx
addq %r11,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r9,8)
movq %rdx,(%rsp,%r9,8)
leaq 1(%r14),%r14
jmp .Louter
.align 16
.Louter:
xorq %r15,%r15
movq %r8,%rbp
movq (%rsp),%r10
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
mulq %rbx
addq %rax,%r10
movq (%rcx),%rax
adcq $0,%rdx
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
imulq %r10,%rbp
movq %rdx,%r11
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq 8(%rsp),%r10
movq %rdx,%r13
leaq 1(%r15),%r15
jmp .Linner_enter
.align 16
.Linner:
addq %rax,%r13
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
movq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
.Linner_enter:
mulq %rbx
addq %rax,%r11
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
addq %r11,%r10
movq %rdx,%r11
adcq $0,%r11
leaq 1(%r15),%r15
mulq %rbp
cmpq %r9,%r15
jne .Linner
.byte 102,72,15,126,195
addq %rax,%r13
movq (%rsi),%rax
adcq $0,%rdx
addq %r10,%r13
movq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
xorq %rdx,%rdx
addq %r11,%r13
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r9,8)
movq %rdx,(%rsp,%r9,8)
leaq 1(%r14),%r14
cmpq %r9,%r14
jl .Louter
xorq %r14,%r14
movq (%rsp),%rax
leaq (%rsp),%rsi
movq %r9,%r15
jmp .Lsub
.align 16
.Lsub: sbbq (%rcx,%r14,8),%rax
movq %rax,(%rdi,%r14,8)
movq 8(%rsi,%r14,8),%rax
leaq 1(%r14),%r14
decq %r15
jnz .Lsub
sbbq $0,%rax
xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
movq %r9,%r15
orq %rcx,%rsi
.align 16
.Lcopy:
movq (%rsi,%r14,8),%rax
movq %r14,(%rsp,%r14,8)
movq %rax,(%rdi,%r14,8)
leaq 1(%r14),%r14
subq $1,%r15
jnz .Lcopy
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
.type bn_mul4x_mont_gather5,@function
.align 16
bn_mul4x_mont_gather5:
.Lmul4x_enter:
movl %r9d,%r9d
movl 8(%rsp),%r10d
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movq %rsp,%rax
leaq 4(%r9),%r11
negq %r11
leaq (%rsp,%r11,8),%rsp
andq $-1024,%rsp
movq %rax,8(%rsp,%r9,8)
.Lmul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq %r10,%r11
shrq $3,%r10
andq $7,%r11
notq %r10
leaq .Lmagic_masks(%rip),%rax
andq $3,%r10
leaq 96(%r12,%r11,8),%r12
movq 0(%rax,%r10,8),%xmm4
movq 8(%rax,%r10,8),%xmm5
movq 16(%rax,%r10,8),%xmm6
movq 24(%rax,%r10,8),%xmm7
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
.byte 102,72,15,126,195
movq (%r8),%r8
movq (%rsi),%rax
xorq %r14,%r14
xorq %r15,%r15
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
movq %r8,%rbp
mulq %rbx
movq %rax,%r10
movq (%rcx),%rax
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
imulq %r10,%rbp
movq %rdx,%r11
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq 16(%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
leaq 4(%r15),%r15
adcq $0,%rdx
movq %rdi,(%rsp)
movq %rdx,%r13
jmp .L1st4x
.align 16
.L1st4x:
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
mulq %rbx
addq %rax,%r10
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq 8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx,%r15,8),%rax
adcq $0,%rdx
leaq 4(%r15),%r15
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq -16(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-32(%rsp,%r15,8)
movq %rdx,%r13
cmpq %r9,%r15
jl .L1st4x
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
.byte 102,72,15,126,195
xorq %rdi,%rdi
addq %r10,%r13
adcq $0,%rdi
movq %r13,-8(%rsp,%r15,8)
movq %rdi,(%rsp,%r15,8)
leaq 1(%r14),%r14
.align 4
.Louter4x:
xorq %r15,%r15
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
movq (%rsp),%r10
movq %r8,%rbp
mulq %rbx
addq %rax,%r10
movq (%rcx),%rax
adcq $0,%rdx
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
imulq %r10,%rbp
movq %rdx,%r11
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx),%rax
adcq $0,%rdx
addq 8(%rsp),%r11
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq 16(%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
leaq 4(%r15),%r15
adcq $0,%rdx
movq %rdx,%r13
jmp .Linner4x
.align 16
.Linner4x:
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -16(%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %rdi,-32(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -8(%rsp,%r15,8),%r11
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%r13
mulq %rbx
addq %rax,%r10
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
addq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq 8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq 8(%rsp,%r15,8),%r11
adcq $0,%rdx
leaq 4(%r15),%r15
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq -16(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %r13,-40(%rsp,%r15,8)
movq %rdx,%r13
cmpq %r9,%r15
jl .Linner4x
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -16(%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %rdi,-32(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -8(%rsp,%r15,8),%r11
adcq $0,%rdx
leaq 1(%r14),%r14
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%r13
.byte 102,72,15,126,195
movq %rdi,-16(%rsp,%r15,8)
xorq %rdi,%rdi
addq %r10,%r13
adcq $0,%rdi
addq (%rsp,%r9,8),%r13
adcq $0,%rdi
movq %r13,-8(%rsp,%r15,8)
movq %rdi,(%rsp,%r15,8)
cmpq %r9,%r14
jl .Louter4x
movq 16(%rsp,%r9,8),%rdi
movq 0(%rsp),%rax
pxor %xmm0,%xmm0
movq 8(%rsp),%rdx
shrq $2,%r9
leaq (%rsp),%rsi
xorq %r14,%r14
subq 0(%rcx),%rax
movq 16(%rsi),%rbx
movq 24(%rsi),%rbp
sbbq 8(%rcx),%rdx
leaq -1(%r9),%r15
jmp .Lsub4x
.align 16
.Lsub4x:
movq %rax,0(%rdi,%r14,8)
movq %rdx,8(%rdi,%r14,8)
sbbq 16(%rcx,%r14,8),%rbx
movq 32(%rsi,%r14,8),%rax
movq 40(%rsi,%r14,8),%rdx
sbbq 24(%rcx,%r14,8),%rbp
movq %rbx,16(%rdi,%r14,8)
movq %rbp,24(%rdi,%r14,8)
sbbq 32(%rcx,%r14,8),%rax
movq 48(%rsi,%r14,8),%rbx
movq 56(%rsi,%r14,8),%rbp
sbbq 40(%rcx,%r14,8),%rdx
leaq 4(%r14),%r14
decq %r15
jnz .Lsub4x
movq %rax,0(%rdi,%r14,8)
movq 32(%rsi,%r14,8),%rax
sbbq 16(%rcx,%r14,8),%rbx
movq %rdx,8(%rdi,%r14,8)
sbbq 24(%rcx,%r14,8),%rbp
movq %rbx,16(%rdi,%r14,8)
sbbq $0,%rax
movq %rbp,24(%rdi,%r14,8)
xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
leaq -1(%r9),%r15
orq %rcx,%rsi
movdqu (%rsi),%xmm1
movdqa %xmm0,(%rsp)
movdqu %xmm1,(%rdi)
jmp .Lcopy4x
.align 16
.Lcopy4x:
movdqu 16(%rsi,%r14,1),%xmm2
movdqu 32(%rsi,%r14,1),%xmm1
movdqa %xmm0,16(%rsp,%r14,1)
movdqu %xmm2,16(%rdi,%r14,1)
movdqa %xmm0,32(%rsp,%r14,1)
movdqu %xmm1,32(%rdi,%r14,1)
leaq 32(%r14),%r14
decq %r15
jnz .Lcopy4x
shlq $2,%r9
movdqu 16(%rsi,%r14,1),%xmm2
movdqa %xmm0,16(%rsp,%r14,1)
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
.Lmul4x_epilogue:
.byte 0xf3,0xc3
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
.globl bn_scatter5
.type bn_scatter5,@function
.align 16
bn_scatter5:
cmpq $0,%rsi
jz .Lscatter_epilogue
leaq (%rdx,%rcx,8),%rdx
.Lscatter:
movq (%rdi),%rax
leaq 8(%rdi),%rdi
movq %rax,(%rdx)
leaq 256(%rdx),%rdx
subq $1,%rsi
jnz .Lscatter
.Lscatter_epilogue:
.byte 0xf3,0xc3
.size bn_scatter5,.-bn_scatter5
.globl bn_gather5
.type bn_gather5,@function
.align 16
bn_gather5:
movq %rcx,%r11
shrq $3,%rcx
andq $7,%r11
notq %rcx
leaq .Lmagic_masks(%rip),%rax
andq $3,%rcx
leaq 96(%rdx,%r11,8),%rdx
movq 0(%rax,%rcx,8),%xmm4
movq 8(%rax,%rcx,8),%xmm5
movq 16(%rax,%rcx,8),%xmm6
movq 24(%rax,%rcx,8),%xmm7
jmp .Lgather
.align 16
.Lgather:
movq -96(%rdx),%xmm0
movq -32(%rdx),%xmm1
pand %xmm4,%xmm0
movq 32(%rdx),%xmm2
pand %xmm5,%xmm1
movq 96(%rdx),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
por %xmm2,%xmm0
leaq 256(%rdx),%rdx
por %xmm3,%xmm0
movq %xmm0,(%rdi)
leaq 8(%rdi),%rdi
subq $1,%rsi
jnz .Lgather
.byte 0xf3,0xc3
.LSEH_end_bn_gather5:
.size bn_gather5,.-bn_gather5
.align 64
.Lmagic_masks:
.long 0,0, 0,0, 0,0, -1,-1
.long 0,0, 0,0, 0,0, 0,0
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0

295
deps/openssl/asm/x64-macosx-gas/bn/x86_64-gf2m.s

@ -0,0 +1,295 @@
.text
.p2align 4
_mul_1x1:
subq $128+8,%rsp
movq $-1,%r9
leaq (%rax,%rax,1),%rsi
shrq $3,%r9
leaq (,%rax,4),%rdi
andq %rax,%r9
leaq (,%rax,8),%r12
sarq $63,%rax
leaq (%r9,%r9,1),%r10
sarq $63,%rsi
leaq (,%r9,4),%r11
andq %rbp,%rax
sarq $63,%rdi
movq %rax,%rdx
shlq $63,%rax
andq %rbp,%rsi
shrq $1,%rdx
movq %rsi,%rcx
shlq $62,%rsi
andq %rbp,%rdi
shrq $2,%rcx
xorq %rsi,%rax
movq %rdi,%rbx
shlq $61,%rdi
xorq %rcx,%rdx
shrq $3,%rbx
xorq %rdi,%rax
xorq %rbx,%rdx
movq %r9,%r13
movq $0,0(%rsp)
xorq %r10,%r13
movq %r9,8(%rsp)
movq %r11,%r14
movq %r10,16(%rsp)
xorq %r12,%r14
movq %r13,24(%rsp)
xorq %r11,%r9
movq %r11,32(%rsp)
xorq %r11,%r10
movq %r9,40(%rsp)
xorq %r11,%r13
movq %r10,48(%rsp)
xorq %r14,%r9
movq %r13,56(%rsp)
xorq %r14,%r10
movq %r12,64(%rsp)
xorq %r14,%r13
movq %r9,72(%rsp)
xorq %r11,%r9
movq %r10,80(%rsp)
xorq %r11,%r10
movq %r13,88(%rsp)
xorq %r11,%r13
movq %r14,96(%rsp)
movq %r8,%rsi
movq %r9,104(%rsp)
andq %rbp,%rsi
movq %r10,112(%rsp)
shrq $4,%rbp
movq %r13,120(%rsp)
movq %r8,%rdi
andq %rbp,%rdi
shrq $4,%rbp
movq (%rsp,%rsi,8),%xmm0
movq %r8,%rsi
andq %rbp,%rsi
shrq $4,%rbp
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $4,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $60,%rbx
xorq %rcx,%rax
pslldq $1,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $12,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $52,%rbx
xorq %rcx,%rax
pslldq $2,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $20,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $44,%rbx
xorq %rcx,%rax
pslldq $3,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $28,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $36,%rbx
xorq %rcx,%rax
pslldq $4,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $36,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $28,%rbx
xorq %rcx,%rax
pslldq $5,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $44,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $20,%rbx
xorq %rcx,%rax
pslldq $6,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $52,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $12,%rbx
xorq %rcx,%rax
pslldq $7,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %rcx,%rbx
shlq $60,%rcx
.byte 102,72,15,126,198
shrq $4,%rbx
xorq %rcx,%rax
psrldq $8,%xmm0
xorq %rbx,%rdx
.byte 102,72,15,126,199
xorq %rsi,%rax
xorq %rdi,%rdx
addq $128+8,%rsp
.byte 0xf3,0xc3
L$end_mul_1x1:
.globl _bn_GF2m_mul_2x2
.p2align 4
_bn_GF2m_mul_2x2:
movq _OPENSSL_ia32cap_P(%rip),%rax
btq $33,%rax
jnc L$vanilla_mul_2x2
.byte 102,72,15,110,198
.byte 102,72,15,110,201
.byte 102,72,15,110,210
.byte 102,73,15,110,216
movdqa %xmm0,%xmm4
movdqa %xmm1,%xmm5
.byte 102,15,58,68,193,0
pxor %xmm2,%xmm4
pxor %xmm3,%xmm5
.byte 102,15,58,68,211,0
.byte 102,15,58,68,229,0
xorps %xmm0,%xmm4
xorps %xmm2,%xmm4
movdqa %xmm4,%xmm5
pslldq $8,%xmm4
psrldq $8,%xmm5
pxor %xmm4,%xmm2
pxor %xmm5,%xmm0
movdqu %xmm2,0(%rdi)
movdqu %xmm0,16(%rdi)
.byte 0xf3,0xc3
.p2align 4
L$vanilla_mul_2x2:
leaq -136(%rsp),%rsp
movq %r14,80(%rsp)
movq %r13,88(%rsp)
movq %r12,96(%rsp)
movq %rbp,104(%rsp)
movq %rbx,112(%rsp)
L$body_mul_2x2:
movq %rdi,32(%rsp)
movq %rsi,40(%rsp)
movq %rdx,48(%rsp)
movq %rcx,56(%rsp)
movq %r8,64(%rsp)
movq $15,%r8
movq %rsi,%rax
movq %rcx,%rbp
call _mul_1x1
movq %rax,16(%rsp)
movq %rdx,24(%rsp)
movq 48(%rsp),%rax
movq 64(%rsp),%rbp
call _mul_1x1
movq %rax,0(%rsp)
movq %rdx,8(%rsp)
movq 40(%rsp),%rax
movq 56(%rsp),%rbp
xorq 48(%rsp),%rax
xorq 64(%rsp),%rbp
call _mul_1x1
movq 0(%rsp),%rbx
movq 8(%rsp),%rcx
movq 16(%rsp),%rdi
movq 24(%rsp),%rsi
movq 32(%rsp),%rbp
xorq %rdx,%rax
xorq %rcx,%rdx
xorq %rbx,%rax
movq %rbx,0(%rbp)
xorq %rdi,%rdx
movq %rsi,24(%rbp)
xorq %rsi,%rax
xorq %rsi,%rdx
xorq %rdx,%rax
movq %rdx,16(%rbp)
movq %rax,8(%rbp)
movq 80(%rsp),%r14
movq 88(%rsp),%r13
movq 96(%rsp),%r12
movq 104(%rsp),%rbp
movq 112(%rsp),%rbx
leaq 136(%rsp),%rsp
.byte 0xf3,0xc3
L$end_mul_2x2:
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 4

785
deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s

@ -0,0 +1,785 @@
.text
.globl _bn_mul_mont_gather5
.p2align 6
_bn_mul_mont_gather5:
testl $3,%r9d
jnz L$mul_enter
cmpl $8,%r9d
jb L$mul_enter
jmp L$mul4x_enter
.p2align 4
L$mul_enter:
movl %r9d,%r9d
movl 8(%rsp),%r10d
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movq %rsp,%rax
leaq 2(%r9),%r11
negq %r11
leaq (%rsp,%r11,8),%rsp
andq $-1024,%rsp
movq %rax,8(%rsp,%r9,8)
L$mul_body:
movq %rdx,%r12
movq %r10,%r11
shrq $3,%r10
andq $7,%r11
notq %r10
leaq L$magic_masks(%rip),%rax
andq $3,%r10
leaq 96(%r12,%r11,8),%r12
movq 0(%rax,%r10,8),%xmm4
movq 8(%rax,%r10,8),%xmm5
movq 16(%rax,%r10,8),%xmm6
movq 24(%rax,%r10,8),%xmm7
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
.byte 102,72,15,126,195
movq (%r8),%r8
movq (%rsi),%rax
xorq %r14,%r14
xorq %r15,%r15
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
movq %r8,%rbp
mulq %rbx
movq %rax,%r10
movq (%rcx),%rax
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
imulq %r10,%rbp
movq %rdx,%r11
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%r13
leaq 1(%r15),%r15
jmp L$1st_enter
.p2align 4
L$1st:
addq %rax,%r13
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%r13
movq %r10,%r11
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
L$1st_enter:
mulq %rbx
addq %rax,%r11
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
leaq 1(%r15),%r15
movq %rdx,%r10
mulq %rbp
cmpq %r9,%r15
jne L$1st
.byte 102,72,15,126,195
addq %rax,%r13
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%r13
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
movq %r10,%r11
xorq %rdx,%rdx
addq %r11,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r9,8)
movq %rdx,(%rsp,%r9,8)
leaq 1(%r14),%r14
jmp L$outer
.p2align 4
L$outer:
xorq %r15,%r15
movq %r8,%rbp
movq (%rsp),%r10
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
mulq %rbx
addq %rax,%r10
movq (%rcx),%rax
adcq $0,%rdx
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
imulq %r10,%rbp
movq %rdx,%r11
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq 8(%rsp),%r10
movq %rdx,%r13
leaq 1(%r15),%r15
jmp L$inner_enter
.p2align 4
L$inner:
addq %rax,%r13
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
movq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
L$inner_enter:
mulq %rbx
addq %rax,%r11
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
addq %r11,%r10
movq %rdx,%r11
adcq $0,%r11
leaq 1(%r15),%r15
mulq %rbp
cmpq %r9,%r15
jne L$inner
.byte 102,72,15,126,195
addq %rax,%r13
movq (%rsi),%rax
adcq $0,%rdx
addq %r10,%r13
movq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
xorq %rdx,%rdx
addq %r11,%r13
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r9,8)
movq %rdx,(%rsp,%r9,8)
leaq 1(%r14),%r14
cmpq %r9,%r14
jl L$outer
xorq %r14,%r14
movq (%rsp),%rax
leaq (%rsp),%rsi
movq %r9,%r15
jmp L$sub
.p2align 4
L$sub: sbbq (%rcx,%r14,8),%rax
movq %rax,(%rdi,%r14,8)
movq 8(%rsi,%r14,8),%rax
leaq 1(%r14),%r14
decq %r15
jnz L$sub
sbbq $0,%rax
xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
movq %r9,%r15
orq %rcx,%rsi
.p2align 4
L$copy:
movq (%rsi,%r14,8),%rax
movq %r14,(%rsp,%r14,8)
movq %rax,(%rdi,%r14,8)
leaq 1(%r14),%r14
subq $1,%r15
jnz L$copy
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
L$mul_epilogue:
.byte 0xf3,0xc3
.p2align 4
bn_mul4x_mont_gather5:
L$mul4x_enter:
movl %r9d,%r9d
movl 8(%rsp),%r10d
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movq %rsp,%rax
leaq 4(%r9),%r11
negq %r11
leaq (%rsp,%r11,8),%rsp
andq $-1024,%rsp
movq %rax,8(%rsp,%r9,8)
L$mul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq %r10,%r11
shrq $3,%r10
andq $7,%r11
notq %r10
leaq L$magic_masks(%rip),%rax
andq $3,%r10
leaq 96(%r12,%r11,8),%r12
movq 0(%rax,%r10,8),%xmm4
movq 8(%rax,%r10,8),%xmm5
movq 16(%rax,%r10,8),%xmm6
movq 24(%rax,%r10,8),%xmm7
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
.byte 102,72,15,126,195
movq (%r8),%r8
movq (%rsi),%rax
xorq %r14,%r14
xorq %r15,%r15
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
movq %r8,%rbp
mulq %rbx
movq %rax,%r10
movq (%rcx),%rax
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
imulq %r10,%rbp
movq %rdx,%r11
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq 16(%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
leaq 4(%r15),%r15
adcq $0,%rdx
movq %rdi,(%rsp)
movq %rdx,%r13
jmp L$1st4x
.p2align 4
L$1st4x:
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
mulq %rbx
addq %rax,%r10
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq 8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx,%r15,8),%rax
adcq $0,%rdx
leaq 4(%r15),%r15
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq -16(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-32(%rsp,%r15,8)
movq %rdx,%r13
cmpq %r9,%r15
jl L$1st4x
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
.byte 102,72,15,126,195
xorq %rdi,%rdi
addq %r10,%r13
adcq $0,%rdi
movq %r13,-8(%rsp,%r15,8)
movq %rdi,(%rsp,%r15,8)
leaq 1(%r14),%r14
.p2align 2
L$outer4x:
xorq %r15,%r15
movq -96(%r12),%xmm0
movq -32(%r12),%xmm1
pand %xmm4,%xmm0
movq 32(%r12),%xmm2
pand %xmm5,%xmm1
movq (%rsp),%r10
movq %r8,%rbp
mulq %rbx
addq %rax,%r10
movq (%rcx),%rax
adcq $0,%rdx
movq 96(%r12),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
imulq %r10,%rbp
movq %rdx,%r11
por %xmm2,%xmm0
leaq 256(%r12),%r12
por %xmm3,%xmm0
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx),%rax
adcq $0,%rdx
addq 8(%rsp),%r11
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq 16(%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
leaq 4(%r15),%r15
adcq $0,%rdx
movq %rdx,%r13
jmp L$inner4x
.p2align 4
L$inner4x:
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -16(%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %rdi,-32(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -8(%rsp,%r15,8),%r11
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%r13
mulq %rbx
addq %rax,%r10
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
addq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq 8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq 8(%rsp,%r15,8),%r11
adcq $0,%rdx
leaq 4(%r15),%r15
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq -16(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %r13,-40(%rsp,%r15,8)
movq %rdx,%r13
cmpq %r9,%r15
jl L$inner4x
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -16(%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %rdi,-32(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -8(%rsp,%r15,8),%r11
adcq $0,%rdx
leaq 1(%r14),%r14
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%r13
.byte 102,72,15,126,195
movq %rdi,-16(%rsp,%r15,8)
xorq %rdi,%rdi
addq %r10,%r13
adcq $0,%rdi
addq (%rsp,%r9,8),%r13
adcq $0,%rdi
movq %r13,-8(%rsp,%r15,8)
movq %rdi,(%rsp,%r15,8)
cmpq %r9,%r14
jl L$outer4x
movq 16(%rsp,%r9,8),%rdi
movq 0(%rsp),%rax
pxor %xmm0,%xmm0
movq 8(%rsp),%rdx
shrq $2,%r9
leaq (%rsp),%rsi
xorq %r14,%r14
subq 0(%rcx),%rax
movq 16(%rsi),%rbx
movq 24(%rsi),%rbp
sbbq 8(%rcx),%rdx
leaq -1(%r9),%r15
jmp L$sub4x
.p2align 4
L$sub4x:
movq %rax,0(%rdi,%r14,8)
movq %rdx,8(%rdi,%r14,8)
sbbq 16(%rcx,%r14,8),%rbx
movq 32(%rsi,%r14,8),%rax
movq 40(%rsi,%r14,8),%rdx
sbbq 24(%rcx,%r14,8),%rbp
movq %rbx,16(%rdi,%r14,8)
movq %rbp,24(%rdi,%r14,8)
sbbq 32(%rcx,%r14,8),%rax
movq 48(%rsi,%r14,8),%rbx
movq 56(%rsi,%r14,8),%rbp
sbbq 40(%rcx,%r14,8),%rdx
leaq 4(%r14),%r14
decq %r15
jnz L$sub4x
movq %rax,0(%rdi,%r14,8)
movq 32(%rsi,%r14,8),%rax
sbbq 16(%rcx,%r14,8),%rbx
movq %rdx,8(%rdi,%r14,8)
sbbq 24(%rcx,%r14,8),%rbp
movq %rbx,16(%rdi,%r14,8)
sbbq $0,%rax
movq %rbp,24(%rdi,%r14,8)
xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
leaq -1(%r9),%r15
orq %rcx,%rsi
movdqu (%rsi),%xmm1
movdqa %xmm0,(%rsp)
movdqu %xmm1,(%rdi)
jmp L$copy4x
.p2align 4
L$copy4x:
movdqu 16(%rsi,%r14,1),%xmm2
movdqu 32(%rsi,%r14,1),%xmm1
movdqa %xmm0,16(%rsp,%r14,1)
movdqu %xmm2,16(%rdi,%r14,1)
movdqa %xmm0,32(%rsp,%r14,1)
movdqu %xmm1,32(%rdi,%r14,1)
leaq 32(%r14),%r14
decq %r15
jnz L$copy4x
shlq $2,%r9
movdqu 16(%rsi,%r14,1),%xmm2
movdqa %xmm0,16(%rsp,%r14,1)
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
L$mul4x_epilogue:
.byte 0xf3,0xc3
.globl _bn_scatter5
.p2align 4
_bn_scatter5:
cmpq $0,%rsi
jz L$scatter_epilogue
leaq (%rdx,%rcx,8),%rdx
L$scatter:
movq (%rdi),%rax
leaq 8(%rdi),%rdi
movq %rax,(%rdx)
leaq 256(%rdx),%rdx
subq $1,%rsi
jnz L$scatter
L$scatter_epilogue:
.byte 0xf3,0xc3
.globl _bn_gather5
.p2align 4
_bn_gather5:
movq %rcx,%r11
shrq $3,%rcx
andq $7,%r11
notq %rcx
leaq L$magic_masks(%rip),%rax
andq $3,%rcx
leaq 96(%rdx,%r11,8),%rdx
movq 0(%rax,%rcx,8),%xmm4
movq 8(%rax,%rcx,8),%xmm5
movq 16(%rax,%rcx,8),%xmm6
movq 24(%rax,%rcx,8),%xmm7
jmp L$gather
.p2align 4
L$gather:
movq -96(%rdx),%xmm0
movq -32(%rdx),%xmm1
pand %xmm4,%xmm0
movq 32(%rdx),%xmm2
pand %xmm5,%xmm1
movq 96(%rdx),%xmm3
pand %xmm6,%xmm2
por %xmm1,%xmm0
pand %xmm7,%xmm3
por %xmm2,%xmm0
leaq 256(%rdx),%rdx
por %xmm3,%xmm0
movq %xmm0,(%rdi)
leaq 8(%rdi),%rdi
subq $1,%rsi
jnz L$gather
.byte 0xf3,0xc3
L$SEH_end_bn_gather5:
.p2align 6
L$magic_masks:
.long 0,0, 0,0, 0,0, -1,-1
.long 0,0, 0,0, 0,0, 0,0
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0

404
deps/openssl/asm/x64-win32-masm/bn/x86_64-gf2m.asm

@ -0,0 +1,404 @@
OPTION DOTNAME
.text$ SEGMENT ALIGN(64) 'CODE'
ALIGN 16
_mul_1x1 PROC PRIVATE
sub rsp,128+8
mov r9,-1
lea rsi,QWORD PTR[rax*1+rax]
shr r9,3
lea rdi,QWORD PTR[rax*4]
and r9,rax
lea r12,QWORD PTR[rax*8]
sar rax,63
lea r10,QWORD PTR[r9*1+r9]
sar rsi,63
lea r11,QWORD PTR[r9*4]
and rax,rbp
sar rdi,63
mov rdx,rax
shl rax,63
and rsi,rbp
shr rdx,1
mov rcx,rsi
shl rsi,62
and rdi,rbp
shr rcx,2
xor rax,rsi
mov rbx,rdi
shl rdi,61
xor rdx,rcx
shr rbx,3
xor rax,rdi
xor rdx,rbx
mov r13,r9
mov QWORD PTR[rsp],0
xor r13,r10
mov QWORD PTR[8+rsp],r9
mov r14,r11
mov QWORD PTR[16+rsp],r10
xor r14,r12
mov QWORD PTR[24+rsp],r13
xor r9,r11
mov QWORD PTR[32+rsp],r11
xor r10,r11
mov QWORD PTR[40+rsp],r9
xor r13,r11
mov QWORD PTR[48+rsp],r10
xor r9,r14
mov QWORD PTR[56+rsp],r13
xor r10,r14
mov QWORD PTR[64+rsp],r12
xor r13,r14
mov QWORD PTR[72+rsp],r9
xor r9,r11
mov QWORD PTR[80+rsp],r10
xor r10,r11
mov QWORD PTR[88+rsp],r13
xor r13,r11
mov QWORD PTR[96+rsp],r14
mov rsi,r8
mov QWORD PTR[104+rsp],r9
and rsi,rbp
mov QWORD PTR[112+rsp],r10
shr rbp,4
mov QWORD PTR[120+rsp],r13
mov rdi,r8
and rdi,rbp
shr rbp,4
movq xmm0,QWORD PTR[rsi*8+rsp]
mov rsi,r8
and rsi,rbp
shr rbp,4
mov rcx,QWORD PTR[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,4
and rdi,rbp
movq xmm1,QWORD PTR[rsi*8+rsp]
shr rbx,60
xor rax,rcx
pslldq xmm1,1
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD PTR[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,12
and rdi,rbp
movq xmm1,QWORD PTR[rsi*8+rsp]
shr rbx,52
xor rax,rcx
pslldq xmm1,2
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD PTR[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,20
and rdi,rbp
movq xmm1,QWORD PTR[rsi*8+rsp]
shr rbx,44
xor rax,rcx
pslldq xmm1,3
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD PTR[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,28
and rdi,rbp
movq xmm1,QWORD PTR[rsi*8+rsp]
shr rbx,36
xor rax,rcx
pslldq xmm1,4
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD PTR[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,36
and rdi,rbp
movq xmm1,QWORD PTR[rsi*8+rsp]
shr rbx,28
xor rax,rcx
pslldq xmm1,5
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD PTR[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,44
and rdi,rbp
movq xmm1,QWORD PTR[rsi*8+rsp]
shr rbx,20
xor rax,rcx
pslldq xmm1,6
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD PTR[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,52
and rdi,rbp
movq xmm1,QWORD PTR[rsi*8+rsp]
shr rbx,12
xor rax,rcx
pslldq xmm1,7
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD PTR[rdi*8+rsp]
mov rbx,rcx
shl rcx,60
DB 102,72,15,126,198
shr rbx,4
xor rax,rcx
psrldq xmm0,8
xor rdx,rbx
DB 102,72,15,126,199
xor rax,rsi
xor rdx,rdi
add rsp,128+8
DB 0F3h,0C3h ;repret
$L$end_mul_1x1::
_mul_1x1 ENDP
EXTERN OPENSSL_ia32cap_P:NEAR
PUBLIC bn_GF2m_mul_2x2
ALIGN 16
bn_GF2m_mul_2x2 PROC PUBLIC
mov rax,QWORD PTR[OPENSSL_ia32cap_P]
bt rax,33
jnc $L$vanilla_mul_2x2
DB 102,72,15,110,194
DB 102,73,15,110,201
DB 102,73,15,110,208
movq xmm3,QWORD PTR[40+rsp]
movdqa xmm4,xmm0
movdqa xmm5,xmm1
DB 102,15,58,68,193,0
pxor xmm4,xmm2
pxor xmm5,xmm3
DB 102,15,58,68,211,0
DB 102,15,58,68,229,0
xorps xmm4,xmm0
xorps xmm4,xmm2
movdqa xmm5,xmm4
pslldq xmm4,8
psrldq xmm5,8
pxor xmm2,xmm4
pxor xmm0,xmm5
movdqu XMMWORD PTR[rcx],xmm2
movdqu XMMWORD PTR[16+rcx],xmm0
DB 0F3h,0C3h ;repret
ALIGN 16
$L$vanilla_mul_2x2::
lea rsp,QWORD PTR[((-136))+rsp]
mov r10,QWORD PTR[176+rsp]
mov QWORD PTR[120+rsp],rdi
mov QWORD PTR[128+rsp],rsi
mov QWORD PTR[80+rsp],r14
mov QWORD PTR[88+rsp],r13
mov QWORD PTR[96+rsp],r12
mov QWORD PTR[104+rsp],rbp
mov QWORD PTR[112+rsp],rbx
$L$body_mul_2x2::
mov QWORD PTR[32+rsp],rcx
mov QWORD PTR[40+rsp],rdx
mov QWORD PTR[48+rsp],r8
mov QWORD PTR[56+rsp],r9
mov QWORD PTR[64+rsp],r10
mov r8,0fh
mov rax,rdx
mov rbp,r9
call _mul_1x1
mov QWORD PTR[16+rsp],rax
mov QWORD PTR[24+rsp],rdx
mov rax,QWORD PTR[48+rsp]
mov rbp,QWORD PTR[64+rsp]
call _mul_1x1
mov QWORD PTR[rsp],rax
mov QWORD PTR[8+rsp],rdx
mov rax,QWORD PTR[40+rsp]
mov rbp,QWORD PTR[56+rsp]
xor rax,QWORD PTR[48+rsp]
xor rbp,QWORD PTR[64+rsp]
call _mul_1x1
mov rbx,QWORD PTR[rsp]
mov rcx,QWORD PTR[8+rsp]
mov rdi,QWORD PTR[16+rsp]
mov rsi,QWORD PTR[24+rsp]
mov rbp,QWORD PTR[32+rsp]
xor rax,rdx
xor rdx,rcx
xor rax,rbx
mov QWORD PTR[rbp],rbx
xor rdx,rdi
mov QWORD PTR[24+rbp],rsi
xor rax,rsi
xor rdx,rsi
xor rax,rdx
mov QWORD PTR[16+rbp],rdx
mov QWORD PTR[8+rbp],rax
mov r14,QWORD PTR[80+rsp]
mov r13,QWORD PTR[88+rsp]
mov r12,QWORD PTR[96+rsp]
mov rbp,QWORD PTR[104+rsp]
mov rbx,QWORD PTR[112+rsp]
mov rdi,QWORD PTR[120+rsp]
mov rsi,QWORD PTR[128+rsp]
lea rsp,QWORD PTR[136+rsp]
DB 0F3h,0C3h ;repret
$L$end_mul_2x2::
bn_GF2m_mul_2x2 ENDP
DB 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
DB 99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54
DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
DB 111,114,103,62,0
ALIGN 16
EXTERN __imp_RtlVirtualUnwind:NEAR
ALIGN 16
se_handler PROC PRIVATE
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD PTR[152+r8]
mov rbx,QWORD PTR[248+r8]
lea r10,QWORD PTR[$L$body_mul_2x2]
cmp rbx,r10
jb $L$in_prologue
mov r14,QWORD PTR[80+rax]
mov r13,QWORD PTR[88+rax]
mov r12,QWORD PTR[96+rax]
mov rbp,QWORD PTR[104+rax]
mov rbx,QWORD PTR[112+rax]
mov rdi,QWORD PTR[120+rax]
mov rsi,QWORD PTR[128+rax]
mov QWORD PTR[144+r8],rbx
mov QWORD PTR[160+r8],rbp
mov QWORD PTR[168+r8],rsi
mov QWORD PTR[176+r8],rdi
mov QWORD PTR[216+r8],r12
mov QWORD PTR[224+r8],r13
mov QWORD PTR[232+r8],r14
$L$in_prologue::
lea rax,QWORD PTR[136+rax]
mov QWORD PTR[152+r8],rax
mov rdi,QWORD PTR[40+r9]
mov rsi,r8
mov ecx,154
DD 0a548f3fch
mov rsi,r9
xor rcx,rcx
mov rdx,QWORD PTR[8+rsi]
mov r8,QWORD PTR[rsi]
mov r9,QWORD PTR[16+rsi]
mov r10,QWORD PTR[40+rsi]
lea r11,QWORD PTR[56+rsi]
lea r12,QWORD PTR[24+rsi]
mov QWORD PTR[32+rsp],r10
mov QWORD PTR[40+rsp],r11
mov QWORD PTR[48+rsp],r12
mov QWORD PTR[56+rsp],rcx
call QWORD PTR[__imp_RtlVirtualUnwind]
mov eax,1
add rsp,64
popfq
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
DB 0F3h,0C3h ;repret
se_handler ENDP
.text$ ENDS
.pdata SEGMENT READONLY ALIGN(4)
ALIGN 4
DD imagerel _mul_1x1
DD imagerel $L$end_mul_1x1
DD imagerel $L$SEH_info_1x1
DD imagerel $L$vanilla_mul_2x2
DD imagerel $L$end_mul_2x2
DD imagerel $L$SEH_info_2x2
.pdata ENDS
.xdata SEGMENT READONLY ALIGN(8)
ALIGN 8
$L$SEH_info_1x1::
DB 001h,007h,002h,000h
DB 007h,001h,011h,000h
$L$SEH_info_2x2::
DB 9,0,0,0
DD imagerel se_handler
.xdata ENDS
END

990
deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm

@ -0,0 +1,990 @@
OPTION DOTNAME
.text$ SEGMENT ALIGN(64) 'CODE'
PUBLIC bn_mul_mont_gather5
ALIGN 64
bn_mul_mont_gather5 PROC PUBLIC
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
mov QWORD PTR[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_bn_mul_mont_gather5::
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD PTR[40+rsp]
mov r9,QWORD PTR[48+rsp]
test r9d,3
jnz $L$mul_enter
cmp r9d,8
jb $L$mul_enter
jmp $L$mul4x_enter
ALIGN 16
$L$mul_enter::
mov r9d,r9d
mov r10d,DWORD PTR[56+rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
lea rsp,QWORD PTR[((-40))+rsp]
movaps XMMWORD PTR[rsp],xmm6
movaps XMMWORD PTR[16+rsp],xmm7
$L$mul_alloca::
mov rax,rsp
lea r11,QWORD PTR[2+r9]
neg r11
lea rsp,QWORD PTR[r11*8+rsp]
and rsp,-1024
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul_body::
mov r12,rdx
mov r11,r10
shr r10,3
and r11,7
not r10
lea rax,QWORD PTR[$L$magic_masks]
and r10,3
lea r12,QWORD PTR[96+r11*8+r12]
movq xmm4,QWORD PTR[r10*8+rax]
movq xmm5,QWORD PTR[8+r10*8+rax]
movq xmm6,QWORD PTR[16+r10*8+rax]
movq xmm7,QWORD PTR[24+r10*8+rax]
movq xmm0,QWORD PTR[((-96))+r12]
movq xmm1,QWORD PTR[((-32))+r12]
pand xmm0,xmm4
movq xmm2,QWORD PTR[32+r12]
pand xmm1,xmm5
movq xmm3,QWORD PTR[96+r12]
pand xmm2,xmm6
por xmm0,xmm1
pand xmm3,xmm7
por xmm0,xmm2
lea r12,QWORD PTR[256+r12]
por xmm0,xmm3
DB 102,72,15,126,195
mov r8,QWORD PTR[r8]
mov rax,QWORD PTR[rsi]
xor r14,r14
xor r15,r15
movq xmm0,QWORD PTR[((-96))+r12]
movq xmm1,QWORD PTR[((-32))+r12]
pand xmm0,xmm4
movq xmm2,QWORD PTR[32+r12]
pand xmm1,xmm5
mov rbp,r8
mul rbx
mov r10,rax
mov rax,QWORD PTR[rcx]
movq xmm3,QWORD PTR[96+r12]
pand xmm2,xmm6
por xmm0,xmm1
pand xmm3,xmm7
imul rbp,r10
mov r11,rdx
por xmm0,xmm2
lea r12,QWORD PTR[256+r12]
por xmm0,xmm3
mul rbp
add r10,rax
mov rax,QWORD PTR[8+rsi]
adc rdx,0
mov r13,rdx
lea r15,QWORD PTR[1+r15]
jmp $L$1st_enter
ALIGN 16
$L$1st::
add r13,rax
mov rax,QWORD PTR[r15*8+rsi]
adc rdx,0
add r13,r11
mov r11,r10
adc rdx,0
mov QWORD PTR[((-16))+r15*8+rsp],r13
mov r13,rdx
$L$1st_enter::
mul rbx
add r11,rax
mov rax,QWORD PTR[r15*8+rcx]
adc rdx,0
lea r15,QWORD PTR[1+r15]
mov r10,rdx
mul rbp
cmp r15,r9
jne $L$1st
DB 102,72,15,126,195
add r13,rax
mov rax,QWORD PTR[rsi]
adc rdx,0
add r13,r11
adc rdx,0
mov QWORD PTR[((-16))+r15*8+rsp],r13
mov r13,rdx
mov r11,r10
xor rdx,rdx
add r13,r11
adc rdx,0
mov QWORD PTR[((-8))+r9*8+rsp],r13
mov QWORD PTR[r9*8+rsp],rdx
lea r14,QWORD PTR[1+r14]
jmp $L$outer
ALIGN 16
$L$outer::
xor r15,r15
mov rbp,r8
mov r10,QWORD PTR[rsp]
movq xmm0,QWORD PTR[((-96))+r12]
movq xmm1,QWORD PTR[((-32))+r12]
pand xmm0,xmm4
movq xmm2,QWORD PTR[32+r12]
pand xmm1,xmm5
mul rbx
add r10,rax
mov rax,QWORD PTR[rcx]
adc rdx,0
movq xmm3,QWORD PTR[96+r12]
pand xmm2,xmm6
por xmm0,xmm1
pand xmm3,xmm7
imul rbp,r10
mov r11,rdx
por xmm0,xmm2
lea r12,QWORD PTR[256+r12]
por xmm0,xmm3
mul rbp
add r10,rax
mov rax,QWORD PTR[8+rsi]
adc rdx,0
mov r10,QWORD PTR[8+rsp]
mov r13,rdx
lea r15,QWORD PTR[1+r15]
jmp $L$inner_enter
ALIGN 16
$L$inner::
add r13,rax
mov rax,QWORD PTR[r15*8+rsi]
adc rdx,0
add r13,r10
mov r10,QWORD PTR[r15*8+rsp]
adc rdx,0
mov QWORD PTR[((-16))+r15*8+rsp],r13
mov r13,rdx
$L$inner_enter::
mul rbx
add r11,rax
mov rax,QWORD PTR[r15*8+rcx]
adc rdx,0
add r10,r11
mov r11,rdx
adc r11,0
lea r15,QWORD PTR[1+r15]
mul rbp
cmp r15,r9
jne $L$inner
DB 102,72,15,126,195
add r13,rax
mov rax,QWORD PTR[rsi]
adc rdx,0
add r13,r10
mov r10,QWORD PTR[r15*8+rsp]
adc rdx,0
mov QWORD PTR[((-16))+r15*8+rsp],r13
mov r13,rdx
xor rdx,rdx
add r13,r11
adc rdx,0
add r13,r10
adc rdx,0
mov QWORD PTR[((-8))+r9*8+rsp],r13
mov QWORD PTR[r9*8+rsp],rdx
lea r14,QWORD PTR[1+r14]
cmp r14,r9
jl $L$outer
xor r14,r14
mov rax,QWORD PTR[rsp]
lea rsi,QWORD PTR[rsp]
mov r15,r9
jmp $L$sub
ALIGN 16
$L$sub:: sbb rax,QWORD PTR[r14*8+rcx]
mov QWORD PTR[r14*8+rdi],rax
mov rax,QWORD PTR[8+r14*8+rsi]
lea r14,QWORD PTR[1+r14]
dec r15
jnz $L$sub
sbb rax,0
xor r14,r14
and rsi,rax
not rax
mov rcx,rdi
and rcx,rax
mov r15,r9
or rsi,rcx
ALIGN 16
$L$copy::
mov rax,QWORD PTR[r14*8+rsi]
mov QWORD PTR[r14*8+rsp],r14
mov QWORD PTR[r14*8+rdi],rax
lea r14,QWORD PTR[1+r14]
sub r15,1
jnz $L$copy
mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1
movaps xmm6,XMMWORD PTR[rsi]
movaps xmm7,XMMWORD PTR[16+rsi]
lea rsi,QWORD PTR[40+rsi]
mov r15,QWORD PTR[rsi]
mov r14,QWORD PTR[8+rsi]
mov r13,QWORD PTR[16+rsi]
mov r12,QWORD PTR[24+rsi]
mov rbp,QWORD PTR[32+rsi]
mov rbx,QWORD PTR[40+rsi]
lea rsp,QWORD PTR[48+rsi]
$L$mul_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_bn_mul_mont_gather5::
bn_mul_mont_gather5 ENDP
ALIGN 16
bn_mul4x_mont_gather5 PROC PRIVATE
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
mov QWORD PTR[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_bn_mul4x_mont_gather5::
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD PTR[40+rsp]
mov r9,QWORD PTR[48+rsp]
$L$mul4x_enter::
mov r9d,r9d
mov r10d,DWORD PTR[56+rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
lea rsp,QWORD PTR[((-40))+rsp]
movaps XMMWORD PTR[rsp],xmm6
movaps XMMWORD PTR[16+rsp],xmm7
$L$mul4x_alloca::
mov rax,rsp
lea r11,QWORD PTR[4+r9]
neg r11
lea rsp,QWORD PTR[r11*8+rsp]
and rsp,-1024
mov QWORD PTR[8+r9*8+rsp],rax
$L$mul4x_body::
mov QWORD PTR[16+r9*8+rsp],rdi
mov r12,rdx
mov r11,r10
shr r10,3
and r11,7
not r10
lea rax,QWORD PTR[$L$magic_masks]
and r10,3
lea r12,QWORD PTR[96+r11*8+r12]
movq xmm4,QWORD PTR[r10*8+rax]
movq xmm5,QWORD PTR[8+r10*8+rax]
movq xmm6,QWORD PTR[16+r10*8+rax]
movq xmm7,QWORD PTR[24+r10*8+rax]
movq xmm0,QWORD PTR[((-96))+r12]
movq xmm1,QWORD PTR[((-32))+r12]
pand xmm0,xmm4
movq xmm2,QWORD PTR[32+r12]
pand xmm1,xmm5
movq xmm3,QWORD PTR[96+r12]
pand xmm2,xmm6
por xmm0,xmm1
pand xmm3,xmm7
por xmm0,xmm2
lea r12,QWORD PTR[256+r12]
por xmm0,xmm3
DB 102,72,15,126,195
mov r8,QWORD PTR[r8]
mov rax,QWORD PTR[rsi]
xor r14,r14
xor r15,r15
movq xmm0,QWORD PTR[((-96))+r12]
movq xmm1,QWORD PTR[((-32))+r12]
pand xmm0,xmm4
movq xmm2,QWORD PTR[32+r12]
pand xmm1,xmm5
mov rbp,r8
mul rbx
mov r10,rax
mov rax,QWORD PTR[rcx]
movq xmm3,QWORD PTR[96+r12]
pand xmm2,xmm6
por xmm0,xmm1
pand xmm3,xmm7
imul rbp,r10
mov r11,rdx
por xmm0,xmm2
lea r12,QWORD PTR[256+r12]
por xmm0,xmm3
mul rbp
add r10,rax
mov rax,QWORD PTR[8+rsi]
adc rdx,0
mov rdi,rdx
mul rbx
add r11,rax
mov rax,QWORD PTR[8+rcx]
adc rdx,0
mov r10,rdx
mul rbp
add rdi,rax
mov rax,QWORD PTR[16+rsi]
adc rdx,0
add rdi,r11
lea r15,QWORD PTR[4+r15]
adc rdx,0
mov QWORD PTR[rsp],rdi
mov r13,rdx
jmp $L$1st4x
ALIGN 16
$L$1st4x::
mul rbx
add r10,rax
mov rax,QWORD PTR[((-16))+r15*8+rcx]
adc rdx,0
mov r11,rdx
mul rbp
add r13,rax
mov rax,QWORD PTR[((-8))+r15*8+rsi]
adc rdx,0
add r13,r10
adc rdx,0
mov QWORD PTR[((-24))+r15*8+rsp],r13
mov rdi,rdx
mul rbx
add r11,rax
mov rax,QWORD PTR[((-8))+r15*8+rcx]
adc rdx,0
mov r10,rdx
mul rbp
add rdi,rax
mov rax,QWORD PTR[r15*8+rsi]
adc rdx,0
add rdi,r11
adc rdx,0
mov QWORD PTR[((-16))+r15*8+rsp],rdi
mov r13,rdx
mul rbx
add r10,rax
mov rax,QWORD PTR[r15*8+rcx]
adc rdx,0
mov r11,rdx
mul rbp
add r13,rax
mov rax,QWORD PTR[8+r15*8+rsi]
adc rdx,0
add r13,r10
adc rdx,0
mov QWORD PTR[((-8))+r15*8+rsp],r13
mov rdi,rdx
mul rbx
add r11,rax
mov rax,QWORD PTR[8+r15*8+rcx]
adc rdx,0
lea r15,QWORD PTR[4+r15]
mov r10,rdx
mul rbp
add rdi,rax
mov rax,QWORD PTR[((-16))+r15*8+rsi]
adc rdx,0
add rdi,r11
adc rdx,0
mov QWORD PTR[((-32))+r15*8+rsp],rdi
mov r13,rdx
cmp r15,r9
jl $L$1st4x
mul rbx
add r10,rax
mov rax,QWORD PTR[((-16))+r15*8+rcx]
adc rdx,0
mov r11,rdx
mul rbp
add r13,rax
mov rax,QWORD PTR[((-8))+r15*8+rsi]
adc rdx,0
add r13,r10
adc rdx,0
mov QWORD PTR[((-24))+r15*8+rsp],r13
mov rdi,rdx
mul rbx
add r11,rax
mov rax,QWORD PTR[((-8))+r15*8+rcx]
adc rdx,0
mov r10,rdx
mul rbp
add rdi,rax
mov rax,QWORD PTR[rsi]
adc rdx,0
add rdi,r11
adc rdx,0
mov QWORD PTR[((-16))+r15*8+rsp],rdi
mov r13,rdx
DB 102,72,15,126,195
xor rdi,rdi
add r13,r10
adc rdi,0
mov QWORD PTR[((-8))+r15*8+rsp],r13
mov QWORD PTR[r15*8+rsp],rdi
lea r14,QWORD PTR[1+r14]
ALIGN 4
$L$outer4x::
xor r15,r15
movq xmm0,QWORD PTR[((-96))+r12]
movq xmm1,QWORD PTR[((-32))+r12]
pand xmm0,xmm4
movq xmm2,QWORD PTR[32+r12]
pand xmm1,xmm5
mov r10,QWORD PTR[rsp]
mov rbp,r8
mul rbx
add r10,rax
mov rax,QWORD PTR[rcx]
adc rdx,0
movq xmm3,QWORD PTR[96+r12]
pand xmm2,xmm6
por xmm0,xmm1
pand xmm3,xmm7
imul rbp,r10
mov r11,rdx
por xmm0,xmm2
lea r12,QWORD PTR[256+r12]
por xmm0,xmm3
mul rbp
add r10,rax
mov rax,QWORD PTR[8+rsi]
adc rdx,0
mov rdi,rdx
mul rbx
add r11,rax
mov rax,QWORD PTR[8+rcx]
adc rdx,0
add r11,QWORD PTR[8+rsp]
adc rdx,0
mov r10,rdx
mul rbp
add rdi,rax
mov rax,QWORD PTR[16+rsi]
adc rdx,0
add rdi,r11
lea r15,QWORD PTR[4+r15]
adc rdx,0
mov r13,rdx
jmp $L$inner4x
ALIGN 16
$L$inner4x::
mul rbx
add r10,rax
mov rax,QWORD PTR[((-16))+r15*8+rcx]
adc rdx,0
add r10,QWORD PTR[((-16))+r15*8+rsp]
adc rdx,0
mov r11,rdx
mul rbp
add r13,rax
mov rax,QWORD PTR[((-8))+r15*8+rsi]
adc rdx,0
add r13,r10
adc rdx,0
mov QWORD PTR[((-32))+r15*8+rsp],rdi
mov rdi,rdx
mul rbx
add r11,rax
mov rax,QWORD PTR[((-8))+r15*8+rcx]
adc rdx,0
add r11,QWORD PTR[((-8))+r15*8+rsp]
adc rdx,0
mov r10,rdx
mul rbp
add rdi,rax
mov rax,QWORD PTR[r15*8+rsi]
adc rdx,0
add rdi,r11
adc rdx,0
mov QWORD PTR[((-24))+r15*8+rsp],r13
mov r13,rdx
mul rbx
add r10,rax
mov rax,QWORD PTR[r15*8+rcx]
adc rdx,0
add r10,QWORD PTR[r15*8+rsp]
adc rdx,0
mov r11,rdx
mul rbp
add r13,rax
mov rax,QWORD PTR[8+r15*8+rsi]
adc rdx,0
add r13,r10
adc rdx,0
mov QWORD PTR[((-16))+r15*8+rsp],rdi
mov rdi,rdx
mul rbx
add r11,rax
mov rax,QWORD PTR[8+r15*8+rcx]
adc rdx,0
add r11,QWORD PTR[8+r15*8+rsp]
adc rdx,0
lea r15,QWORD PTR[4+r15]
mov r10,rdx
mul rbp
add rdi,rax
mov rax,QWORD PTR[((-16))+r15*8+rsi]
adc rdx,0
add rdi,r11
adc rdx,0
mov QWORD PTR[((-40))+r15*8+rsp],r13
mov r13,rdx
cmp r15,r9
jl $L$inner4x
mul rbx
add r10,rax
mov rax,QWORD PTR[((-16))+r15*8+rcx]
adc rdx,0
add r10,QWORD PTR[((-16))+r15*8+rsp]
adc rdx,0
mov r11,rdx
mul rbp
add r13,rax
mov rax,QWORD PTR[((-8))+r15*8+rsi]
adc rdx,0
add r13,r10
adc rdx,0
mov QWORD PTR[((-32))+r15*8+rsp],rdi
mov rdi,rdx
mul rbx
add r11,rax
mov rax,QWORD PTR[((-8))+r15*8+rcx]
adc rdx,0
add r11,QWORD PTR[((-8))+r15*8+rsp]
adc rdx,0
lea r14,QWORD PTR[1+r14]
mov r10,rdx
mul rbp
add rdi,rax
mov rax,QWORD PTR[rsi]
adc rdx,0
add rdi,r11
adc rdx,0
mov QWORD PTR[((-24))+r15*8+rsp],r13
mov r13,rdx
DB 102,72,15,126,195
mov QWORD PTR[((-16))+r15*8+rsp],rdi
xor rdi,rdi
add r13,r10
adc rdi,0
add r13,QWORD PTR[r9*8+rsp]
adc rdi,0
mov QWORD PTR[((-8))+r15*8+rsp],r13
mov QWORD PTR[r15*8+rsp],rdi
cmp r14,r9
jl $L$outer4x
mov rdi,QWORD PTR[16+r9*8+rsp]
mov rax,QWORD PTR[rsp]
pxor xmm0,xmm0
mov rdx,QWORD PTR[8+rsp]
shr r9,2
lea rsi,QWORD PTR[rsp]
xor r14,r14
sub rax,QWORD PTR[rcx]
mov rbx,QWORD PTR[16+rsi]
mov rbp,QWORD PTR[24+rsi]
sbb rdx,QWORD PTR[8+rcx]
lea r15,QWORD PTR[((-1))+r9]
jmp $L$sub4x
ALIGN 16
$L$sub4x::
mov QWORD PTR[r14*8+rdi],rax
mov QWORD PTR[8+r14*8+rdi],rdx
sbb rbx,QWORD PTR[16+r14*8+rcx]
mov rax,QWORD PTR[32+r14*8+rsi]
mov rdx,QWORD PTR[40+r14*8+rsi]
sbb rbp,QWORD PTR[24+r14*8+rcx]
mov QWORD PTR[16+r14*8+rdi],rbx
mov QWORD PTR[24+r14*8+rdi],rbp
sbb rax,QWORD PTR[32+r14*8+rcx]
mov rbx,QWORD PTR[48+r14*8+rsi]
mov rbp,QWORD PTR[56+r14*8+rsi]
sbb rdx,QWORD PTR[40+r14*8+rcx]
lea r14,QWORD PTR[4+r14]
dec r15
jnz $L$sub4x
mov QWORD PTR[r14*8+rdi],rax
mov rax,QWORD PTR[32+r14*8+rsi]
sbb rbx,QWORD PTR[16+r14*8+rcx]
mov QWORD PTR[8+r14*8+rdi],rdx
sbb rbp,QWORD PTR[24+r14*8+rcx]
mov QWORD PTR[16+r14*8+rdi],rbx
sbb rax,0
mov QWORD PTR[24+r14*8+rdi],rbp
xor r14,r14
and rsi,rax
not rax
mov rcx,rdi
and rcx,rax
lea r15,QWORD PTR[((-1))+r9]
or rsi,rcx
movdqu xmm1,XMMWORD PTR[rsi]
movdqa XMMWORD PTR[rsp],xmm0
movdqu XMMWORD PTR[rdi],xmm1
jmp $L$copy4x
ALIGN 16
$L$copy4x::
movdqu xmm2,XMMWORD PTR[16+r14*1+rsi]
movdqu xmm1,XMMWORD PTR[32+r14*1+rsi]
movdqa XMMWORD PTR[16+r14*1+rsp],xmm0
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2
movdqa XMMWORD PTR[32+r14*1+rsp],xmm0
movdqu XMMWORD PTR[32+r14*1+rdi],xmm1
lea r14,QWORD PTR[32+r14]
dec r15
jnz $L$copy4x
shl r9,2
movdqu xmm2,XMMWORD PTR[16+r14*1+rsi]
movdqa XMMWORD PTR[16+r14*1+rsp],xmm0
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2
mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1
movaps xmm6,XMMWORD PTR[rsi]
movaps xmm7,XMMWORD PTR[16+rsi]
lea rsi,QWORD PTR[40+rsi]
mov r15,QWORD PTR[rsi]
mov r14,QWORD PTR[8+rsi]
mov r13,QWORD PTR[16+rsi]
mov r12,QWORD PTR[24+rsi]
mov rbp,QWORD PTR[32+rsi]
mov rbx,QWORD PTR[40+rsi]
lea rsp,QWORD PTR[48+rsi]
$L$mul4x_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_bn_mul4x_mont_gather5::
bn_mul4x_mont_gather5 ENDP
PUBLIC bn_scatter5
ALIGN 16
bn_scatter5 PROC PUBLIC
cmp rdx,0
jz $L$scatter_epilogue
lea r8,QWORD PTR[r9*8+r8]
$L$scatter::
mov rax,QWORD PTR[rcx]
lea rcx,QWORD PTR[8+rcx]
mov QWORD PTR[r8],rax
lea r8,QWORD PTR[256+r8]
sub rdx,1
jnz $L$scatter
$L$scatter_epilogue::
DB 0F3h,0C3h ;repret
bn_scatter5 ENDP
PUBLIC bn_gather5
ALIGN 16
bn_gather5 PROC PUBLIC
$L$SEH_begin_bn_gather5::
DB 048h,083h,0ech,028h
DB 00fh,029h,034h,024h
DB 00fh,029h,07ch,024h,010h
mov r11,r9
shr r9,3
and r11,7
not r9
lea rax,QWORD PTR[$L$magic_masks]
and r9,3
lea r8,QWORD PTR[96+r11*8+r8]
movq xmm4,QWORD PTR[r9*8+rax]
movq xmm5,QWORD PTR[8+r9*8+rax]
movq xmm6,QWORD PTR[16+r9*8+rax]
movq xmm7,QWORD PTR[24+r9*8+rax]
jmp $L$gather
ALIGN 16
$L$gather::
movq xmm0,QWORD PTR[((-96))+r8]
movq xmm1,QWORD PTR[((-32))+r8]
pand xmm0,xmm4
movq xmm2,QWORD PTR[32+r8]
pand xmm1,xmm5
movq xmm3,QWORD PTR[96+r8]
pand xmm2,xmm6
por xmm0,xmm1
pand xmm3,xmm7
por xmm0,xmm2
lea r8,QWORD PTR[256+r8]
por xmm0,xmm3
movq QWORD PTR[rcx],xmm0
lea rcx,QWORD PTR[8+rcx]
sub rdx,1
jnz $L$gather
movaps XMMWORD PTR[rsp],xmm6
movaps XMMWORD PTR[16+rsp],xmm7
lea rsp,QWORD PTR[40+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_bn_gather5::
bn_gather5 ENDP
ALIGN 64
$L$magic_masks::
DD 0,0,0,0,0,0,-1,-1
DD 0,0,0,0,0,0,0,0
DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
DB 112,101,110,115,115,108,46,111,114,103,62,0
EXTERN __imp_RtlVirtualUnwind:NEAR
ALIGN 16
mul_handler PROC PRIVATE
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD PTR[120+r8]
mov rbx,QWORD PTR[248+r8]
mov rsi,QWORD PTR[8+r9]
mov r11,QWORD PTR[56+r9]
mov r10d,DWORD PTR[r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$common_seh_tail
lea rax,QWORD PTR[88+rax]
mov r10d,DWORD PTR[4+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$common_seh_tail
mov rax,QWORD PTR[152+r8]
mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jae $L$common_seh_tail
mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax]
movaps xmm0,XMMWORD PTR[rax]
movaps xmm1,XMMWORD PTR[16+rax]
lea rax,QWORD PTR[88+rax]
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
mov r13,QWORD PTR[((-32))+rax]
mov r14,QWORD PTR[((-40))+rax]
mov r15,QWORD PTR[((-48))+rax]
mov QWORD PTR[144+r8],rbx
mov QWORD PTR[160+r8],rbp
mov QWORD PTR[216+r8],r12
mov QWORD PTR[224+r8],r13
mov QWORD PTR[232+r8],r14
mov QWORD PTR[240+r8],r15
movups XMMWORD PTR[512+r8],xmm0
movups XMMWORD PTR[528+r8],xmm1
$L$common_seh_tail::
mov rdi,QWORD PTR[8+rax]
mov rsi,QWORD PTR[16+rax]
mov QWORD PTR[152+r8],rax
mov QWORD PTR[168+r8],rsi
mov QWORD PTR[176+r8],rdi
mov rdi,QWORD PTR[40+r9]
mov rsi,r8
mov ecx,154
DD 0a548f3fch
mov rsi,r9
xor rcx,rcx
mov rdx,QWORD PTR[8+rsi]
mov r8,QWORD PTR[rsi]
mov r9,QWORD PTR[16+rsi]
mov r10,QWORD PTR[40+rsi]
lea r11,QWORD PTR[56+rsi]
lea r12,QWORD PTR[24+rsi]
mov QWORD PTR[32+rsp],r10
mov QWORD PTR[40+rsp],r11
mov QWORD PTR[48+rsp],r12
mov QWORD PTR[56+rsp],rcx
call QWORD PTR[__imp_RtlVirtualUnwind]
mov eax,1
add rsp,64
popfq
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
DB 0F3h,0C3h ;repret
mul_handler ENDP
.text$ ENDS
.pdata SEGMENT READONLY ALIGN(4)
ALIGN 4
DD imagerel $L$SEH_begin_bn_mul_mont_gather5
DD imagerel $L$SEH_end_bn_mul_mont_gather5
DD imagerel $L$SEH_info_bn_mul_mont_gather5
DD imagerel $L$SEH_begin_bn_mul4x_mont_gather5
DD imagerel $L$SEH_end_bn_mul4x_mont_gather5
DD imagerel $L$SEH_info_bn_mul4x_mont_gather5
DD imagerel $L$SEH_begin_bn_gather5
DD imagerel $L$SEH_end_bn_gather5
DD imagerel $L$SEH_info_bn_gather5
.pdata ENDS
.xdata SEGMENT READONLY ALIGN(8)
ALIGN 8
$L$SEH_info_bn_mul_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$mul_alloca,imagerel $L$mul_body,imagerel $L$mul_epilogue
ALIGN 8
$L$SEH_info_bn_mul4x_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
DD imagerel $L$mul4x_alloca,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
ALIGN 8
$L$SEH_info_bn_gather5::
DB 001h,00dh,005h,000h
DB 00dh,078h,001h,000h
DB 008h,068h,000h,000h
DB 004h,042h,000h,000h
ALIGN 8
.xdata ENDS
END

19
deps/openssl/openssl.gyp

@ -696,6 +696,7 @@
'LIB_BN_ASM',
'MD5_ASM',
'OPENSSL_BN_ASM',
'OPENSSL_BN_ASM_MONT',
'OPENSSL_CPUID_OBJ',
'RIP_ASM',
'RMD160_ASM',
@ -730,12 +731,18 @@
]
}],
['OS!="win" and OS!="mac" and target_arch=="x64"', {
'defines': [
'OPENSSL_BN_ASM_MONT5',
'OPENSSL_BN_ASM_GF2m',
],
'sources': [
'asm/x64-elf-gas/aes/aes-x86_64.s',
'asm/x64-elf-gas/aes/aesni-x86_64.s',
'asm/x64-elf-gas/aes/aesni-sha1-x86_64.s',
'asm/x64-elf-gas/bn/modexp512-x86_64.s',
'asm/x64-elf-gas/bn/x86_64-mont.s',
'asm/x64-elf-gas/bn/x86_64-mont5.s',
'asm/x64-elf-gas/bn/x86_64-gf2m.s',
'asm/x64-elf-gas/camellia/cmll-x86_64.s',
'asm/x64-elf-gas/md5/md5-x86_64.s',
'asm/x64-elf-gas/rc4/rc4-x86_64.s',
@ -779,12 +786,18 @@
]
}],
['OS=="mac" and target_arch=="x64"', {
'defines': [
'OPENSSL_BN_ASM_MONT5',
'OPENSSL_BN_ASM_GF2m',
],
'sources': [
'asm/x64-macosx-gas/aes/aes-x86_64.s',
'asm/x64-macosx-gas/aes/aesni-x86_64.s',
'asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s',
'asm/x64-macosx-gas/bn/modexp512-x86_64.s',
'asm/x64-macosx-gas/bn/x86_64-mont.s',
'asm/x64-macosx-gas/bn/x86_64-mont5.s',
'asm/x64-macosx-gas/bn/x86_64-gf2m.s',
'asm/x64-macosx-gas/camellia/cmll-x86_64.s',
'asm/x64-macosx-gas/md5/md5-x86_64.s',
'asm/x64-macosx-gas/rc4/rc4-x86_64.s',
@ -847,12 +860,18 @@
]
}],
['OS=="win" and target_arch=="x64"', {
'defines': [
'OPENSSL_BN_ASM_MONT5',
'OPENSSL_BN_ASM_GF2m',
],
'sources': [
'asm/x64-win32-masm/aes/aes-x86_64.asm',
'asm/x64-win32-masm/aes/aesni-x86_64.asm',
'asm/x64-win32-masm/aes/aesni-sha1-x86_64.asm',
'asm/x64-win32-masm/bn/modexp512-x86_64.asm',
'asm/x64-win32-masm/bn/x86_64-mont.asm',
'asm/x64-win32-masm/bn/x86_64-mont5.asm',
'asm/x64-win32-masm/bn/x86_64-gf2m.asm',
'asm/x64-win32-masm/camellia/cmll-x86_64.asm',
'asm/x64-win32-masm/md5/md5-x86_64.asm',
'asm/x64-win32-masm/rc4/rc4-x86_64.asm',

Loading…
Cancel
Save