mirror of https://github.com/lukechilds/node.git
8 changed files with 3585 additions and 0 deletions
@ -0,0 +1,295 @@ |
|||||
|
.text |
||||
|
|
||||
|
|
||||
|
.type _mul_1x1,@function |
||||
|
.align 16 |
||||
|
_mul_1x1: |
||||
|
subq $128+8,%rsp |
||||
|
movq $-1,%r9 |
||||
|
leaq (%rax,%rax,1),%rsi |
||||
|
shrq $3,%r9 |
||||
|
leaq (,%rax,4),%rdi |
||||
|
andq %rax,%r9 |
||||
|
leaq (,%rax,8),%r12 |
||||
|
sarq $63,%rax |
||||
|
leaq (%r9,%r9,1),%r10 |
||||
|
sarq $63,%rsi |
||||
|
leaq (,%r9,4),%r11 |
||||
|
andq %rbp,%rax |
||||
|
sarq $63,%rdi |
||||
|
movq %rax,%rdx |
||||
|
shlq $63,%rax |
||||
|
andq %rbp,%rsi |
||||
|
shrq $1,%rdx |
||||
|
movq %rsi,%rcx |
||||
|
shlq $62,%rsi |
||||
|
andq %rbp,%rdi |
||||
|
shrq $2,%rcx |
||||
|
xorq %rsi,%rax |
||||
|
movq %rdi,%rbx |
||||
|
shlq $61,%rdi |
||||
|
xorq %rcx,%rdx |
||||
|
shrq $3,%rbx |
||||
|
xorq %rdi,%rax |
||||
|
xorq %rbx,%rdx |
||||
|
|
||||
|
movq %r9,%r13 |
||||
|
movq $0,0(%rsp) |
||||
|
xorq %r10,%r13 |
||||
|
movq %r9,8(%rsp) |
||||
|
movq %r11,%r14 |
||||
|
movq %r10,16(%rsp) |
||||
|
xorq %r12,%r14 |
||||
|
movq %r13,24(%rsp) |
||||
|
|
||||
|
xorq %r11,%r9 |
||||
|
movq %r11,32(%rsp) |
||||
|
xorq %r11,%r10 |
||||
|
movq %r9,40(%rsp) |
||||
|
xorq %r11,%r13 |
||||
|
movq %r10,48(%rsp) |
||||
|
xorq %r14,%r9 |
||||
|
movq %r13,56(%rsp) |
||||
|
xorq %r14,%r10 |
||||
|
|
||||
|
movq %r12,64(%rsp) |
||||
|
xorq %r14,%r13 |
||||
|
movq %r9,72(%rsp) |
||||
|
xorq %r11,%r9 |
||||
|
movq %r10,80(%rsp) |
||||
|
xorq %r11,%r10 |
||||
|
movq %r13,88(%rsp) |
||||
|
|
||||
|
xorq %r11,%r13 |
||||
|
movq %r14,96(%rsp) |
||||
|
movq %r8,%rsi |
||||
|
movq %r9,104(%rsp) |
||||
|
andq %rbp,%rsi |
||||
|
movq %r10,112(%rsp) |
||||
|
shrq $4,%rbp |
||||
|
movq %r13,120(%rsp) |
||||
|
movq %r8,%rdi |
||||
|
andq %rbp,%rdi |
||||
|
shrq $4,%rbp |
||||
|
|
||||
|
movq (%rsp,%rsi,8),%xmm0 |
||||
|
movq %r8,%rsi |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $4,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $60,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $1,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $12,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $52,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $2,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $20,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $44,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $3,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $28,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $36,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $4,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $36,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $28,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $5,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $44,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $20,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $6,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $52,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $12,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $7,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %rcx,%rbx |
||||
|
shlq $60,%rcx |
||||
|
.byte 102,72,15,126,198 |
||||
|
shrq $4,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
psrldq $8,%xmm0 |
||||
|
xorq %rbx,%rdx |
||||
|
.byte 102,72,15,126,199 |
||||
|
xorq %rsi,%rax |
||||
|
xorq %rdi,%rdx |
||||
|
|
||||
|
addq $128+8,%rsp |
||||
|
.byte 0xf3,0xc3 |
||||
|
.Lend_mul_1x1: |
||||
|
.size _mul_1x1,.-_mul_1x1 |
||||
|
|
||||
|
.globl bn_GF2m_mul_2x2 |
||||
|
.type bn_GF2m_mul_2x2,@function |
||||
|
.align 16 |
||||
|
bn_GF2m_mul_2x2: |
||||
|
movq OPENSSL_ia32cap_P(%rip),%rax |
||||
|
btq $33,%rax |
||||
|
jnc .Lvanilla_mul_2x2 |
||||
|
|
||||
|
.byte 102,72,15,110,198 |
||||
|
.byte 102,72,15,110,201 |
||||
|
.byte 102,72,15,110,210 |
||||
|
.byte 102,73,15,110,216 |
||||
|
movdqa %xmm0,%xmm4 |
||||
|
movdqa %xmm1,%xmm5 |
||||
|
.byte 102,15,58,68,193,0 |
||||
|
pxor %xmm2,%xmm4 |
||||
|
pxor %xmm3,%xmm5 |
||||
|
.byte 102,15,58,68,211,0 |
||||
|
.byte 102,15,58,68,229,0 |
||||
|
xorps %xmm0,%xmm4 |
||||
|
xorps %xmm2,%xmm4 |
||||
|
movdqa %xmm4,%xmm5 |
||||
|
pslldq $8,%xmm4 |
||||
|
psrldq $8,%xmm5 |
||||
|
pxor %xmm4,%xmm2 |
||||
|
pxor %xmm5,%xmm0 |
||||
|
movdqu %xmm2,0(%rdi) |
||||
|
movdqu %xmm0,16(%rdi) |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
.align 16 |
||||
|
.Lvanilla_mul_2x2: |
||||
|
leaq -136(%rsp),%rsp |
||||
|
movq %r14,80(%rsp) |
||||
|
movq %r13,88(%rsp) |
||||
|
movq %r12,96(%rsp) |
||||
|
movq %rbp,104(%rsp) |
||||
|
movq %rbx,112(%rsp) |
||||
|
.Lbody_mul_2x2: |
||||
|
movq %rdi,32(%rsp) |
||||
|
movq %rsi,40(%rsp) |
||||
|
movq %rdx,48(%rsp) |
||||
|
movq %rcx,56(%rsp) |
||||
|
movq %r8,64(%rsp) |
||||
|
|
||||
|
movq $15,%r8 |
||||
|
movq %rsi,%rax |
||||
|
movq %rcx,%rbp |
||||
|
call _mul_1x1 |
||||
|
|
||||
|
movq %rax,16(%rsp) |
||||
|
movq %rdx,24(%rsp) |
||||
|
|
||||
|
movq 48(%rsp),%rax |
||||
|
movq 64(%rsp),%rbp |
||||
|
call _mul_1x1 |
||||
|
|
||||
|
movq %rax,0(%rsp) |
||||
|
movq %rdx,8(%rsp) |
||||
|
|
||||
|
movq 40(%rsp),%rax |
||||
|
movq 56(%rsp),%rbp |
||||
|
xorq 48(%rsp),%rax |
||||
|
xorq 64(%rsp),%rbp |
||||
|
call _mul_1x1 |
||||
|
|
||||
|
movq 0(%rsp),%rbx |
||||
|
movq 8(%rsp),%rcx |
||||
|
movq 16(%rsp),%rdi |
||||
|
movq 24(%rsp),%rsi |
||||
|
movq 32(%rsp),%rbp |
||||
|
|
||||
|
xorq %rdx,%rax |
||||
|
xorq %rcx,%rdx |
||||
|
xorq %rbx,%rax |
||||
|
movq %rbx,0(%rbp) |
||||
|
xorq %rdi,%rdx |
||||
|
movq %rsi,24(%rbp) |
||||
|
xorq %rsi,%rax |
||||
|
xorq %rsi,%rdx |
||||
|
xorq %rdx,%rax |
||||
|
movq %rdx,16(%rbp) |
||||
|
movq %rax,8(%rbp) |
||||
|
|
||||
|
movq 80(%rsp),%r14 |
||||
|
movq 88(%rsp),%r13 |
||||
|
movq 96(%rsp),%r12 |
||||
|
movq 104(%rsp),%rbp |
||||
|
movq 112(%rsp),%rbx |
||||
|
leaq 136(%rsp),%rsp |
||||
|
.byte 0xf3,0xc3 |
||||
|
.Lend_mul_2x2: |
||||
|
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 |
||||
|
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
||||
|
.align 16 |
@ -0,0 +1,785 @@ |
|||||
|
.text |
||||
|
|
||||
|
|
||||
|
.globl bn_mul_mont_gather5 |
||||
|
.type bn_mul_mont_gather5,@function |
||||
|
.align 64 |
||||
|
bn_mul_mont_gather5: |
||||
|
testl $3,%r9d |
||||
|
jnz .Lmul_enter |
||||
|
cmpl $8,%r9d |
||||
|
jb .Lmul_enter |
||||
|
jmp .Lmul4x_enter |
||||
|
|
||||
|
.align 16 |
||||
|
.Lmul_enter: |
||||
|
movl %r9d,%r9d |
||||
|
movl 8(%rsp),%r10d |
||||
|
pushq %rbx |
||||
|
pushq %rbp |
||||
|
pushq %r12 |
||||
|
pushq %r13 |
||||
|
pushq %r14 |
||||
|
pushq %r15 |
||||
|
movq %rsp,%rax |
||||
|
leaq 2(%r9),%r11 |
||||
|
negq %r11 |
||||
|
leaq (%rsp,%r11,8),%rsp |
||||
|
andq $-1024,%rsp |
||||
|
|
||||
|
movq %rax,8(%rsp,%r9,8) |
||||
|
.Lmul_body: |
||||
|
movq %rdx,%r12 |
||||
|
movq %r10,%r11 |
||||
|
shrq $3,%r10 |
||||
|
andq $7,%r11 |
||||
|
notq %r10 |
||||
|
leaq .Lmagic_masks(%rip),%rax |
||||
|
andq $3,%r10 |
||||
|
leaq 96(%r12,%r11,8),%r12 |
||||
|
movq 0(%rax,%r10,8),%xmm4 |
||||
|
movq 8(%rax,%r10,8),%xmm5 |
||||
|
movq 16(%rax,%r10,8),%xmm6 |
||||
|
movq 24(%rax,%r10,8),%xmm7 |
||||
|
|
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
|
||||
|
movq (%r8),%r8 |
||||
|
movq (%rsi),%rax |
||||
|
|
||||
|
xorq %r14,%r14 |
||||
|
xorq %r15,%r15 |
||||
|
|
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
|
||||
|
movq %r8,%rbp |
||||
|
mulq %rbx |
||||
|
movq %rax,%r10 |
||||
|
movq (%rcx),%rax |
||||
|
|
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
|
||||
|
imulq %r10,%rbp |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r10 |
||||
|
movq 8(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
leaq 1(%r15),%r15 |
||||
|
jmp .L1st_enter |
||||
|
|
||||
|
.align 16 |
||||
|
.L1st: |
||||
|
addq %rax,%r13 |
||||
|
movq (%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%r13 |
||||
|
movq %r10,%r11 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
.L1st_enter: |
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq (%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
leaq 1(%r15),%r15 |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
cmpq %r9,%r15 |
||||
|
jne .L1st |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
|
||||
|
addq %rax,%r13 |
||||
|
movq (%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
movq %r10,%r11 |
||||
|
|
||||
|
xorq %rdx,%rdx |
||||
|
addq %r11,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-8(%rsp,%r9,8) |
||||
|
movq %rdx,(%rsp,%r9,8) |
||||
|
|
||||
|
leaq 1(%r14),%r14 |
||||
|
jmp .Louter |
||||
|
.align 16 |
||||
|
.Louter: |
||||
|
xorq %r15,%r15 |
||||
|
movq %r8,%rbp |
||||
|
movq (%rsp),%r10 |
||||
|
|
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq (%rcx),%rax |
||||
|
adcq $0,%rdx |
||||
|
|
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
|
||||
|
imulq %r10,%rbp |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r10 |
||||
|
movq 8(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq 8(%rsp),%r10 |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
leaq 1(%r15),%r15 |
||||
|
jmp .Linner_enter |
||||
|
|
||||
|
.align 16 |
||||
|
.Linner: |
||||
|
addq %rax,%r13 |
||||
|
movq (%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
movq (%rsp,%r15,8),%r10 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
.Linner_enter: |
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq (%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%r10 |
||||
|
movq %rdx,%r11 |
||||
|
adcq $0,%r11 |
||||
|
leaq 1(%r15),%r15 |
||||
|
|
||||
|
mulq %rbp |
||||
|
cmpq %r9,%r15 |
||||
|
jne .Linner |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
|
||||
|
addq %rax,%r13 |
||||
|
movq (%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
movq (%rsp,%r15,8),%r10 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
xorq %rdx,%rdx |
||||
|
addq %r11,%r13 |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-8(%rsp,%r9,8) |
||||
|
movq %rdx,(%rsp,%r9,8) |
||||
|
|
||||
|
leaq 1(%r14),%r14 |
||||
|
cmpq %r9,%r14 |
||||
|
jl .Louter |
||||
|
|
||||
|
xorq %r14,%r14 |
||||
|
movq (%rsp),%rax |
||||
|
leaq (%rsp),%rsi |
||||
|
movq %r9,%r15 |
||||
|
jmp .Lsub |
||||
|
.align 16 |
||||
|
.Lsub: sbbq (%rcx,%r14,8),%rax |
||||
|
movq %rax,(%rdi,%r14,8) |
||||
|
movq 8(%rsi,%r14,8),%rax |
||||
|
leaq 1(%r14),%r14 |
||||
|
decq %r15 |
||||
|
jnz .Lsub |
||||
|
|
||||
|
sbbq $0,%rax |
||||
|
xorq %r14,%r14 |
||||
|
andq %rax,%rsi |
||||
|
notq %rax |
||||
|
movq %rdi,%rcx |
||||
|
andq %rax,%rcx |
||||
|
movq %r9,%r15 |
||||
|
orq %rcx,%rsi |
||||
|
.align 16 |
||||
|
.Lcopy: |
||||
|
movq (%rsi,%r14,8),%rax |
||||
|
movq %r14,(%rsp,%r14,8) |
||||
|
movq %rax,(%rdi,%r14,8) |
||||
|
leaq 1(%r14),%r14 |
||||
|
subq $1,%r15 |
||||
|
jnz .Lcopy |
||||
|
|
||||
|
movq 8(%rsp,%r9,8),%rsi |
||||
|
movq $1,%rax |
||||
|
movq (%rsi),%r15 |
||||
|
movq 8(%rsi),%r14 |
||||
|
movq 16(%rsi),%r13 |
||||
|
movq 24(%rsi),%r12 |
||||
|
movq 32(%rsi),%rbp |
||||
|
movq 40(%rsi),%rbx |
||||
|
leaq 48(%rsi),%rsp |
||||
|
.Lmul_epilogue: |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 |
||||
|
.type bn_mul4x_mont_gather5,@function |
||||
|
.align 16 |
||||
|
bn_mul4x_mont_gather5: |
||||
|
.Lmul4x_enter: |
||||
|
movl %r9d,%r9d |
||||
|
movl 8(%rsp),%r10d |
||||
|
pushq %rbx |
||||
|
pushq %rbp |
||||
|
pushq %r12 |
||||
|
pushq %r13 |
||||
|
pushq %r14 |
||||
|
pushq %r15 |
||||
|
movq %rsp,%rax |
||||
|
leaq 4(%r9),%r11 |
||||
|
negq %r11 |
||||
|
leaq (%rsp,%r11,8),%rsp |
||||
|
andq $-1024,%rsp |
||||
|
|
||||
|
movq %rax,8(%rsp,%r9,8) |
||||
|
.Lmul4x_body: |
||||
|
movq %rdi,16(%rsp,%r9,8) |
||||
|
movq %rdx,%r12 |
||||
|
movq %r10,%r11 |
||||
|
shrq $3,%r10 |
||||
|
andq $7,%r11 |
||||
|
notq %r10 |
||||
|
leaq .Lmagic_masks(%rip),%rax |
||||
|
andq $3,%r10 |
||||
|
leaq 96(%r12,%r11,8),%r12 |
||||
|
movq 0(%rax,%r10,8),%xmm4 |
||||
|
movq 8(%rax,%r10,8),%xmm5 |
||||
|
movq 16(%rax,%r10,8),%xmm6 |
||||
|
movq 24(%rax,%r10,8),%xmm7 |
||||
|
|
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
movq (%r8),%r8 |
||||
|
movq (%rsi),%rax |
||||
|
|
||||
|
xorq %r14,%r14 |
||||
|
xorq %r15,%r15 |
||||
|
|
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
|
||||
|
movq %r8,%rbp |
||||
|
mulq %rbx |
||||
|
movq %rax,%r10 |
||||
|
movq (%rcx),%rax |
||||
|
|
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
|
||||
|
imulq %r10,%rbp |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r10 |
||||
|
movq 8(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq 8(%rcx),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq 16(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
leaq 4(%r15),%r15 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,(%rsp) |
||||
|
movq %rdx,%r13 |
||||
|
jmp .L1st4x |
||||
|
.align 16 |
||||
|
.L1st4x: |
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq -16(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq -8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-24(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq -8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq (%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq (%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq 8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-8(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq 8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
leaq 4(%r15),%r15 |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq -16(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-32(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
cmpq %r9,%r15 |
||||
|
jl .L1st4x |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq -16(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq -8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-24(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq -8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq (%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
|
||||
|
xorq %rdi,%rdi |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdi |
||||
|
movq %r13,-8(%rsp,%r15,8) |
||||
|
movq %rdi,(%rsp,%r15,8) |
||||
|
|
||||
|
leaq 1(%r14),%r14 |
||||
|
.align 4 |
||||
|
.Louter4x: |
||||
|
xorq %r15,%r15 |
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
|
||||
|
movq (%rsp),%r10 |
||||
|
movq %r8,%rbp |
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq (%rcx),%rax |
||||
|
adcq $0,%rdx |
||||
|
|
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
|
||||
|
imulq %r10,%rbp |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r10 |
||||
|
movq 8(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq 8(%rcx),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq 8(%rsp),%r11 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq 16(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
leaq 4(%r15),%r15 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r13 |
||||
|
jmp .Linner4x |
||||
|
.align 16 |
||||
|
.Linner4x: |
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq -16(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq -16(%rsp,%r15,8),%r10 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq -8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-32(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq -8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq -8(%rsp,%r15,8),%r11 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq (%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-24(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq (%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq (%rsp,%r15,8),%r10 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq 8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq 8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq 8(%rsp,%r15,8),%r11 |
||||
|
adcq $0,%rdx |
||||
|
leaq 4(%r15),%r15 |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq -16(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-40(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
cmpq %r9,%r15 |
||||
|
jl .Linner4x |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq -16(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq -16(%rsp,%r15,8),%r10 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq -8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-32(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq -8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq -8(%rsp,%r15,8),%r11 |
||||
|
adcq $0,%rdx |
||||
|
leaq 1(%r14),%r14 |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq (%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-24(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
movq %rdi,-16(%rsp,%r15,8) |
||||
|
|
||||
|
xorq %rdi,%rdi |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdi |
||||
|
addq (%rsp,%r9,8),%r13 |
||||
|
adcq $0,%rdi |
||||
|
movq %r13,-8(%rsp,%r15,8) |
||||
|
movq %rdi,(%rsp,%r15,8) |
||||
|
|
||||
|
cmpq %r9,%r14 |
||||
|
jl .Louter4x |
||||
|
movq 16(%rsp,%r9,8),%rdi |
||||
|
movq 0(%rsp),%rax |
||||
|
pxor %xmm0,%xmm0 |
||||
|
movq 8(%rsp),%rdx |
||||
|
shrq $2,%r9 |
||||
|
leaq (%rsp),%rsi |
||||
|
xorq %r14,%r14 |
||||
|
|
||||
|
subq 0(%rcx),%rax |
||||
|
movq 16(%rsi),%rbx |
||||
|
movq 24(%rsi),%rbp |
||||
|
sbbq 8(%rcx),%rdx |
||||
|
leaq -1(%r9),%r15 |
||||
|
jmp .Lsub4x |
||||
|
.align 16 |
||||
|
.Lsub4x: |
||||
|
movq %rax,0(%rdi,%r14,8) |
||||
|
movq %rdx,8(%rdi,%r14,8) |
||||
|
sbbq 16(%rcx,%r14,8),%rbx |
||||
|
movq 32(%rsi,%r14,8),%rax |
||||
|
movq 40(%rsi,%r14,8),%rdx |
||||
|
sbbq 24(%rcx,%r14,8),%rbp |
||||
|
movq %rbx,16(%rdi,%r14,8) |
||||
|
movq %rbp,24(%rdi,%r14,8) |
||||
|
sbbq 32(%rcx,%r14,8),%rax |
||||
|
movq 48(%rsi,%r14,8),%rbx |
||||
|
movq 56(%rsi,%r14,8),%rbp |
||||
|
sbbq 40(%rcx,%r14,8),%rdx |
||||
|
leaq 4(%r14),%r14 |
||||
|
decq %r15 |
||||
|
jnz .Lsub4x |
||||
|
|
||||
|
movq %rax,0(%rdi,%r14,8) |
||||
|
movq 32(%rsi,%r14,8),%rax |
||||
|
sbbq 16(%rcx,%r14,8),%rbx |
||||
|
movq %rdx,8(%rdi,%r14,8) |
||||
|
sbbq 24(%rcx,%r14,8),%rbp |
||||
|
movq %rbx,16(%rdi,%r14,8) |
||||
|
|
||||
|
sbbq $0,%rax |
||||
|
movq %rbp,24(%rdi,%r14,8) |
||||
|
xorq %r14,%r14 |
||||
|
andq %rax,%rsi |
||||
|
notq %rax |
||||
|
movq %rdi,%rcx |
||||
|
andq %rax,%rcx |
||||
|
leaq -1(%r9),%r15 |
||||
|
orq %rcx,%rsi |
||||
|
|
||||
|
movdqu (%rsi),%xmm1 |
||||
|
movdqa %xmm0,(%rsp) |
||||
|
movdqu %xmm1,(%rdi) |
||||
|
jmp .Lcopy4x |
||||
|
.align 16 |
||||
|
.Lcopy4x: |
||||
|
movdqu 16(%rsi,%r14,1),%xmm2 |
||||
|
movdqu 32(%rsi,%r14,1),%xmm1 |
||||
|
movdqa %xmm0,16(%rsp,%r14,1) |
||||
|
movdqu %xmm2,16(%rdi,%r14,1) |
||||
|
movdqa %xmm0,32(%rsp,%r14,1) |
||||
|
movdqu %xmm1,32(%rdi,%r14,1) |
||||
|
leaq 32(%r14),%r14 |
||||
|
decq %r15 |
||||
|
jnz .Lcopy4x |
||||
|
|
||||
|
shlq $2,%r9 |
||||
|
movdqu 16(%rsi,%r14,1),%xmm2 |
||||
|
movdqa %xmm0,16(%rsp,%r14,1) |
||||
|
movdqu %xmm2,16(%rdi,%r14,1) |
||||
|
movq 8(%rsp,%r9,8),%rsi |
||||
|
movq $1,%rax |
||||
|
movq (%rsi),%r15 |
||||
|
movq 8(%rsi),%r14 |
||||
|
movq 16(%rsi),%r13 |
||||
|
movq 24(%rsi),%r12 |
||||
|
movq 32(%rsi),%rbp |
||||
|
movq 40(%rsi),%rbx |
||||
|
leaq 48(%rsi),%rsp |
||||
|
.Lmul4x_epilogue: |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 |
||||
|
.globl bn_scatter5 |
||||
|
.type bn_scatter5,@function |
||||
|
.align 16 |
||||
|
bn_scatter5: |
||||
|
cmpq $0,%rsi |
||||
|
jz .Lscatter_epilogue |
||||
|
leaq (%rdx,%rcx,8),%rdx |
||||
|
.Lscatter: |
||||
|
movq (%rdi),%rax |
||||
|
leaq 8(%rdi),%rdi |
||||
|
movq %rax,(%rdx) |
||||
|
leaq 256(%rdx),%rdx |
||||
|
subq $1,%rsi |
||||
|
jnz .Lscatter |
||||
|
.Lscatter_epilogue: |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size bn_scatter5,.-bn_scatter5 |
||||
|
|
||||
|
.globl bn_gather5 |
||||
|
.type bn_gather5,@function |
||||
|
.align 16 |
||||
|
bn_gather5: |
||||
|
movq %rcx,%r11 |
||||
|
shrq $3,%rcx |
||||
|
andq $7,%r11 |
||||
|
notq %rcx |
||||
|
leaq .Lmagic_masks(%rip),%rax |
||||
|
andq $3,%rcx |
||||
|
leaq 96(%rdx,%r11,8),%rdx |
||||
|
movq 0(%rax,%rcx,8),%xmm4 |
||||
|
movq 8(%rax,%rcx,8),%xmm5 |
||||
|
movq 16(%rax,%rcx,8),%xmm6 |
||||
|
movq 24(%rax,%rcx,8),%xmm7 |
||||
|
jmp .Lgather |
||||
|
.align 16 |
||||
|
.Lgather: |
||||
|
movq -96(%rdx),%xmm0 |
||||
|
movq -32(%rdx),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%rdx),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
movq 96(%rdx),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%rdx),%rdx |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
movq %xmm0,(%rdi) |
||||
|
leaq 8(%rdi),%rdi |
||||
|
subq $1,%rsi |
||||
|
jnz .Lgather |
||||
|
.byte 0xf3,0xc3 |
||||
|
.LSEH_end_bn_gather5: |
||||
|
.size bn_gather5,.-bn_gather5 |
||||
|
.align 64 |
||||
|
.Lmagic_masks: |
||||
|
.long 0,0, 0,0, 0,0, -1,-1 |
||||
|
.long 0,0, 0,0, 0,0, 0,0 |
||||
|
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
@ -0,0 +1,295 @@ |
|||||
|
.text |
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
_mul_1x1: |
||||
|
subq $128+8,%rsp |
||||
|
movq $-1,%r9 |
||||
|
leaq (%rax,%rax,1),%rsi |
||||
|
shrq $3,%r9 |
||||
|
leaq (,%rax,4),%rdi |
||||
|
andq %rax,%r9 |
||||
|
leaq (,%rax,8),%r12 |
||||
|
sarq $63,%rax |
||||
|
leaq (%r9,%r9,1),%r10 |
||||
|
sarq $63,%rsi |
||||
|
leaq (,%r9,4),%r11 |
||||
|
andq %rbp,%rax |
||||
|
sarq $63,%rdi |
||||
|
movq %rax,%rdx |
||||
|
shlq $63,%rax |
||||
|
andq %rbp,%rsi |
||||
|
shrq $1,%rdx |
||||
|
movq %rsi,%rcx |
||||
|
shlq $62,%rsi |
||||
|
andq %rbp,%rdi |
||||
|
shrq $2,%rcx |
||||
|
xorq %rsi,%rax |
||||
|
movq %rdi,%rbx |
||||
|
shlq $61,%rdi |
||||
|
xorq %rcx,%rdx |
||||
|
shrq $3,%rbx |
||||
|
xorq %rdi,%rax |
||||
|
xorq %rbx,%rdx |
||||
|
|
||||
|
movq %r9,%r13 |
||||
|
movq $0,0(%rsp) |
||||
|
xorq %r10,%r13 |
||||
|
movq %r9,8(%rsp) |
||||
|
movq %r11,%r14 |
||||
|
movq %r10,16(%rsp) |
||||
|
xorq %r12,%r14 |
||||
|
movq %r13,24(%rsp) |
||||
|
|
||||
|
xorq %r11,%r9 |
||||
|
movq %r11,32(%rsp) |
||||
|
xorq %r11,%r10 |
||||
|
movq %r9,40(%rsp) |
||||
|
xorq %r11,%r13 |
||||
|
movq %r10,48(%rsp) |
||||
|
xorq %r14,%r9 |
||||
|
movq %r13,56(%rsp) |
||||
|
xorq %r14,%r10 |
||||
|
|
||||
|
movq %r12,64(%rsp) |
||||
|
xorq %r14,%r13 |
||||
|
movq %r9,72(%rsp) |
||||
|
xorq %r11,%r9 |
||||
|
movq %r10,80(%rsp) |
||||
|
xorq %r11,%r10 |
||||
|
movq %r13,88(%rsp) |
||||
|
|
||||
|
xorq %r11,%r13 |
||||
|
movq %r14,96(%rsp) |
||||
|
movq %r8,%rsi |
||||
|
movq %r9,104(%rsp) |
||||
|
andq %rbp,%rsi |
||||
|
movq %r10,112(%rsp) |
||||
|
shrq $4,%rbp |
||||
|
movq %r13,120(%rsp) |
||||
|
movq %r8,%rdi |
||||
|
andq %rbp,%rdi |
||||
|
shrq $4,%rbp |
||||
|
|
||||
|
movq (%rsp,%rsi,8),%xmm0 |
||||
|
movq %r8,%rsi |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $4,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $60,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $1,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $12,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $52,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $2,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $20,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $44,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $3,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $28,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $36,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $4,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $36,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $28,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $5,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $44,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $20,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $6,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %r8,%rdi |
||||
|
movq %rcx,%rbx |
||||
|
shlq $52,%rcx |
||||
|
andq %rbp,%rdi |
||||
|
movq (%rsp,%rsi,8),%xmm1 |
||||
|
shrq $12,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
pslldq $7,%xmm1 |
||||
|
movq %r8,%rsi |
||||
|
shrq $4,%rbp |
||||
|
xorq %rbx,%rdx |
||||
|
andq %rbp,%rsi |
||||
|
shrq $4,%rbp |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movq (%rsp,%rdi,8),%rcx |
||||
|
movq %rcx,%rbx |
||||
|
shlq $60,%rcx |
||||
|
.byte 102,72,15,126,198 |
||||
|
shrq $4,%rbx |
||||
|
xorq %rcx,%rax |
||||
|
psrldq $8,%xmm0 |
||||
|
xorq %rbx,%rdx |
||||
|
.byte 102,72,15,126,199 |
||||
|
xorq %rsi,%rax |
||||
|
xorq %rdi,%rdx |
||||
|
|
||||
|
addq $128+8,%rsp |
||||
|
.byte 0xf3,0xc3 |
||||
|
L$end_mul_1x1: |
||||
|
|
||||
|
|
||||
|
.globl _bn_GF2m_mul_2x2 |
||||
|
|
||||
|
.p2align 4 |
||||
|
_bn_GF2m_mul_2x2: |
||||
|
movq _OPENSSL_ia32cap_P(%rip),%rax |
||||
|
btq $33,%rax |
||||
|
jnc L$vanilla_mul_2x2 |
||||
|
|
||||
|
.byte 102,72,15,110,198 |
||||
|
.byte 102,72,15,110,201 |
||||
|
.byte 102,72,15,110,210 |
||||
|
.byte 102,73,15,110,216 |
||||
|
movdqa %xmm0,%xmm4 |
||||
|
movdqa %xmm1,%xmm5 |
||||
|
.byte 102,15,58,68,193,0 |
||||
|
pxor %xmm2,%xmm4 |
||||
|
pxor %xmm3,%xmm5 |
||||
|
.byte 102,15,58,68,211,0 |
||||
|
.byte 102,15,58,68,229,0 |
||||
|
xorps %xmm0,%xmm4 |
||||
|
xorps %xmm2,%xmm4 |
||||
|
movdqa %xmm4,%xmm5 |
||||
|
pslldq $8,%xmm4 |
||||
|
psrldq $8,%xmm5 |
||||
|
pxor %xmm4,%xmm2 |
||||
|
pxor %xmm5,%xmm0 |
||||
|
movdqu %xmm2,0(%rdi) |
||||
|
movdqu %xmm0,16(%rdi) |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
.p2align 4 |
||||
|
L$vanilla_mul_2x2: |
||||
|
leaq -136(%rsp),%rsp |
||||
|
movq %r14,80(%rsp) |
||||
|
movq %r13,88(%rsp) |
||||
|
movq %r12,96(%rsp) |
||||
|
movq %rbp,104(%rsp) |
||||
|
movq %rbx,112(%rsp) |
||||
|
L$body_mul_2x2: |
||||
|
movq %rdi,32(%rsp) |
||||
|
movq %rsi,40(%rsp) |
||||
|
movq %rdx,48(%rsp) |
||||
|
movq %rcx,56(%rsp) |
||||
|
movq %r8,64(%rsp) |
||||
|
|
||||
|
movq $15,%r8 |
||||
|
movq %rsi,%rax |
||||
|
movq %rcx,%rbp |
||||
|
call _mul_1x1 |
||||
|
|
||||
|
movq %rax,16(%rsp) |
||||
|
movq %rdx,24(%rsp) |
||||
|
|
||||
|
movq 48(%rsp),%rax |
||||
|
movq 64(%rsp),%rbp |
||||
|
call _mul_1x1 |
||||
|
|
||||
|
movq %rax,0(%rsp) |
||||
|
movq %rdx,8(%rsp) |
||||
|
|
||||
|
movq 40(%rsp),%rax |
||||
|
movq 56(%rsp),%rbp |
||||
|
xorq 48(%rsp),%rax |
||||
|
xorq 64(%rsp),%rbp |
||||
|
call _mul_1x1 |
||||
|
|
||||
|
movq 0(%rsp),%rbx |
||||
|
movq 8(%rsp),%rcx |
||||
|
movq 16(%rsp),%rdi |
||||
|
movq 24(%rsp),%rsi |
||||
|
movq 32(%rsp),%rbp |
||||
|
|
||||
|
xorq %rdx,%rax |
||||
|
xorq %rcx,%rdx |
||||
|
xorq %rbx,%rax |
||||
|
movq %rbx,0(%rbp) |
||||
|
xorq %rdi,%rdx |
||||
|
movq %rsi,24(%rbp) |
||||
|
xorq %rsi,%rax |
||||
|
xorq %rsi,%rdx |
||||
|
xorq %rdx,%rax |
||||
|
movq %rdx,16(%rbp) |
||||
|
movq %rax,8(%rbp) |
||||
|
|
||||
|
movq 80(%rsp),%r14 |
||||
|
movq 88(%rsp),%r13 |
||||
|
movq 96(%rsp),%r12 |
||||
|
movq 104(%rsp),%rbp |
||||
|
movq 112(%rsp),%rbx |
||||
|
leaq 136(%rsp),%rsp |
||||
|
.byte 0xf3,0xc3 |
||||
|
L$end_mul_2x2: |
||||
|
|
||||
|
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
||||
|
.p2align 4 |
@ -0,0 +1,785 @@ |
|||||
|
.text |
||||
|
|
||||
|
|
||||
|
.globl _bn_mul_mont_gather5 |
||||
|
|
||||
|
.p2align 6 |
||||
|
_bn_mul_mont_gather5: |
||||
|
testl $3,%r9d |
||||
|
jnz L$mul_enter |
||||
|
cmpl $8,%r9d |
||||
|
jb L$mul_enter |
||||
|
jmp L$mul4x_enter |
||||
|
|
||||
|
.p2align 4 |
||||
|
L$mul_enter: |
||||
|
movl %r9d,%r9d |
||||
|
movl 8(%rsp),%r10d |
||||
|
pushq %rbx |
||||
|
pushq %rbp |
||||
|
pushq %r12 |
||||
|
pushq %r13 |
||||
|
pushq %r14 |
||||
|
pushq %r15 |
||||
|
movq %rsp,%rax |
||||
|
leaq 2(%r9),%r11 |
||||
|
negq %r11 |
||||
|
leaq (%rsp,%r11,8),%rsp |
||||
|
andq $-1024,%rsp |
||||
|
|
||||
|
movq %rax,8(%rsp,%r9,8) |
||||
|
L$mul_body: |
||||
|
movq %rdx,%r12 |
||||
|
movq %r10,%r11 |
||||
|
shrq $3,%r10 |
||||
|
andq $7,%r11 |
||||
|
notq %r10 |
||||
|
leaq L$magic_masks(%rip),%rax |
||||
|
andq $3,%r10 |
||||
|
leaq 96(%r12,%r11,8),%r12 |
||||
|
movq 0(%rax,%r10,8),%xmm4 |
||||
|
movq 8(%rax,%r10,8),%xmm5 |
||||
|
movq 16(%rax,%r10,8),%xmm6 |
||||
|
movq 24(%rax,%r10,8),%xmm7 |
||||
|
|
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
|
||||
|
movq (%r8),%r8 |
||||
|
movq (%rsi),%rax |
||||
|
|
||||
|
xorq %r14,%r14 |
||||
|
xorq %r15,%r15 |
||||
|
|
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
|
||||
|
movq %r8,%rbp |
||||
|
mulq %rbx |
||||
|
movq %rax,%r10 |
||||
|
movq (%rcx),%rax |
||||
|
|
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
|
||||
|
imulq %r10,%rbp |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r10 |
||||
|
movq 8(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
leaq 1(%r15),%r15 |
||||
|
jmp L$1st_enter |
||||
|
|
||||
|
.p2align 4 |
||||
|
L$1st: |
||||
|
addq %rax,%r13 |
||||
|
movq (%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%r13 |
||||
|
movq %r10,%r11 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
L$1st_enter: |
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq (%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
leaq 1(%r15),%r15 |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
cmpq %r9,%r15 |
||||
|
jne L$1st |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
|
||||
|
addq %rax,%r13 |
||||
|
movq (%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
movq %r10,%r11 |
||||
|
|
||||
|
xorq %rdx,%rdx |
||||
|
addq %r11,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-8(%rsp,%r9,8) |
||||
|
movq %rdx,(%rsp,%r9,8) |
||||
|
|
||||
|
leaq 1(%r14),%r14 |
||||
|
jmp L$outer |
||||
|
.p2align 4 |
||||
|
L$outer: |
||||
|
xorq %r15,%r15 |
||||
|
movq %r8,%rbp |
||||
|
movq (%rsp),%r10 |
||||
|
|
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq (%rcx),%rax |
||||
|
adcq $0,%rdx |
||||
|
|
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
|
||||
|
imulq %r10,%rbp |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r10 |
||||
|
movq 8(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq 8(%rsp),%r10 |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
leaq 1(%r15),%r15 |
||||
|
jmp L$inner_enter |
||||
|
|
||||
|
.p2align 4 |
||||
|
L$inner: |
||||
|
addq %rax,%r13 |
||||
|
movq (%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
movq (%rsp,%r15,8),%r10 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
L$inner_enter: |
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq (%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%r10 |
||||
|
movq %rdx,%r11 |
||||
|
adcq $0,%r11 |
||||
|
leaq 1(%r15),%r15 |
||||
|
|
||||
|
mulq %rbp |
||||
|
cmpq %r9,%r15 |
||||
|
jne L$inner |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
|
||||
|
addq %rax,%r13 |
||||
|
movq (%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
movq (%rsp,%r15,8),%r10 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
xorq %rdx,%rdx |
||||
|
addq %r11,%r13 |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-8(%rsp,%r9,8) |
||||
|
movq %rdx,(%rsp,%r9,8) |
||||
|
|
||||
|
leaq 1(%r14),%r14 |
||||
|
cmpq %r9,%r14 |
||||
|
jl L$outer |
||||
|
|
||||
|
xorq %r14,%r14 |
||||
|
movq (%rsp),%rax |
||||
|
leaq (%rsp),%rsi |
||||
|
movq %r9,%r15 |
||||
|
jmp L$sub |
||||
|
.p2align 4 |
||||
|
L$sub: sbbq (%rcx,%r14,8),%rax |
||||
|
movq %rax,(%rdi,%r14,8) |
||||
|
movq 8(%rsi,%r14,8),%rax |
||||
|
leaq 1(%r14),%r14 |
||||
|
decq %r15 |
||||
|
jnz L$sub |
||||
|
|
||||
|
sbbq $0,%rax |
||||
|
xorq %r14,%r14 |
||||
|
andq %rax,%rsi |
||||
|
notq %rax |
||||
|
movq %rdi,%rcx |
||||
|
andq %rax,%rcx |
||||
|
movq %r9,%r15 |
||||
|
orq %rcx,%rsi |
||||
|
.p2align 4 |
||||
|
L$copy: |
||||
|
movq (%rsi,%r14,8),%rax |
||||
|
movq %r14,(%rsp,%r14,8) |
||||
|
movq %rax,(%rdi,%r14,8) |
||||
|
leaq 1(%r14),%r14 |
||||
|
subq $1,%r15 |
||||
|
jnz L$copy |
||||
|
|
||||
|
movq 8(%rsp,%r9,8),%rsi |
||||
|
movq $1,%rax |
||||
|
movq (%rsi),%r15 |
||||
|
movq 8(%rsi),%r14 |
||||
|
movq 16(%rsi),%r13 |
||||
|
movq 24(%rsi),%r12 |
||||
|
movq 32(%rsi),%rbp |
||||
|
movq 40(%rsi),%rbx |
||||
|
leaq 48(%rsi),%rsp |
||||
|
L$mul_epilogue: |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
bn_mul4x_mont_gather5: |
||||
|
L$mul4x_enter: |
||||
|
movl %r9d,%r9d |
||||
|
movl 8(%rsp),%r10d |
||||
|
pushq %rbx |
||||
|
pushq %rbp |
||||
|
pushq %r12 |
||||
|
pushq %r13 |
||||
|
pushq %r14 |
||||
|
pushq %r15 |
||||
|
movq %rsp,%rax |
||||
|
leaq 4(%r9),%r11 |
||||
|
negq %r11 |
||||
|
leaq (%rsp,%r11,8),%rsp |
||||
|
andq $-1024,%rsp |
||||
|
|
||||
|
movq %rax,8(%rsp,%r9,8) |
||||
|
L$mul4x_body: |
||||
|
movq %rdi,16(%rsp,%r9,8) |
||||
|
movq %rdx,%r12 |
||||
|
movq %r10,%r11 |
||||
|
shrq $3,%r10 |
||||
|
andq $7,%r11 |
||||
|
notq %r10 |
||||
|
leaq L$magic_masks(%rip),%rax |
||||
|
andq $3,%r10 |
||||
|
leaq 96(%r12,%r11,8),%r12 |
||||
|
movq 0(%rax,%r10,8),%xmm4 |
||||
|
movq 8(%rax,%r10,8),%xmm5 |
||||
|
movq 16(%rax,%r10,8),%xmm6 |
||||
|
movq 24(%rax,%r10,8),%xmm7 |
||||
|
|
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
movq (%r8),%r8 |
||||
|
movq (%rsi),%rax |
||||
|
|
||||
|
xorq %r14,%r14 |
||||
|
xorq %r15,%r15 |
||||
|
|
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
|
||||
|
movq %r8,%rbp |
||||
|
mulq %rbx |
||||
|
movq %rax,%r10 |
||||
|
movq (%rcx),%rax |
||||
|
|
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
|
||||
|
imulq %r10,%rbp |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r10 |
||||
|
movq 8(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq 8(%rcx),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq 16(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
leaq 4(%r15),%r15 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,(%rsp) |
||||
|
movq %rdx,%r13 |
||||
|
jmp L$1st4x |
||||
|
.p2align 4 |
||||
|
L$1st4x: |
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq -16(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq -8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-24(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq -8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq (%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq (%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq 8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-8(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq 8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
leaq 4(%r15),%r15 |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq -16(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-32(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
cmpq %r9,%r15 |
||||
|
jl L$1st4x |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq -16(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq -8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-24(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq -8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq (%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
|
||||
|
xorq %rdi,%rdi |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdi |
||||
|
movq %r13,-8(%rsp,%r15,8) |
||||
|
movq %rdi,(%rsp,%r15,8) |
||||
|
|
||||
|
leaq 1(%r14),%r14 |
||||
|
.p2align 2 |
||||
|
L$outer4x: |
||||
|
xorq %r15,%r15 |
||||
|
movq -96(%r12),%xmm0 |
||||
|
movq -32(%r12),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%r12),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
|
||||
|
movq (%rsp),%r10 |
||||
|
movq %r8,%rbp |
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq (%rcx),%rax |
||||
|
adcq $0,%rdx |
||||
|
|
||||
|
movq 96(%r12),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
|
||||
|
imulq %r10,%rbp |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%r12),%r12 |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r10 |
||||
|
movq 8(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq 8(%rcx),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq 8(%rsp),%r11 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq 16(%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
leaq 4(%r15),%r15 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r13 |
||||
|
jmp L$inner4x |
||||
|
.p2align 4 |
||||
|
L$inner4x: |
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq -16(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq -16(%rsp,%r15,8),%r10 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq -8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-32(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq -8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq -8(%rsp,%r15,8),%r11 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq (%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-24(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq (%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq (%rsp,%r15,8),%r10 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq 8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-16(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq 8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq 8(%rsp,%r15,8),%r11 |
||||
|
adcq $0,%rdx |
||||
|
leaq 4(%r15),%r15 |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq -16(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-40(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
cmpq %r9,%r15 |
||||
|
jl L$inner4x |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r10 |
||||
|
movq -16(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq -16(%rsp,%r15,8),%r10 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdx,%r11 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%r13 |
||||
|
movq -8(%rsi,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdx |
||||
|
movq %rdi,-32(%rsp,%r15,8) |
||||
|
movq %rdx,%rdi |
||||
|
|
||||
|
mulq %rbx |
||||
|
addq %rax,%r11 |
||||
|
movq -8(%rcx,%r15,8),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq -8(%rsp,%r15,8),%r11 |
||||
|
adcq $0,%rdx |
||||
|
leaq 1(%r14),%r14 |
||||
|
movq %rdx,%r10 |
||||
|
|
||||
|
mulq %rbp |
||||
|
addq %rax,%rdi |
||||
|
movq (%rsi),%rax |
||||
|
adcq $0,%rdx |
||||
|
addq %r11,%rdi |
||||
|
adcq $0,%rdx |
||||
|
movq %r13,-24(%rsp,%r15,8) |
||||
|
movq %rdx,%r13 |
||||
|
|
||||
|
.byte 102,72,15,126,195 |
||||
|
movq %rdi,-16(%rsp,%r15,8) |
||||
|
|
||||
|
xorq %rdi,%rdi |
||||
|
addq %r10,%r13 |
||||
|
adcq $0,%rdi |
||||
|
addq (%rsp,%r9,8),%r13 |
||||
|
adcq $0,%rdi |
||||
|
movq %r13,-8(%rsp,%r15,8) |
||||
|
movq %rdi,(%rsp,%r15,8) |
||||
|
|
||||
|
cmpq %r9,%r14 |
||||
|
jl L$outer4x |
||||
|
movq 16(%rsp,%r9,8),%rdi |
||||
|
movq 0(%rsp),%rax |
||||
|
pxor %xmm0,%xmm0 |
||||
|
movq 8(%rsp),%rdx |
||||
|
shrq $2,%r9 |
||||
|
leaq (%rsp),%rsi |
||||
|
xorq %r14,%r14 |
||||
|
|
||||
|
subq 0(%rcx),%rax |
||||
|
movq 16(%rsi),%rbx |
||||
|
movq 24(%rsi),%rbp |
||||
|
sbbq 8(%rcx),%rdx |
||||
|
leaq -1(%r9),%r15 |
||||
|
jmp L$sub4x |
||||
|
.p2align 4 |
||||
|
L$sub4x: |
||||
|
movq %rax,0(%rdi,%r14,8) |
||||
|
movq %rdx,8(%rdi,%r14,8) |
||||
|
sbbq 16(%rcx,%r14,8),%rbx |
||||
|
movq 32(%rsi,%r14,8),%rax |
||||
|
movq 40(%rsi,%r14,8),%rdx |
||||
|
sbbq 24(%rcx,%r14,8),%rbp |
||||
|
movq %rbx,16(%rdi,%r14,8) |
||||
|
movq %rbp,24(%rdi,%r14,8) |
||||
|
sbbq 32(%rcx,%r14,8),%rax |
||||
|
movq 48(%rsi,%r14,8),%rbx |
||||
|
movq 56(%rsi,%r14,8),%rbp |
||||
|
sbbq 40(%rcx,%r14,8),%rdx |
||||
|
leaq 4(%r14),%r14 |
||||
|
decq %r15 |
||||
|
jnz L$sub4x |
||||
|
|
||||
|
movq %rax,0(%rdi,%r14,8) |
||||
|
movq 32(%rsi,%r14,8),%rax |
||||
|
sbbq 16(%rcx,%r14,8),%rbx |
||||
|
movq %rdx,8(%rdi,%r14,8) |
||||
|
sbbq 24(%rcx,%r14,8),%rbp |
||||
|
movq %rbx,16(%rdi,%r14,8) |
||||
|
|
||||
|
sbbq $0,%rax |
||||
|
movq %rbp,24(%rdi,%r14,8) |
||||
|
xorq %r14,%r14 |
||||
|
andq %rax,%rsi |
||||
|
notq %rax |
||||
|
movq %rdi,%rcx |
||||
|
andq %rax,%rcx |
||||
|
leaq -1(%r9),%r15 |
||||
|
orq %rcx,%rsi |
||||
|
|
||||
|
movdqu (%rsi),%xmm1 |
||||
|
movdqa %xmm0,(%rsp) |
||||
|
movdqu %xmm1,(%rdi) |
||||
|
jmp L$copy4x |
||||
|
.p2align 4 |
||||
|
L$copy4x: |
||||
|
movdqu 16(%rsi,%r14,1),%xmm2 |
||||
|
movdqu 32(%rsi,%r14,1),%xmm1 |
||||
|
movdqa %xmm0,16(%rsp,%r14,1) |
||||
|
movdqu %xmm2,16(%rdi,%r14,1) |
||||
|
movdqa %xmm0,32(%rsp,%r14,1) |
||||
|
movdqu %xmm1,32(%rdi,%r14,1) |
||||
|
leaq 32(%r14),%r14 |
||||
|
decq %r15 |
||||
|
jnz L$copy4x |
||||
|
|
||||
|
shlq $2,%r9 |
||||
|
movdqu 16(%rsi,%r14,1),%xmm2 |
||||
|
movdqa %xmm0,16(%rsp,%r14,1) |
||||
|
movdqu %xmm2,16(%rdi,%r14,1) |
||||
|
movq 8(%rsp,%r9,8),%rsi |
||||
|
movq $1,%rax |
||||
|
movq (%rsi),%r15 |
||||
|
movq 8(%rsi),%r14 |
||||
|
movq 16(%rsi),%r13 |
||||
|
movq 24(%rsi),%r12 |
||||
|
movq 32(%rsi),%rbp |
||||
|
movq 40(%rsi),%rbx |
||||
|
leaq 48(%rsi),%rsp |
||||
|
L$mul4x_epilogue: |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
.globl _bn_scatter5 |
||||
|
|
||||
|
.p2align 4 |
||||
|
_bn_scatter5: |
||||
|
cmpq $0,%rsi |
||||
|
jz L$scatter_epilogue |
||||
|
leaq (%rdx,%rcx,8),%rdx |
||||
|
L$scatter: |
||||
|
movq (%rdi),%rax |
||||
|
leaq 8(%rdi),%rdi |
||||
|
movq %rax,(%rdx) |
||||
|
leaq 256(%rdx),%rdx |
||||
|
subq $1,%rsi |
||||
|
jnz L$scatter |
||||
|
L$scatter_epilogue: |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
.globl _bn_gather5 |
||||
|
|
||||
|
.p2align 4 |
||||
|
_bn_gather5: |
||||
|
movq %rcx,%r11 |
||||
|
shrq $3,%rcx |
||||
|
andq $7,%r11 |
||||
|
notq %rcx |
||||
|
leaq L$magic_masks(%rip),%rax |
||||
|
andq $3,%rcx |
||||
|
leaq 96(%rdx,%r11,8),%rdx |
||||
|
movq 0(%rax,%rcx,8),%xmm4 |
||||
|
movq 8(%rax,%rcx,8),%xmm5 |
||||
|
movq 16(%rax,%rcx,8),%xmm6 |
||||
|
movq 24(%rax,%rcx,8),%xmm7 |
||||
|
jmp L$gather |
||||
|
.p2align 4 |
||||
|
L$gather: |
||||
|
movq -96(%rdx),%xmm0 |
||||
|
movq -32(%rdx),%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movq 32(%rdx),%xmm2 |
||||
|
pand %xmm5,%xmm1 |
||||
|
movq 96(%rdx),%xmm3 |
||||
|
pand %xmm6,%xmm2 |
||||
|
por %xmm1,%xmm0 |
||||
|
pand %xmm7,%xmm3 |
||||
|
por %xmm2,%xmm0 |
||||
|
leaq 256(%rdx),%rdx |
||||
|
por %xmm3,%xmm0 |
||||
|
|
||||
|
movq %xmm0,(%rdi) |
||||
|
leaq 8(%rdi),%rdi |
||||
|
subq $1,%rsi |
||||
|
jnz L$gather |
||||
|
.byte 0xf3,0xc3 |
||||
|
L$SEH_end_bn_gather5: |
||||
|
|
||||
|
.p2align 6 |
||||
|
L$magic_masks: |
||||
|
.long 0,0, 0,0, 0,0, -1,-1 |
||||
|
.long 0,0, 0,0, 0,0, 0,0 |
||||
|
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
@ -0,0 +1,404 @@ |
|||||
|
OPTION DOTNAME |
||||
|
.text$ SEGMENT ALIGN(64) 'CODE' |
||||
|
|
||||
|
|
||||
|
ALIGN 16 |
||||
|
_mul_1x1 PROC PRIVATE |
||||
|
sub rsp,128+8 |
||||
|
mov r9,-1 |
||||
|
lea rsi,QWORD PTR[rax*1+rax] |
||||
|
shr r9,3 |
||||
|
lea rdi,QWORD PTR[rax*4] |
||||
|
and r9,rax |
||||
|
lea r12,QWORD PTR[rax*8] |
||||
|
sar rax,63 |
||||
|
lea r10,QWORD PTR[r9*1+r9] |
||||
|
sar rsi,63 |
||||
|
lea r11,QWORD PTR[r9*4] |
||||
|
and rax,rbp |
||||
|
sar rdi,63 |
||||
|
mov rdx,rax |
||||
|
shl rax,63 |
||||
|
and rsi,rbp |
||||
|
shr rdx,1 |
||||
|
mov rcx,rsi |
||||
|
shl rsi,62 |
||||
|
and rdi,rbp |
||||
|
shr rcx,2 |
||||
|
xor rax,rsi |
||||
|
mov rbx,rdi |
||||
|
shl rdi,61 |
||||
|
xor rdx,rcx |
||||
|
shr rbx,3 |
||||
|
xor rax,rdi |
||||
|
xor rdx,rbx |
||||
|
|
||||
|
mov r13,r9 |
||||
|
mov QWORD PTR[rsp],0 |
||||
|
xor r13,r10 |
||||
|
mov QWORD PTR[8+rsp],r9 |
||||
|
mov r14,r11 |
||||
|
mov QWORD PTR[16+rsp],r10 |
||||
|
xor r14,r12 |
||||
|
mov QWORD PTR[24+rsp],r13 |
||||
|
|
||||
|
xor r9,r11 |
||||
|
mov QWORD PTR[32+rsp],r11 |
||||
|
xor r10,r11 |
||||
|
mov QWORD PTR[40+rsp],r9 |
||||
|
xor r13,r11 |
||||
|
mov QWORD PTR[48+rsp],r10 |
||||
|
xor r9,r14 |
||||
|
mov QWORD PTR[56+rsp],r13 |
||||
|
xor r10,r14 |
||||
|
|
||||
|
mov QWORD PTR[64+rsp],r12 |
||||
|
xor r13,r14 |
||||
|
mov QWORD PTR[72+rsp],r9 |
||||
|
xor r9,r11 |
||||
|
mov QWORD PTR[80+rsp],r10 |
||||
|
xor r10,r11 |
||||
|
mov QWORD PTR[88+rsp],r13 |
||||
|
|
||||
|
xor r13,r11 |
||||
|
mov QWORD PTR[96+rsp],r14 |
||||
|
mov rsi,r8 |
||||
|
mov QWORD PTR[104+rsp],r9 |
||||
|
and rsi,rbp |
||||
|
mov QWORD PTR[112+rsp],r10 |
||||
|
shr rbp,4 |
||||
|
mov QWORD PTR[120+rsp],r13 |
||||
|
mov rdi,r8 |
||||
|
and rdi,rbp |
||||
|
shr rbp,4 |
||||
|
|
||||
|
movq xmm0,QWORD PTR[rsi*8+rsp] |
||||
|
mov rsi,r8 |
||||
|
and rsi,rbp |
||||
|
shr rbp,4 |
||||
|
mov rcx,QWORD PTR[rdi*8+rsp] |
||||
|
mov rdi,r8 |
||||
|
mov rbx,rcx |
||||
|
shl rcx,4 |
||||
|
and rdi,rbp |
||||
|
movq xmm1,QWORD PTR[rsi*8+rsp] |
||||
|
shr rbx,60 |
||||
|
xor rax,rcx |
||||
|
pslldq xmm1,1 |
||||
|
mov rsi,r8 |
||||
|
shr rbp,4 |
||||
|
xor rdx,rbx |
||||
|
and rsi,rbp |
||||
|
shr rbp,4 |
||||
|
pxor xmm0,xmm1 |
||||
|
mov rcx,QWORD PTR[rdi*8+rsp] |
||||
|
mov rdi,r8 |
||||
|
mov rbx,rcx |
||||
|
shl rcx,12 |
||||
|
and rdi,rbp |
||||
|
movq xmm1,QWORD PTR[rsi*8+rsp] |
||||
|
shr rbx,52 |
||||
|
xor rax,rcx |
||||
|
pslldq xmm1,2 |
||||
|
mov rsi,r8 |
||||
|
shr rbp,4 |
||||
|
xor rdx,rbx |
||||
|
and rsi,rbp |
||||
|
shr rbp,4 |
||||
|
pxor xmm0,xmm1 |
||||
|
mov rcx,QWORD PTR[rdi*8+rsp] |
||||
|
mov rdi,r8 |
||||
|
mov rbx,rcx |
||||
|
shl rcx,20 |
||||
|
and rdi,rbp |
||||
|
movq xmm1,QWORD PTR[rsi*8+rsp] |
||||
|
shr rbx,44 |
||||
|
xor rax,rcx |
||||
|
pslldq xmm1,3 |
||||
|
mov rsi,r8 |
||||
|
shr rbp,4 |
||||
|
xor rdx,rbx |
||||
|
and rsi,rbp |
||||
|
shr rbp,4 |
||||
|
pxor xmm0,xmm1 |
||||
|
mov rcx,QWORD PTR[rdi*8+rsp] |
||||
|
mov rdi,r8 |
||||
|
mov rbx,rcx |
||||
|
shl rcx,28 |
||||
|
and rdi,rbp |
||||
|
movq xmm1,QWORD PTR[rsi*8+rsp] |
||||
|
shr rbx,36 |
||||
|
xor rax,rcx |
||||
|
pslldq xmm1,4 |
||||
|
mov rsi,r8 |
||||
|
shr rbp,4 |
||||
|
xor rdx,rbx |
||||
|
and rsi,rbp |
||||
|
shr rbp,4 |
||||
|
pxor xmm0,xmm1 |
||||
|
mov rcx,QWORD PTR[rdi*8+rsp] |
||||
|
mov rdi,r8 |
||||
|
mov rbx,rcx |
||||
|
shl rcx,36 |
||||
|
and rdi,rbp |
||||
|
movq xmm1,QWORD PTR[rsi*8+rsp] |
||||
|
shr rbx,28 |
||||
|
xor rax,rcx |
||||
|
pslldq xmm1,5 |
||||
|
mov rsi,r8 |
||||
|
shr rbp,4 |
||||
|
xor rdx,rbx |
||||
|
and rsi,rbp |
||||
|
shr rbp,4 |
||||
|
pxor xmm0,xmm1 |
||||
|
mov rcx,QWORD PTR[rdi*8+rsp] |
||||
|
mov rdi,r8 |
||||
|
mov rbx,rcx |
||||
|
shl rcx,44 |
||||
|
and rdi,rbp |
||||
|
movq xmm1,QWORD PTR[rsi*8+rsp] |
||||
|
shr rbx,20 |
||||
|
xor rax,rcx |
||||
|
pslldq xmm1,6 |
||||
|
mov rsi,r8 |
||||
|
shr rbp,4 |
||||
|
xor rdx,rbx |
||||
|
and rsi,rbp |
||||
|
shr rbp,4 |
||||
|
pxor xmm0,xmm1 |
||||
|
mov rcx,QWORD PTR[rdi*8+rsp] |
||||
|
mov rdi,r8 |
||||
|
mov rbx,rcx |
||||
|
shl rcx,52 |
||||
|
and rdi,rbp |
||||
|
movq xmm1,QWORD PTR[rsi*8+rsp] |
||||
|
shr rbx,12 |
||||
|
xor rax,rcx |
||||
|
pslldq xmm1,7 |
||||
|
mov rsi,r8 |
||||
|
shr rbp,4 |
||||
|
xor rdx,rbx |
||||
|
and rsi,rbp |
||||
|
shr rbp,4 |
||||
|
pxor xmm0,xmm1 |
||||
|
mov rcx,QWORD PTR[rdi*8+rsp] |
||||
|
mov rbx,rcx |
||||
|
shl rcx,60 |
||||
|
DB 102,72,15,126,198 |
||||
|
shr rbx,4 |
||||
|
xor rax,rcx |
||||
|
psrldq xmm0,8 |
||||
|
xor rdx,rbx |
||||
|
DB 102,72,15,126,199 |
||||
|
xor rax,rsi |
||||
|
xor rdx,rdi |
||||
|
|
||||
|
add rsp,128+8 |
||||
|
DB 0F3h,0C3h ;repret |
||||
|
$L$end_mul_1x1:: |
||||
|
_mul_1x1 ENDP |
||||
|
EXTERN OPENSSL_ia32cap_P:NEAR |
||||
|
PUBLIC bn_GF2m_mul_2x2 |
||||
|
|
||||
|
ALIGN 16 |
||||
|
bn_GF2m_mul_2x2 PROC PUBLIC |
||||
|
mov rax,QWORD PTR[OPENSSL_ia32cap_P] |
||||
|
bt rax,33 |
||||
|
jnc $L$vanilla_mul_2x2 |
||||
|
|
||||
|
DB 102,72,15,110,194 |
||||
|
DB 102,73,15,110,201 |
||||
|
DB 102,73,15,110,208 |
||||
|
movq xmm3,QWORD PTR[40+rsp] |
||||
|
movdqa xmm4,xmm0 |
||||
|
movdqa xmm5,xmm1 |
||||
|
DB 102,15,58,68,193,0 |
||||
|
pxor xmm4,xmm2 |
||||
|
pxor xmm5,xmm3 |
||||
|
DB 102,15,58,68,211,0 |
||||
|
DB 102,15,58,68,229,0 |
||||
|
xorps xmm4,xmm0 |
||||
|
xorps xmm4,xmm2 |
||||
|
movdqa xmm5,xmm4 |
||||
|
pslldq xmm4,8 |
||||
|
psrldq xmm5,8 |
||||
|
pxor xmm2,xmm4 |
||||
|
pxor xmm0,xmm5 |
||||
|
movdqu XMMWORD PTR[rcx],xmm2 |
||||
|
movdqu XMMWORD PTR[16+rcx],xmm0 |
||||
|
DB 0F3h,0C3h ;repret |
||||
|
|
||||
|
ALIGN 16 |
||||
|
$L$vanilla_mul_2x2:: |
||||
|
lea rsp,QWORD PTR[((-136))+rsp] |
||||
|
mov r10,QWORD PTR[176+rsp] |
||||
|
mov QWORD PTR[120+rsp],rdi |
||||
|
mov QWORD PTR[128+rsp],rsi |
||||
|
mov QWORD PTR[80+rsp],r14 |
||||
|
mov QWORD PTR[88+rsp],r13 |
||||
|
mov QWORD PTR[96+rsp],r12 |
||||
|
mov QWORD PTR[104+rsp],rbp |
||||
|
mov QWORD PTR[112+rsp],rbx |
||||
|
$L$body_mul_2x2:: |
||||
|
mov QWORD PTR[32+rsp],rcx |
||||
|
mov QWORD PTR[40+rsp],rdx |
||||
|
mov QWORD PTR[48+rsp],r8 |
||||
|
mov QWORD PTR[56+rsp],r9 |
||||
|
mov QWORD PTR[64+rsp],r10 |
||||
|
|
||||
|
mov r8,0fh |
||||
|
mov rax,rdx |
||||
|
mov rbp,r9 |
||||
|
call _mul_1x1 |
||||
|
|
||||
|
mov QWORD PTR[16+rsp],rax |
||||
|
mov QWORD PTR[24+rsp],rdx |
||||
|
|
||||
|
mov rax,QWORD PTR[48+rsp] |
||||
|
mov rbp,QWORD PTR[64+rsp] |
||||
|
call _mul_1x1 |
||||
|
|
||||
|
mov QWORD PTR[rsp],rax |
||||
|
mov QWORD PTR[8+rsp],rdx |
||||
|
|
||||
|
mov rax,QWORD PTR[40+rsp] |
||||
|
mov rbp,QWORD PTR[56+rsp] |
||||
|
xor rax,QWORD PTR[48+rsp] |
||||
|
xor rbp,QWORD PTR[64+rsp] |
||||
|
call _mul_1x1 |
||||
|
|
||||
|
mov rbx,QWORD PTR[rsp] |
||||
|
mov rcx,QWORD PTR[8+rsp] |
||||
|
mov rdi,QWORD PTR[16+rsp] |
||||
|
mov rsi,QWORD PTR[24+rsp] |
||||
|
mov rbp,QWORD PTR[32+rsp] |
||||
|
|
||||
|
xor rax,rdx |
||||
|
xor rdx,rcx |
||||
|
xor rax,rbx |
||||
|
mov QWORD PTR[rbp],rbx |
||||
|
xor rdx,rdi |
||||
|
mov QWORD PTR[24+rbp],rsi |
||||
|
xor rax,rsi |
||||
|
xor rdx,rsi |
||||
|
xor rax,rdx |
||||
|
mov QWORD PTR[16+rbp],rdx |
||||
|
mov QWORD PTR[8+rbp],rax |
||||
|
|
||||
|
mov r14,QWORD PTR[80+rsp] |
||||
|
mov r13,QWORD PTR[88+rsp] |
||||
|
mov r12,QWORD PTR[96+rsp] |
||||
|
mov rbp,QWORD PTR[104+rsp] |
||||
|
mov rbx,QWORD PTR[112+rsp] |
||||
|
mov rdi,QWORD PTR[120+rsp] |
||||
|
mov rsi,QWORD PTR[128+rsp] |
||||
|
lea rsp,QWORD PTR[136+rsp] |
||||
|
DB 0F3h,0C3h ;repret |
||||
|
$L$end_mul_2x2:: |
||||
|
bn_GF2m_mul_2x2 ENDP |
||||
|
DB 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 |
||||
|
DB 99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54 |
||||
|
DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 |
||||
|
DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 |
||||
|
DB 111,114,103,62,0 |
||||
|
ALIGN 16 |
||||
|
EXTERN __imp_RtlVirtualUnwind:NEAR |
||||
|
|
||||
|
|
||||
|
ALIGN 16 |
||||
|
se_handler PROC PRIVATE |
||||
|
push rsi |
||||
|
push rdi |
||||
|
push rbx |
||||
|
push rbp |
||||
|
push r12 |
||||
|
push r13 |
||||
|
push r14 |
||||
|
push r15 |
||||
|
pushfq |
||||
|
sub rsp,64 |
||||
|
|
||||
|
mov rax,QWORD PTR[152+r8] |
||||
|
mov rbx,QWORD PTR[248+r8] |
||||
|
|
||||
|
lea r10,QWORD PTR[$L$body_mul_2x2] |
||||
|
cmp rbx,r10 |
||||
|
jb $L$in_prologue |
||||
|
|
||||
|
mov r14,QWORD PTR[80+rax] |
||||
|
mov r13,QWORD PTR[88+rax] |
||||
|
mov r12,QWORD PTR[96+rax] |
||||
|
mov rbp,QWORD PTR[104+rax] |
||||
|
mov rbx,QWORD PTR[112+rax] |
||||
|
mov rdi,QWORD PTR[120+rax] |
||||
|
mov rsi,QWORD PTR[128+rax] |
||||
|
|
||||
|
mov QWORD PTR[144+r8],rbx |
||||
|
mov QWORD PTR[160+r8],rbp |
||||
|
mov QWORD PTR[168+r8],rsi |
||||
|
mov QWORD PTR[176+r8],rdi |
||||
|
mov QWORD PTR[216+r8],r12 |
||||
|
mov QWORD PTR[224+r8],r13 |
||||
|
mov QWORD PTR[232+r8],r14 |
||||
|
|
||||
|
$L$in_prologue:: |
||||
|
lea rax,QWORD PTR[136+rax] |
||||
|
mov QWORD PTR[152+r8],rax |
||||
|
|
||||
|
mov rdi,QWORD PTR[40+r9] |
||||
|
mov rsi,r8 |
||||
|
mov ecx,154 |
||||
|
DD 0a548f3fch |
||||
|
|
||||
|
|
||||
|
mov rsi,r9 |
||||
|
xor rcx,rcx |
||||
|
mov rdx,QWORD PTR[8+rsi] |
||||
|
mov r8,QWORD PTR[rsi] |
||||
|
mov r9,QWORD PTR[16+rsi] |
||||
|
mov r10,QWORD PTR[40+rsi] |
||||
|
lea r11,QWORD PTR[56+rsi] |
||||
|
lea r12,QWORD PTR[24+rsi] |
||||
|
mov QWORD PTR[32+rsp],r10 |
||||
|
mov QWORD PTR[40+rsp],r11 |
||||
|
mov QWORD PTR[48+rsp],r12 |
||||
|
mov QWORD PTR[56+rsp],rcx |
||||
|
call QWORD PTR[__imp_RtlVirtualUnwind] |
||||
|
|
||||
|
mov eax,1 |
||||
|
add rsp,64 |
||||
|
popfq |
||||
|
pop r15 |
||||
|
pop r14 |
||||
|
pop r13 |
||||
|
pop r12 |
||||
|
pop rbp |
||||
|
pop rbx |
||||
|
pop rdi |
||||
|
pop rsi |
||||
|
DB 0F3h,0C3h ;repret |
||||
|
se_handler ENDP |
||||
|
|
||||
|
.text$ ENDS |
||||
|
.pdata SEGMENT READONLY ALIGN(4) |
||||
|
ALIGN 4 |
||||
|
DD imagerel _mul_1x1 |
||||
|
DD imagerel $L$end_mul_1x1 |
||||
|
DD imagerel $L$SEH_info_1x1 |
||||
|
|
||||
|
DD imagerel $L$vanilla_mul_2x2 |
||||
|
DD imagerel $L$end_mul_2x2 |
||||
|
DD imagerel $L$SEH_info_2x2 |
||||
|
.pdata ENDS |
||||
|
.xdata SEGMENT READONLY ALIGN(8) |
||||
|
ALIGN 8 |
||||
|
$L$SEH_info_1x1:: |
||||
|
DB 001h,007h,002h,000h |
||||
|
DB 007h,001h,011h,000h |
||||
|
|
||||
|
$L$SEH_info_2x2:: |
||||
|
DB 9,0,0,0 |
||||
|
DD imagerel se_handler |
||||
|
|
||||
|
.xdata ENDS |
||||
|
END |
@ -0,0 +1,990 @@ |
|||||
|
OPTION DOTNAME |
||||
|
.text$ SEGMENT ALIGN(64) 'CODE' |
||||
|
|
||||
|
PUBLIC bn_mul_mont_gather5 |
||||
|
|
||||
|
ALIGN 64 |
||||
|
bn_mul_mont_gather5 PROC PUBLIC |
||||
|
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue |
||||
|
mov QWORD PTR[16+rsp],rsi |
||||
|
mov rax,rsp |
||||
|
$L$SEH_begin_bn_mul_mont_gather5:: |
||||
|
mov rdi,rcx |
||||
|
mov rsi,rdx |
||||
|
mov rdx,r8 |
||||
|
mov rcx,r9 |
||||
|
mov r8,QWORD PTR[40+rsp] |
||||
|
mov r9,QWORD PTR[48+rsp] |
||||
|
|
||||
|
|
||||
|
test r9d,3 |
||||
|
jnz $L$mul_enter |
||||
|
cmp r9d,8 |
||||
|
jb $L$mul_enter |
||||
|
jmp $L$mul4x_enter |
||||
|
|
||||
|
ALIGN 16 |
||||
|
$L$mul_enter:: |
||||
|
mov r9d,r9d |
||||
|
mov r10d,DWORD PTR[56+rsp] |
||||
|
push rbx |
||||
|
push rbp |
||||
|
push r12 |
||||
|
push r13 |
||||
|
push r14 |
||||
|
push r15 |
||||
|
lea rsp,QWORD PTR[((-40))+rsp] |
||||
|
movaps XMMWORD PTR[rsp],xmm6 |
||||
|
movaps XMMWORD PTR[16+rsp],xmm7 |
||||
|
$L$mul_alloca:: |
||||
|
mov rax,rsp |
||||
|
lea r11,QWORD PTR[2+r9] |
||||
|
neg r11 |
||||
|
lea rsp,QWORD PTR[r11*8+rsp] |
||||
|
and rsp,-1024 |
||||
|
|
||||
|
mov QWORD PTR[8+r9*8+rsp],rax |
||||
|
$L$mul_body:: |
||||
|
mov r12,rdx |
||||
|
mov r11,r10 |
||||
|
shr r10,3 |
||||
|
and r11,7 |
||||
|
not r10 |
||||
|
lea rax,QWORD PTR[$L$magic_masks] |
||||
|
and r10,3 |
||||
|
lea r12,QWORD PTR[96+r11*8+r12] |
||||
|
movq xmm4,QWORD PTR[r10*8+rax] |
||||
|
movq xmm5,QWORD PTR[8+r10*8+rax] |
||||
|
movq xmm6,QWORD PTR[16+r10*8+rax] |
||||
|
movq xmm7,QWORD PTR[24+r10*8+rax] |
||||
|
|
||||
|
movq xmm0,QWORD PTR[((-96))+r12] |
||||
|
movq xmm1,QWORD PTR[((-32))+r12] |
||||
|
pand xmm0,xmm4 |
||||
|
movq xmm2,QWORD PTR[32+r12] |
||||
|
pand xmm1,xmm5 |
||||
|
movq xmm3,QWORD PTR[96+r12] |
||||
|
pand xmm2,xmm6 |
||||
|
por xmm0,xmm1 |
||||
|
pand xmm3,xmm7 |
||||
|
por xmm0,xmm2 |
||||
|
lea r12,QWORD PTR[256+r12] |
||||
|
por xmm0,xmm3 |
||||
|
|
||||
|
DB 102,72,15,126,195 |
||||
|
|
||||
|
mov r8,QWORD PTR[r8] |
||||
|
mov rax,QWORD PTR[rsi] |
||||
|
|
||||
|
xor r14,r14 |
||||
|
xor r15,r15 |
||||
|
|
||||
|
movq xmm0,QWORD PTR[((-96))+r12] |
||||
|
movq xmm1,QWORD PTR[((-32))+r12] |
||||
|
pand xmm0,xmm4 |
||||
|
movq xmm2,QWORD PTR[32+r12] |
||||
|
pand xmm1,xmm5 |
||||
|
|
||||
|
mov rbp,r8 |
||||
|
mul rbx |
||||
|
mov r10,rax |
||||
|
mov rax,QWORD PTR[rcx] |
||||
|
|
||||
|
movq xmm3,QWORD PTR[96+r12] |
||||
|
pand xmm2,xmm6 |
||||
|
por xmm0,xmm1 |
||||
|
pand xmm3,xmm7 |
||||
|
|
||||
|
imul rbp,r10 |
||||
|
mov r11,rdx |
||||
|
|
||||
|
por xmm0,xmm2 |
||||
|
lea r12,QWORD PTR[256+r12] |
||||
|
por xmm0,xmm3 |
||||
|
|
||||
|
mul rbp |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[8+rsi] |
||||
|
adc rdx,0 |
||||
|
mov r13,rdx |
||||
|
|
||||
|
lea r15,QWORD PTR[1+r15] |
||||
|
jmp $L$1st_enter |
||||
|
|
||||
|
ALIGN 16 |
||||
|
$L$1st:: |
||||
|
add r13,rax |
||||
|
mov rax,QWORD PTR[r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add r13,r11 |
||||
|
mov r11,r10 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-16))+r15*8+rsp],r13 |
||||
|
mov r13,rdx |
||||
|
|
||||
|
$L$1st_enter:: |
||||
|
mul rbx |
||||
|
add r11,rax |
||||
|
mov rax,QWORD PTR[r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
lea r15,QWORD PTR[1+r15] |
||||
|
mov r10,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
cmp r15,r9 |
||||
|
jne $L$1st |
||||
|
|
||||
|
DB 102,72,15,126,195 |
||||
|
|
||||
|
add r13,rax |
||||
|
mov rax,QWORD PTR[rsi] |
||||
|
adc rdx,0 |
||||
|
add r13,r11 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-16))+r15*8+rsp],r13 |
||||
|
mov r13,rdx |
||||
|
mov r11,r10 |
||||
|
|
||||
|
xor rdx,rdx |
||||
|
add r13,r11 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-8))+r9*8+rsp],r13 |
||||
|
mov QWORD PTR[r9*8+rsp],rdx |
||||
|
|
||||
|
lea r14,QWORD PTR[1+r14] |
||||
|
jmp $L$outer |
||||
|
ALIGN 16 |
||||
|
$L$outer:: |
||||
|
xor r15,r15 |
||||
|
mov rbp,r8 |
||||
|
mov r10,QWORD PTR[rsp] |
||||
|
|
||||
|
movq xmm0,QWORD PTR[((-96))+r12] |
||||
|
movq xmm1,QWORD PTR[((-32))+r12] |
||||
|
pand xmm0,xmm4 |
||||
|
movq xmm2,QWORD PTR[32+r12] |
||||
|
pand xmm1,xmm5 |
||||
|
|
||||
|
mul rbx |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[rcx] |
||||
|
adc rdx,0 |
||||
|
|
||||
|
movq xmm3,QWORD PTR[96+r12] |
||||
|
pand xmm2,xmm6 |
||||
|
por xmm0,xmm1 |
||||
|
pand xmm3,xmm7 |
||||
|
|
||||
|
imul rbp,r10 |
||||
|
mov r11,rdx |
||||
|
|
||||
|
por xmm0,xmm2 |
||||
|
lea r12,QWORD PTR[256+r12] |
||||
|
por xmm0,xmm3 |
||||
|
|
||||
|
mul rbp |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[8+rsi] |
||||
|
adc rdx,0 |
||||
|
mov r10,QWORD PTR[8+rsp] |
||||
|
mov r13,rdx |
||||
|
|
||||
|
lea r15,QWORD PTR[1+r15] |
||||
|
jmp $L$inner_enter |
||||
|
|
||||
|
ALIGN 16 |
||||
|
$L$inner:: |
||||
|
add r13,rax |
||||
|
mov rax,QWORD PTR[r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add r13,r10 |
||||
|
mov r10,QWORD PTR[r15*8+rsp] |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-16))+r15*8+rsp],r13 |
||||
|
mov r13,rdx |
||||
|
|
||||
|
$L$inner_enter:: |
||||
|
mul rbx |
||||
|
add r11,rax |
||||
|
mov rax,QWORD PTR[r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
add r10,r11 |
||||
|
mov r11,rdx |
||||
|
adc r11,0 |
||||
|
lea r15,QWORD PTR[1+r15] |
||||
|
|
||||
|
mul rbp |
||||
|
cmp r15,r9 |
||||
|
jne $L$inner |
||||
|
|
||||
|
DB 102,72,15,126,195 |
||||
|
|
||||
|
add r13,rax |
||||
|
mov rax,QWORD PTR[rsi] |
||||
|
adc rdx,0 |
||||
|
add r13,r10 |
||||
|
mov r10,QWORD PTR[r15*8+rsp] |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-16))+r15*8+rsp],r13 |
||||
|
mov r13,rdx |
||||
|
|
||||
|
xor rdx,rdx |
||||
|
add r13,r11 |
||||
|
adc rdx,0 |
||||
|
add r13,r10 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-8))+r9*8+rsp],r13 |
||||
|
mov QWORD PTR[r9*8+rsp],rdx |
||||
|
|
||||
|
lea r14,QWORD PTR[1+r14] |
||||
|
cmp r14,r9 |
||||
|
jl $L$outer |
||||
|
|
||||
|
xor r14,r14 |
||||
|
mov rax,QWORD PTR[rsp] |
||||
|
lea rsi,QWORD PTR[rsp] |
||||
|
mov r15,r9 |
||||
|
jmp $L$sub |
||||
|
ALIGN 16 |
||||
|
$L$sub:: sbb rax,QWORD PTR[r14*8+rcx] |
||||
|
mov QWORD PTR[r14*8+rdi],rax |
||||
|
mov rax,QWORD PTR[8+r14*8+rsi] |
||||
|
lea r14,QWORD PTR[1+r14] |
||||
|
dec r15 |
||||
|
jnz $L$sub |
||||
|
|
||||
|
sbb rax,0 |
||||
|
xor r14,r14 |
||||
|
and rsi,rax |
||||
|
not rax |
||||
|
mov rcx,rdi |
||||
|
and rcx,rax |
||||
|
mov r15,r9 |
||||
|
or rsi,rcx |
||||
|
ALIGN 16 |
||||
|
$L$copy:: |
||||
|
mov rax,QWORD PTR[r14*8+rsi] |
||||
|
mov QWORD PTR[r14*8+rsp],r14 |
||||
|
mov QWORD PTR[r14*8+rdi],rax |
||||
|
lea r14,QWORD PTR[1+r14] |
||||
|
sub r15,1 |
||||
|
jnz $L$copy |
||||
|
|
||||
|
mov rsi,QWORD PTR[8+r9*8+rsp] |
||||
|
mov rax,1 |
||||
|
movaps xmm6,XMMWORD PTR[rsi] |
||||
|
movaps xmm7,XMMWORD PTR[16+rsi] |
||||
|
lea rsi,QWORD PTR[40+rsi] |
||||
|
mov r15,QWORD PTR[rsi] |
||||
|
mov r14,QWORD PTR[8+rsi] |
||||
|
mov r13,QWORD PTR[16+rsi] |
||||
|
mov r12,QWORD PTR[24+rsi] |
||||
|
mov rbp,QWORD PTR[32+rsi] |
||||
|
mov rbx,QWORD PTR[40+rsi] |
||||
|
lea rsp,QWORD PTR[48+rsi] |
||||
|
$L$mul_epilogue:: |
||||
|
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue |
||||
|
mov rsi,QWORD PTR[16+rsp] |
||||
|
DB 0F3h,0C3h ;repret |
||||
|
$L$SEH_end_bn_mul_mont_gather5:: |
||||
|
bn_mul_mont_gather5 ENDP |
||||
|
|
||||
|
ALIGN 16 |
||||
|
bn_mul4x_mont_gather5 PROC PRIVATE |
||||
|
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue |
||||
|
mov QWORD PTR[16+rsp],rsi |
||||
|
mov rax,rsp |
||||
|
$L$SEH_begin_bn_mul4x_mont_gather5:: |
||||
|
mov rdi,rcx |
||||
|
mov rsi,rdx |
||||
|
mov rdx,r8 |
||||
|
mov rcx,r9 |
||||
|
mov r8,QWORD PTR[40+rsp] |
||||
|
mov r9,QWORD PTR[48+rsp] |
||||
|
|
||||
|
|
||||
|
$L$mul4x_enter:: |
||||
|
mov r9d,r9d |
||||
|
mov r10d,DWORD PTR[56+rsp] |
||||
|
push rbx |
||||
|
push rbp |
||||
|
push r12 |
||||
|
push r13 |
||||
|
push r14 |
||||
|
push r15 |
||||
|
lea rsp,QWORD PTR[((-40))+rsp] |
||||
|
movaps XMMWORD PTR[rsp],xmm6 |
||||
|
movaps XMMWORD PTR[16+rsp],xmm7 |
||||
|
$L$mul4x_alloca:: |
||||
|
mov rax,rsp |
||||
|
lea r11,QWORD PTR[4+r9] |
||||
|
neg r11 |
||||
|
lea rsp,QWORD PTR[r11*8+rsp] |
||||
|
and rsp,-1024 |
||||
|
|
||||
|
mov QWORD PTR[8+r9*8+rsp],rax |
||||
|
$L$mul4x_body:: |
||||
|
mov QWORD PTR[16+r9*8+rsp],rdi |
||||
|
mov r12,rdx |
||||
|
mov r11,r10 |
||||
|
shr r10,3 |
||||
|
and r11,7 |
||||
|
not r10 |
||||
|
lea rax,QWORD PTR[$L$magic_masks] |
||||
|
and r10,3 |
||||
|
lea r12,QWORD PTR[96+r11*8+r12] |
||||
|
movq xmm4,QWORD PTR[r10*8+rax] |
||||
|
movq xmm5,QWORD PTR[8+r10*8+rax] |
||||
|
movq xmm6,QWORD PTR[16+r10*8+rax] |
||||
|
movq xmm7,QWORD PTR[24+r10*8+rax] |
||||
|
|
||||
|
movq xmm0,QWORD PTR[((-96))+r12] |
||||
|
movq xmm1,QWORD PTR[((-32))+r12] |
||||
|
pand xmm0,xmm4 |
||||
|
movq xmm2,QWORD PTR[32+r12] |
||||
|
pand xmm1,xmm5 |
||||
|
movq xmm3,QWORD PTR[96+r12] |
||||
|
pand xmm2,xmm6 |
||||
|
por xmm0,xmm1 |
||||
|
pand xmm3,xmm7 |
||||
|
por xmm0,xmm2 |
||||
|
lea r12,QWORD PTR[256+r12] |
||||
|
por xmm0,xmm3 |
||||
|
|
||||
|
DB 102,72,15,126,195 |
||||
|
mov r8,QWORD PTR[r8] |
||||
|
mov rax,QWORD PTR[rsi] |
||||
|
|
||||
|
xor r14,r14 |
||||
|
xor r15,r15 |
||||
|
|
||||
|
movq xmm0,QWORD PTR[((-96))+r12] |
||||
|
movq xmm1,QWORD PTR[((-32))+r12] |
||||
|
pand xmm0,xmm4 |
||||
|
movq xmm2,QWORD PTR[32+r12] |
||||
|
pand xmm1,xmm5 |
||||
|
|
||||
|
mov rbp,r8 |
||||
|
mul rbx |
||||
|
mov r10,rax |
||||
|
mov rax,QWORD PTR[rcx] |
||||
|
|
||||
|
movq xmm3,QWORD PTR[96+r12] |
||||
|
pand xmm2,xmm6 |
||||
|
por xmm0,xmm1 |
||||
|
pand xmm3,xmm7 |
||||
|
|
||||
|
imul rbp,r10 |
||||
|
mov r11,rdx |
||||
|
|
||||
|
por xmm0,xmm2 |
||||
|
lea r12,QWORD PTR[256+r12] |
||||
|
por xmm0,xmm3 |
||||
|
|
||||
|
mul rbp |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[8+rsi] |
||||
|
adc rdx,0 |
||||
|
mov rdi,rdx |
||||
|
|
||||
|
mul rbx |
||||
|
add r11,rax |
||||
|
mov rax,QWORD PTR[8+rcx] |
||||
|
adc rdx,0 |
||||
|
mov r10,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add rdi,rax |
||||
|
mov rax,QWORD PTR[16+rsi] |
||||
|
adc rdx,0 |
||||
|
add rdi,r11 |
||||
|
lea r15,QWORD PTR[4+r15] |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[rsp],rdi |
||||
|
mov r13,rdx |
||||
|
jmp $L$1st4x |
||||
|
ALIGN 16 |
||||
|
$L$1st4x:: |
||||
|
mul rbx |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[((-16))+r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
mov r11,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add r13,rax |
||||
|
mov rax,QWORD PTR[((-8))+r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add r13,r10 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-24))+r15*8+rsp],r13 |
||||
|
mov rdi,rdx |
||||
|
|
||||
|
mul rbx |
||||
|
add r11,rax |
||||
|
mov rax,QWORD PTR[((-8))+r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
mov r10,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add rdi,rax |
||||
|
mov rax,QWORD PTR[r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add rdi,r11 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-16))+r15*8+rsp],rdi |
||||
|
mov r13,rdx |
||||
|
|
||||
|
mul rbx |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
mov r11,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add r13,rax |
||||
|
mov rax,QWORD PTR[8+r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add r13,r10 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-8))+r15*8+rsp],r13 |
||||
|
mov rdi,rdx |
||||
|
|
||||
|
mul rbx |
||||
|
add r11,rax |
||||
|
mov rax,QWORD PTR[8+r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
lea r15,QWORD PTR[4+r15] |
||||
|
mov r10,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add rdi,rax |
||||
|
mov rax,QWORD PTR[((-16))+r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add rdi,r11 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-32))+r15*8+rsp],rdi |
||||
|
mov r13,rdx |
||||
|
cmp r15,r9 |
||||
|
jl $L$1st4x |
||||
|
|
||||
|
mul rbx |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[((-16))+r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
mov r11,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add r13,rax |
||||
|
mov rax,QWORD PTR[((-8))+r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add r13,r10 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-24))+r15*8+rsp],r13 |
||||
|
mov rdi,rdx |
||||
|
|
||||
|
mul rbx |
||||
|
add r11,rax |
||||
|
mov rax,QWORD PTR[((-8))+r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
mov r10,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add rdi,rax |
||||
|
mov rax,QWORD PTR[rsi] |
||||
|
adc rdx,0 |
||||
|
add rdi,r11 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-16))+r15*8+rsp],rdi |
||||
|
mov r13,rdx |
||||
|
|
||||
|
DB 102,72,15,126,195 |
||||
|
|
||||
|
xor rdi,rdi |
||||
|
add r13,r10 |
||||
|
adc rdi,0 |
||||
|
mov QWORD PTR[((-8))+r15*8+rsp],r13 |
||||
|
mov QWORD PTR[r15*8+rsp],rdi |
||||
|
|
||||
|
lea r14,QWORD PTR[1+r14] |
||||
|
ALIGN 4 |
||||
|
$L$outer4x:: |
||||
|
xor r15,r15 |
||||
|
movq xmm0,QWORD PTR[((-96))+r12] |
||||
|
movq xmm1,QWORD PTR[((-32))+r12] |
||||
|
pand xmm0,xmm4 |
||||
|
movq xmm2,QWORD PTR[32+r12] |
||||
|
pand xmm1,xmm5 |
||||
|
|
||||
|
mov r10,QWORD PTR[rsp] |
||||
|
mov rbp,r8 |
||||
|
mul rbx |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[rcx] |
||||
|
adc rdx,0 |
||||
|
|
||||
|
movq xmm3,QWORD PTR[96+r12] |
||||
|
pand xmm2,xmm6 |
||||
|
por xmm0,xmm1 |
||||
|
pand xmm3,xmm7 |
||||
|
|
||||
|
imul rbp,r10 |
||||
|
mov r11,rdx |
||||
|
|
||||
|
por xmm0,xmm2 |
||||
|
lea r12,QWORD PTR[256+r12] |
||||
|
por xmm0,xmm3 |
||||
|
|
||||
|
mul rbp |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[8+rsi] |
||||
|
adc rdx,0 |
||||
|
mov rdi,rdx |
||||
|
|
||||
|
mul rbx |
||||
|
add r11,rax |
||||
|
mov rax,QWORD PTR[8+rcx] |
||||
|
adc rdx,0 |
||||
|
add r11,QWORD PTR[8+rsp] |
||||
|
adc rdx,0 |
||||
|
mov r10,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add rdi,rax |
||||
|
mov rax,QWORD PTR[16+rsi] |
||||
|
adc rdx,0 |
||||
|
add rdi,r11 |
||||
|
lea r15,QWORD PTR[4+r15] |
||||
|
adc rdx,0 |
||||
|
mov r13,rdx |
||||
|
jmp $L$inner4x |
||||
|
ALIGN 16 |
||||
|
$L$inner4x:: |
||||
|
mul rbx |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[((-16))+r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
add r10,QWORD PTR[((-16))+r15*8+rsp] |
||||
|
adc rdx,0 |
||||
|
mov r11,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add r13,rax |
||||
|
mov rax,QWORD PTR[((-8))+r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add r13,r10 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-32))+r15*8+rsp],rdi |
||||
|
mov rdi,rdx |
||||
|
|
||||
|
mul rbx |
||||
|
add r11,rax |
||||
|
mov rax,QWORD PTR[((-8))+r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
add r11,QWORD PTR[((-8))+r15*8+rsp] |
||||
|
adc rdx,0 |
||||
|
mov r10,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add rdi,rax |
||||
|
mov rax,QWORD PTR[r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add rdi,r11 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-24))+r15*8+rsp],r13 |
||||
|
mov r13,rdx |
||||
|
|
||||
|
mul rbx |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
add r10,QWORD PTR[r15*8+rsp] |
||||
|
adc rdx,0 |
||||
|
mov r11,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add r13,rax |
||||
|
mov rax,QWORD PTR[8+r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add r13,r10 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-16))+r15*8+rsp],rdi |
||||
|
mov rdi,rdx |
||||
|
|
||||
|
mul rbx |
||||
|
add r11,rax |
||||
|
mov rax,QWORD PTR[8+r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
add r11,QWORD PTR[8+r15*8+rsp] |
||||
|
adc rdx,0 |
||||
|
lea r15,QWORD PTR[4+r15] |
||||
|
mov r10,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add rdi,rax |
||||
|
mov rax,QWORD PTR[((-16))+r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add rdi,r11 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-40))+r15*8+rsp],r13 |
||||
|
mov r13,rdx |
||||
|
cmp r15,r9 |
||||
|
jl $L$inner4x |
||||
|
|
||||
|
mul rbx |
||||
|
add r10,rax |
||||
|
mov rax,QWORD PTR[((-16))+r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
add r10,QWORD PTR[((-16))+r15*8+rsp] |
||||
|
adc rdx,0 |
||||
|
mov r11,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add r13,rax |
||||
|
mov rax,QWORD PTR[((-8))+r15*8+rsi] |
||||
|
adc rdx,0 |
||||
|
add r13,r10 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-32))+r15*8+rsp],rdi |
||||
|
mov rdi,rdx |
||||
|
|
||||
|
mul rbx |
||||
|
add r11,rax |
||||
|
mov rax,QWORD PTR[((-8))+r15*8+rcx] |
||||
|
adc rdx,0 |
||||
|
add r11,QWORD PTR[((-8))+r15*8+rsp] |
||||
|
adc rdx,0 |
||||
|
lea r14,QWORD PTR[1+r14] |
||||
|
mov r10,rdx |
||||
|
|
||||
|
mul rbp |
||||
|
add rdi,rax |
||||
|
mov rax,QWORD PTR[rsi] |
||||
|
adc rdx,0 |
||||
|
add rdi,r11 |
||||
|
adc rdx,0 |
||||
|
mov QWORD PTR[((-24))+r15*8+rsp],r13 |
||||
|
mov r13,rdx |
||||
|
|
||||
|
DB 102,72,15,126,195 |
||||
|
mov QWORD PTR[((-16))+r15*8+rsp],rdi |
||||
|
|
||||
|
xor rdi,rdi |
||||
|
add r13,r10 |
||||
|
adc rdi,0 |
||||
|
add r13,QWORD PTR[r9*8+rsp] |
||||
|
adc rdi,0 |
||||
|
mov QWORD PTR[((-8))+r15*8+rsp],r13 |
||||
|
mov QWORD PTR[r15*8+rsp],rdi |
||||
|
|
||||
|
cmp r14,r9 |
||||
|
jl $L$outer4x |
||||
|
mov rdi,QWORD PTR[16+r9*8+rsp] |
||||
|
mov rax,QWORD PTR[rsp] |
||||
|
pxor xmm0,xmm0 |
||||
|
mov rdx,QWORD PTR[8+rsp] |
||||
|
shr r9,2 |
||||
|
lea rsi,QWORD PTR[rsp] |
||||
|
xor r14,r14 |
||||
|
|
||||
|
sub rax,QWORD PTR[rcx] |
||||
|
mov rbx,QWORD PTR[16+rsi] |
||||
|
mov rbp,QWORD PTR[24+rsi] |
||||
|
sbb rdx,QWORD PTR[8+rcx] |
||||
|
lea r15,QWORD PTR[((-1))+r9] |
||||
|
jmp $L$sub4x |
||||
|
ALIGN 16 |
||||
|
$L$sub4x:: |
||||
|
mov QWORD PTR[r14*8+rdi],rax |
||||
|
mov QWORD PTR[8+r14*8+rdi],rdx |
||||
|
sbb rbx,QWORD PTR[16+r14*8+rcx] |
||||
|
mov rax,QWORD PTR[32+r14*8+rsi] |
||||
|
mov rdx,QWORD PTR[40+r14*8+rsi] |
||||
|
sbb rbp,QWORD PTR[24+r14*8+rcx] |
||||
|
mov QWORD PTR[16+r14*8+rdi],rbx |
||||
|
mov QWORD PTR[24+r14*8+rdi],rbp |
||||
|
sbb rax,QWORD PTR[32+r14*8+rcx] |
||||
|
mov rbx,QWORD PTR[48+r14*8+rsi] |
||||
|
mov rbp,QWORD PTR[56+r14*8+rsi] |
||||
|
sbb rdx,QWORD PTR[40+r14*8+rcx] |
||||
|
lea r14,QWORD PTR[4+r14] |
||||
|
dec r15 |
||||
|
jnz $L$sub4x |
||||
|
|
||||
|
mov QWORD PTR[r14*8+rdi],rax |
||||
|
mov rax,QWORD PTR[32+r14*8+rsi] |
||||
|
sbb rbx,QWORD PTR[16+r14*8+rcx] |
||||
|
mov QWORD PTR[8+r14*8+rdi],rdx |
||||
|
sbb rbp,QWORD PTR[24+r14*8+rcx] |
||||
|
mov QWORD PTR[16+r14*8+rdi],rbx |
||||
|
|
||||
|
sbb rax,0 |
||||
|
mov QWORD PTR[24+r14*8+rdi],rbp |
||||
|
xor r14,r14 |
||||
|
and rsi,rax |
||||
|
not rax |
||||
|
mov rcx,rdi |
||||
|
and rcx,rax |
||||
|
lea r15,QWORD PTR[((-1))+r9] |
||||
|
or rsi,rcx |
||||
|
|
||||
|
movdqu xmm1,XMMWORD PTR[rsi] |
||||
|
movdqa XMMWORD PTR[rsp],xmm0 |
||||
|
movdqu XMMWORD PTR[rdi],xmm1 |
||||
|
jmp $L$copy4x |
||||
|
ALIGN 16 |
||||
|
$L$copy4x:: |
||||
|
movdqu xmm2,XMMWORD PTR[16+r14*1+rsi] |
||||
|
movdqu xmm1,XMMWORD PTR[32+r14*1+rsi] |
||||
|
movdqa XMMWORD PTR[16+r14*1+rsp],xmm0 |
||||
|
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 |
||||
|
movdqa XMMWORD PTR[32+r14*1+rsp],xmm0 |
||||
|
movdqu XMMWORD PTR[32+r14*1+rdi],xmm1 |
||||
|
lea r14,QWORD PTR[32+r14] |
||||
|
dec r15 |
||||
|
jnz $L$copy4x |
||||
|
|
||||
|
shl r9,2 |
||||
|
movdqu xmm2,XMMWORD PTR[16+r14*1+rsi] |
||||
|
movdqa XMMWORD PTR[16+r14*1+rsp],xmm0 |
||||
|
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 |
||||
|
mov rsi,QWORD PTR[8+r9*8+rsp] |
||||
|
mov rax,1 |
||||
|
movaps xmm6,XMMWORD PTR[rsi] |
||||
|
movaps xmm7,XMMWORD PTR[16+rsi] |
||||
|
lea rsi,QWORD PTR[40+rsi] |
||||
|
mov r15,QWORD PTR[rsi] |
||||
|
mov r14,QWORD PTR[8+rsi] |
||||
|
mov r13,QWORD PTR[16+rsi] |
||||
|
mov r12,QWORD PTR[24+rsi] |
||||
|
mov rbp,QWORD PTR[32+rsi] |
||||
|
mov rbx,QWORD PTR[40+rsi] |
||||
|
lea rsp,QWORD PTR[48+rsi] |
||||
|
$L$mul4x_epilogue:: |
||||
|
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue |
||||
|
mov rsi,QWORD PTR[16+rsp] |
||||
|
DB 0F3h,0C3h ;repret |
||||
|
$L$SEH_end_bn_mul4x_mont_gather5:: |
||||
|
bn_mul4x_mont_gather5 ENDP |
||||
|
PUBLIC bn_scatter5 |
||||
|
|
||||
|
ALIGN 16 |
||||
|
bn_scatter5 PROC PUBLIC |
||||
|
cmp rdx,0 |
||||
|
jz $L$scatter_epilogue |
||||
|
lea r8,QWORD PTR[r9*8+r8] |
||||
|
$L$scatter:: |
||||
|
mov rax,QWORD PTR[rcx] |
||||
|
lea rcx,QWORD PTR[8+rcx] |
||||
|
mov QWORD PTR[r8],rax |
||||
|
lea r8,QWORD PTR[256+r8] |
||||
|
sub rdx,1 |
||||
|
jnz $L$scatter |
||||
|
$L$scatter_epilogue:: |
||||
|
DB 0F3h,0C3h ;repret |
||||
|
bn_scatter5 ENDP |
||||
|
|
||||
|
PUBLIC bn_gather5 |
||||
|
|
||||
|
ALIGN 16 |
||||
|
bn_gather5 PROC PUBLIC |
||||
|
$L$SEH_begin_bn_gather5:: |
||||
|
|
||||
|
DB 048h,083h,0ech,028h |
||||
|
|
||||
|
DB 00fh,029h,034h,024h |
||||
|
|
||||
|
DB 00fh,029h,07ch,024h,010h |
||||
|
|
||||
|
mov r11,r9 |
||||
|
shr r9,3 |
||||
|
and r11,7 |
||||
|
not r9 |
||||
|
lea rax,QWORD PTR[$L$magic_masks] |
||||
|
and r9,3 |
||||
|
lea r8,QWORD PTR[96+r11*8+r8] |
||||
|
movq xmm4,QWORD PTR[r9*8+rax] |
||||
|
movq xmm5,QWORD PTR[8+r9*8+rax] |
||||
|
movq xmm6,QWORD PTR[16+r9*8+rax] |
||||
|
movq xmm7,QWORD PTR[24+r9*8+rax] |
||||
|
jmp $L$gather |
||||
|
ALIGN 16 |
||||
|
$L$gather:: |
||||
|
movq xmm0,QWORD PTR[((-96))+r8] |
||||
|
movq xmm1,QWORD PTR[((-32))+r8] |
||||
|
pand xmm0,xmm4 |
||||
|
movq xmm2,QWORD PTR[32+r8] |
||||
|
pand xmm1,xmm5 |
||||
|
movq xmm3,QWORD PTR[96+r8] |
||||
|
pand xmm2,xmm6 |
||||
|
por xmm0,xmm1 |
||||
|
pand xmm3,xmm7 |
||||
|
por xmm0,xmm2 |
||||
|
lea r8,QWORD PTR[256+r8] |
||||
|
por xmm0,xmm3 |
||||
|
|
||||
|
movq QWORD PTR[rcx],xmm0 |
||||
|
lea rcx,QWORD PTR[8+rcx] |
||||
|
sub rdx,1 |
||||
|
jnz $L$gather |
||||
|
movaps XMMWORD PTR[rsp],xmm6 |
||||
|
movaps XMMWORD PTR[16+rsp],xmm7 |
||||
|
lea rsp,QWORD PTR[40+rsp] |
||||
|
DB 0F3h,0C3h ;repret |
||||
|
$L$SEH_end_bn_gather5:: |
||||
|
bn_gather5 ENDP |
||||
|
ALIGN 64 |
||||
|
$L$magic_masks:: |
||||
|
DD 0,0,0,0,0,0,-1,-1 |
||||
|
DD 0,0,0,0,0,0,0,0 |
||||
|
DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
||||
|
DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 |
||||
|
DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 |
||||
|
DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 |
||||
|
DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 |
||||
|
DB 112,101,110,115,115,108,46,111,114,103,62,0 |
||||
|
EXTERN __imp_RtlVirtualUnwind:NEAR |
||||
|
|
||||
|
ALIGN 16 |
||||
|
mul_handler PROC PRIVATE |
||||
|
push rsi |
||||
|
push rdi |
||||
|
push rbx |
||||
|
push rbp |
||||
|
push r12 |
||||
|
push r13 |
||||
|
push r14 |
||||
|
push r15 |
||||
|
pushfq |
||||
|
sub rsp,64 |
||||
|
|
||||
|
mov rax,QWORD PTR[120+r8] |
||||
|
mov rbx,QWORD PTR[248+r8] |
||||
|
|
||||
|
mov rsi,QWORD PTR[8+r9] |
||||
|
mov r11,QWORD PTR[56+r9] |
||||
|
|
||||
|
mov r10d,DWORD PTR[r11] |
||||
|
lea r10,QWORD PTR[r10*1+rsi] |
||||
|
cmp rbx,r10 |
||||
|
jb $L$common_seh_tail |
||||
|
|
||||
|
lea rax,QWORD PTR[88+rax] |
||||
|
|
||||
|
mov r10d,DWORD PTR[4+r11] |
||||
|
lea r10,QWORD PTR[r10*1+rsi] |
||||
|
cmp rbx,r10 |
||||
|
jb $L$common_seh_tail |
||||
|
|
||||
|
mov rax,QWORD PTR[152+r8] |
||||
|
|
||||
|
mov r10d,DWORD PTR[8+r11] |
||||
|
lea r10,QWORD PTR[r10*1+rsi] |
||||
|
cmp rbx,r10 |
||||
|
jae $L$common_seh_tail |
||||
|
|
||||
|
mov r10,QWORD PTR[192+r8] |
||||
|
mov rax,QWORD PTR[8+r10*8+rax] |
||||
|
|
||||
|
movaps xmm0,XMMWORD PTR[rax] |
||||
|
movaps xmm1,XMMWORD PTR[16+rax] |
||||
|
lea rax,QWORD PTR[88+rax] |
||||
|
|
||||
|
mov rbx,QWORD PTR[((-8))+rax] |
||||
|
mov rbp,QWORD PTR[((-16))+rax] |
||||
|
mov r12,QWORD PTR[((-24))+rax] |
||||
|
mov r13,QWORD PTR[((-32))+rax] |
||||
|
mov r14,QWORD PTR[((-40))+rax] |
||||
|
mov r15,QWORD PTR[((-48))+rax] |
||||
|
mov QWORD PTR[144+r8],rbx |
||||
|
mov QWORD PTR[160+r8],rbp |
||||
|
mov QWORD PTR[216+r8],r12 |
||||
|
mov QWORD PTR[224+r8],r13 |
||||
|
mov QWORD PTR[232+r8],r14 |
||||
|
mov QWORD PTR[240+r8],r15 |
||||
|
movups XMMWORD PTR[512+r8],xmm0 |
||||
|
movups XMMWORD PTR[528+r8],xmm1 |
||||
|
|
||||
|
$L$common_seh_tail:: |
||||
|
mov rdi,QWORD PTR[8+rax] |
||||
|
mov rsi,QWORD PTR[16+rax] |
||||
|
mov QWORD PTR[152+r8],rax |
||||
|
mov QWORD PTR[168+r8],rsi |
||||
|
mov QWORD PTR[176+r8],rdi |
||||
|
|
||||
|
mov rdi,QWORD PTR[40+r9] |
||||
|
mov rsi,r8 |
||||
|
mov ecx,154 |
||||
|
DD 0a548f3fch |
||||
|
|
||||
|
|
||||
|
mov rsi,r9 |
||||
|
xor rcx,rcx |
||||
|
mov rdx,QWORD PTR[8+rsi] |
||||
|
mov r8,QWORD PTR[rsi] |
||||
|
mov r9,QWORD PTR[16+rsi] |
||||
|
mov r10,QWORD PTR[40+rsi] |
||||
|
lea r11,QWORD PTR[56+rsi] |
||||
|
lea r12,QWORD PTR[24+rsi] |
||||
|
mov QWORD PTR[32+rsp],r10 |
||||
|
mov QWORD PTR[40+rsp],r11 |
||||
|
mov QWORD PTR[48+rsp],r12 |
||||
|
mov QWORD PTR[56+rsp],rcx |
||||
|
call QWORD PTR[__imp_RtlVirtualUnwind] |
||||
|
|
||||
|
mov eax,1 |
||||
|
add rsp,64 |
||||
|
popfq |
||||
|
pop r15 |
||||
|
pop r14 |
||||
|
pop r13 |
||||
|
pop r12 |
||||
|
pop rbp |
||||
|
pop rbx |
||||
|
pop rdi |
||||
|
pop rsi |
||||
|
DB 0F3h,0C3h ;repret |
||||
|
mul_handler ENDP |
||||
|
|
||||
|
.text$ ENDS |
||||
|
.pdata SEGMENT READONLY ALIGN(4) |
||||
|
ALIGN 4 |
||||
|
DD imagerel $L$SEH_begin_bn_mul_mont_gather5 |
||||
|
DD imagerel $L$SEH_end_bn_mul_mont_gather5 |
||||
|
DD imagerel $L$SEH_info_bn_mul_mont_gather5 |
||||
|
|
||||
|
DD imagerel $L$SEH_begin_bn_mul4x_mont_gather5 |
||||
|
DD imagerel $L$SEH_end_bn_mul4x_mont_gather5 |
||||
|
DD imagerel $L$SEH_info_bn_mul4x_mont_gather5 |
||||
|
|
||||
|
DD imagerel $L$SEH_begin_bn_gather5 |
||||
|
DD imagerel $L$SEH_end_bn_gather5 |
||||
|
DD imagerel $L$SEH_info_bn_gather5 |
||||
|
|
||||
|
.pdata ENDS |
||||
|
.xdata SEGMENT READONLY ALIGN(8) |
||||
|
ALIGN 8 |
||||
|
$L$SEH_info_bn_mul_mont_gather5:: |
||||
|
DB 9,0,0,0 |
||||
|
DD imagerel mul_handler |
||||
|
DD imagerel $L$mul_alloca,imagerel $L$mul_body,imagerel $L$mul_epilogue |
||||
|
|
||||
|
ALIGN 8 |
||||
|
$L$SEH_info_bn_mul4x_mont_gather5:: |
||||
|
DB 9,0,0,0 |
||||
|
DD imagerel mul_handler |
||||
|
DD imagerel $L$mul4x_alloca,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue |
||||
|
|
||||
|
ALIGN 8 |
||||
|
$L$SEH_info_bn_gather5:: |
||||
|
DB 001h,00dh,005h,000h |
||||
|
DB 00dh,078h,001h,000h |
||||
|
|
||||
|
DB 008h,068h,000h,000h |
||||
|
|
||||
|
DB 004h,042h,000h,000h |
||||
|
|
||||
|
ALIGN 8 |
||||
|
|
||||
|
.xdata ENDS |
||||
|
END |
Loading…
Reference in new issue