mirror of https://github.com/lukechilds/node.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
5396 lines
98 KiB
5396 lines
98 KiB
.text
|
|
|
|
|
|
.globl sha1_block_data_order
|
|
.type sha1_block_data_order,@function
|
|
.align 16
|
|
sha1_block_data_order:
|
|
movl OPENSSL_ia32cap_P+0(%rip),%r9d
|
|
movl OPENSSL_ia32cap_P+4(%rip),%r8d
|
|
movl OPENSSL_ia32cap_P+8(%rip),%r10d
|
|
testl $512,%r8d
|
|
jz .Lialu
|
|
testl $536870912,%r10d
|
|
jnz _shaext_shortcut
|
|
andl $296,%r10d
|
|
cmpl $296,%r10d
|
|
je _avx2_shortcut
|
|
andl $268435456,%r8d
|
|
andl $1073741824,%r9d
|
|
orl %r9d,%r8d
|
|
cmpl $1342177280,%r8d
|
|
je _avx_shortcut
|
|
jmp _ssse3_shortcut
|
|
|
|
.align 16
|
|
.Lialu:
|
|
movq %rsp,%rax
|
|
pushq %rbx
|
|
pushq %rbp
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
movq %rdi,%r8
|
|
subq $72,%rsp
|
|
movq %rsi,%r9
|
|
andq $-64,%rsp
|
|
movq %rdx,%r10
|
|
movq %rax,64(%rsp)
|
|
.Lprologue:
|
|
|
|
movl 0(%r8),%esi
|
|
movl 4(%r8),%edi
|
|
movl 8(%r8),%r11d
|
|
movl 12(%r8),%r12d
|
|
movl 16(%r8),%r13d
|
|
jmp .Lloop
|
|
|
|
.align 16
|
|
.Lloop:
|
|
movl 0(%r9),%edx
|
|
bswapl %edx
|
|
movl 4(%r9),%ebp
|
|
movl %r12d,%eax
|
|
movl %edx,0(%rsp)
|
|
movl %esi,%ecx
|
|
bswapl %ebp
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
andl %edi,%eax
|
|
leal 1518500249(%rdx,%r13,1),%r13d
|
|
addl %ecx,%r13d
|
|
xorl %r12d,%eax
|
|
roll $30,%edi
|
|
addl %eax,%r13d
|
|
movl 8(%r9),%r14d
|
|
movl %r11d,%eax
|
|
movl %ebp,4(%rsp)
|
|
movl %r13d,%ecx
|
|
bswapl %r14d
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
andl %esi,%eax
|
|
leal 1518500249(%rbp,%r12,1),%r12d
|
|
addl %ecx,%r12d
|
|
xorl %r11d,%eax
|
|
roll $30,%esi
|
|
addl %eax,%r12d
|
|
movl 12(%r9),%edx
|
|
movl %edi,%eax
|
|
movl %r14d,8(%rsp)
|
|
movl %r12d,%ecx
|
|
bswapl %edx
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
andl %r13d,%eax
|
|
leal 1518500249(%r14,%r11,1),%r11d
|
|
addl %ecx,%r11d
|
|
xorl %edi,%eax
|
|
roll $30,%r13d
|
|
addl %eax,%r11d
|
|
movl 16(%r9),%ebp
|
|
movl %esi,%eax
|
|
movl %edx,12(%rsp)
|
|
movl %r11d,%ecx
|
|
bswapl %ebp
|
|
xorl %r13d,%eax
|
|
roll $5,%ecx
|
|
andl %r12d,%eax
|
|
leal 1518500249(%rdx,%rdi,1),%edi
|
|
addl %ecx,%edi
|
|
xorl %esi,%eax
|
|
roll $30,%r12d
|
|
addl %eax,%edi
|
|
movl 20(%r9),%r14d
|
|
movl %r13d,%eax
|
|
movl %ebp,16(%rsp)
|
|
movl %edi,%ecx
|
|
bswapl %r14d
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
andl %r11d,%eax
|
|
leal 1518500249(%rbp,%rsi,1),%esi
|
|
addl %ecx,%esi
|
|
xorl %r13d,%eax
|
|
roll $30,%r11d
|
|
addl %eax,%esi
|
|
movl 24(%r9),%edx
|
|
movl %r12d,%eax
|
|
movl %r14d,20(%rsp)
|
|
movl %esi,%ecx
|
|
bswapl %edx
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
andl %edi,%eax
|
|
leal 1518500249(%r14,%r13,1),%r13d
|
|
addl %ecx,%r13d
|
|
xorl %r12d,%eax
|
|
roll $30,%edi
|
|
addl %eax,%r13d
|
|
movl 28(%r9),%ebp
|
|
movl %r11d,%eax
|
|
movl %edx,24(%rsp)
|
|
movl %r13d,%ecx
|
|
bswapl %ebp
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
andl %esi,%eax
|
|
leal 1518500249(%rdx,%r12,1),%r12d
|
|
addl %ecx,%r12d
|
|
xorl %r11d,%eax
|
|
roll $30,%esi
|
|
addl %eax,%r12d
|
|
movl 32(%r9),%r14d
|
|
movl %edi,%eax
|
|
movl %ebp,28(%rsp)
|
|
movl %r12d,%ecx
|
|
bswapl %r14d
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
andl %r13d,%eax
|
|
leal 1518500249(%rbp,%r11,1),%r11d
|
|
addl %ecx,%r11d
|
|
xorl %edi,%eax
|
|
roll $30,%r13d
|
|
addl %eax,%r11d
|
|
movl 36(%r9),%edx
|
|
movl %esi,%eax
|
|
movl %r14d,32(%rsp)
|
|
movl %r11d,%ecx
|
|
bswapl %edx
|
|
xorl %r13d,%eax
|
|
roll $5,%ecx
|
|
andl %r12d,%eax
|
|
leal 1518500249(%r14,%rdi,1),%edi
|
|
addl %ecx,%edi
|
|
xorl %esi,%eax
|
|
roll $30,%r12d
|
|
addl %eax,%edi
|
|
movl 40(%r9),%ebp
|
|
movl %r13d,%eax
|
|
movl %edx,36(%rsp)
|
|
movl %edi,%ecx
|
|
bswapl %ebp
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
andl %r11d,%eax
|
|
leal 1518500249(%rdx,%rsi,1),%esi
|
|
addl %ecx,%esi
|
|
xorl %r13d,%eax
|
|
roll $30,%r11d
|
|
addl %eax,%esi
|
|
movl 44(%r9),%r14d
|
|
movl %r12d,%eax
|
|
movl %ebp,40(%rsp)
|
|
movl %esi,%ecx
|
|
bswapl %r14d
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
andl %edi,%eax
|
|
leal 1518500249(%rbp,%r13,1),%r13d
|
|
addl %ecx,%r13d
|
|
xorl %r12d,%eax
|
|
roll $30,%edi
|
|
addl %eax,%r13d
|
|
movl 48(%r9),%edx
|
|
movl %r11d,%eax
|
|
movl %r14d,44(%rsp)
|
|
movl %r13d,%ecx
|
|
bswapl %edx
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
andl %esi,%eax
|
|
leal 1518500249(%r14,%r12,1),%r12d
|
|
addl %ecx,%r12d
|
|
xorl %r11d,%eax
|
|
roll $30,%esi
|
|
addl %eax,%r12d
|
|
movl 52(%r9),%ebp
|
|
movl %edi,%eax
|
|
movl %edx,48(%rsp)
|
|
movl %r12d,%ecx
|
|
bswapl %ebp
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
andl %r13d,%eax
|
|
leal 1518500249(%rdx,%r11,1),%r11d
|
|
addl %ecx,%r11d
|
|
xorl %edi,%eax
|
|
roll $30,%r13d
|
|
addl %eax,%r11d
|
|
movl 56(%r9),%r14d
|
|
movl %esi,%eax
|
|
movl %ebp,52(%rsp)
|
|
movl %r11d,%ecx
|
|
bswapl %r14d
|
|
xorl %r13d,%eax
|
|
roll $5,%ecx
|
|
andl %r12d,%eax
|
|
leal 1518500249(%rbp,%rdi,1),%edi
|
|
addl %ecx,%edi
|
|
xorl %esi,%eax
|
|
roll $30,%r12d
|
|
addl %eax,%edi
|
|
movl 60(%r9),%edx
|
|
movl %r13d,%eax
|
|
movl %r14d,56(%rsp)
|
|
movl %edi,%ecx
|
|
bswapl %edx
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
andl %r11d,%eax
|
|
leal 1518500249(%r14,%rsi,1),%esi
|
|
addl %ecx,%esi
|
|
xorl %r13d,%eax
|
|
roll $30,%r11d
|
|
addl %eax,%esi
|
|
xorl 0(%rsp),%ebp
|
|
movl %r12d,%eax
|
|
movl %edx,60(%rsp)
|
|
movl %esi,%ecx
|
|
xorl 8(%rsp),%ebp
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
xorl 32(%rsp),%ebp
|
|
andl %edi,%eax
|
|
leal 1518500249(%rdx,%r13,1),%r13d
|
|
roll $30,%edi
|
|
xorl %r12d,%eax
|
|
addl %ecx,%r13d
|
|
roll $1,%ebp
|
|
addl %eax,%r13d
|
|
xorl 4(%rsp),%r14d
|
|
movl %r11d,%eax
|
|
movl %ebp,0(%rsp)
|
|
movl %r13d,%ecx
|
|
xorl 12(%rsp),%r14d
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
xorl 36(%rsp),%r14d
|
|
andl %esi,%eax
|
|
leal 1518500249(%rbp,%r12,1),%r12d
|
|
roll $30,%esi
|
|
xorl %r11d,%eax
|
|
addl %ecx,%r12d
|
|
roll $1,%r14d
|
|
addl %eax,%r12d
|
|
xorl 8(%rsp),%edx
|
|
movl %edi,%eax
|
|
movl %r14d,4(%rsp)
|
|
movl %r12d,%ecx
|
|
xorl 16(%rsp),%edx
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
xorl 40(%rsp),%edx
|
|
andl %r13d,%eax
|
|
leal 1518500249(%r14,%r11,1),%r11d
|
|
roll $30,%r13d
|
|
xorl %edi,%eax
|
|
addl %ecx,%r11d
|
|
roll $1,%edx
|
|
addl %eax,%r11d
|
|
xorl 12(%rsp),%ebp
|
|
movl %esi,%eax
|
|
movl %edx,8(%rsp)
|
|
movl %r11d,%ecx
|
|
xorl 20(%rsp),%ebp
|
|
xorl %r13d,%eax
|
|
roll $5,%ecx
|
|
xorl 44(%rsp),%ebp
|
|
andl %r12d,%eax
|
|
leal 1518500249(%rdx,%rdi,1),%edi
|
|
roll $30,%r12d
|
|
xorl %esi,%eax
|
|
addl %ecx,%edi
|
|
roll $1,%ebp
|
|
addl %eax,%edi
|
|
xorl 16(%rsp),%r14d
|
|
movl %r13d,%eax
|
|
movl %ebp,12(%rsp)
|
|
movl %edi,%ecx
|
|
xorl 24(%rsp),%r14d
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
xorl 48(%rsp),%r14d
|
|
andl %r11d,%eax
|
|
leal 1518500249(%rbp,%rsi,1),%esi
|
|
roll $30,%r11d
|
|
xorl %r13d,%eax
|
|
addl %ecx,%esi
|
|
roll $1,%r14d
|
|
addl %eax,%esi
|
|
xorl 20(%rsp),%edx
|
|
movl %edi,%eax
|
|
movl %r14d,16(%rsp)
|
|
movl %esi,%ecx
|
|
xorl 28(%rsp),%edx
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
xorl 52(%rsp),%edx
|
|
leal 1859775393(%r14,%r13,1),%r13d
|
|
xorl %r11d,%eax
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %eax,%r13d
|
|
roll $1,%edx
|
|
xorl 24(%rsp),%ebp
|
|
movl %esi,%eax
|
|
movl %edx,20(%rsp)
|
|
movl %r13d,%ecx
|
|
xorl 32(%rsp),%ebp
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
xorl 56(%rsp),%ebp
|
|
leal 1859775393(%rdx,%r12,1),%r12d
|
|
xorl %edi,%eax
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %eax,%r12d
|
|
roll $1,%ebp
|
|
xorl 28(%rsp),%r14d
|
|
movl %r13d,%eax
|
|
movl %ebp,24(%rsp)
|
|
movl %r12d,%ecx
|
|
xorl 36(%rsp),%r14d
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
xorl 60(%rsp),%r14d
|
|
leal 1859775393(%rbp,%r11,1),%r11d
|
|
xorl %esi,%eax
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %eax,%r11d
|
|
roll $1,%r14d
|
|
xorl 32(%rsp),%edx
|
|
movl %r12d,%eax
|
|
movl %r14d,28(%rsp)
|
|
movl %r11d,%ecx
|
|
xorl 40(%rsp),%edx
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
xorl 0(%rsp),%edx
|
|
leal 1859775393(%r14,%rdi,1),%edi
|
|
xorl %r13d,%eax
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %eax,%edi
|
|
roll $1,%edx
|
|
xorl 36(%rsp),%ebp
|
|
movl %r11d,%eax
|
|
movl %edx,32(%rsp)
|
|
movl %edi,%ecx
|
|
xorl 44(%rsp),%ebp
|
|
xorl %r13d,%eax
|
|
roll $5,%ecx
|
|
xorl 4(%rsp),%ebp
|
|
leal 1859775393(%rdx,%rsi,1),%esi
|
|
xorl %r12d,%eax
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %eax,%esi
|
|
roll $1,%ebp
|
|
xorl 40(%rsp),%r14d
|
|
movl %edi,%eax
|
|
movl %ebp,36(%rsp)
|
|
movl %esi,%ecx
|
|
xorl 48(%rsp),%r14d
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
xorl 8(%rsp),%r14d
|
|
leal 1859775393(%rbp,%r13,1),%r13d
|
|
xorl %r11d,%eax
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %eax,%r13d
|
|
roll $1,%r14d
|
|
xorl 44(%rsp),%edx
|
|
movl %esi,%eax
|
|
movl %r14d,40(%rsp)
|
|
movl %r13d,%ecx
|
|
xorl 52(%rsp),%edx
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
xorl 12(%rsp),%edx
|
|
leal 1859775393(%r14,%r12,1),%r12d
|
|
xorl %edi,%eax
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %eax,%r12d
|
|
roll $1,%edx
|
|
xorl 48(%rsp),%ebp
|
|
movl %r13d,%eax
|
|
movl %edx,44(%rsp)
|
|
movl %r12d,%ecx
|
|
xorl 56(%rsp),%ebp
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
xorl 16(%rsp),%ebp
|
|
leal 1859775393(%rdx,%r11,1),%r11d
|
|
xorl %esi,%eax
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %eax,%r11d
|
|
roll $1,%ebp
|
|
xorl 52(%rsp),%r14d
|
|
movl %r12d,%eax
|
|
movl %ebp,48(%rsp)
|
|
movl %r11d,%ecx
|
|
xorl 60(%rsp),%r14d
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
xorl 20(%rsp),%r14d
|
|
leal 1859775393(%rbp,%rdi,1),%edi
|
|
xorl %r13d,%eax
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %eax,%edi
|
|
roll $1,%r14d
|
|
xorl 56(%rsp),%edx
|
|
movl %r11d,%eax
|
|
movl %r14d,52(%rsp)
|
|
movl %edi,%ecx
|
|
xorl 0(%rsp),%edx
|
|
xorl %r13d,%eax
|
|
roll $5,%ecx
|
|
xorl 24(%rsp),%edx
|
|
leal 1859775393(%r14,%rsi,1),%esi
|
|
xorl %r12d,%eax
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %eax,%esi
|
|
roll $1,%edx
|
|
xorl 60(%rsp),%ebp
|
|
movl %edi,%eax
|
|
movl %edx,56(%rsp)
|
|
movl %esi,%ecx
|
|
xorl 4(%rsp),%ebp
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
xorl 28(%rsp),%ebp
|
|
leal 1859775393(%rdx,%r13,1),%r13d
|
|
xorl %r11d,%eax
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %eax,%r13d
|
|
roll $1,%ebp
|
|
xorl 0(%rsp),%r14d
|
|
movl %esi,%eax
|
|
movl %ebp,60(%rsp)
|
|
movl %r13d,%ecx
|
|
xorl 8(%rsp),%r14d
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
xorl 32(%rsp),%r14d
|
|
leal 1859775393(%rbp,%r12,1),%r12d
|
|
xorl %edi,%eax
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %eax,%r12d
|
|
roll $1,%r14d
|
|
xorl 4(%rsp),%edx
|
|
movl %r13d,%eax
|
|
movl %r14d,0(%rsp)
|
|
movl %r12d,%ecx
|
|
xorl 12(%rsp),%edx
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
xorl 36(%rsp),%edx
|
|
leal 1859775393(%r14,%r11,1),%r11d
|
|
xorl %esi,%eax
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %eax,%r11d
|
|
roll $1,%edx
|
|
xorl 8(%rsp),%ebp
|
|
movl %r12d,%eax
|
|
movl %edx,4(%rsp)
|
|
movl %r11d,%ecx
|
|
xorl 16(%rsp),%ebp
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
xorl 40(%rsp),%ebp
|
|
leal 1859775393(%rdx,%rdi,1),%edi
|
|
xorl %r13d,%eax
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %eax,%edi
|
|
roll $1,%ebp
|
|
xorl 12(%rsp),%r14d
|
|
movl %r11d,%eax
|
|
movl %ebp,8(%rsp)
|
|
movl %edi,%ecx
|
|
xorl 20(%rsp),%r14d
|
|
xorl %r13d,%eax
|
|
roll $5,%ecx
|
|
xorl 44(%rsp),%r14d
|
|
leal 1859775393(%rbp,%rsi,1),%esi
|
|
xorl %r12d,%eax
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %eax,%esi
|
|
roll $1,%r14d
|
|
xorl 16(%rsp),%edx
|
|
movl %edi,%eax
|
|
movl %r14d,12(%rsp)
|
|
movl %esi,%ecx
|
|
xorl 24(%rsp),%edx
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
xorl 48(%rsp),%edx
|
|
leal 1859775393(%r14,%r13,1),%r13d
|
|
xorl %r11d,%eax
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %eax,%r13d
|
|
roll $1,%edx
|
|
xorl 20(%rsp),%ebp
|
|
movl %esi,%eax
|
|
movl %edx,16(%rsp)
|
|
movl %r13d,%ecx
|
|
xorl 28(%rsp),%ebp
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
xorl 52(%rsp),%ebp
|
|
leal 1859775393(%rdx,%r12,1),%r12d
|
|
xorl %edi,%eax
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %eax,%r12d
|
|
roll $1,%ebp
|
|
xorl 24(%rsp),%r14d
|
|
movl %r13d,%eax
|
|
movl %ebp,20(%rsp)
|
|
movl %r12d,%ecx
|
|
xorl 32(%rsp),%r14d
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
xorl 56(%rsp),%r14d
|
|
leal 1859775393(%rbp,%r11,1),%r11d
|
|
xorl %esi,%eax
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %eax,%r11d
|
|
roll $1,%r14d
|
|
xorl 28(%rsp),%edx
|
|
movl %r12d,%eax
|
|
movl %r14d,24(%rsp)
|
|
movl %r11d,%ecx
|
|
xorl 36(%rsp),%edx
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
xorl 60(%rsp),%edx
|
|
leal 1859775393(%r14,%rdi,1),%edi
|
|
xorl %r13d,%eax
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %eax,%edi
|
|
roll $1,%edx
|
|
xorl 32(%rsp),%ebp
|
|
movl %r11d,%eax
|
|
movl %edx,28(%rsp)
|
|
movl %edi,%ecx
|
|
xorl 40(%rsp),%ebp
|
|
xorl %r13d,%eax
|
|
roll $5,%ecx
|
|
xorl 0(%rsp),%ebp
|
|
leal 1859775393(%rdx,%rsi,1),%esi
|
|
xorl %r12d,%eax
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %eax,%esi
|
|
roll $1,%ebp
|
|
xorl 36(%rsp),%r14d
|
|
movl %r12d,%eax
|
|
movl %ebp,32(%rsp)
|
|
movl %r12d,%ebx
|
|
xorl 44(%rsp),%r14d
|
|
andl %r11d,%eax
|
|
movl %esi,%ecx
|
|
xorl 4(%rsp),%r14d
|
|
leal -1894007588(%rbp,%r13,1),%r13d
|
|
xorl %r11d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r13d
|
|
roll $1,%r14d
|
|
andl %edi,%ebx
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %ebx,%r13d
|
|
xorl 40(%rsp),%edx
|
|
movl %r11d,%eax
|
|
movl %r14d,36(%rsp)
|
|
movl %r11d,%ebx
|
|
xorl 48(%rsp),%edx
|
|
andl %edi,%eax
|
|
movl %r13d,%ecx
|
|
xorl 8(%rsp),%edx
|
|
leal -1894007588(%r14,%r12,1),%r12d
|
|
xorl %edi,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r12d
|
|
roll $1,%edx
|
|
andl %esi,%ebx
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %ebx,%r12d
|
|
xorl 44(%rsp),%ebp
|
|
movl %edi,%eax
|
|
movl %edx,40(%rsp)
|
|
movl %edi,%ebx
|
|
xorl 52(%rsp),%ebp
|
|
andl %esi,%eax
|
|
movl %r12d,%ecx
|
|
xorl 12(%rsp),%ebp
|
|
leal -1894007588(%rdx,%r11,1),%r11d
|
|
xorl %esi,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r11d
|
|
roll $1,%ebp
|
|
andl %r13d,%ebx
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %ebx,%r11d
|
|
xorl 48(%rsp),%r14d
|
|
movl %esi,%eax
|
|
movl %ebp,44(%rsp)
|
|
movl %esi,%ebx
|
|
xorl 56(%rsp),%r14d
|
|
andl %r13d,%eax
|
|
movl %r11d,%ecx
|
|
xorl 16(%rsp),%r14d
|
|
leal -1894007588(%rbp,%rdi,1),%edi
|
|
xorl %r13d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%edi
|
|
roll $1,%r14d
|
|
andl %r12d,%ebx
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %ebx,%edi
|
|
xorl 52(%rsp),%edx
|
|
movl %r13d,%eax
|
|
movl %r14d,48(%rsp)
|
|
movl %r13d,%ebx
|
|
xorl 60(%rsp),%edx
|
|
andl %r12d,%eax
|
|
movl %edi,%ecx
|
|
xorl 20(%rsp),%edx
|
|
leal -1894007588(%r14,%rsi,1),%esi
|
|
xorl %r12d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%esi
|
|
roll $1,%edx
|
|
andl %r11d,%ebx
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %ebx,%esi
|
|
xorl 56(%rsp),%ebp
|
|
movl %r12d,%eax
|
|
movl %edx,52(%rsp)
|
|
movl %r12d,%ebx
|
|
xorl 0(%rsp),%ebp
|
|
andl %r11d,%eax
|
|
movl %esi,%ecx
|
|
xorl 24(%rsp),%ebp
|
|
leal -1894007588(%rdx,%r13,1),%r13d
|
|
xorl %r11d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r13d
|
|
roll $1,%ebp
|
|
andl %edi,%ebx
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %ebx,%r13d
|
|
xorl 60(%rsp),%r14d
|
|
movl %r11d,%eax
|
|
movl %ebp,56(%rsp)
|
|
movl %r11d,%ebx
|
|
xorl 4(%rsp),%r14d
|
|
andl %edi,%eax
|
|
movl %r13d,%ecx
|
|
xorl 28(%rsp),%r14d
|
|
leal -1894007588(%rbp,%r12,1),%r12d
|
|
xorl %edi,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r12d
|
|
roll $1,%r14d
|
|
andl %esi,%ebx
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %ebx,%r12d
|
|
xorl 0(%rsp),%edx
|
|
movl %edi,%eax
|
|
movl %r14d,60(%rsp)
|
|
movl %edi,%ebx
|
|
xorl 8(%rsp),%edx
|
|
andl %esi,%eax
|
|
movl %r12d,%ecx
|
|
xorl 32(%rsp),%edx
|
|
leal -1894007588(%r14,%r11,1),%r11d
|
|
xorl %esi,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r11d
|
|
roll $1,%edx
|
|
andl %r13d,%ebx
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %ebx,%r11d
|
|
xorl 4(%rsp),%ebp
|
|
movl %esi,%eax
|
|
movl %edx,0(%rsp)
|
|
movl %esi,%ebx
|
|
xorl 12(%rsp),%ebp
|
|
andl %r13d,%eax
|
|
movl %r11d,%ecx
|
|
xorl 36(%rsp),%ebp
|
|
leal -1894007588(%rdx,%rdi,1),%edi
|
|
xorl %r13d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%edi
|
|
roll $1,%ebp
|
|
andl %r12d,%ebx
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %ebx,%edi
|
|
xorl 8(%rsp),%r14d
|
|
movl %r13d,%eax
|
|
movl %ebp,4(%rsp)
|
|
movl %r13d,%ebx
|
|
xorl 16(%rsp),%r14d
|
|
andl %r12d,%eax
|
|
movl %edi,%ecx
|
|
xorl 40(%rsp),%r14d
|
|
leal -1894007588(%rbp,%rsi,1),%esi
|
|
xorl %r12d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%esi
|
|
roll $1,%r14d
|
|
andl %r11d,%ebx
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %ebx,%esi
|
|
xorl 12(%rsp),%edx
|
|
movl %r12d,%eax
|
|
movl %r14d,8(%rsp)
|
|
movl %r12d,%ebx
|
|
xorl 20(%rsp),%edx
|
|
andl %r11d,%eax
|
|
movl %esi,%ecx
|
|
xorl 44(%rsp),%edx
|
|
leal -1894007588(%r14,%r13,1),%r13d
|
|
xorl %r11d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r13d
|
|
roll $1,%edx
|
|
andl %edi,%ebx
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %ebx,%r13d
|
|
xorl 16(%rsp),%ebp
|
|
movl %r11d,%eax
|
|
movl %edx,12(%rsp)
|
|
movl %r11d,%ebx
|
|
xorl 24(%rsp),%ebp
|
|
andl %edi,%eax
|
|
movl %r13d,%ecx
|
|
xorl 48(%rsp),%ebp
|
|
leal -1894007588(%rdx,%r12,1),%r12d
|
|
xorl %edi,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r12d
|
|
roll $1,%ebp
|
|
andl %esi,%ebx
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %ebx,%r12d
|
|
xorl 20(%rsp),%r14d
|
|
movl %edi,%eax
|
|
movl %ebp,16(%rsp)
|
|
movl %edi,%ebx
|
|
xorl 28(%rsp),%r14d
|
|
andl %esi,%eax
|
|
movl %r12d,%ecx
|
|
xorl 52(%rsp),%r14d
|
|
leal -1894007588(%rbp,%r11,1),%r11d
|
|
xorl %esi,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r11d
|
|
roll $1,%r14d
|
|
andl %r13d,%ebx
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %ebx,%r11d
|
|
xorl 24(%rsp),%edx
|
|
movl %esi,%eax
|
|
movl %r14d,20(%rsp)
|
|
movl %esi,%ebx
|
|
xorl 32(%rsp),%edx
|
|
andl %r13d,%eax
|
|
movl %r11d,%ecx
|
|
xorl 56(%rsp),%edx
|
|
leal -1894007588(%r14,%rdi,1),%edi
|
|
xorl %r13d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%edi
|
|
roll $1,%edx
|
|
andl %r12d,%ebx
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %ebx,%edi
|
|
xorl 28(%rsp),%ebp
|
|
movl %r13d,%eax
|
|
movl %edx,24(%rsp)
|
|
movl %r13d,%ebx
|
|
xorl 36(%rsp),%ebp
|
|
andl %r12d,%eax
|
|
movl %edi,%ecx
|
|
xorl 60(%rsp),%ebp
|
|
leal -1894007588(%rdx,%rsi,1),%esi
|
|
xorl %r12d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%esi
|
|
roll $1,%ebp
|
|
andl %r11d,%ebx
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %ebx,%esi
|
|
xorl 32(%rsp),%r14d
|
|
movl %r12d,%eax
|
|
movl %ebp,28(%rsp)
|
|
movl %r12d,%ebx
|
|
xorl 40(%rsp),%r14d
|
|
andl %r11d,%eax
|
|
movl %esi,%ecx
|
|
xorl 0(%rsp),%r14d
|
|
leal -1894007588(%rbp,%r13,1),%r13d
|
|
xorl %r11d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r13d
|
|
roll $1,%r14d
|
|
andl %edi,%ebx
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %ebx,%r13d
|
|
xorl 36(%rsp),%edx
|
|
movl %r11d,%eax
|
|
movl %r14d,32(%rsp)
|
|
movl %r11d,%ebx
|
|
xorl 44(%rsp),%edx
|
|
andl %edi,%eax
|
|
movl %r13d,%ecx
|
|
xorl 4(%rsp),%edx
|
|
leal -1894007588(%r14,%r12,1),%r12d
|
|
xorl %edi,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r12d
|
|
roll $1,%edx
|
|
andl %esi,%ebx
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %ebx,%r12d
|
|
xorl 40(%rsp),%ebp
|
|
movl %edi,%eax
|
|
movl %edx,36(%rsp)
|
|
movl %edi,%ebx
|
|
xorl 48(%rsp),%ebp
|
|
andl %esi,%eax
|
|
movl %r12d,%ecx
|
|
xorl 8(%rsp),%ebp
|
|
leal -1894007588(%rdx,%r11,1),%r11d
|
|
xorl %esi,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%r11d
|
|
roll $1,%ebp
|
|
andl %r13d,%ebx
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %ebx,%r11d
|
|
xorl 44(%rsp),%r14d
|
|
movl %esi,%eax
|
|
movl %ebp,40(%rsp)
|
|
movl %esi,%ebx
|
|
xorl 52(%rsp),%r14d
|
|
andl %r13d,%eax
|
|
movl %r11d,%ecx
|
|
xorl 12(%rsp),%r14d
|
|
leal -1894007588(%rbp,%rdi,1),%edi
|
|
xorl %r13d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%edi
|
|
roll $1,%r14d
|
|
andl %r12d,%ebx
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %ebx,%edi
|
|
xorl 48(%rsp),%edx
|
|
movl %r13d,%eax
|
|
movl %r14d,44(%rsp)
|
|
movl %r13d,%ebx
|
|
xorl 56(%rsp),%edx
|
|
andl %r12d,%eax
|
|
movl %edi,%ecx
|
|
xorl 16(%rsp),%edx
|
|
leal -1894007588(%r14,%rsi,1),%esi
|
|
xorl %r12d,%ebx
|
|
roll $5,%ecx
|
|
addl %eax,%esi
|
|
roll $1,%edx
|
|
andl %r11d,%ebx
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %ebx,%esi
|
|
xorl 52(%rsp),%ebp
|
|
movl %edi,%eax
|
|
movl %edx,48(%rsp)
|
|
movl %esi,%ecx
|
|
xorl 60(%rsp),%ebp
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
xorl 20(%rsp),%ebp
|
|
leal -899497514(%rdx,%r13,1),%r13d
|
|
xorl %r11d,%eax
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %eax,%r13d
|
|
roll $1,%ebp
|
|
xorl 56(%rsp),%r14d
|
|
movl %esi,%eax
|
|
movl %ebp,52(%rsp)
|
|
movl %r13d,%ecx
|
|
xorl 0(%rsp),%r14d
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
xorl 24(%rsp),%r14d
|
|
leal -899497514(%rbp,%r12,1),%r12d
|
|
xorl %edi,%eax
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %eax,%r12d
|
|
roll $1,%r14d
|
|
xorl 60(%rsp),%edx
|
|
movl %r13d,%eax
|
|
movl %r14d,56(%rsp)
|
|
movl %r12d,%ecx
|
|
xorl 4(%rsp),%edx
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
xorl 28(%rsp),%edx
|
|
leal -899497514(%r14,%r11,1),%r11d
|
|
xorl %esi,%eax
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %eax,%r11d
|
|
roll $1,%edx
|
|
xorl 0(%rsp),%ebp
|
|
movl %r12d,%eax
|
|
movl %edx,60(%rsp)
|
|
movl %r11d,%ecx
|
|
xorl 8(%rsp),%ebp
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
xorl 32(%rsp),%ebp
|
|
leal -899497514(%rdx,%rdi,1),%edi
|
|
xorl %r13d,%eax
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %eax,%edi
|
|
roll $1,%ebp
|
|
xorl 4(%rsp),%r14d
|
|
movl %r11d,%eax
|
|
movl %ebp,0(%rsp)
|
|
movl %edi,%ecx
|
|
xorl 12(%rsp),%r14d
|
|
xorl %r13d,%eax
|
|
roll $5,%ecx
|
|
xorl 36(%rsp),%r14d
|
|
leal -899497514(%rbp,%rsi,1),%esi
|
|
xorl %r12d,%eax
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %eax,%esi
|
|
roll $1,%r14d
|
|
xorl 8(%rsp),%edx
|
|
movl %edi,%eax
|
|
movl %r14d,4(%rsp)
|
|
movl %esi,%ecx
|
|
xorl 16(%rsp),%edx
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
xorl 40(%rsp),%edx
|
|
leal -899497514(%r14,%r13,1),%r13d
|
|
xorl %r11d,%eax
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %eax,%r13d
|
|
roll $1,%edx
|
|
xorl 12(%rsp),%ebp
|
|
movl %esi,%eax
|
|
movl %edx,8(%rsp)
|
|
movl %r13d,%ecx
|
|
xorl 20(%rsp),%ebp
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
xorl 44(%rsp),%ebp
|
|
leal -899497514(%rdx,%r12,1),%r12d
|
|
xorl %edi,%eax
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %eax,%r12d
|
|
roll $1,%ebp
|
|
xorl 16(%rsp),%r14d
|
|
movl %r13d,%eax
|
|
movl %ebp,12(%rsp)
|
|
movl %r12d,%ecx
|
|
xorl 24(%rsp),%r14d
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
xorl 48(%rsp),%r14d
|
|
leal -899497514(%rbp,%r11,1),%r11d
|
|
xorl %esi,%eax
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %eax,%r11d
|
|
roll $1,%r14d
|
|
xorl 20(%rsp),%edx
|
|
movl %r12d,%eax
|
|
movl %r14d,16(%rsp)
|
|
movl %r11d,%ecx
|
|
xorl 28(%rsp),%edx
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
xorl 52(%rsp),%edx
|
|
leal -899497514(%r14,%rdi,1),%edi
|
|
xorl %r13d,%eax
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %eax,%edi
|
|
roll $1,%edx
|
|
xorl 24(%rsp),%ebp
|
|
movl %r11d,%eax
|
|
movl %edx,20(%rsp)
|
|
movl %edi,%ecx
|
|
xorl 32(%rsp),%ebp
|
|
xorl %r13d,%eax
|
|
roll $5,%ecx
|
|
xorl 56(%rsp),%ebp
|
|
leal -899497514(%rdx,%rsi,1),%esi
|
|
xorl %r12d,%eax
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %eax,%esi
|
|
roll $1,%ebp
|
|
xorl 28(%rsp),%r14d
|
|
movl %edi,%eax
|
|
movl %ebp,24(%rsp)
|
|
movl %esi,%ecx
|
|
xorl 36(%rsp),%r14d
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
xorl 60(%rsp),%r14d
|
|
leal -899497514(%rbp,%r13,1),%r13d
|
|
xorl %r11d,%eax
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %eax,%r13d
|
|
roll $1,%r14d
|
|
xorl 32(%rsp),%edx
|
|
movl %esi,%eax
|
|
movl %r14d,28(%rsp)
|
|
movl %r13d,%ecx
|
|
xorl 40(%rsp),%edx
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
xorl 0(%rsp),%edx
|
|
leal -899497514(%r14,%r12,1),%r12d
|
|
xorl %edi,%eax
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %eax,%r12d
|
|
roll $1,%edx
|
|
xorl 36(%rsp),%ebp
|
|
movl %r13d,%eax
|
|
|
|
movl %r12d,%ecx
|
|
xorl 44(%rsp),%ebp
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
xorl 4(%rsp),%ebp
|
|
leal -899497514(%rdx,%r11,1),%r11d
|
|
xorl %esi,%eax
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %eax,%r11d
|
|
roll $1,%ebp
|
|
xorl 40(%rsp),%r14d
|
|
movl %r12d,%eax
|
|
|
|
movl %r11d,%ecx
|
|
xorl 48(%rsp),%r14d
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
xorl 8(%rsp),%r14d
|
|
leal -899497514(%rbp,%rdi,1),%edi
|
|
xorl %r13d,%eax
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %eax,%edi
|
|
roll $1,%r14d
|
|
xorl 44(%rsp),%edx
|
|
movl %r11d,%eax
|
|
|
|
movl %edi,%ecx
|
|
xorl 52(%rsp),%edx
|
|
xorl %r13d,%eax
|
|
roll $5,%ecx
|
|
xorl 12(%rsp),%edx
|
|
leal -899497514(%r14,%rsi,1),%esi
|
|
xorl %r12d,%eax
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %eax,%esi
|
|
roll $1,%edx
|
|
xorl 48(%rsp),%ebp
|
|
movl %edi,%eax
|
|
|
|
movl %esi,%ecx
|
|
xorl 56(%rsp),%ebp
|
|
xorl %r12d,%eax
|
|
roll $5,%ecx
|
|
xorl 16(%rsp),%ebp
|
|
leal -899497514(%rdx,%r13,1),%r13d
|
|
xorl %r11d,%eax
|
|
addl %ecx,%r13d
|
|
roll $30,%edi
|
|
addl %eax,%r13d
|
|
roll $1,%ebp
|
|
xorl 52(%rsp),%r14d
|
|
movl %esi,%eax
|
|
|
|
movl %r13d,%ecx
|
|
xorl 60(%rsp),%r14d
|
|
xorl %r11d,%eax
|
|
roll $5,%ecx
|
|
xorl 20(%rsp),%r14d
|
|
leal -899497514(%rbp,%r12,1),%r12d
|
|
xorl %edi,%eax
|
|
addl %ecx,%r12d
|
|
roll $30,%esi
|
|
addl %eax,%r12d
|
|
roll $1,%r14d
|
|
xorl 56(%rsp),%edx
|
|
movl %r13d,%eax
|
|
|
|
movl %r12d,%ecx
|
|
xorl 0(%rsp),%edx
|
|
xorl %edi,%eax
|
|
roll $5,%ecx
|
|
xorl 24(%rsp),%edx
|
|
leal -899497514(%r14,%r11,1),%r11d
|
|
xorl %esi,%eax
|
|
addl %ecx,%r11d
|
|
roll $30,%r13d
|
|
addl %eax,%r11d
|
|
roll $1,%edx
|
|
xorl 60(%rsp),%ebp
|
|
movl %r12d,%eax
|
|
|
|
movl %r11d,%ecx
|
|
xorl 4(%rsp),%ebp
|
|
xorl %esi,%eax
|
|
roll $5,%ecx
|
|
xorl 28(%rsp),%ebp
|
|
leal -899497514(%rdx,%rdi,1),%edi
|
|
xorl %r13d,%eax
|
|
addl %ecx,%edi
|
|
roll $30,%r12d
|
|
addl %eax,%edi
|
|
roll $1,%ebp
|
|
movl %r11d,%eax
|
|
movl %edi,%ecx
|
|
xorl %r13d,%eax
|
|
leal -899497514(%rbp,%rsi,1),%esi
|
|
roll $5,%ecx
|
|
xorl %r12d,%eax
|
|
addl %ecx,%esi
|
|
roll $30,%r11d
|
|
addl %eax,%esi
|
|
addl 0(%r8),%esi
|
|
addl 4(%r8),%edi
|
|
addl 8(%r8),%r11d
|
|
addl 12(%r8),%r12d
|
|
addl 16(%r8),%r13d
|
|
movl %esi,0(%r8)
|
|
movl %edi,4(%r8)
|
|
movl %r11d,8(%r8)
|
|
movl %r12d,12(%r8)
|
|
movl %r13d,16(%r8)
|
|
|
|
subq $1,%r10
|
|
leaq 64(%r9),%r9
|
|
jnz .Lloop
|
|
|
|
movq 64(%rsp),%rsi
|
|
movq -40(%rsi),%r14
|
|
movq -32(%rsi),%r13
|
|
movq -24(%rsi),%r12
|
|
movq -16(%rsi),%rbp
|
|
movq -8(%rsi),%rbx
|
|
leaq (%rsi),%rsp
|
|
.Lepilogue:
|
|
.byte 0xf3,0xc3
|
|
.size sha1_block_data_order,.-sha1_block_data_order
|
|
.type sha1_block_data_order_shaext,@function
|
|
.align 32
|
|
sha1_block_data_order_shaext:
|
|
_shaext_shortcut:
|
|
movdqu (%rdi),%xmm0
|
|
movd 16(%rdi),%xmm1
|
|
movdqa K_XX_XX+160(%rip),%xmm3
|
|
|
|
movdqu (%rsi),%xmm4
|
|
pshufd $27,%xmm0,%xmm0
|
|
movdqu 16(%rsi),%xmm5
|
|
pshufd $27,%xmm1,%xmm1
|
|
movdqu 32(%rsi),%xmm6
|
|
.byte 102,15,56,0,227
|
|
movdqu 48(%rsi),%xmm7
|
|
.byte 102,15,56,0,235
|
|
.byte 102,15,56,0,243
|
|
movdqa %xmm1,%xmm9
|
|
.byte 102,15,56,0,251
|
|
jmp .Loop_shaext
|
|
|
|
.align 16
|
|
.Loop_shaext:
|
|
decq %rdx
|
|
leaq 64(%rsi),%rax
|
|
paddd %xmm4,%xmm1
|
|
cmovneq %rax,%rsi
|
|
movdqa %xmm0,%xmm8
|
|
.byte 15,56,201,229
|
|
movdqa %xmm0,%xmm2
|
|
.byte 15,58,204,193,0
|
|
.byte 15,56,200,213
|
|
pxor %xmm6,%xmm4
|
|
.byte 15,56,201,238
|
|
.byte 15,56,202,231
|
|
|
|
movdqa %xmm0,%xmm1
|
|
.byte 15,58,204,194,0
|
|
.byte 15,56,200,206
|
|
pxor %xmm7,%xmm5
|
|
.byte 15,56,202,236
|
|
.byte 15,56,201,247
|
|
movdqa %xmm0,%xmm2
|
|
.byte 15,58,204,193,0
|
|
.byte 15,56,200,215
|
|
pxor %xmm4,%xmm6
|
|
.byte 15,56,201,252
|
|
.byte 15,56,202,245
|
|
|
|
movdqa %xmm0,%xmm1
|
|
.byte 15,58,204,194,0
|
|
.byte 15,56,200,204
|
|
pxor %xmm5,%xmm7
|
|
.byte 15,56,202,254
|
|
.byte 15,56,201,229
|
|
movdqa %xmm0,%xmm2
|
|
.byte 15,58,204,193,0
|
|
.byte 15,56,200,213
|
|
pxor %xmm6,%xmm4
|
|
.byte 15,56,201,238
|
|
.byte 15,56,202,231
|
|
|
|
movdqa %xmm0,%xmm1
|
|
.byte 15,58,204,194,1
|
|
.byte 15,56,200,206
|
|
pxor %xmm7,%xmm5
|
|
.byte 15,56,202,236
|
|
.byte 15,56,201,247
|
|
movdqa %xmm0,%xmm2
|
|
.byte 15,58,204,193,1
|
|
.byte 15,56,200,215
|
|
pxor %xmm4,%xmm6
|
|
.byte 15,56,201,252
|
|
.byte 15,56,202,245
|
|
|
|
movdqa %xmm0,%xmm1
|
|
.byte 15,58,204,194,1
|
|
.byte 15,56,200,204
|
|
pxor %xmm5,%xmm7
|
|
.byte 15,56,202,254
|
|
.byte 15,56,201,229
|
|
movdqa %xmm0,%xmm2
|
|
.byte 15,58,204,193,1
|
|
.byte 15,56,200,213
|
|
pxor %xmm6,%xmm4
|
|
.byte 15,56,201,238
|
|
.byte 15,56,202,231
|
|
|
|
movdqa %xmm0,%xmm1
|
|
.byte 15,58,204,194,1
|
|
.byte 15,56,200,206
|
|
pxor %xmm7,%xmm5
|
|
.byte 15,56,202,236
|
|
.byte 15,56,201,247
|
|
movdqa %xmm0,%xmm2
|
|
.byte 15,58,204,193,2
|
|
.byte 15,56,200,215
|
|
pxor %xmm4,%xmm6
|
|
.byte 15,56,201,252
|
|
.byte 15,56,202,245
|
|
|
|
movdqa %xmm0,%xmm1
|
|
.byte 15,58,204,194,2
|
|
.byte 15,56,200,204
|
|
pxor %xmm5,%xmm7
|
|
.byte 15,56,202,254
|
|
.byte 15,56,201,229
|
|
movdqa %xmm0,%xmm2
|
|
.byte 15,58,204,193,2
|
|
.byte 15,56,200,213
|
|
pxor %xmm6,%xmm4
|
|
.byte 15,56,201,238
|
|
.byte 15,56,202,231
|
|
|
|
movdqa %xmm0,%xmm1
|
|
.byte 15,58,204,194,2
|
|
.byte 15,56,200,206
|
|
pxor %xmm7,%xmm5
|
|
.byte 15,56,202,236
|
|
.byte 15,56,201,247
|
|
movdqa %xmm0,%xmm2
|
|
.byte 15,58,204,193,2
|
|
.byte 15,56,200,215
|
|
pxor %xmm4,%xmm6
|
|
.byte 15,56,201,252
|
|
.byte 15,56,202,245
|
|
|
|
movdqa %xmm0,%xmm1
|
|
.byte 15,58,204,194,3
|
|
.byte 15,56,200,204
|
|
pxor %xmm5,%xmm7
|
|
.byte 15,56,202,254
|
|
movdqu (%rsi),%xmm4
|
|
movdqa %xmm0,%xmm2
|
|
.byte 15,58,204,193,3
|
|
.byte 15,56,200,213
|
|
movdqu 16(%rsi),%xmm5
|
|
.byte 102,15,56,0,227
|
|
|
|
movdqa %xmm0,%xmm1
|
|
.byte 15,58,204,194,3
|
|
.byte 15,56,200,206
|
|
movdqu 32(%rsi),%xmm6
|
|
.byte 102,15,56,0,235
|
|
|
|
movdqa %xmm0,%xmm2
|
|
.byte 15,58,204,193,3
|
|
.byte 15,56,200,215
|
|
movdqu 48(%rsi),%xmm7
|
|
.byte 102,15,56,0,243
|
|
|
|
movdqa %xmm0,%xmm1
|
|
.byte 15,58,204,194,3
|
|
.byte 65,15,56,200,201
|
|
.byte 102,15,56,0,251
|
|
|
|
paddd %xmm8,%xmm0
|
|
movdqa %xmm1,%xmm9
|
|
|
|
jnz .Loop_shaext
|
|
|
|
pshufd $27,%xmm0,%xmm0
|
|
pshufd $27,%xmm1,%xmm1
|
|
movdqu %xmm0,(%rdi)
|
|
movd %xmm1,16(%rdi)
|
|
.byte 0xf3,0xc3
|
|
.size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext
|
|
.type sha1_block_data_order_ssse3,@function
|
|
.align 16
|
|
sha1_block_data_order_ssse3:
|
|
_ssse3_shortcut:
|
|
movq %rsp,%rax
|
|
pushq %rbx
|
|
pushq %rbp
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
leaq -64(%rsp),%rsp
|
|
movq %rax,%r14
|
|
andq $-64,%rsp
|
|
movq %rdi,%r8
|
|
movq %rsi,%r9
|
|
movq %rdx,%r10
|
|
|
|
shlq $6,%r10
|
|
addq %r9,%r10
|
|
leaq K_XX_XX+64(%rip),%r11
|
|
|
|
movl 0(%r8),%eax
|
|
movl 4(%r8),%ebx
|
|
movl 8(%r8),%ecx
|
|
movl 12(%r8),%edx
|
|
movl %ebx,%esi
|
|
movl 16(%r8),%ebp
|
|
movl %ecx,%edi
|
|
xorl %edx,%edi
|
|
andl %edi,%esi
|
|
|
|
movdqa 64(%r11),%xmm6
|
|
movdqa -64(%r11),%xmm9
|
|
movdqu 0(%r9),%xmm0
|
|
movdqu 16(%r9),%xmm1
|
|
movdqu 32(%r9),%xmm2
|
|
movdqu 48(%r9),%xmm3
|
|
.byte 102,15,56,0,198
|
|
.byte 102,15,56,0,206
|
|
.byte 102,15,56,0,214
|
|
addq $64,%r9
|
|
paddd %xmm9,%xmm0
|
|
.byte 102,15,56,0,222
|
|
paddd %xmm9,%xmm1
|
|
paddd %xmm9,%xmm2
|
|
movdqa %xmm0,0(%rsp)
|
|
psubd %xmm9,%xmm0
|
|
movdqa %xmm1,16(%rsp)
|
|
psubd %xmm9,%xmm1
|
|
movdqa %xmm2,32(%rsp)
|
|
psubd %xmm9,%xmm2
|
|
jmp .Loop_ssse3
|
|
.align 16
|
|
.Loop_ssse3:
|
|
rorl $2,%ebx
|
|
pshufd $238,%xmm0,%xmm4
|
|
xorl %edx,%esi
|
|
movdqa %xmm3,%xmm8
|
|
paddd %xmm3,%xmm9
|
|
movl %eax,%edi
|
|
addl 0(%rsp),%ebp
|
|
punpcklqdq %xmm1,%xmm4
|
|
xorl %ecx,%ebx
|
|
roll $5,%eax
|
|
addl %esi,%ebp
|
|
psrldq $4,%xmm8
|
|
andl %ebx,%edi
|
|
xorl %ecx,%ebx
|
|
pxor %xmm0,%xmm4
|
|
addl %eax,%ebp
|
|
rorl $7,%eax
|
|
pxor %xmm2,%xmm8
|
|
xorl %ecx,%edi
|
|
movl %ebp,%esi
|
|
addl 4(%rsp),%edx
|
|
pxor %xmm8,%xmm4
|
|
xorl %ebx,%eax
|
|
roll $5,%ebp
|
|
movdqa %xmm9,48(%rsp)
|
|
addl %edi,%edx
|
|
andl %eax,%esi
|
|
movdqa %xmm4,%xmm10
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
rorl $7,%ebp
|
|
movdqa %xmm4,%xmm8
|
|
xorl %ebx,%esi
|
|
pslldq $12,%xmm10
|
|
paddd %xmm4,%xmm4
|
|
movl %edx,%edi
|
|
addl 8(%rsp),%ecx
|
|
psrld $31,%xmm8
|
|
xorl %eax,%ebp
|
|
roll $5,%edx
|
|
addl %esi,%ecx
|
|
movdqa %xmm10,%xmm9
|
|
andl %ebp,%edi
|
|
xorl %eax,%ebp
|
|
psrld $30,%xmm10
|
|
addl %edx,%ecx
|
|
rorl $7,%edx
|
|
por %xmm8,%xmm4
|
|
xorl %eax,%edi
|
|
movl %ecx,%esi
|
|
addl 12(%rsp),%ebx
|
|
pslld $2,%xmm9
|
|
pxor %xmm10,%xmm4
|
|
xorl %ebp,%edx
|
|
movdqa -64(%r11),%xmm10
|
|
roll $5,%ecx
|
|
addl %edi,%ebx
|
|
andl %edx,%esi
|
|
pxor %xmm9,%xmm4
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
rorl $7,%ecx
|
|
pshufd $238,%xmm1,%xmm5
|
|
xorl %ebp,%esi
|
|
movdqa %xmm4,%xmm9
|
|
paddd %xmm4,%xmm10
|
|
movl %ebx,%edi
|
|
addl 16(%rsp),%eax
|
|
punpcklqdq %xmm2,%xmm5
|
|
xorl %edx,%ecx
|
|
roll $5,%ebx
|
|
addl %esi,%eax
|
|
psrldq $4,%xmm9
|
|
andl %ecx,%edi
|
|
xorl %edx,%ecx
|
|
pxor %xmm1,%xmm5
|
|
addl %ebx,%eax
|
|
rorl $7,%ebx
|
|
pxor %xmm3,%xmm9
|
|
xorl %edx,%edi
|
|
movl %eax,%esi
|
|
addl 20(%rsp),%ebp
|
|
pxor %xmm9,%xmm5
|
|
xorl %ecx,%ebx
|
|
roll $5,%eax
|
|
movdqa %xmm10,0(%rsp)
|
|
addl %edi,%ebp
|
|
andl %ebx,%esi
|
|
movdqa %xmm5,%xmm8
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
rorl $7,%eax
|
|
movdqa %xmm5,%xmm9
|
|
xorl %ecx,%esi
|
|
pslldq $12,%xmm8
|
|
paddd %xmm5,%xmm5
|
|
movl %ebp,%edi
|
|
addl 24(%rsp),%edx
|
|
psrld $31,%xmm9
|
|
xorl %ebx,%eax
|
|
roll $5,%ebp
|
|
addl %esi,%edx
|
|
movdqa %xmm8,%xmm10
|
|
andl %eax,%edi
|
|
xorl %ebx,%eax
|
|
psrld $30,%xmm8
|
|
addl %ebp,%edx
|
|
rorl $7,%ebp
|
|
por %xmm9,%xmm5
|
|
xorl %ebx,%edi
|
|
movl %edx,%esi
|
|
addl 28(%rsp),%ecx
|
|
pslld $2,%xmm10
|
|
pxor %xmm8,%xmm5
|
|
xorl %eax,%ebp
|
|
movdqa -32(%r11),%xmm8
|
|
roll $5,%edx
|
|
addl %edi,%ecx
|
|
andl %ebp,%esi
|
|
pxor %xmm10,%xmm5
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
rorl $7,%edx
|
|
pshufd $238,%xmm2,%xmm6
|
|
xorl %eax,%esi
|
|
movdqa %xmm5,%xmm10
|
|
paddd %xmm5,%xmm8
|
|
movl %ecx,%edi
|
|
addl 32(%rsp),%ebx
|
|
punpcklqdq %xmm3,%xmm6
|
|
xorl %ebp,%edx
|
|
roll $5,%ecx
|
|
addl %esi,%ebx
|
|
psrldq $4,%xmm10
|
|
andl %edx,%edi
|
|
xorl %ebp,%edx
|
|
pxor %xmm2,%xmm6
|
|
addl %ecx,%ebx
|
|
rorl $7,%ecx
|
|
pxor %xmm4,%xmm10
|
|
xorl %ebp,%edi
|
|
movl %ebx,%esi
|
|
addl 36(%rsp),%eax
|
|
pxor %xmm10,%xmm6
|
|
xorl %edx,%ecx
|
|
roll $5,%ebx
|
|
movdqa %xmm8,16(%rsp)
|
|
addl %edi,%eax
|
|
andl %ecx,%esi
|
|
movdqa %xmm6,%xmm9
|
|
xorl %edx,%ecx
|
|
addl %ebx,%eax
|
|
rorl $7,%ebx
|
|
movdqa %xmm6,%xmm10
|
|
xorl %edx,%esi
|
|
pslldq $12,%xmm9
|
|
paddd %xmm6,%xmm6
|
|
movl %eax,%edi
|
|
addl 40(%rsp),%ebp
|
|
psrld $31,%xmm10
|
|
xorl %ecx,%ebx
|
|
roll $5,%eax
|
|
addl %esi,%ebp
|
|
movdqa %xmm9,%xmm8
|
|
andl %ebx,%edi
|
|
xorl %ecx,%ebx
|
|
psrld $30,%xmm9
|
|
addl %eax,%ebp
|
|
rorl $7,%eax
|
|
por %xmm10,%xmm6
|
|
xorl %ecx,%edi
|
|
movl %ebp,%esi
|
|
addl 44(%rsp),%edx
|
|
pslld $2,%xmm8
|
|
pxor %xmm9,%xmm6
|
|
xorl %ebx,%eax
|
|
movdqa -32(%r11),%xmm9
|
|
roll $5,%ebp
|
|
addl %edi,%edx
|
|
andl %eax,%esi
|
|
pxor %xmm8,%xmm6
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
rorl $7,%ebp
|
|
pshufd $238,%xmm3,%xmm7
|
|
xorl %ebx,%esi
|
|
movdqa %xmm6,%xmm8
|
|
paddd %xmm6,%xmm9
|
|
movl %edx,%edi
|
|
addl 48(%rsp),%ecx
|
|
punpcklqdq %xmm4,%xmm7
|
|
xorl %eax,%ebp
|
|
roll $5,%edx
|
|
addl %esi,%ecx
|
|
psrldq $4,%xmm8
|
|
andl %ebp,%edi
|
|
xorl %eax,%ebp
|
|
pxor %xmm3,%xmm7
|
|
addl %edx,%ecx
|
|
rorl $7,%edx
|
|
pxor %xmm5,%xmm8
|
|
xorl %eax,%edi
|
|
movl %ecx,%esi
|
|
addl 52(%rsp),%ebx
|
|
pxor %xmm8,%xmm7
|
|
xorl %ebp,%edx
|
|
roll $5,%ecx
|
|
movdqa %xmm9,32(%rsp)
|
|
addl %edi,%ebx
|
|
andl %edx,%esi
|
|
movdqa %xmm7,%xmm10
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
rorl $7,%ecx
|
|
movdqa %xmm7,%xmm8
|
|
xorl %ebp,%esi
|
|
pslldq $12,%xmm10
|
|
paddd %xmm7,%xmm7
|
|
movl %ebx,%edi
|
|
addl 56(%rsp),%eax
|
|
psrld $31,%xmm8
|
|
xorl %edx,%ecx
|
|
roll $5,%ebx
|
|
addl %esi,%eax
|
|
movdqa %xmm10,%xmm9
|
|
andl %ecx,%edi
|
|
xorl %edx,%ecx
|
|
psrld $30,%xmm10
|
|
addl %ebx,%eax
|
|
rorl $7,%ebx
|
|
por %xmm8,%xmm7
|
|
xorl %edx,%edi
|
|
movl %eax,%esi
|
|
addl 60(%rsp),%ebp
|
|
pslld $2,%xmm9
|
|
pxor %xmm10,%xmm7
|
|
xorl %ecx,%ebx
|
|
movdqa -32(%r11),%xmm10
|
|
roll $5,%eax
|
|
addl %edi,%ebp
|
|
andl %ebx,%esi
|
|
pxor %xmm9,%xmm7
|
|
pshufd $238,%xmm6,%xmm9
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
rorl $7,%eax
|
|
pxor %xmm4,%xmm0
|
|
xorl %ecx,%esi
|
|
movl %ebp,%edi
|
|
addl 0(%rsp),%edx
|
|
punpcklqdq %xmm7,%xmm9
|
|
xorl %ebx,%eax
|
|
roll $5,%ebp
|
|
pxor %xmm1,%xmm0
|
|
addl %esi,%edx
|
|
andl %eax,%edi
|
|
movdqa %xmm10,%xmm8
|
|
xorl %ebx,%eax
|
|
paddd %xmm7,%xmm10
|
|
addl %ebp,%edx
|
|
pxor %xmm9,%xmm0
|
|
rorl $7,%ebp
|
|
xorl %ebx,%edi
|
|
movl %edx,%esi
|
|
addl 4(%rsp),%ecx
|
|
movdqa %xmm0,%xmm9
|
|
xorl %eax,%ebp
|
|
roll $5,%edx
|
|
movdqa %xmm10,48(%rsp)
|
|
addl %edi,%ecx
|
|
andl %ebp,%esi
|
|
xorl %eax,%ebp
|
|
pslld $2,%xmm0
|
|
addl %edx,%ecx
|
|
rorl $7,%edx
|
|
psrld $30,%xmm9
|
|
xorl %eax,%esi
|
|
movl %ecx,%edi
|
|
addl 8(%rsp),%ebx
|
|
por %xmm9,%xmm0
|
|
xorl %ebp,%edx
|
|
roll $5,%ecx
|
|
pshufd $238,%xmm7,%xmm10
|
|
addl %esi,%ebx
|
|
andl %edx,%edi
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
addl 12(%rsp),%eax
|
|
xorl %ebp,%edi
|
|
movl %ebx,%esi
|
|
roll $5,%ebx
|
|
addl %edi,%eax
|
|
xorl %edx,%esi
|
|
rorl $7,%ecx
|
|
addl %ebx,%eax
|
|
pxor %xmm5,%xmm1
|
|
addl 16(%rsp),%ebp
|
|
xorl %ecx,%esi
|
|
punpcklqdq %xmm0,%xmm10
|
|
movl %eax,%edi
|
|
roll $5,%eax
|
|
pxor %xmm2,%xmm1
|
|
addl %esi,%ebp
|
|
xorl %ecx,%edi
|
|
movdqa %xmm8,%xmm9
|
|
rorl $7,%ebx
|
|
paddd %xmm0,%xmm8
|
|
addl %eax,%ebp
|
|
pxor %xmm10,%xmm1
|
|
addl 20(%rsp),%edx
|
|
xorl %ebx,%edi
|
|
movl %ebp,%esi
|
|
roll $5,%ebp
|
|
movdqa %xmm1,%xmm10
|
|
addl %edi,%edx
|
|
xorl %ebx,%esi
|
|
movdqa %xmm8,0(%rsp)
|
|
rorl $7,%eax
|
|
addl %ebp,%edx
|
|
addl 24(%rsp),%ecx
|
|
pslld $2,%xmm1
|
|
xorl %eax,%esi
|
|
movl %edx,%edi
|
|
psrld $30,%xmm10
|
|
roll $5,%edx
|
|
addl %esi,%ecx
|
|
xorl %eax,%edi
|
|
rorl $7,%ebp
|
|
por %xmm10,%xmm1
|
|
addl %edx,%ecx
|
|
addl 28(%rsp),%ebx
|
|
pshufd $238,%xmm0,%xmm8
|
|
xorl %ebp,%edi
|
|
movl %ecx,%esi
|
|
roll $5,%ecx
|
|
addl %edi,%ebx
|
|
xorl %ebp,%esi
|
|
rorl $7,%edx
|
|
addl %ecx,%ebx
|
|
pxor %xmm6,%xmm2
|
|
addl 32(%rsp),%eax
|
|
xorl %edx,%esi
|
|
punpcklqdq %xmm1,%xmm8
|
|
movl %ebx,%edi
|
|
roll $5,%ebx
|
|
pxor %xmm3,%xmm2
|
|
addl %esi,%eax
|
|
xorl %edx,%edi
|
|
movdqa 0(%r11),%xmm10
|
|
rorl $7,%ecx
|
|
paddd %xmm1,%xmm9
|
|
addl %ebx,%eax
|
|
pxor %xmm8,%xmm2
|
|
addl 36(%rsp),%ebp
|
|
xorl %ecx,%edi
|
|
movl %eax,%esi
|
|
roll $5,%eax
|
|
movdqa %xmm2,%xmm8
|
|
addl %edi,%ebp
|
|
xorl %ecx,%esi
|
|
movdqa %xmm9,16(%rsp)
|
|
rorl $7,%ebx
|
|
addl %eax,%ebp
|
|
addl 40(%rsp),%edx
|
|
pslld $2,%xmm2
|
|
xorl %ebx,%esi
|
|
movl %ebp,%edi
|
|
psrld $30,%xmm8
|
|
roll $5,%ebp
|
|
addl %esi,%edx
|
|
xorl %ebx,%edi
|
|
rorl $7,%eax
|
|
por %xmm8,%xmm2
|
|
addl %ebp,%edx
|
|
addl 44(%rsp),%ecx
|
|
pshufd $238,%xmm1,%xmm9
|
|
xorl %eax,%edi
|
|
movl %edx,%esi
|
|
roll $5,%edx
|
|
addl %edi,%ecx
|
|
xorl %eax,%esi
|
|
rorl $7,%ebp
|
|
addl %edx,%ecx
|
|
pxor %xmm7,%xmm3
|
|
addl 48(%rsp),%ebx
|
|
xorl %ebp,%esi
|
|
punpcklqdq %xmm2,%xmm9
|
|
movl %ecx,%edi
|
|
roll $5,%ecx
|
|
pxor %xmm4,%xmm3
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
movdqa %xmm10,%xmm8
|
|
rorl $7,%edx
|
|
paddd %xmm2,%xmm10
|
|
addl %ecx,%ebx
|
|
pxor %xmm9,%xmm3
|
|
addl 52(%rsp),%eax
|
|
xorl %edx,%edi
|
|
movl %ebx,%esi
|
|
roll $5,%ebx
|
|
movdqa %xmm3,%xmm9
|
|
addl %edi,%eax
|
|
xorl %edx,%esi
|
|
movdqa %xmm10,32(%rsp)
|
|
rorl $7,%ecx
|
|
addl %ebx,%eax
|
|
addl 56(%rsp),%ebp
|
|
pslld $2,%xmm3
|
|
xorl %ecx,%esi
|
|
movl %eax,%edi
|
|
psrld $30,%xmm9
|
|
roll $5,%eax
|
|
addl %esi,%ebp
|
|
xorl %ecx,%edi
|
|
rorl $7,%ebx
|
|
por %xmm9,%xmm3
|
|
addl %eax,%ebp
|
|
addl 60(%rsp),%edx
|
|
pshufd $238,%xmm2,%xmm10
|
|
xorl %ebx,%edi
|
|
movl %ebp,%esi
|
|
roll $5,%ebp
|
|
addl %edi,%edx
|
|
xorl %ebx,%esi
|
|
rorl $7,%eax
|
|
addl %ebp,%edx
|
|
pxor %xmm0,%xmm4
|
|
addl 0(%rsp),%ecx
|
|
xorl %eax,%esi
|
|
punpcklqdq %xmm3,%xmm10
|
|
movl %edx,%edi
|
|
roll $5,%edx
|
|
pxor %xmm5,%xmm4
|
|
addl %esi,%ecx
|
|
xorl %eax,%edi
|
|
movdqa %xmm8,%xmm9
|
|
rorl $7,%ebp
|
|
paddd %xmm3,%xmm8
|
|
addl %edx,%ecx
|
|
pxor %xmm10,%xmm4
|
|
addl 4(%rsp),%ebx
|
|
xorl %ebp,%edi
|
|
movl %ecx,%esi
|
|
roll $5,%ecx
|
|
movdqa %xmm4,%xmm10
|
|
addl %edi,%ebx
|
|
xorl %ebp,%esi
|
|
movdqa %xmm8,48(%rsp)
|
|
rorl $7,%edx
|
|
addl %ecx,%ebx
|
|
addl 8(%rsp),%eax
|
|
pslld $2,%xmm4
|
|
xorl %edx,%esi
|
|
movl %ebx,%edi
|
|
psrld $30,%xmm10
|
|
roll $5,%ebx
|
|
addl %esi,%eax
|
|
xorl %edx,%edi
|
|
rorl $7,%ecx
|
|
por %xmm10,%xmm4
|
|
addl %ebx,%eax
|
|
addl 12(%rsp),%ebp
|
|
pshufd $238,%xmm3,%xmm8
|
|
xorl %ecx,%edi
|
|
movl %eax,%esi
|
|
roll $5,%eax
|
|
addl %edi,%ebp
|
|
xorl %ecx,%esi
|
|
rorl $7,%ebx
|
|
addl %eax,%ebp
|
|
pxor %xmm1,%xmm5
|
|
addl 16(%rsp),%edx
|
|
xorl %ebx,%esi
|
|
punpcklqdq %xmm4,%xmm8
|
|
movl %ebp,%edi
|
|
roll $5,%ebp
|
|
pxor %xmm6,%xmm5
|
|
addl %esi,%edx
|
|
xorl %ebx,%edi
|
|
movdqa %xmm9,%xmm10
|
|
rorl $7,%eax
|
|
paddd %xmm4,%xmm9
|
|
addl %ebp,%edx
|
|
pxor %xmm8,%xmm5
|
|
addl 20(%rsp),%ecx
|
|
xorl %eax,%edi
|
|
movl %edx,%esi
|
|
roll $5,%edx
|
|
movdqa %xmm5,%xmm8
|
|
addl %edi,%ecx
|
|
xorl %eax,%esi
|
|
movdqa %xmm9,0(%rsp)
|
|
rorl $7,%ebp
|
|
addl %edx,%ecx
|
|
addl 24(%rsp),%ebx
|
|
pslld $2,%xmm5
|
|
xorl %ebp,%esi
|
|
movl %ecx,%edi
|
|
psrld $30,%xmm8
|
|
roll $5,%ecx
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
rorl $7,%edx
|
|
por %xmm8,%xmm5
|
|
addl %ecx,%ebx
|
|
addl 28(%rsp),%eax
|
|
pshufd $238,%xmm4,%xmm9
|
|
rorl $7,%ecx
|
|
movl %ebx,%esi
|
|
xorl %edx,%edi
|
|
roll $5,%ebx
|
|
addl %edi,%eax
|
|
xorl %ecx,%esi
|
|
xorl %edx,%ecx
|
|
addl %ebx,%eax
|
|
pxor %xmm2,%xmm6
|
|
addl 32(%rsp),%ebp
|
|
andl %ecx,%esi
|
|
xorl %edx,%ecx
|
|
rorl $7,%ebx
|
|
punpcklqdq %xmm5,%xmm9
|
|
movl %eax,%edi
|
|
xorl %ecx,%esi
|
|
pxor %xmm7,%xmm6
|
|
roll $5,%eax
|
|
addl %esi,%ebp
|
|
movdqa %xmm10,%xmm8
|
|
xorl %ebx,%edi
|
|
paddd %xmm5,%xmm10
|
|
xorl %ecx,%ebx
|
|
pxor %xmm9,%xmm6
|
|
addl %eax,%ebp
|
|
addl 36(%rsp),%edx
|
|
andl %ebx,%edi
|
|
xorl %ecx,%ebx
|
|
rorl $7,%eax
|
|
movdqa %xmm6,%xmm9
|
|
movl %ebp,%esi
|
|
xorl %ebx,%edi
|
|
movdqa %xmm10,16(%rsp)
|
|
roll $5,%ebp
|
|
addl %edi,%edx
|
|
xorl %eax,%esi
|
|
pslld $2,%xmm6
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
psrld $30,%xmm9
|
|
addl 40(%rsp),%ecx
|
|
andl %eax,%esi
|
|
xorl %ebx,%eax
|
|
por %xmm9,%xmm6
|
|
rorl $7,%ebp
|
|
movl %edx,%edi
|
|
xorl %eax,%esi
|
|
roll $5,%edx
|
|
pshufd $238,%xmm5,%xmm10
|
|
addl %esi,%ecx
|
|
xorl %ebp,%edi
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
addl 44(%rsp),%ebx
|
|
andl %ebp,%edi
|
|
xorl %eax,%ebp
|
|
rorl $7,%edx
|
|
movl %ecx,%esi
|
|
xorl %ebp,%edi
|
|
roll $5,%ecx
|
|
addl %edi,%ebx
|
|
xorl %edx,%esi
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
pxor %xmm3,%xmm7
|
|
addl 48(%rsp),%eax
|
|
andl %edx,%esi
|
|
xorl %ebp,%edx
|
|
rorl $7,%ecx
|
|
punpcklqdq %xmm6,%xmm10
|
|
movl %ebx,%edi
|
|
xorl %edx,%esi
|
|
pxor %xmm0,%xmm7
|
|
roll $5,%ebx
|
|
addl %esi,%eax
|
|
movdqa 32(%r11),%xmm9
|
|
xorl %ecx,%edi
|
|
paddd %xmm6,%xmm8
|
|
xorl %edx,%ecx
|
|
pxor %xmm10,%xmm7
|
|
addl %ebx,%eax
|
|
addl 52(%rsp),%ebp
|
|
andl %ecx,%edi
|
|
xorl %edx,%ecx
|
|
rorl $7,%ebx
|
|
movdqa %xmm7,%xmm10
|
|
movl %eax,%esi
|
|
xorl %ecx,%edi
|
|
movdqa %xmm8,32(%rsp)
|
|
roll $5,%eax
|
|
addl %edi,%ebp
|
|
xorl %ebx,%esi
|
|
pslld $2,%xmm7
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
psrld $30,%xmm10
|
|
addl 56(%rsp),%edx
|
|
andl %ebx,%esi
|
|
xorl %ecx,%ebx
|
|
por %xmm10,%xmm7
|
|
rorl $7,%eax
|
|
movl %ebp,%edi
|
|
xorl %ebx,%esi
|
|
roll $5,%ebp
|
|
pshufd $238,%xmm6,%xmm8
|
|
addl %esi,%edx
|
|
xorl %eax,%edi
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
addl 60(%rsp),%ecx
|
|
andl %eax,%edi
|
|
xorl %ebx,%eax
|
|
rorl $7,%ebp
|
|
movl %edx,%esi
|
|
xorl %eax,%edi
|
|
roll $5,%edx
|
|
addl %edi,%ecx
|
|
xorl %ebp,%esi
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
pxor %xmm4,%xmm0
|
|
addl 0(%rsp),%ebx
|
|
andl %ebp,%esi
|
|
xorl %eax,%ebp
|
|
rorl $7,%edx
|
|
punpcklqdq %xmm7,%xmm8
|
|
movl %ecx,%edi
|
|
xorl %ebp,%esi
|
|
pxor %xmm1,%xmm0
|
|
roll $5,%ecx
|
|
addl %esi,%ebx
|
|
movdqa %xmm9,%xmm10
|
|
xorl %edx,%edi
|
|
paddd %xmm7,%xmm9
|
|
xorl %ebp,%edx
|
|
pxor %xmm8,%xmm0
|
|
addl %ecx,%ebx
|
|
addl 4(%rsp),%eax
|
|
andl %edx,%edi
|
|
xorl %ebp,%edx
|
|
rorl $7,%ecx
|
|
movdqa %xmm0,%xmm8
|
|
movl %ebx,%esi
|
|
xorl %edx,%edi
|
|
movdqa %xmm9,48(%rsp)
|
|
roll $5,%ebx
|
|
addl %edi,%eax
|
|
xorl %ecx,%esi
|
|
pslld $2,%xmm0
|
|
xorl %edx,%ecx
|
|
addl %ebx,%eax
|
|
psrld $30,%xmm8
|
|
addl 8(%rsp),%ebp
|
|
andl %ecx,%esi
|
|
xorl %edx,%ecx
|
|
por %xmm8,%xmm0
|
|
rorl $7,%ebx
|
|
movl %eax,%edi
|
|
xorl %ecx,%esi
|
|
roll $5,%eax
|
|
pshufd $238,%xmm7,%xmm9
|
|
addl %esi,%ebp
|
|
xorl %ebx,%edi
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
addl 12(%rsp),%edx
|
|
andl %ebx,%edi
|
|
xorl %ecx,%ebx
|
|
rorl $7,%eax
|
|
movl %ebp,%esi
|
|
xorl %ebx,%edi
|
|
roll $5,%ebp
|
|
addl %edi,%edx
|
|
xorl %eax,%esi
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
pxor %xmm5,%xmm1
|
|
addl 16(%rsp),%ecx
|
|
andl %eax,%esi
|
|
xorl %ebx,%eax
|
|
rorl $7,%ebp
|
|
punpcklqdq %xmm0,%xmm9
|
|
movl %edx,%edi
|
|
xorl %eax,%esi
|
|
pxor %xmm2,%xmm1
|
|
roll $5,%edx
|
|
addl %esi,%ecx
|
|
movdqa %xmm10,%xmm8
|
|
xorl %ebp,%edi
|
|
paddd %xmm0,%xmm10
|
|
xorl %eax,%ebp
|
|
pxor %xmm9,%xmm1
|
|
addl %edx,%ecx
|
|
addl 20(%rsp),%ebx
|
|
andl %ebp,%edi
|
|
xorl %eax,%ebp
|
|
rorl $7,%edx
|
|
movdqa %xmm1,%xmm9
|
|
movl %ecx,%esi
|
|
xorl %ebp,%edi
|
|
movdqa %xmm10,0(%rsp)
|
|
roll $5,%ecx
|
|
addl %edi,%ebx
|
|
xorl %edx,%esi
|
|
pslld $2,%xmm1
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
psrld $30,%xmm9
|
|
addl 24(%rsp),%eax
|
|
andl %edx,%esi
|
|
xorl %ebp,%edx
|
|
por %xmm9,%xmm1
|
|
rorl $7,%ecx
|
|
movl %ebx,%edi
|
|
xorl %edx,%esi
|
|
roll $5,%ebx
|
|
pshufd $238,%xmm0,%xmm10
|
|
addl %esi,%eax
|
|
xorl %ecx,%edi
|
|
xorl %edx,%ecx
|
|
addl %ebx,%eax
|
|
addl 28(%rsp),%ebp
|
|
andl %ecx,%edi
|
|
xorl %edx,%ecx
|
|
rorl $7,%ebx
|
|
movl %eax,%esi
|
|
xorl %ecx,%edi
|
|
roll $5,%eax
|
|
addl %edi,%ebp
|
|
xorl %ebx,%esi
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
pxor %xmm6,%xmm2
|
|
addl 32(%rsp),%edx
|
|
andl %ebx,%esi
|
|
xorl %ecx,%ebx
|
|
rorl $7,%eax
|
|
punpcklqdq %xmm1,%xmm10
|
|
movl %ebp,%edi
|
|
xorl %ebx,%esi
|
|
pxor %xmm3,%xmm2
|
|
roll $5,%ebp
|
|
addl %esi,%edx
|
|
movdqa %xmm8,%xmm9
|
|
xorl %eax,%edi
|
|
paddd %xmm1,%xmm8
|
|
xorl %ebx,%eax
|
|
pxor %xmm10,%xmm2
|
|
addl %ebp,%edx
|
|
addl 36(%rsp),%ecx
|
|
andl %eax,%edi
|
|
xorl %ebx,%eax
|
|
rorl $7,%ebp
|
|
movdqa %xmm2,%xmm10
|
|
movl %edx,%esi
|
|
xorl %eax,%edi
|
|
movdqa %xmm8,16(%rsp)
|
|
roll $5,%edx
|
|
addl %edi,%ecx
|
|
xorl %ebp,%esi
|
|
pslld $2,%xmm2
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
psrld $30,%xmm10
|
|
addl 40(%rsp),%ebx
|
|
andl %ebp,%esi
|
|
xorl %eax,%ebp
|
|
por %xmm10,%xmm2
|
|
rorl $7,%edx
|
|
movl %ecx,%edi
|
|
xorl %ebp,%esi
|
|
roll $5,%ecx
|
|
pshufd $238,%xmm1,%xmm8
|
|
addl %esi,%ebx
|
|
xorl %edx,%edi
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
addl 44(%rsp),%eax
|
|
andl %edx,%edi
|
|
xorl %ebp,%edx
|
|
rorl $7,%ecx
|
|
movl %ebx,%esi
|
|
xorl %edx,%edi
|
|
roll $5,%ebx
|
|
addl %edi,%eax
|
|
xorl %edx,%esi
|
|
addl %ebx,%eax
|
|
pxor %xmm7,%xmm3
|
|
addl 48(%rsp),%ebp
|
|
xorl %ecx,%esi
|
|
punpcklqdq %xmm2,%xmm8
|
|
movl %eax,%edi
|
|
roll $5,%eax
|
|
pxor %xmm4,%xmm3
|
|
addl %esi,%ebp
|
|
xorl %ecx,%edi
|
|
movdqa %xmm9,%xmm10
|
|
rorl $7,%ebx
|
|
paddd %xmm2,%xmm9
|
|
addl %eax,%ebp
|
|
pxor %xmm8,%xmm3
|
|
addl 52(%rsp),%edx
|
|
xorl %ebx,%edi
|
|
movl %ebp,%esi
|
|
roll $5,%ebp
|
|
movdqa %xmm3,%xmm8
|
|
addl %edi,%edx
|
|
xorl %ebx,%esi
|
|
movdqa %xmm9,32(%rsp)
|
|
rorl $7,%eax
|
|
addl %ebp,%edx
|
|
addl 56(%rsp),%ecx
|
|
pslld $2,%xmm3
|
|
xorl %eax,%esi
|
|
movl %edx,%edi
|
|
psrld $30,%xmm8
|
|
roll $5,%edx
|
|
addl %esi,%ecx
|
|
xorl %eax,%edi
|
|
rorl $7,%ebp
|
|
por %xmm8,%xmm3
|
|
addl %edx,%ecx
|
|
addl 60(%rsp),%ebx
|
|
xorl %ebp,%edi
|
|
movl %ecx,%esi
|
|
roll $5,%ecx
|
|
addl %edi,%ebx
|
|
xorl %ebp,%esi
|
|
rorl $7,%edx
|
|
addl %ecx,%ebx
|
|
addl 0(%rsp),%eax
|
|
xorl %edx,%esi
|
|
movl %ebx,%edi
|
|
roll $5,%ebx
|
|
paddd %xmm3,%xmm10
|
|
addl %esi,%eax
|
|
xorl %edx,%edi
|
|
movdqa %xmm10,48(%rsp)
|
|
rorl $7,%ecx
|
|
addl %ebx,%eax
|
|
addl 4(%rsp),%ebp
|
|
xorl %ecx,%edi
|
|
movl %eax,%esi
|
|
roll $5,%eax
|
|
addl %edi,%ebp
|
|
xorl %ecx,%esi
|
|
rorl $7,%ebx
|
|
addl %eax,%ebp
|
|
addl 8(%rsp),%edx
|
|
xorl %ebx,%esi
|
|
movl %ebp,%edi
|
|
roll $5,%ebp
|
|
addl %esi,%edx
|
|
xorl %ebx,%edi
|
|
rorl $7,%eax
|
|
addl %ebp,%edx
|
|
addl 12(%rsp),%ecx
|
|
xorl %eax,%edi
|
|
movl %edx,%esi
|
|
roll $5,%edx
|
|
addl %edi,%ecx
|
|
xorl %eax,%esi
|
|
rorl $7,%ebp
|
|
addl %edx,%ecx
|
|
cmpq %r10,%r9
|
|
je .Ldone_ssse3
|
|
movdqa 64(%r11),%xmm6
|
|
movdqa -64(%r11),%xmm9
|
|
movdqu 0(%r9),%xmm0
|
|
movdqu 16(%r9),%xmm1
|
|
movdqu 32(%r9),%xmm2
|
|
movdqu 48(%r9),%xmm3
|
|
.byte 102,15,56,0,198
|
|
addq $64,%r9
|
|
addl 16(%rsp),%ebx
|
|
xorl %ebp,%esi
|
|
movl %ecx,%edi
|
|
.byte 102,15,56,0,206
|
|
roll $5,%ecx
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
rorl $7,%edx
|
|
paddd %xmm9,%xmm0
|
|
addl %ecx,%ebx
|
|
addl 20(%rsp),%eax
|
|
xorl %edx,%edi
|
|
movl %ebx,%esi
|
|
movdqa %xmm0,0(%rsp)
|
|
roll $5,%ebx
|
|
addl %edi,%eax
|
|
xorl %edx,%esi
|
|
rorl $7,%ecx
|
|
psubd %xmm9,%xmm0
|
|
addl %ebx,%eax
|
|
addl 24(%rsp),%ebp
|
|
xorl %ecx,%esi
|
|
movl %eax,%edi
|
|
roll $5,%eax
|
|
addl %esi,%ebp
|
|
xorl %ecx,%edi
|
|
rorl $7,%ebx
|
|
addl %eax,%ebp
|
|
addl 28(%rsp),%edx
|
|
xorl %ebx,%edi
|
|
movl %ebp,%esi
|
|
roll $5,%ebp
|
|
addl %edi,%edx
|
|
xorl %ebx,%esi
|
|
rorl $7,%eax
|
|
addl %ebp,%edx
|
|
addl 32(%rsp),%ecx
|
|
xorl %eax,%esi
|
|
movl %edx,%edi
|
|
.byte 102,15,56,0,214
|
|
roll $5,%edx
|
|
addl %esi,%ecx
|
|
xorl %eax,%edi
|
|
rorl $7,%ebp
|
|
paddd %xmm9,%xmm1
|
|
addl %edx,%ecx
|
|
addl 36(%rsp),%ebx
|
|
xorl %ebp,%edi
|
|
movl %ecx,%esi
|
|
movdqa %xmm1,16(%rsp)
|
|
roll $5,%ecx
|
|
addl %edi,%ebx
|
|
xorl %ebp,%esi
|
|
rorl $7,%edx
|
|
psubd %xmm9,%xmm1
|
|
addl %ecx,%ebx
|
|
addl 40(%rsp),%eax
|
|
xorl %edx,%esi
|
|
movl %ebx,%edi
|
|
roll $5,%ebx
|
|
addl %esi,%eax
|
|
xorl %edx,%edi
|
|
rorl $7,%ecx
|
|
addl %ebx,%eax
|
|
addl 44(%rsp),%ebp
|
|
xorl %ecx,%edi
|
|
movl %eax,%esi
|
|
roll $5,%eax
|
|
addl %edi,%ebp
|
|
xorl %ecx,%esi
|
|
rorl $7,%ebx
|
|
addl %eax,%ebp
|
|
addl 48(%rsp),%edx
|
|
xorl %ebx,%esi
|
|
movl %ebp,%edi
|
|
.byte 102,15,56,0,222
|
|
roll $5,%ebp
|
|
addl %esi,%edx
|
|
xorl %ebx,%edi
|
|
rorl $7,%eax
|
|
paddd %xmm9,%xmm2
|
|
addl %ebp,%edx
|
|
addl 52(%rsp),%ecx
|
|
xorl %eax,%edi
|
|
movl %edx,%esi
|
|
movdqa %xmm2,32(%rsp)
|
|
roll $5,%edx
|
|
addl %edi,%ecx
|
|
xorl %eax,%esi
|
|
rorl $7,%ebp
|
|
psubd %xmm9,%xmm2
|
|
addl %edx,%ecx
|
|
addl 56(%rsp),%ebx
|
|
xorl %ebp,%esi
|
|
movl %ecx,%edi
|
|
roll $5,%ecx
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
rorl $7,%edx
|
|
addl %ecx,%ebx
|
|
addl 60(%rsp),%eax
|
|
xorl %edx,%edi
|
|
movl %ebx,%esi
|
|
roll $5,%ebx
|
|
addl %edi,%eax
|
|
rorl $7,%ecx
|
|
addl %ebx,%eax
|
|
addl 0(%r8),%eax
|
|
addl 4(%r8),%esi
|
|
addl 8(%r8),%ecx
|
|
addl 12(%r8),%edx
|
|
movl %eax,0(%r8)
|
|
addl 16(%r8),%ebp
|
|
movl %esi,4(%r8)
|
|
movl %esi,%ebx
|
|
movl %ecx,8(%r8)
|
|
movl %ecx,%edi
|
|
movl %edx,12(%r8)
|
|
xorl %edx,%edi
|
|
movl %ebp,16(%r8)
|
|
andl %edi,%esi
|
|
jmp .Loop_ssse3
|
|
|
|
.align 16
|
|
.Ldone_ssse3:
|
|
addl 16(%rsp),%ebx
|
|
xorl %ebp,%esi
|
|
movl %ecx,%edi
|
|
roll $5,%ecx
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
rorl $7,%edx
|
|
addl %ecx,%ebx
|
|
addl 20(%rsp),%eax
|
|
xorl %edx,%edi
|
|
movl %ebx,%esi
|
|
roll $5,%ebx
|
|
addl %edi,%eax
|
|
xorl %edx,%esi
|
|
rorl $7,%ecx
|
|
addl %ebx,%eax
|
|
addl 24(%rsp),%ebp
|
|
xorl %ecx,%esi
|
|
movl %eax,%edi
|
|
roll $5,%eax
|
|
addl %esi,%ebp
|
|
xorl %ecx,%edi
|
|
rorl $7,%ebx
|
|
addl %eax,%ebp
|
|
addl 28(%rsp),%edx
|
|
xorl %ebx,%edi
|
|
movl %ebp,%esi
|
|
roll $5,%ebp
|
|
addl %edi,%edx
|
|
xorl %ebx,%esi
|
|
rorl $7,%eax
|
|
addl %ebp,%edx
|
|
addl 32(%rsp),%ecx
|
|
xorl %eax,%esi
|
|
movl %edx,%edi
|
|
roll $5,%edx
|
|
addl %esi,%ecx
|
|
xorl %eax,%edi
|
|
rorl $7,%ebp
|
|
addl %edx,%ecx
|
|
addl 36(%rsp),%ebx
|
|
xorl %ebp,%edi
|
|
movl %ecx,%esi
|
|
roll $5,%ecx
|
|
addl %edi,%ebx
|
|
xorl %ebp,%esi
|
|
rorl $7,%edx
|
|
addl %ecx,%ebx
|
|
addl 40(%rsp),%eax
|
|
xorl %edx,%esi
|
|
movl %ebx,%edi
|
|
roll $5,%ebx
|
|
addl %esi,%eax
|
|
xorl %edx,%edi
|
|
rorl $7,%ecx
|
|
addl %ebx,%eax
|
|
addl 44(%rsp),%ebp
|
|
xorl %ecx,%edi
|
|
movl %eax,%esi
|
|
roll $5,%eax
|
|
addl %edi,%ebp
|
|
xorl %ecx,%esi
|
|
rorl $7,%ebx
|
|
addl %eax,%ebp
|
|
addl 48(%rsp),%edx
|
|
xorl %ebx,%esi
|
|
movl %ebp,%edi
|
|
roll $5,%ebp
|
|
addl %esi,%edx
|
|
xorl %ebx,%edi
|
|
rorl $7,%eax
|
|
addl %ebp,%edx
|
|
addl 52(%rsp),%ecx
|
|
xorl %eax,%edi
|
|
movl %edx,%esi
|
|
roll $5,%edx
|
|
addl %edi,%ecx
|
|
xorl %eax,%esi
|
|
rorl $7,%ebp
|
|
addl %edx,%ecx
|
|
addl 56(%rsp),%ebx
|
|
xorl %ebp,%esi
|
|
movl %ecx,%edi
|
|
roll $5,%ecx
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
rorl $7,%edx
|
|
addl %ecx,%ebx
|
|
addl 60(%rsp),%eax
|
|
xorl %edx,%edi
|
|
movl %ebx,%esi
|
|
roll $5,%ebx
|
|
addl %edi,%eax
|
|
rorl $7,%ecx
|
|
addl %ebx,%eax
|
|
addl 0(%r8),%eax
|
|
addl 4(%r8),%esi
|
|
addl 8(%r8),%ecx
|
|
movl %eax,0(%r8)
|
|
addl 12(%r8),%edx
|
|
movl %esi,4(%r8)
|
|
addl 16(%r8),%ebp
|
|
movl %ecx,8(%r8)
|
|
movl %edx,12(%r8)
|
|
movl %ebp,16(%r8)
|
|
leaq (%r14),%rsi
|
|
movq -40(%rsi),%r14
|
|
movq -32(%rsi),%r13
|
|
movq -24(%rsi),%r12
|
|
movq -16(%rsi),%rbp
|
|
movq -8(%rsi),%rbx
|
|
leaq (%rsi),%rsp
|
|
.Lepilogue_ssse3:
|
|
.byte 0xf3,0xc3
|
|
.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
|
|
.type sha1_block_data_order_avx,@function
|
|
.align 16
|
|
sha1_block_data_order_avx:
|
|
_avx_shortcut:
|
|
movq %rsp,%rax
|
|
pushq %rbx
|
|
pushq %rbp
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
leaq -64(%rsp),%rsp
|
|
vzeroupper
|
|
movq %rax,%r14
|
|
andq $-64,%rsp
|
|
movq %rdi,%r8
|
|
movq %rsi,%r9
|
|
movq %rdx,%r10
|
|
|
|
shlq $6,%r10
|
|
addq %r9,%r10
|
|
leaq K_XX_XX+64(%rip),%r11
|
|
|
|
movl 0(%r8),%eax
|
|
movl 4(%r8),%ebx
|
|
movl 8(%r8),%ecx
|
|
movl 12(%r8),%edx
|
|
movl %ebx,%esi
|
|
movl 16(%r8),%ebp
|
|
movl %ecx,%edi
|
|
xorl %edx,%edi
|
|
andl %edi,%esi
|
|
|
|
vmovdqa 64(%r11),%xmm6
|
|
vmovdqa -64(%r11),%xmm11
|
|
vmovdqu 0(%r9),%xmm0
|
|
vmovdqu 16(%r9),%xmm1
|
|
vmovdqu 32(%r9),%xmm2
|
|
vmovdqu 48(%r9),%xmm3
|
|
vpshufb %xmm6,%xmm0,%xmm0
|
|
addq $64,%r9
|
|
vpshufb %xmm6,%xmm1,%xmm1
|
|
vpshufb %xmm6,%xmm2,%xmm2
|
|
vpshufb %xmm6,%xmm3,%xmm3
|
|
vpaddd %xmm11,%xmm0,%xmm4
|
|
vpaddd %xmm11,%xmm1,%xmm5
|
|
vpaddd %xmm11,%xmm2,%xmm6
|
|
vmovdqa %xmm4,0(%rsp)
|
|
vmovdqa %xmm5,16(%rsp)
|
|
vmovdqa %xmm6,32(%rsp)
|
|
jmp .Loop_avx
|
|
.align 16
|
|
.Loop_avx:
|
|
shrdl $2,%ebx,%ebx
|
|
xorl %edx,%esi
|
|
vpalignr $8,%xmm0,%xmm1,%xmm4
|
|
movl %eax,%edi
|
|
addl 0(%rsp),%ebp
|
|
vpaddd %xmm3,%xmm11,%xmm9
|
|
xorl %ecx,%ebx
|
|
shldl $5,%eax,%eax
|
|
vpsrldq $4,%xmm3,%xmm8
|
|
addl %esi,%ebp
|
|
andl %ebx,%edi
|
|
vpxor %xmm0,%xmm4,%xmm4
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
vpxor %xmm2,%xmm8,%xmm8
|
|
shrdl $7,%eax,%eax
|
|
xorl %ecx,%edi
|
|
movl %ebp,%esi
|
|
addl 4(%rsp),%edx
|
|
vpxor %xmm8,%xmm4,%xmm4
|
|
xorl %ebx,%eax
|
|
shldl $5,%ebp,%ebp
|
|
vmovdqa %xmm9,48(%rsp)
|
|
addl %edi,%edx
|
|
andl %eax,%esi
|
|
vpsrld $31,%xmm4,%xmm8
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
shrdl $7,%ebp,%ebp
|
|
xorl %ebx,%esi
|
|
vpslldq $12,%xmm4,%xmm10
|
|
vpaddd %xmm4,%xmm4,%xmm4
|
|
movl %edx,%edi
|
|
addl 8(%rsp),%ecx
|
|
xorl %eax,%ebp
|
|
shldl $5,%edx,%edx
|
|
vpsrld $30,%xmm10,%xmm9
|
|
vpor %xmm8,%xmm4,%xmm4
|
|
addl %esi,%ecx
|
|
andl %ebp,%edi
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
vpslld $2,%xmm10,%xmm10
|
|
vpxor %xmm9,%xmm4,%xmm4
|
|
shrdl $7,%edx,%edx
|
|
xorl %eax,%edi
|
|
movl %ecx,%esi
|
|
addl 12(%rsp),%ebx
|
|
vpxor %xmm10,%xmm4,%xmm4
|
|
xorl %ebp,%edx
|
|
shldl $5,%ecx,%ecx
|
|
addl %edi,%ebx
|
|
andl %edx,%esi
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
shrdl $7,%ecx,%ecx
|
|
xorl %ebp,%esi
|
|
vpalignr $8,%xmm1,%xmm2,%xmm5
|
|
movl %ebx,%edi
|
|
addl 16(%rsp),%eax
|
|
vpaddd %xmm4,%xmm11,%xmm9
|
|
xorl %edx,%ecx
|
|
shldl $5,%ebx,%ebx
|
|
vpsrldq $4,%xmm4,%xmm8
|
|
addl %esi,%eax
|
|
andl %ecx,%edi
|
|
vpxor %xmm1,%xmm5,%xmm5
|
|
xorl %edx,%ecx
|
|
addl %ebx,%eax
|
|
vpxor %xmm3,%xmm8,%xmm8
|
|
shrdl $7,%ebx,%ebx
|
|
xorl %edx,%edi
|
|
movl %eax,%esi
|
|
addl 20(%rsp),%ebp
|
|
vpxor %xmm8,%xmm5,%xmm5
|
|
xorl %ecx,%ebx
|
|
shldl $5,%eax,%eax
|
|
vmovdqa %xmm9,0(%rsp)
|
|
addl %edi,%ebp
|
|
andl %ebx,%esi
|
|
vpsrld $31,%xmm5,%xmm8
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
shrdl $7,%eax,%eax
|
|
xorl %ecx,%esi
|
|
vpslldq $12,%xmm5,%xmm10
|
|
vpaddd %xmm5,%xmm5,%xmm5
|
|
movl %ebp,%edi
|
|
addl 24(%rsp),%edx
|
|
xorl %ebx,%eax
|
|
shldl $5,%ebp,%ebp
|
|
vpsrld $30,%xmm10,%xmm9
|
|
vpor %xmm8,%xmm5,%xmm5
|
|
addl %esi,%edx
|
|
andl %eax,%edi
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
vpslld $2,%xmm10,%xmm10
|
|
vpxor %xmm9,%xmm5,%xmm5
|
|
shrdl $7,%ebp,%ebp
|
|
xorl %ebx,%edi
|
|
movl %edx,%esi
|
|
addl 28(%rsp),%ecx
|
|
vpxor %xmm10,%xmm5,%xmm5
|
|
xorl %eax,%ebp
|
|
shldl $5,%edx,%edx
|
|
vmovdqa -32(%r11),%xmm11
|
|
addl %edi,%ecx
|
|
andl %ebp,%esi
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
shrdl $7,%edx,%edx
|
|
xorl %eax,%esi
|
|
vpalignr $8,%xmm2,%xmm3,%xmm6
|
|
movl %ecx,%edi
|
|
addl 32(%rsp),%ebx
|
|
vpaddd %xmm5,%xmm11,%xmm9
|
|
xorl %ebp,%edx
|
|
shldl $5,%ecx,%ecx
|
|
vpsrldq $4,%xmm5,%xmm8
|
|
addl %esi,%ebx
|
|
andl %edx,%edi
|
|
vpxor %xmm2,%xmm6,%xmm6
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
vpxor %xmm4,%xmm8,%xmm8
|
|
shrdl $7,%ecx,%ecx
|
|
xorl %ebp,%edi
|
|
movl %ebx,%esi
|
|
addl 36(%rsp),%eax
|
|
vpxor %xmm8,%xmm6,%xmm6
|
|
xorl %edx,%ecx
|
|
shldl $5,%ebx,%ebx
|
|
vmovdqa %xmm9,16(%rsp)
|
|
addl %edi,%eax
|
|
andl %ecx,%esi
|
|
vpsrld $31,%xmm6,%xmm8
|
|
xorl %edx,%ecx
|
|
addl %ebx,%eax
|
|
shrdl $7,%ebx,%ebx
|
|
xorl %edx,%esi
|
|
vpslldq $12,%xmm6,%xmm10
|
|
vpaddd %xmm6,%xmm6,%xmm6
|
|
movl %eax,%edi
|
|
addl 40(%rsp),%ebp
|
|
xorl %ecx,%ebx
|
|
shldl $5,%eax,%eax
|
|
vpsrld $30,%xmm10,%xmm9
|
|
vpor %xmm8,%xmm6,%xmm6
|
|
addl %esi,%ebp
|
|
andl %ebx,%edi
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
vpslld $2,%xmm10,%xmm10
|
|
vpxor %xmm9,%xmm6,%xmm6
|
|
shrdl $7,%eax,%eax
|
|
xorl %ecx,%edi
|
|
movl %ebp,%esi
|
|
addl 44(%rsp),%edx
|
|
vpxor %xmm10,%xmm6,%xmm6
|
|
xorl %ebx,%eax
|
|
shldl $5,%ebp,%ebp
|
|
addl %edi,%edx
|
|
andl %eax,%esi
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
shrdl $7,%ebp,%ebp
|
|
xorl %ebx,%esi
|
|
vpalignr $8,%xmm3,%xmm4,%xmm7
|
|
movl %edx,%edi
|
|
addl 48(%rsp),%ecx
|
|
vpaddd %xmm6,%xmm11,%xmm9
|
|
xorl %eax,%ebp
|
|
shldl $5,%edx,%edx
|
|
vpsrldq $4,%xmm6,%xmm8
|
|
addl %esi,%ecx
|
|
andl %ebp,%edi
|
|
vpxor %xmm3,%xmm7,%xmm7
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
vpxor %xmm5,%xmm8,%xmm8
|
|
shrdl $7,%edx,%edx
|
|
xorl %eax,%edi
|
|
movl %ecx,%esi
|
|
addl 52(%rsp),%ebx
|
|
vpxor %xmm8,%xmm7,%xmm7
|
|
xorl %ebp,%edx
|
|
shldl $5,%ecx,%ecx
|
|
vmovdqa %xmm9,32(%rsp)
|
|
addl %edi,%ebx
|
|
andl %edx,%esi
|
|
vpsrld $31,%xmm7,%xmm8
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
shrdl $7,%ecx,%ecx
|
|
xorl %ebp,%esi
|
|
vpslldq $12,%xmm7,%xmm10
|
|
vpaddd %xmm7,%xmm7,%xmm7
|
|
movl %ebx,%edi
|
|
addl 56(%rsp),%eax
|
|
xorl %edx,%ecx
|
|
shldl $5,%ebx,%ebx
|
|
vpsrld $30,%xmm10,%xmm9
|
|
vpor %xmm8,%xmm7,%xmm7
|
|
addl %esi,%eax
|
|
andl %ecx,%edi
|
|
xorl %edx,%ecx
|
|
addl %ebx,%eax
|
|
vpslld $2,%xmm10,%xmm10
|
|
vpxor %xmm9,%xmm7,%xmm7
|
|
shrdl $7,%ebx,%ebx
|
|
xorl %edx,%edi
|
|
movl %eax,%esi
|
|
addl 60(%rsp),%ebp
|
|
vpxor %xmm10,%xmm7,%xmm7
|
|
xorl %ecx,%ebx
|
|
shldl $5,%eax,%eax
|
|
addl %edi,%ebp
|
|
andl %ebx,%esi
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
vpalignr $8,%xmm6,%xmm7,%xmm8
|
|
vpxor %xmm4,%xmm0,%xmm0
|
|
shrdl $7,%eax,%eax
|
|
xorl %ecx,%esi
|
|
movl %ebp,%edi
|
|
addl 0(%rsp),%edx
|
|
vpxor %xmm1,%xmm0,%xmm0
|
|
xorl %ebx,%eax
|
|
shldl $5,%ebp,%ebp
|
|
vpaddd %xmm7,%xmm11,%xmm9
|
|
addl %esi,%edx
|
|
andl %eax,%edi
|
|
vpxor %xmm8,%xmm0,%xmm0
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
shrdl $7,%ebp,%ebp
|
|
xorl %ebx,%edi
|
|
vpsrld $30,%xmm0,%xmm8
|
|
vmovdqa %xmm9,48(%rsp)
|
|
movl %edx,%esi
|
|
addl 4(%rsp),%ecx
|
|
xorl %eax,%ebp
|
|
shldl $5,%edx,%edx
|
|
vpslld $2,%xmm0,%xmm0
|
|
addl %edi,%ecx
|
|
andl %ebp,%esi
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
shrdl $7,%edx,%edx
|
|
xorl %eax,%esi
|
|
movl %ecx,%edi
|
|
addl 8(%rsp),%ebx
|
|
vpor %xmm8,%xmm0,%xmm0
|
|
xorl %ebp,%edx
|
|
shldl $5,%ecx,%ecx
|
|
addl %esi,%ebx
|
|
andl %edx,%edi
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
addl 12(%rsp),%eax
|
|
xorl %ebp,%edi
|
|
movl %ebx,%esi
|
|
shldl $5,%ebx,%ebx
|
|
addl %edi,%eax
|
|
xorl %edx,%esi
|
|
shrdl $7,%ecx,%ecx
|
|
addl %ebx,%eax
|
|
vpalignr $8,%xmm7,%xmm0,%xmm8
|
|
vpxor %xmm5,%xmm1,%xmm1
|
|
addl 16(%rsp),%ebp
|
|
xorl %ecx,%esi
|
|
movl %eax,%edi
|
|
shldl $5,%eax,%eax
|
|
vpxor %xmm2,%xmm1,%xmm1
|
|
addl %esi,%ebp
|
|
xorl %ecx,%edi
|
|
vpaddd %xmm0,%xmm11,%xmm9
|
|
shrdl $7,%ebx,%ebx
|
|
addl %eax,%ebp
|
|
vpxor %xmm8,%xmm1,%xmm1
|
|
addl 20(%rsp),%edx
|
|
xorl %ebx,%edi
|
|
movl %ebp,%esi
|
|
shldl $5,%ebp,%ebp
|
|
vpsrld $30,%xmm1,%xmm8
|
|
vmovdqa %xmm9,0(%rsp)
|
|
addl %edi,%edx
|
|
xorl %ebx,%esi
|
|
shrdl $7,%eax,%eax
|
|
addl %ebp,%edx
|
|
vpslld $2,%xmm1,%xmm1
|
|
addl 24(%rsp),%ecx
|
|
xorl %eax,%esi
|
|
movl %edx,%edi
|
|
shldl $5,%edx,%edx
|
|
addl %esi,%ecx
|
|
xorl %eax,%edi
|
|
shrdl $7,%ebp,%ebp
|
|
addl %edx,%ecx
|
|
vpor %xmm8,%xmm1,%xmm1
|
|
addl 28(%rsp),%ebx
|
|
xorl %ebp,%edi
|
|
movl %ecx,%esi
|
|
shldl $5,%ecx,%ecx
|
|
addl %edi,%ebx
|
|
xorl %ebp,%esi
|
|
shrdl $7,%edx,%edx
|
|
addl %ecx,%ebx
|
|
vpalignr $8,%xmm0,%xmm1,%xmm8
|
|
vpxor %xmm6,%xmm2,%xmm2
|
|
addl 32(%rsp),%eax
|
|
xorl %edx,%esi
|
|
movl %ebx,%edi
|
|
shldl $5,%ebx,%ebx
|
|
vpxor %xmm3,%xmm2,%xmm2
|
|
addl %esi,%eax
|
|
xorl %edx,%edi
|
|
vpaddd %xmm1,%xmm11,%xmm9
|
|
vmovdqa 0(%r11),%xmm11
|
|
shrdl $7,%ecx,%ecx
|
|
addl %ebx,%eax
|
|
vpxor %xmm8,%xmm2,%xmm2
|
|
addl 36(%rsp),%ebp
|
|
xorl %ecx,%edi
|
|
movl %eax,%esi
|
|
shldl $5,%eax,%eax
|
|
vpsrld $30,%xmm2,%xmm8
|
|
vmovdqa %xmm9,16(%rsp)
|
|
addl %edi,%ebp
|
|
xorl %ecx,%esi
|
|
shrdl $7,%ebx,%ebx
|
|
addl %eax,%ebp
|
|
vpslld $2,%xmm2,%xmm2
|
|
addl 40(%rsp),%edx
|
|
xorl %ebx,%esi
|
|
movl %ebp,%edi
|
|
shldl $5,%ebp,%ebp
|
|
addl %esi,%edx
|
|
xorl %ebx,%edi
|
|
shrdl $7,%eax,%eax
|
|
addl %ebp,%edx
|
|
vpor %xmm8,%xmm2,%xmm2
|
|
addl 44(%rsp),%ecx
|
|
xorl %eax,%edi
|
|
movl %edx,%esi
|
|
shldl $5,%edx,%edx
|
|
addl %edi,%ecx
|
|
xorl %eax,%esi
|
|
shrdl $7,%ebp,%ebp
|
|
addl %edx,%ecx
|
|
vpalignr $8,%xmm1,%xmm2,%xmm8
|
|
vpxor %xmm7,%xmm3,%xmm3
|
|
addl 48(%rsp),%ebx
|
|
xorl %ebp,%esi
|
|
movl %ecx,%edi
|
|
shldl $5,%ecx,%ecx
|
|
vpxor %xmm4,%xmm3,%xmm3
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
vpaddd %xmm2,%xmm11,%xmm9
|
|
shrdl $7,%edx,%edx
|
|
addl %ecx,%ebx
|
|
vpxor %xmm8,%xmm3,%xmm3
|
|
addl 52(%rsp),%eax
|
|
xorl %edx,%edi
|
|
movl %ebx,%esi
|
|
shldl $5,%ebx,%ebx
|
|
vpsrld $30,%xmm3,%xmm8
|
|
vmovdqa %xmm9,32(%rsp)
|
|
addl %edi,%eax
|
|
xorl %edx,%esi
|
|
shrdl $7,%ecx,%ecx
|
|
addl %ebx,%eax
|
|
vpslld $2,%xmm3,%xmm3
|
|
addl 56(%rsp),%ebp
|
|
xorl %ecx,%esi
|
|
movl %eax,%edi
|
|
shldl $5,%eax,%eax
|
|
addl %esi,%ebp
|
|
xorl %ecx,%edi
|
|
shrdl $7,%ebx,%ebx
|
|
addl %eax,%ebp
|
|
vpor %xmm8,%xmm3,%xmm3
|
|
addl 60(%rsp),%edx
|
|
xorl %ebx,%edi
|
|
movl %ebp,%esi
|
|
shldl $5,%ebp,%ebp
|
|
addl %edi,%edx
|
|
xorl %ebx,%esi
|
|
shrdl $7,%eax,%eax
|
|
addl %ebp,%edx
|
|
vpalignr $8,%xmm2,%xmm3,%xmm8
|
|
vpxor %xmm0,%xmm4,%xmm4
|
|
addl 0(%rsp),%ecx
|
|
xorl %eax,%esi
|
|
movl %edx,%edi
|
|
shldl $5,%edx,%edx
|
|
vpxor %xmm5,%xmm4,%xmm4
|
|
addl %esi,%ecx
|
|
xorl %eax,%edi
|
|
vpaddd %xmm3,%xmm11,%xmm9
|
|
shrdl $7,%ebp,%ebp
|
|
addl %edx,%ecx
|
|
vpxor %xmm8,%xmm4,%xmm4
|
|
addl 4(%rsp),%ebx
|
|
xorl %ebp,%edi
|
|
movl %ecx,%esi
|
|
shldl $5,%ecx,%ecx
|
|
vpsrld $30,%xmm4,%xmm8
|
|
vmovdqa %xmm9,48(%rsp)
|
|
addl %edi,%ebx
|
|
xorl %ebp,%esi
|
|
shrdl $7,%edx,%edx
|
|
addl %ecx,%ebx
|
|
vpslld $2,%xmm4,%xmm4
|
|
addl 8(%rsp),%eax
|
|
xorl %edx,%esi
|
|
movl %ebx,%edi
|
|
shldl $5,%ebx,%ebx
|
|
addl %esi,%eax
|
|
xorl %edx,%edi
|
|
shrdl $7,%ecx,%ecx
|
|
addl %ebx,%eax
|
|
vpor %xmm8,%xmm4,%xmm4
|
|
addl 12(%rsp),%ebp
|
|
xorl %ecx,%edi
|
|
movl %eax,%esi
|
|
shldl $5,%eax,%eax
|
|
addl %edi,%ebp
|
|
xorl %ecx,%esi
|
|
shrdl $7,%ebx,%ebx
|
|
addl %eax,%ebp
|
|
vpalignr $8,%xmm3,%xmm4,%xmm8
|
|
vpxor %xmm1,%xmm5,%xmm5
|
|
addl 16(%rsp),%edx
|
|
xorl %ebx,%esi
|
|
movl %ebp,%edi
|
|
shldl $5,%ebp,%ebp
|
|
vpxor %xmm6,%xmm5,%xmm5
|
|
addl %esi,%edx
|
|
xorl %ebx,%edi
|
|
vpaddd %xmm4,%xmm11,%xmm9
|
|
shrdl $7,%eax,%eax
|
|
addl %ebp,%edx
|
|
vpxor %xmm8,%xmm5,%xmm5
|
|
addl 20(%rsp),%ecx
|
|
xorl %eax,%edi
|
|
movl %edx,%esi
|
|
shldl $5,%edx,%edx
|
|
vpsrld $30,%xmm5,%xmm8
|
|
vmovdqa %xmm9,0(%rsp)
|
|
addl %edi,%ecx
|
|
xorl %eax,%esi
|
|
shrdl $7,%ebp,%ebp
|
|
addl %edx,%ecx
|
|
vpslld $2,%xmm5,%xmm5
|
|
addl 24(%rsp),%ebx
|
|
xorl %ebp,%esi
|
|
movl %ecx,%edi
|
|
shldl $5,%ecx,%ecx
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
shrdl $7,%edx,%edx
|
|
addl %ecx,%ebx
|
|
vpor %xmm8,%xmm5,%xmm5
|
|
addl 28(%rsp),%eax
|
|
shrdl $7,%ecx,%ecx
|
|
movl %ebx,%esi
|
|
xorl %edx,%edi
|
|
shldl $5,%ebx,%ebx
|
|
addl %edi,%eax
|
|
xorl %ecx,%esi
|
|
xorl %edx,%ecx
|
|
addl %ebx,%eax
|
|
vpalignr $8,%xmm4,%xmm5,%xmm8
|
|
vpxor %xmm2,%xmm6,%xmm6
|
|
addl 32(%rsp),%ebp
|
|
andl %ecx,%esi
|
|
xorl %edx,%ecx
|
|
shrdl $7,%ebx,%ebx
|
|
vpxor %xmm7,%xmm6,%xmm6
|
|
movl %eax,%edi
|
|
xorl %ecx,%esi
|
|
vpaddd %xmm5,%xmm11,%xmm9
|
|
shldl $5,%eax,%eax
|
|
addl %esi,%ebp
|
|
vpxor %xmm8,%xmm6,%xmm6
|
|
xorl %ebx,%edi
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
addl 36(%rsp),%edx
|
|
vpsrld $30,%xmm6,%xmm8
|
|
vmovdqa %xmm9,16(%rsp)
|
|
andl %ebx,%edi
|
|
xorl %ecx,%ebx
|
|
shrdl $7,%eax,%eax
|
|
movl %ebp,%esi
|
|
vpslld $2,%xmm6,%xmm6
|
|
xorl %ebx,%edi
|
|
shldl $5,%ebp,%ebp
|
|
addl %edi,%edx
|
|
xorl %eax,%esi
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
addl 40(%rsp),%ecx
|
|
andl %eax,%esi
|
|
vpor %xmm8,%xmm6,%xmm6
|
|
xorl %ebx,%eax
|
|
shrdl $7,%ebp,%ebp
|
|
movl %edx,%edi
|
|
xorl %eax,%esi
|
|
shldl $5,%edx,%edx
|
|
addl %esi,%ecx
|
|
xorl %ebp,%edi
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
addl 44(%rsp),%ebx
|
|
andl %ebp,%edi
|
|
xorl %eax,%ebp
|
|
shrdl $7,%edx,%edx
|
|
movl %ecx,%esi
|
|
xorl %ebp,%edi
|
|
shldl $5,%ecx,%ecx
|
|
addl %edi,%ebx
|
|
xorl %edx,%esi
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
vpalignr $8,%xmm5,%xmm6,%xmm8
|
|
vpxor %xmm3,%xmm7,%xmm7
|
|
addl 48(%rsp),%eax
|
|
andl %edx,%esi
|
|
xorl %ebp,%edx
|
|
shrdl $7,%ecx,%ecx
|
|
vpxor %xmm0,%xmm7,%xmm7
|
|
movl %ebx,%edi
|
|
xorl %edx,%esi
|
|
vpaddd %xmm6,%xmm11,%xmm9
|
|
vmovdqa 32(%r11),%xmm11
|
|
shldl $5,%ebx,%ebx
|
|
addl %esi,%eax
|
|
vpxor %xmm8,%xmm7,%xmm7
|
|
xorl %ecx,%edi
|
|
xorl %edx,%ecx
|
|
addl %ebx,%eax
|
|
addl 52(%rsp),%ebp
|
|
vpsrld $30,%xmm7,%xmm8
|
|
vmovdqa %xmm9,32(%rsp)
|
|
andl %ecx,%edi
|
|
xorl %edx,%ecx
|
|
shrdl $7,%ebx,%ebx
|
|
movl %eax,%esi
|
|
vpslld $2,%xmm7,%xmm7
|
|
xorl %ecx,%edi
|
|
shldl $5,%eax,%eax
|
|
addl %edi,%ebp
|
|
xorl %ebx,%esi
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
addl 56(%rsp),%edx
|
|
andl %ebx,%esi
|
|
vpor %xmm8,%xmm7,%xmm7
|
|
xorl %ecx,%ebx
|
|
shrdl $7,%eax,%eax
|
|
movl %ebp,%edi
|
|
xorl %ebx,%esi
|
|
shldl $5,%ebp,%ebp
|
|
addl %esi,%edx
|
|
xorl %eax,%edi
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
addl 60(%rsp),%ecx
|
|
andl %eax,%edi
|
|
xorl %ebx,%eax
|
|
shrdl $7,%ebp,%ebp
|
|
movl %edx,%esi
|
|
xorl %eax,%edi
|
|
shldl $5,%edx,%edx
|
|
addl %edi,%ecx
|
|
xorl %ebp,%esi
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
vpalignr $8,%xmm6,%xmm7,%xmm8
|
|
vpxor %xmm4,%xmm0,%xmm0
|
|
addl 0(%rsp),%ebx
|
|
andl %ebp,%esi
|
|
xorl %eax,%ebp
|
|
shrdl $7,%edx,%edx
|
|
vpxor %xmm1,%xmm0,%xmm0
|
|
movl %ecx,%edi
|
|
xorl %ebp,%esi
|
|
vpaddd %xmm7,%xmm11,%xmm9
|
|
shldl $5,%ecx,%ecx
|
|
addl %esi,%ebx
|
|
vpxor %xmm8,%xmm0,%xmm0
|
|
xorl %edx,%edi
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
addl 4(%rsp),%eax
|
|
vpsrld $30,%xmm0,%xmm8
|
|
vmovdqa %xmm9,48(%rsp)
|
|
andl %edx,%edi
|
|
xorl %ebp,%edx
|
|
shrdl $7,%ecx,%ecx
|
|
movl %ebx,%esi
|
|
vpslld $2,%xmm0,%xmm0
|
|
xorl %edx,%edi
|
|
shldl $5,%ebx,%ebx
|
|
addl %edi,%eax
|
|
xorl %ecx,%esi
|
|
xorl %edx,%ecx
|
|
addl %ebx,%eax
|
|
addl 8(%rsp),%ebp
|
|
andl %ecx,%esi
|
|
vpor %xmm8,%xmm0,%xmm0
|
|
xorl %edx,%ecx
|
|
shrdl $7,%ebx,%ebx
|
|
movl %eax,%edi
|
|
xorl %ecx,%esi
|
|
shldl $5,%eax,%eax
|
|
addl %esi,%ebp
|
|
xorl %ebx,%edi
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
addl 12(%rsp),%edx
|
|
andl %ebx,%edi
|
|
xorl %ecx,%ebx
|
|
shrdl $7,%eax,%eax
|
|
movl %ebp,%esi
|
|
xorl %ebx,%edi
|
|
shldl $5,%ebp,%ebp
|
|
addl %edi,%edx
|
|
xorl %eax,%esi
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
vpalignr $8,%xmm7,%xmm0,%xmm8
|
|
vpxor %xmm5,%xmm1,%xmm1
|
|
addl 16(%rsp),%ecx
|
|
andl %eax,%esi
|
|
xorl %ebx,%eax
|
|
shrdl $7,%ebp,%ebp
|
|
vpxor %xmm2,%xmm1,%xmm1
|
|
movl %edx,%edi
|
|
xorl %eax,%esi
|
|
vpaddd %xmm0,%xmm11,%xmm9
|
|
shldl $5,%edx,%edx
|
|
addl %esi,%ecx
|
|
vpxor %xmm8,%xmm1,%xmm1
|
|
xorl %ebp,%edi
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
addl 20(%rsp),%ebx
|
|
vpsrld $30,%xmm1,%xmm8
|
|
vmovdqa %xmm9,0(%rsp)
|
|
andl %ebp,%edi
|
|
xorl %eax,%ebp
|
|
shrdl $7,%edx,%edx
|
|
movl %ecx,%esi
|
|
vpslld $2,%xmm1,%xmm1
|
|
xorl %ebp,%edi
|
|
shldl $5,%ecx,%ecx
|
|
addl %edi,%ebx
|
|
xorl %edx,%esi
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
addl 24(%rsp),%eax
|
|
andl %edx,%esi
|
|
vpor %xmm8,%xmm1,%xmm1
|
|
xorl %ebp,%edx
|
|
shrdl $7,%ecx,%ecx
|
|
movl %ebx,%edi
|
|
xorl %edx,%esi
|
|
shldl $5,%ebx,%ebx
|
|
addl %esi,%eax
|
|
xorl %ecx,%edi
|
|
xorl %edx,%ecx
|
|
addl %ebx,%eax
|
|
addl 28(%rsp),%ebp
|
|
andl %ecx,%edi
|
|
xorl %edx,%ecx
|
|
shrdl $7,%ebx,%ebx
|
|
movl %eax,%esi
|
|
xorl %ecx,%edi
|
|
shldl $5,%eax,%eax
|
|
addl %edi,%ebp
|
|
xorl %ebx,%esi
|
|
xorl %ecx,%ebx
|
|
addl %eax,%ebp
|
|
vpalignr $8,%xmm0,%xmm1,%xmm8
|
|
vpxor %xmm6,%xmm2,%xmm2
|
|
addl 32(%rsp),%edx
|
|
andl %ebx,%esi
|
|
xorl %ecx,%ebx
|
|
shrdl $7,%eax,%eax
|
|
vpxor %xmm3,%xmm2,%xmm2
|
|
movl %ebp,%edi
|
|
xorl %ebx,%esi
|
|
vpaddd %xmm1,%xmm11,%xmm9
|
|
shldl $5,%ebp,%ebp
|
|
addl %esi,%edx
|
|
vpxor %xmm8,%xmm2,%xmm2
|
|
xorl %eax,%edi
|
|
xorl %ebx,%eax
|
|
addl %ebp,%edx
|
|
addl 36(%rsp),%ecx
|
|
vpsrld $30,%xmm2,%xmm8
|
|
vmovdqa %xmm9,16(%rsp)
|
|
andl %eax,%edi
|
|
xorl %ebx,%eax
|
|
shrdl $7,%ebp,%ebp
|
|
movl %edx,%esi
|
|
vpslld $2,%xmm2,%xmm2
|
|
xorl %eax,%edi
|
|
shldl $5,%edx,%edx
|
|
addl %edi,%ecx
|
|
xorl %ebp,%esi
|
|
xorl %eax,%ebp
|
|
addl %edx,%ecx
|
|
addl 40(%rsp),%ebx
|
|
andl %ebp,%esi
|
|
vpor %xmm8,%xmm2,%xmm2
|
|
xorl %eax,%ebp
|
|
shrdl $7,%edx,%edx
|
|
movl %ecx,%edi
|
|
xorl %ebp,%esi
|
|
shldl $5,%ecx,%ecx
|
|
addl %esi,%ebx
|
|
xorl %edx,%edi
|
|
xorl %ebp,%edx
|
|
addl %ecx,%ebx
|
|
addl 44(%rsp),%eax
|
|
andl %edx,%edi
|
|
xorl %ebp,%edx
|
|
shrdl $7,%ecx,%ecx
|
|
movl %ebx,%esi
|
|
xorl %edx,%edi
|
|
shldl $5,%ebx,%ebx
|
|
addl %edi,%eax
|
|
xorl %edx,%esi
|
|
addl %ebx,%eax
|
|
vpalignr $8,%xmm1,%xmm2,%xmm8
|
|
vpxor %xmm7,%xmm3,%xmm3
|
|
addl 48(%rsp),%ebp
|
|
xorl %ecx,%esi
|
|
movl %eax,%edi
|
|
shldl $5,%eax,%eax
|
|
vpxor %xmm4,%xmm3,%xmm3
|
|
addl %esi,%ebp
|
|
xorl %ecx,%edi
|
|
vpaddd %xmm2,%xmm11,%xmm9
|
|
shrdl $7,%ebx,%ebx
|
|
addl %eax,%ebp
|
|
vpxor %xmm8,%xmm3,%xmm3
|
|
addl 52(%rsp),%edx
|
|
xorl %ebx,%edi
|
|
movl %ebp,%esi
|
|
shldl $5,%ebp,%ebp
|
|
vpsrld $30,%xmm3,%xmm8
|
|
vmovdqa %xmm9,32(%rsp)
|
|
addl %edi,%edx
|
|
xorl %ebx,%esi
|
|
shrdl $7,%eax,%eax
|
|
addl %ebp,%edx
|
|
vpslld $2,%xmm3,%xmm3
|
|
addl 56(%rsp),%ecx
|
|
xorl %eax,%esi
|
|
movl %edx,%edi
|
|
shldl $5,%edx,%edx
|
|
addl %esi,%ecx
|
|
xorl %eax,%edi
|
|
shrdl $7,%ebp,%ebp
|
|
addl %edx,%ecx
|
|
vpor %xmm8,%xmm3,%xmm3
|
|
addl 60(%rsp),%ebx
|
|
xorl %ebp,%edi
|
|
movl %ecx,%esi
|
|
shldl $5,%ecx,%ecx
|
|
addl %edi,%ebx
|
|
xorl %ebp,%esi
|
|
shrdl $7,%edx,%edx
|
|
addl %ecx,%ebx
|
|
addl 0(%rsp),%eax
|
|
vpaddd %xmm3,%xmm11,%xmm9
|
|
xorl %edx,%esi
|
|
movl %ebx,%edi
|
|
shldl $5,%ebx,%ebx
|
|
addl %esi,%eax
|
|
vmovdqa %xmm9,48(%rsp)
|
|
xorl %edx,%edi
|
|
shrdl $7,%ecx,%ecx
|
|
addl %ebx,%eax
|
|
addl 4(%rsp),%ebp
|
|
xorl %ecx,%edi
|
|
movl %eax,%esi
|
|
shldl $5,%eax,%eax
|
|
addl %edi,%ebp
|
|
xorl %ecx,%esi
|
|
shrdl $7,%ebx,%ebx
|
|
addl %eax,%ebp
|
|
addl 8(%rsp),%edx
|
|
xorl %ebx,%esi
|
|
movl %ebp,%edi
|
|
shldl $5,%ebp,%ebp
|
|
addl %esi,%edx
|
|
xorl %ebx,%edi
|
|
shrdl $7,%eax,%eax
|
|
addl %ebp,%edx
|
|
addl 12(%rsp),%ecx
|
|
xorl %eax,%edi
|
|
movl %edx,%esi
|
|
shldl $5,%edx,%edx
|
|
addl %edi,%ecx
|
|
xorl %eax,%esi
|
|
shrdl $7,%ebp,%ebp
|
|
addl %edx,%ecx
|
|
cmpq %r10,%r9
|
|
je .Ldone_avx
|
|
vmovdqa 64(%r11),%xmm6
|
|
vmovdqa -64(%r11),%xmm11
|
|
vmovdqu 0(%r9),%xmm0
|
|
vmovdqu 16(%r9),%xmm1
|
|
vmovdqu 32(%r9),%xmm2
|
|
vmovdqu 48(%r9),%xmm3
|
|
vpshufb %xmm6,%xmm0,%xmm0
|
|
addq $64,%r9
|
|
addl 16(%rsp),%ebx
|
|
xorl %ebp,%esi
|
|
vpshufb %xmm6,%xmm1,%xmm1
|
|
movl %ecx,%edi
|
|
shldl $5,%ecx,%ecx
|
|
vpaddd %xmm11,%xmm0,%xmm4
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
shrdl $7,%edx,%edx
|
|
addl %ecx,%ebx
|
|
vmovdqa %xmm4,0(%rsp)
|
|
addl 20(%rsp),%eax
|
|
xorl %edx,%edi
|
|
movl %ebx,%esi
|
|
shldl $5,%ebx,%ebx
|
|
addl %edi,%eax
|
|
xorl %edx,%esi
|
|
shrdl $7,%ecx,%ecx
|
|
addl %ebx,%eax
|
|
addl 24(%rsp),%ebp
|
|
xorl %ecx,%esi
|
|
movl %eax,%edi
|
|
shldl $5,%eax,%eax
|
|
addl %esi,%ebp
|
|
xorl %ecx,%edi
|
|
shrdl $7,%ebx,%ebx
|
|
addl %eax,%ebp
|
|
addl 28(%rsp),%edx
|
|
xorl %ebx,%edi
|
|
movl %ebp,%esi
|
|
shldl $5,%ebp,%ebp
|
|
addl %edi,%edx
|
|
xorl %ebx,%esi
|
|
shrdl $7,%eax,%eax
|
|
addl %ebp,%edx
|
|
addl 32(%rsp),%ecx
|
|
xorl %eax,%esi
|
|
vpshufb %xmm6,%xmm2,%xmm2
|
|
movl %edx,%edi
|
|
shldl $5,%edx,%edx
|
|
vpaddd %xmm11,%xmm1,%xmm5
|
|
addl %esi,%ecx
|
|
xorl %eax,%edi
|
|
shrdl $7,%ebp,%ebp
|
|
addl %edx,%ecx
|
|
vmovdqa %xmm5,16(%rsp)
|
|
addl 36(%rsp),%ebx
|
|
xorl %ebp,%edi
|
|
movl %ecx,%esi
|
|
shldl $5,%ecx,%ecx
|
|
addl %edi,%ebx
|
|
xorl %ebp,%esi
|
|
shrdl $7,%edx,%edx
|
|
addl %ecx,%ebx
|
|
addl 40(%rsp),%eax
|
|
xorl %edx,%esi
|
|
movl %ebx,%edi
|
|
shldl $5,%ebx,%ebx
|
|
addl %esi,%eax
|
|
xorl %edx,%edi
|
|
shrdl $7,%ecx,%ecx
|
|
addl %ebx,%eax
|
|
addl 44(%rsp),%ebp
|
|
xorl %ecx,%edi
|
|
movl %eax,%esi
|
|
shldl $5,%eax,%eax
|
|
addl %edi,%ebp
|
|
xorl %ecx,%esi
|
|
shrdl $7,%ebx,%ebx
|
|
addl %eax,%ebp
|
|
addl 48(%rsp),%edx
|
|
xorl %ebx,%esi
|
|
vpshufb %xmm6,%xmm3,%xmm3
|
|
movl %ebp,%edi
|
|
shldl $5,%ebp,%ebp
|
|
vpaddd %xmm11,%xmm2,%xmm6
|
|
addl %esi,%edx
|
|
xorl %ebx,%edi
|
|
shrdl $7,%eax,%eax
|
|
addl %ebp,%edx
|
|
vmovdqa %xmm6,32(%rsp)
|
|
addl 52(%rsp),%ecx
|
|
xorl %eax,%edi
|
|
movl %edx,%esi
|
|
shldl $5,%edx,%edx
|
|
addl %edi,%ecx
|
|
xorl %eax,%esi
|
|
shrdl $7,%ebp,%ebp
|
|
addl %edx,%ecx
|
|
addl 56(%rsp),%ebx
|
|
xorl %ebp,%esi
|
|
movl %ecx,%edi
|
|
shldl $5,%ecx,%ecx
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
shrdl $7,%edx,%edx
|
|
addl %ecx,%ebx
|
|
addl 60(%rsp),%eax
|
|
xorl %edx,%edi
|
|
movl %ebx,%esi
|
|
shldl $5,%ebx,%ebx
|
|
addl %edi,%eax
|
|
shrdl $7,%ecx,%ecx
|
|
addl %ebx,%eax
|
|
addl 0(%r8),%eax
|
|
addl 4(%r8),%esi
|
|
addl 8(%r8),%ecx
|
|
addl 12(%r8),%edx
|
|
movl %eax,0(%r8)
|
|
addl 16(%r8),%ebp
|
|
movl %esi,4(%r8)
|
|
movl %esi,%ebx
|
|
movl %ecx,8(%r8)
|
|
movl %ecx,%edi
|
|
movl %edx,12(%r8)
|
|
xorl %edx,%edi
|
|
movl %ebp,16(%r8)
|
|
andl %edi,%esi
|
|
jmp .Loop_avx
|
|
|
|
.align 16
|
|
.Ldone_avx:
|
|
addl 16(%rsp),%ebx
|
|
xorl %ebp,%esi
|
|
movl %ecx,%edi
|
|
shldl $5,%ecx,%ecx
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
shrdl $7,%edx,%edx
|
|
addl %ecx,%ebx
|
|
addl 20(%rsp),%eax
|
|
xorl %edx,%edi
|
|
movl %ebx,%esi
|
|
shldl $5,%ebx,%ebx
|
|
addl %edi,%eax
|
|
xorl %edx,%esi
|
|
shrdl $7,%ecx,%ecx
|
|
addl %ebx,%eax
|
|
addl 24(%rsp),%ebp
|
|
xorl %ecx,%esi
|
|
movl %eax,%edi
|
|
shldl $5,%eax,%eax
|
|
addl %esi,%ebp
|
|
xorl %ecx,%edi
|
|
shrdl $7,%ebx,%ebx
|
|
addl %eax,%ebp
|
|
addl 28(%rsp),%edx
|
|
xorl %ebx,%edi
|
|
movl %ebp,%esi
|
|
shldl $5,%ebp,%ebp
|
|
addl %edi,%edx
|
|
xorl %ebx,%esi
|
|
shrdl $7,%eax,%eax
|
|
addl %ebp,%edx
|
|
addl 32(%rsp),%ecx
|
|
xorl %eax,%esi
|
|
movl %edx,%edi
|
|
shldl $5,%edx,%edx
|
|
addl %esi,%ecx
|
|
xorl %eax,%edi
|
|
shrdl $7,%ebp,%ebp
|
|
addl %edx,%ecx
|
|
addl 36(%rsp),%ebx
|
|
xorl %ebp,%edi
|
|
movl %ecx,%esi
|
|
shldl $5,%ecx,%ecx
|
|
addl %edi,%ebx
|
|
xorl %ebp,%esi
|
|
shrdl $7,%edx,%edx
|
|
addl %ecx,%ebx
|
|
addl 40(%rsp),%eax
|
|
xorl %edx,%esi
|
|
movl %ebx,%edi
|
|
shldl $5,%ebx,%ebx
|
|
addl %esi,%eax
|
|
xorl %edx,%edi
|
|
shrdl $7,%ecx,%ecx
|
|
addl %ebx,%eax
|
|
addl 44(%rsp),%ebp
|
|
xorl %ecx,%edi
|
|
movl %eax,%esi
|
|
shldl $5,%eax,%eax
|
|
addl %edi,%ebp
|
|
xorl %ecx,%esi
|
|
shrdl $7,%ebx,%ebx
|
|
addl %eax,%ebp
|
|
addl 48(%rsp),%edx
|
|
xorl %ebx,%esi
|
|
movl %ebp,%edi
|
|
shldl $5,%ebp,%ebp
|
|
addl %esi,%edx
|
|
xorl %ebx,%edi
|
|
shrdl $7,%eax,%eax
|
|
addl %ebp,%edx
|
|
addl 52(%rsp),%ecx
|
|
xorl %eax,%edi
|
|
movl %edx,%esi
|
|
shldl $5,%edx,%edx
|
|
addl %edi,%ecx
|
|
xorl %eax,%esi
|
|
shrdl $7,%ebp,%ebp
|
|
addl %edx,%ecx
|
|
addl 56(%rsp),%ebx
|
|
xorl %ebp,%esi
|
|
movl %ecx,%edi
|
|
shldl $5,%ecx,%ecx
|
|
addl %esi,%ebx
|
|
xorl %ebp,%edi
|
|
shrdl $7,%edx,%edx
|
|
addl %ecx,%ebx
|
|
addl 60(%rsp),%eax
|
|
xorl %edx,%edi
|
|
movl %ebx,%esi
|
|
shldl $5,%ebx,%ebx
|
|
addl %edi,%eax
|
|
shrdl $7,%ecx,%ecx
|
|
addl %ebx,%eax
|
|
vzeroupper
|
|
|
|
addl 0(%r8),%eax
|
|
addl 4(%r8),%esi
|
|
addl 8(%r8),%ecx
|
|
movl %eax,0(%r8)
|
|
addl 12(%r8),%edx
|
|
movl %esi,4(%r8)
|
|
addl 16(%r8),%ebp
|
|
movl %ecx,8(%r8)
|
|
movl %edx,12(%r8)
|
|
movl %ebp,16(%r8)
|
|
leaq (%r14),%rsi
|
|
movq -40(%rsi),%r14
|
|
movq -32(%rsi),%r13
|
|
movq -24(%rsi),%r12
|
|
movq -16(%rsi),%rbp
|
|
movq -8(%rsi),%rbx
|
|
leaq (%rsi),%rsp
|
|
.Lepilogue_avx:
|
|
.byte 0xf3,0xc3
|
|
.size sha1_block_data_order_avx,.-sha1_block_data_order_avx
|
|
.type sha1_block_data_order_avx2,@function
|
|
.align 16
|
|
sha1_block_data_order_avx2:
|
|
_avx2_shortcut:
|
|
movq %rsp,%rax
|
|
pushq %rbx
|
|
pushq %rbp
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
vzeroupper
|
|
movq %rax,%r14
|
|
movq %rdi,%r8
|
|
movq %rsi,%r9
|
|
movq %rdx,%r10
|
|
|
|
leaq -640(%rsp),%rsp
|
|
shlq $6,%r10
|
|
leaq 64(%r9),%r13
|
|
andq $-128,%rsp
|
|
addq %r9,%r10
|
|
leaq K_XX_XX+64(%rip),%r11
|
|
|
|
movl 0(%r8),%eax
|
|
cmpq %r10,%r13
|
|
cmovaeq %r9,%r13
|
|
movl 4(%r8),%ebp
|
|
movl 8(%r8),%ecx
|
|
movl 12(%r8),%edx
|
|
movl 16(%r8),%esi
|
|
vmovdqu 64(%r11),%ymm6
|
|
|
|
vmovdqu (%r9),%xmm0
|
|
vmovdqu 16(%r9),%xmm1
|
|
vmovdqu 32(%r9),%xmm2
|
|
vmovdqu 48(%r9),%xmm3
|
|
leaq 64(%r9),%r9
|
|
vinserti128 $1,(%r13),%ymm0,%ymm0
|
|
vinserti128 $1,16(%r13),%ymm1,%ymm1
|
|
vpshufb %ymm6,%ymm0,%ymm0
|
|
vinserti128 $1,32(%r13),%ymm2,%ymm2
|
|
vpshufb %ymm6,%ymm1,%ymm1
|
|
vinserti128 $1,48(%r13),%ymm3,%ymm3
|
|
vpshufb %ymm6,%ymm2,%ymm2
|
|
vmovdqu -64(%r11),%ymm11
|
|
vpshufb %ymm6,%ymm3,%ymm3
|
|
|
|
vpaddd %ymm11,%ymm0,%ymm4
|
|
vpaddd %ymm11,%ymm1,%ymm5
|
|
vmovdqu %ymm4,0(%rsp)
|
|
vpaddd %ymm11,%ymm2,%ymm6
|
|
vmovdqu %ymm5,32(%rsp)
|
|
vpaddd %ymm11,%ymm3,%ymm7
|
|
vmovdqu %ymm6,64(%rsp)
|
|
vmovdqu %ymm7,96(%rsp)
|
|
vpalignr $8,%ymm0,%ymm1,%ymm4
|
|
vpsrldq $4,%ymm3,%ymm8
|
|
vpxor %ymm0,%ymm4,%ymm4
|
|
vpxor %ymm2,%ymm8,%ymm8
|
|
vpxor %ymm8,%ymm4,%ymm4
|
|
vpsrld $31,%ymm4,%ymm8
|
|
vpslldq $12,%ymm4,%ymm10
|
|
vpaddd %ymm4,%ymm4,%ymm4
|
|
vpsrld $30,%ymm10,%ymm9
|
|
vpor %ymm8,%ymm4,%ymm4
|
|
vpslld $2,%ymm10,%ymm10
|
|
vpxor %ymm9,%ymm4,%ymm4
|
|
vpxor %ymm10,%ymm4,%ymm4
|
|
vpaddd %ymm11,%ymm4,%ymm9
|
|
vmovdqu %ymm9,128(%rsp)
|
|
vpalignr $8,%ymm1,%ymm2,%ymm5
|
|
vpsrldq $4,%ymm4,%ymm8
|
|
vpxor %ymm1,%ymm5,%ymm5
|
|
vpxor %ymm3,%ymm8,%ymm8
|
|
vpxor %ymm8,%ymm5,%ymm5
|
|
vpsrld $31,%ymm5,%ymm8
|
|
vmovdqu -32(%r11),%ymm11
|
|
vpslldq $12,%ymm5,%ymm10
|
|
vpaddd %ymm5,%ymm5,%ymm5
|
|
vpsrld $30,%ymm10,%ymm9
|
|
vpor %ymm8,%ymm5,%ymm5
|
|
vpslld $2,%ymm10,%ymm10
|
|
vpxor %ymm9,%ymm5,%ymm5
|
|
vpxor %ymm10,%ymm5,%ymm5
|
|
vpaddd %ymm11,%ymm5,%ymm9
|
|
vmovdqu %ymm9,160(%rsp)
|
|
vpalignr $8,%ymm2,%ymm3,%ymm6
|
|
vpsrldq $4,%ymm5,%ymm8
|
|
vpxor %ymm2,%ymm6,%ymm6
|
|
vpxor %ymm4,%ymm8,%ymm8
|
|
vpxor %ymm8,%ymm6,%ymm6
|
|
vpsrld $31,%ymm6,%ymm8
|
|
vpslldq $12,%ymm6,%ymm10
|
|
vpaddd %ymm6,%ymm6,%ymm6
|
|
vpsrld $30,%ymm10,%ymm9
|
|
vpor %ymm8,%ymm6,%ymm6
|
|
vpslld $2,%ymm10,%ymm10
|
|
vpxor %ymm9,%ymm6,%ymm6
|
|
vpxor %ymm10,%ymm6,%ymm6
|
|
vpaddd %ymm11,%ymm6,%ymm9
|
|
vmovdqu %ymm9,192(%rsp)
|
|
vpalignr $8,%ymm3,%ymm4,%ymm7
|
|
vpsrldq $4,%ymm6,%ymm8
|
|
vpxor %ymm3,%ymm7,%ymm7
|
|
vpxor %ymm5,%ymm8,%ymm8
|
|
vpxor %ymm8,%ymm7,%ymm7
|
|
vpsrld $31,%ymm7,%ymm8
|
|
vpslldq $12,%ymm7,%ymm10
|
|
vpaddd %ymm7,%ymm7,%ymm7
|
|
vpsrld $30,%ymm10,%ymm9
|
|
vpor %ymm8,%ymm7,%ymm7
|
|
vpslld $2,%ymm10,%ymm10
|
|
vpxor %ymm9,%ymm7,%ymm7
|
|
vpxor %ymm10,%ymm7,%ymm7
|
|
vpaddd %ymm11,%ymm7,%ymm9
|
|
vmovdqu %ymm9,224(%rsp)
|
|
leaq 128(%rsp),%r13
|
|
jmp .Loop_avx2
|
|
.align 32
|
|
.Loop_avx2:
|
|
rorxl $2,%ebp,%ebx
|
|
andnl %edx,%ebp,%edi
|
|
andl %ecx,%ebp
|
|
xorl %edi,%ebp
|
|
jmp .Lalign32_1
|
|
.align 32
|
|
.Lalign32_1:
|
|
vpalignr $8,%ymm6,%ymm7,%ymm8
|
|
vpxor %ymm4,%ymm0,%ymm0
|
|
addl -128(%r13),%esi
|
|
andnl %ecx,%eax,%edi
|
|
vpxor %ymm1,%ymm0,%ymm0
|
|
addl %ebp,%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
vpxor %ymm8,%ymm0,%ymm0
|
|
andl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %edi,%eax
|
|
vpsrld $30,%ymm0,%ymm8
|
|
vpslld $2,%ymm0,%ymm0
|
|
addl -124(%r13),%edx
|
|
andnl %ebx,%esi,%edi
|
|
addl %eax,%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
andl %ebp,%esi
|
|
vpor %ymm8,%ymm0,%ymm0
|
|
addl %r12d,%edx
|
|
xorl %edi,%esi
|
|
addl -120(%r13),%ecx
|
|
andnl %ebp,%edx,%edi
|
|
vpaddd %ymm11,%ymm0,%ymm9
|
|
addl %esi,%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
andl %eax,%edx
|
|
vmovdqu %ymm9,256(%rsp)
|
|
addl %r12d,%ecx
|
|
xorl %edi,%edx
|
|
addl -116(%r13),%ebx
|
|
andnl %eax,%ecx,%edi
|
|
addl %edx,%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
andl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %edi,%ecx
|
|
addl -96(%r13),%ebp
|
|
andnl %esi,%ebx,%edi
|
|
addl %ecx,%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
andl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %edi,%ebx
|
|
vpalignr $8,%ymm7,%ymm0,%ymm8
|
|
vpxor %ymm5,%ymm1,%ymm1
|
|
addl -92(%r13),%eax
|
|
andnl %edx,%ebp,%edi
|
|
vpxor %ymm2,%ymm1,%ymm1
|
|
addl %ebx,%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
vpxor %ymm8,%ymm1,%ymm1
|
|
andl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edi,%ebp
|
|
vpsrld $30,%ymm1,%ymm8
|
|
vpslld $2,%ymm1,%ymm1
|
|
addl -88(%r13),%esi
|
|
andnl %ecx,%eax,%edi
|
|
addl %ebp,%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
andl %ebx,%eax
|
|
vpor %ymm8,%ymm1,%ymm1
|
|
addl %r12d,%esi
|
|
xorl %edi,%eax
|
|
addl -84(%r13),%edx
|
|
andnl %ebx,%esi,%edi
|
|
vpaddd %ymm11,%ymm1,%ymm9
|
|
addl %eax,%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
andl %ebp,%esi
|
|
vmovdqu %ymm9,288(%rsp)
|
|
addl %r12d,%edx
|
|
xorl %edi,%esi
|
|
addl -64(%r13),%ecx
|
|
andnl %ebp,%edx,%edi
|
|
addl %esi,%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
andl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %edi,%edx
|
|
addl -60(%r13),%ebx
|
|
andnl %eax,%ecx,%edi
|
|
addl %edx,%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
andl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %edi,%ecx
|
|
vpalignr $8,%ymm0,%ymm1,%ymm8
|
|
vpxor %ymm6,%ymm2,%ymm2
|
|
addl -56(%r13),%ebp
|
|
andnl %esi,%ebx,%edi
|
|
vpxor %ymm3,%ymm2,%ymm2
|
|
vmovdqu 0(%r11),%ymm11
|
|
addl %ecx,%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
vpxor %ymm8,%ymm2,%ymm2
|
|
andl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %edi,%ebx
|
|
vpsrld $30,%ymm2,%ymm8
|
|
vpslld $2,%ymm2,%ymm2
|
|
addl -52(%r13),%eax
|
|
andnl %edx,%ebp,%edi
|
|
addl %ebx,%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
andl %ecx,%ebp
|
|
vpor %ymm8,%ymm2,%ymm2
|
|
addl %r12d,%eax
|
|
xorl %edi,%ebp
|
|
addl -32(%r13),%esi
|
|
andnl %ecx,%eax,%edi
|
|
vpaddd %ymm11,%ymm2,%ymm9
|
|
addl %ebp,%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
andl %ebx,%eax
|
|
vmovdqu %ymm9,320(%rsp)
|
|
addl %r12d,%esi
|
|
xorl %edi,%eax
|
|
addl -28(%r13),%edx
|
|
andnl %ebx,%esi,%edi
|
|
addl %eax,%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
andl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %edi,%esi
|
|
addl -24(%r13),%ecx
|
|
andnl %ebp,%edx,%edi
|
|
addl %esi,%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
andl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %edi,%edx
|
|
vpalignr $8,%ymm1,%ymm2,%ymm8
|
|
vpxor %ymm7,%ymm3,%ymm3
|
|
addl -20(%r13),%ebx
|
|
andnl %eax,%ecx,%edi
|
|
vpxor %ymm4,%ymm3,%ymm3
|
|
addl %edx,%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
vpxor %ymm8,%ymm3,%ymm3
|
|
andl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %edi,%ecx
|
|
vpsrld $30,%ymm3,%ymm8
|
|
vpslld $2,%ymm3,%ymm3
|
|
addl 0(%r13),%ebp
|
|
andnl %esi,%ebx,%edi
|
|
addl %ecx,%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
andl %edx,%ebx
|
|
vpor %ymm8,%ymm3,%ymm3
|
|
addl %r12d,%ebp
|
|
xorl %edi,%ebx
|
|
addl 4(%r13),%eax
|
|
andnl %edx,%ebp,%edi
|
|
vpaddd %ymm11,%ymm3,%ymm9
|
|
addl %ebx,%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
andl %ecx,%ebp
|
|
vmovdqu %ymm9,352(%rsp)
|
|
addl %r12d,%eax
|
|
xorl %edi,%ebp
|
|
addl 8(%r13),%esi
|
|
andnl %ecx,%eax,%edi
|
|
addl %ebp,%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
andl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %edi,%eax
|
|
addl 12(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
vpalignr $8,%ymm2,%ymm3,%ymm8
|
|
vpxor %ymm0,%ymm4,%ymm4
|
|
addl 32(%r13),%ecx
|
|
leal (%rcx,%rsi,1),%ecx
|
|
vpxor %ymm5,%ymm4,%ymm4
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
vpxor %ymm8,%ymm4,%ymm4
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
addl 36(%r13),%ebx
|
|
vpsrld $30,%ymm4,%ymm8
|
|
vpslld $2,%ymm4,%ymm4
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
vpor %ymm8,%ymm4,%ymm4
|
|
addl 40(%r13),%ebp
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
vpaddd %ymm11,%ymm4,%ymm9
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
addl 44(%r13),%eax
|
|
vmovdqu %ymm9,384(%rsp)
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
addl 64(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
vpalignr $8,%ymm3,%ymm4,%ymm8
|
|
vpxor %ymm1,%ymm5,%ymm5
|
|
addl 68(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
vpxor %ymm6,%ymm5,%ymm5
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
vpxor %ymm8,%ymm5,%ymm5
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
addl 72(%r13),%ecx
|
|
vpsrld $30,%ymm5,%ymm8
|
|
vpslld $2,%ymm5,%ymm5
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
vpor %ymm8,%ymm5,%ymm5
|
|
addl 76(%r13),%ebx
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
vpaddd %ymm11,%ymm5,%ymm9
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
addl 96(%r13),%ebp
|
|
vmovdqu %ymm9,416(%rsp)
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
addl 100(%r13),%eax
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
vpalignr $8,%ymm4,%ymm5,%ymm8
|
|
vpxor %ymm2,%ymm6,%ymm6
|
|
addl 104(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
vpxor %ymm7,%ymm6,%ymm6
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
vpxor %ymm8,%ymm6,%ymm6
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
addl 108(%r13),%edx
|
|
leaq 256(%r13),%r13
|
|
vpsrld $30,%ymm6,%ymm8
|
|
vpslld $2,%ymm6,%ymm6
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
vpor %ymm8,%ymm6,%ymm6
|
|
addl -128(%r13),%ecx
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
vpaddd %ymm11,%ymm6,%ymm9
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
addl -124(%r13),%ebx
|
|
vmovdqu %ymm9,448(%rsp)
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
addl -120(%r13),%ebp
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
vpalignr $8,%ymm5,%ymm6,%ymm8
|
|
vpxor %ymm3,%ymm7,%ymm7
|
|
addl -116(%r13),%eax
|
|
leal (%rax,%rbx,1),%eax
|
|
vpxor %ymm0,%ymm7,%ymm7
|
|
vmovdqu 32(%r11),%ymm11
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
vpxor %ymm8,%ymm7,%ymm7
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
addl -96(%r13),%esi
|
|
vpsrld $30,%ymm7,%ymm8
|
|
vpslld $2,%ymm7,%ymm7
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
vpor %ymm8,%ymm7,%ymm7
|
|
addl -92(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
vpaddd %ymm11,%ymm7,%ymm9
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
addl -88(%r13),%ecx
|
|
vmovdqu %ymm9,480(%rsp)
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
addl -84(%r13),%ebx
|
|
movl %esi,%edi
|
|
xorl %eax,%edi
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
andl %edi,%ecx
|
|
jmp .Lalign32_2
|
|
.align 32
|
|
.Lalign32_2:
|
|
vpalignr $8,%ymm6,%ymm7,%ymm8
|
|
vpxor %ymm4,%ymm0,%ymm0
|
|
addl -64(%r13),%ebp
|
|
xorl %esi,%ecx
|
|
vpxor %ymm1,%ymm0,%ymm0
|
|
movl %edx,%edi
|
|
xorl %esi,%edi
|
|
leal (%rcx,%rbp,1),%ebp
|
|
vpxor %ymm8,%ymm0,%ymm0
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
vpsrld $30,%ymm0,%ymm8
|
|
vpslld $2,%ymm0,%ymm0
|
|
addl %r12d,%ebp
|
|
andl %edi,%ebx
|
|
addl -60(%r13),%eax
|
|
xorl %edx,%ebx
|
|
movl %ecx,%edi
|
|
xorl %edx,%edi
|
|
vpor %ymm8,%ymm0,%ymm0
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
vpaddd %ymm11,%ymm0,%ymm9
|
|
addl %r12d,%eax
|
|
andl %edi,%ebp
|
|
addl -56(%r13),%esi
|
|
xorl %ecx,%ebp
|
|
vmovdqu %ymm9,512(%rsp)
|
|
movl %ebx,%edi
|
|
xorl %ecx,%edi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
andl %edi,%eax
|
|
addl -52(%r13),%edx
|
|
xorl %ebx,%eax
|
|
movl %ebp,%edi
|
|
xorl %ebx,%edi
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
andl %edi,%esi
|
|
addl -32(%r13),%ecx
|
|
xorl %ebp,%esi
|
|
movl %eax,%edi
|
|
xorl %ebp,%edi
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
andl %edi,%edx
|
|
vpalignr $8,%ymm7,%ymm0,%ymm8
|
|
vpxor %ymm5,%ymm1,%ymm1
|
|
addl -28(%r13),%ebx
|
|
xorl %eax,%edx
|
|
vpxor %ymm2,%ymm1,%ymm1
|
|
movl %esi,%edi
|
|
xorl %eax,%edi
|
|
leal (%rbx,%rdx,1),%ebx
|
|
vpxor %ymm8,%ymm1,%ymm1
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
vpsrld $30,%ymm1,%ymm8
|
|
vpslld $2,%ymm1,%ymm1
|
|
addl %r12d,%ebx
|
|
andl %edi,%ecx
|
|
addl -24(%r13),%ebp
|
|
xorl %esi,%ecx
|
|
movl %edx,%edi
|
|
xorl %esi,%edi
|
|
vpor %ymm8,%ymm1,%ymm1
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
vpaddd %ymm11,%ymm1,%ymm9
|
|
addl %r12d,%ebp
|
|
andl %edi,%ebx
|
|
addl -20(%r13),%eax
|
|
xorl %edx,%ebx
|
|
vmovdqu %ymm9,544(%rsp)
|
|
movl %ecx,%edi
|
|
xorl %edx,%edi
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
andl %edi,%ebp
|
|
addl 0(%r13),%esi
|
|
xorl %ecx,%ebp
|
|
movl %ebx,%edi
|
|
xorl %ecx,%edi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
andl %edi,%eax
|
|
addl 4(%r13),%edx
|
|
xorl %ebx,%eax
|
|
movl %ebp,%edi
|
|
xorl %ebx,%edi
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
andl %edi,%esi
|
|
vpalignr $8,%ymm0,%ymm1,%ymm8
|
|
vpxor %ymm6,%ymm2,%ymm2
|
|
addl 8(%r13),%ecx
|
|
xorl %ebp,%esi
|
|
vpxor %ymm3,%ymm2,%ymm2
|
|
movl %eax,%edi
|
|
xorl %ebp,%edi
|
|
leal (%rcx,%rsi,1),%ecx
|
|
vpxor %ymm8,%ymm2,%ymm2
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
vpsrld $30,%ymm2,%ymm8
|
|
vpslld $2,%ymm2,%ymm2
|
|
addl %r12d,%ecx
|
|
andl %edi,%edx
|
|
addl 12(%r13),%ebx
|
|
xorl %eax,%edx
|
|
movl %esi,%edi
|
|
xorl %eax,%edi
|
|
vpor %ymm8,%ymm2,%ymm2
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
vpaddd %ymm11,%ymm2,%ymm9
|
|
addl %r12d,%ebx
|
|
andl %edi,%ecx
|
|
addl 32(%r13),%ebp
|
|
xorl %esi,%ecx
|
|
vmovdqu %ymm9,576(%rsp)
|
|
movl %edx,%edi
|
|
xorl %esi,%edi
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
andl %edi,%ebx
|
|
addl 36(%r13),%eax
|
|
xorl %edx,%ebx
|
|
movl %ecx,%edi
|
|
xorl %edx,%edi
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
andl %edi,%ebp
|
|
addl 40(%r13),%esi
|
|
xorl %ecx,%ebp
|
|
movl %ebx,%edi
|
|
xorl %ecx,%edi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
andl %edi,%eax
|
|
vpalignr $8,%ymm1,%ymm2,%ymm8
|
|
vpxor %ymm7,%ymm3,%ymm3
|
|
addl 44(%r13),%edx
|
|
xorl %ebx,%eax
|
|
vpxor %ymm4,%ymm3,%ymm3
|
|
movl %ebp,%edi
|
|
xorl %ebx,%edi
|
|
leal (%rdx,%rax,1),%edx
|
|
vpxor %ymm8,%ymm3,%ymm3
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
vpsrld $30,%ymm3,%ymm8
|
|
vpslld $2,%ymm3,%ymm3
|
|
addl %r12d,%edx
|
|
andl %edi,%esi
|
|
addl 64(%r13),%ecx
|
|
xorl %ebp,%esi
|
|
movl %eax,%edi
|
|
xorl %ebp,%edi
|
|
vpor %ymm8,%ymm3,%ymm3
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
vpaddd %ymm11,%ymm3,%ymm9
|
|
addl %r12d,%ecx
|
|
andl %edi,%edx
|
|
addl 68(%r13),%ebx
|
|
xorl %eax,%edx
|
|
vmovdqu %ymm9,608(%rsp)
|
|
movl %esi,%edi
|
|
xorl %eax,%edi
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
andl %edi,%ecx
|
|
addl 72(%r13),%ebp
|
|
xorl %esi,%ecx
|
|
movl %edx,%edi
|
|
xorl %esi,%edi
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
andl %edi,%ebx
|
|
addl 76(%r13),%eax
|
|
xorl %edx,%ebx
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
addl 96(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
addl 100(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
addl 104(%r13),%ecx
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
addl 108(%r13),%ebx
|
|
leaq 256(%r13),%r13
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
addl -128(%r13),%ebp
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
addl -124(%r13),%eax
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
addl -120(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
addl -116(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
addl -96(%r13),%ecx
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
addl -92(%r13),%ebx
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
addl -88(%r13),%ebp
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
addl -84(%r13),%eax
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
addl -64(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
addl -60(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
addl -56(%r13),%ecx
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
addl -52(%r13),%ebx
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
addl -32(%r13),%ebp
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
addl -28(%r13),%eax
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
addl -24(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
addl -20(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
addl %r12d,%edx
|
|
leaq 128(%r9),%r13
|
|
leaq 128(%r9),%rdi
|
|
cmpq %r10,%r13
|
|
cmovaeq %r9,%r13
|
|
|
|
|
|
addl 0(%r8),%edx
|
|
addl 4(%r8),%esi
|
|
addl 8(%r8),%ebp
|
|
movl %edx,0(%r8)
|
|
addl 12(%r8),%ebx
|
|
movl %esi,4(%r8)
|
|
movl %edx,%eax
|
|
addl 16(%r8),%ecx
|
|
movl %ebp,%r12d
|
|
movl %ebp,8(%r8)
|
|
movl %ebx,%edx
|
|
|
|
movl %ebx,12(%r8)
|
|
movl %esi,%ebp
|
|
movl %ecx,16(%r8)
|
|
|
|
movl %ecx,%esi
|
|
movl %r12d,%ecx
|
|
|
|
|
|
cmpq %r10,%r9
|
|
je .Ldone_avx2
|
|
vmovdqu 64(%r11),%ymm6
|
|
cmpq %r10,%rdi
|
|
ja .Last_avx2
|
|
|
|
vmovdqu -64(%rdi),%xmm0
|
|
vmovdqu -48(%rdi),%xmm1
|
|
vmovdqu -32(%rdi),%xmm2
|
|
vmovdqu -16(%rdi),%xmm3
|
|
vinserti128 $1,0(%r13),%ymm0,%ymm0
|
|
vinserti128 $1,16(%r13),%ymm1,%ymm1
|
|
vinserti128 $1,32(%r13),%ymm2,%ymm2
|
|
vinserti128 $1,48(%r13),%ymm3,%ymm3
|
|
jmp .Last_avx2
|
|
|
|
.align 32
|
|
.Last_avx2:
|
|
leaq 128+16(%rsp),%r13
|
|
rorxl $2,%ebp,%ebx
|
|
andnl %edx,%ebp,%edi
|
|
andl %ecx,%ebp
|
|
xorl %edi,%ebp
|
|
subq $-128,%r9
|
|
addl -128(%r13),%esi
|
|
andnl %ecx,%eax,%edi
|
|
addl %ebp,%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
andl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %edi,%eax
|
|
addl -124(%r13),%edx
|
|
andnl %ebx,%esi,%edi
|
|
addl %eax,%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
andl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %edi,%esi
|
|
addl -120(%r13),%ecx
|
|
andnl %ebp,%edx,%edi
|
|
addl %esi,%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
andl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %edi,%edx
|
|
addl -116(%r13),%ebx
|
|
andnl %eax,%ecx,%edi
|
|
addl %edx,%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
andl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %edi,%ecx
|
|
addl -96(%r13),%ebp
|
|
andnl %esi,%ebx,%edi
|
|
addl %ecx,%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
andl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %edi,%ebx
|
|
addl -92(%r13),%eax
|
|
andnl %edx,%ebp,%edi
|
|
addl %ebx,%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
andl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edi,%ebp
|
|
addl -88(%r13),%esi
|
|
andnl %ecx,%eax,%edi
|
|
addl %ebp,%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
andl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %edi,%eax
|
|
addl -84(%r13),%edx
|
|
andnl %ebx,%esi,%edi
|
|
addl %eax,%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
andl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %edi,%esi
|
|
addl -64(%r13),%ecx
|
|
andnl %ebp,%edx,%edi
|
|
addl %esi,%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
andl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %edi,%edx
|
|
addl -60(%r13),%ebx
|
|
andnl %eax,%ecx,%edi
|
|
addl %edx,%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
andl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %edi,%ecx
|
|
addl -56(%r13),%ebp
|
|
andnl %esi,%ebx,%edi
|
|
addl %ecx,%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
andl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %edi,%ebx
|
|
addl -52(%r13),%eax
|
|
andnl %edx,%ebp,%edi
|
|
addl %ebx,%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
andl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edi,%ebp
|
|
addl -32(%r13),%esi
|
|
andnl %ecx,%eax,%edi
|
|
addl %ebp,%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
andl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %edi,%eax
|
|
addl -28(%r13),%edx
|
|
andnl %ebx,%esi,%edi
|
|
addl %eax,%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
andl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %edi,%esi
|
|
addl -24(%r13),%ecx
|
|
andnl %ebp,%edx,%edi
|
|
addl %esi,%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
andl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %edi,%edx
|
|
addl -20(%r13),%ebx
|
|
andnl %eax,%ecx,%edi
|
|
addl %edx,%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
andl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %edi,%ecx
|
|
addl 0(%r13),%ebp
|
|
andnl %esi,%ebx,%edi
|
|
addl %ecx,%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
andl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %edi,%ebx
|
|
addl 4(%r13),%eax
|
|
andnl %edx,%ebp,%edi
|
|
addl %ebx,%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
andl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edi,%ebp
|
|
addl 8(%r13),%esi
|
|
andnl %ecx,%eax,%edi
|
|
addl %ebp,%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
andl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %edi,%eax
|
|
addl 12(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
addl 32(%r13),%ecx
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
addl 36(%r13),%ebx
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
addl 40(%r13),%ebp
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
addl 44(%r13),%eax
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
addl 64(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
vmovdqu -64(%r11),%ymm11
|
|
vpshufb %ymm6,%ymm0,%ymm0
|
|
addl 68(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
addl 72(%r13),%ecx
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
addl 76(%r13),%ebx
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
addl 96(%r13),%ebp
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
addl 100(%r13),%eax
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
vpshufb %ymm6,%ymm1,%ymm1
|
|
vpaddd %ymm11,%ymm0,%ymm8
|
|
addl 104(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
addl 108(%r13),%edx
|
|
leaq 256(%r13),%r13
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
addl -128(%r13),%ecx
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
addl -124(%r13),%ebx
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
addl -120(%r13),%ebp
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
vmovdqu %ymm8,0(%rsp)
|
|
vpshufb %ymm6,%ymm2,%ymm2
|
|
vpaddd %ymm11,%ymm1,%ymm9
|
|
addl -116(%r13),%eax
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
addl -96(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
addl -92(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
addl -88(%r13),%ecx
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
addl -84(%r13),%ebx
|
|
movl %esi,%edi
|
|
xorl %eax,%edi
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
andl %edi,%ecx
|
|
vmovdqu %ymm9,32(%rsp)
|
|
vpshufb %ymm6,%ymm3,%ymm3
|
|
vpaddd %ymm11,%ymm2,%ymm6
|
|
addl -64(%r13),%ebp
|
|
xorl %esi,%ecx
|
|
movl %edx,%edi
|
|
xorl %esi,%edi
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
andl %edi,%ebx
|
|
addl -60(%r13),%eax
|
|
xorl %edx,%ebx
|
|
movl %ecx,%edi
|
|
xorl %edx,%edi
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
andl %edi,%ebp
|
|
addl -56(%r13),%esi
|
|
xorl %ecx,%ebp
|
|
movl %ebx,%edi
|
|
xorl %ecx,%edi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
andl %edi,%eax
|
|
addl -52(%r13),%edx
|
|
xorl %ebx,%eax
|
|
movl %ebp,%edi
|
|
xorl %ebx,%edi
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
andl %edi,%esi
|
|
addl -32(%r13),%ecx
|
|
xorl %ebp,%esi
|
|
movl %eax,%edi
|
|
xorl %ebp,%edi
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
andl %edi,%edx
|
|
jmp .Lalign32_3
|
|
.align 32
|
|
.Lalign32_3:
|
|
vmovdqu %ymm6,64(%rsp)
|
|
vpaddd %ymm11,%ymm3,%ymm7
|
|
addl -28(%r13),%ebx
|
|
xorl %eax,%edx
|
|
movl %esi,%edi
|
|
xorl %eax,%edi
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
andl %edi,%ecx
|
|
addl -24(%r13),%ebp
|
|
xorl %esi,%ecx
|
|
movl %edx,%edi
|
|
xorl %esi,%edi
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
andl %edi,%ebx
|
|
addl -20(%r13),%eax
|
|
xorl %edx,%ebx
|
|
movl %ecx,%edi
|
|
xorl %edx,%edi
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
andl %edi,%ebp
|
|
addl 0(%r13),%esi
|
|
xorl %ecx,%ebp
|
|
movl %ebx,%edi
|
|
xorl %ecx,%edi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
andl %edi,%eax
|
|
addl 4(%r13),%edx
|
|
xorl %ebx,%eax
|
|
movl %ebp,%edi
|
|
xorl %ebx,%edi
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
andl %edi,%esi
|
|
vmovdqu %ymm7,96(%rsp)
|
|
addl 8(%r13),%ecx
|
|
xorl %ebp,%esi
|
|
movl %eax,%edi
|
|
xorl %ebp,%edi
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
andl %edi,%edx
|
|
addl 12(%r13),%ebx
|
|
xorl %eax,%edx
|
|
movl %esi,%edi
|
|
xorl %eax,%edi
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
andl %edi,%ecx
|
|
addl 32(%r13),%ebp
|
|
xorl %esi,%ecx
|
|
movl %edx,%edi
|
|
xorl %esi,%edi
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
andl %edi,%ebx
|
|
addl 36(%r13),%eax
|
|
xorl %edx,%ebx
|
|
movl %ecx,%edi
|
|
xorl %edx,%edi
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
andl %edi,%ebp
|
|
addl 40(%r13),%esi
|
|
xorl %ecx,%ebp
|
|
movl %ebx,%edi
|
|
xorl %ecx,%edi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
andl %edi,%eax
|
|
vpalignr $8,%ymm0,%ymm1,%ymm4
|
|
addl 44(%r13),%edx
|
|
xorl %ebx,%eax
|
|
movl %ebp,%edi
|
|
xorl %ebx,%edi
|
|
vpsrldq $4,%ymm3,%ymm8
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
vpxor %ymm0,%ymm4,%ymm4
|
|
vpxor %ymm2,%ymm8,%ymm8
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
vpxor %ymm8,%ymm4,%ymm4
|
|
andl %edi,%esi
|
|
addl 64(%r13),%ecx
|
|
xorl %ebp,%esi
|
|
movl %eax,%edi
|
|
vpsrld $31,%ymm4,%ymm8
|
|
xorl %ebp,%edi
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
vpslldq $12,%ymm4,%ymm10
|
|
vpaddd %ymm4,%ymm4,%ymm4
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
vpsrld $30,%ymm10,%ymm9
|
|
vpor %ymm8,%ymm4,%ymm4
|
|
addl %r12d,%ecx
|
|
andl %edi,%edx
|
|
vpslld $2,%ymm10,%ymm10
|
|
vpxor %ymm9,%ymm4,%ymm4
|
|
addl 68(%r13),%ebx
|
|
xorl %eax,%edx
|
|
vpxor %ymm10,%ymm4,%ymm4
|
|
movl %esi,%edi
|
|
xorl %eax,%edi
|
|
leal (%rbx,%rdx,1),%ebx
|
|
vpaddd %ymm11,%ymm4,%ymm9
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
vmovdqu %ymm9,128(%rsp)
|
|
addl %r12d,%ebx
|
|
andl %edi,%ecx
|
|
addl 72(%r13),%ebp
|
|
xorl %esi,%ecx
|
|
movl %edx,%edi
|
|
xorl %esi,%edi
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
andl %edi,%ebx
|
|
addl 76(%r13),%eax
|
|
xorl %edx,%ebx
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
vpalignr $8,%ymm1,%ymm2,%ymm5
|
|
addl 96(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
vpsrldq $4,%ymm4,%ymm8
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
vpxor %ymm1,%ymm5,%ymm5
|
|
vpxor %ymm3,%ymm8,%ymm8
|
|
addl 100(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
vpxor %ymm8,%ymm5,%ymm5
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
vpsrld $31,%ymm5,%ymm8
|
|
vmovdqu -32(%r11),%ymm11
|
|
xorl %ebx,%esi
|
|
addl 104(%r13),%ecx
|
|
leal (%rcx,%rsi,1),%ecx
|
|
vpslldq $12,%ymm5,%ymm10
|
|
vpaddd %ymm5,%ymm5,%ymm5
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
vpsrld $30,%ymm10,%ymm9
|
|
vpor %ymm8,%ymm5,%ymm5
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
vpslld $2,%ymm10,%ymm10
|
|
vpxor %ymm9,%ymm5,%ymm5
|
|
xorl %ebp,%edx
|
|
addl 108(%r13),%ebx
|
|
leaq 256(%r13),%r13
|
|
vpxor %ymm10,%ymm5,%ymm5
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
vpaddd %ymm11,%ymm5,%ymm9
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
vmovdqu %ymm9,160(%rsp)
|
|
addl -128(%r13),%ebp
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
vpalignr $8,%ymm2,%ymm3,%ymm6
|
|
addl -124(%r13),%eax
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
vpsrldq $4,%ymm5,%ymm8
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
vpxor %ymm2,%ymm6,%ymm6
|
|
vpxor %ymm4,%ymm8,%ymm8
|
|
addl -120(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
vpxor %ymm8,%ymm6,%ymm6
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
vpsrld $31,%ymm6,%ymm8
|
|
xorl %ecx,%eax
|
|
addl -116(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
vpslldq $12,%ymm6,%ymm10
|
|
vpaddd %ymm6,%ymm6,%ymm6
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
vpsrld $30,%ymm10,%ymm9
|
|
vpor %ymm8,%ymm6,%ymm6
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
vpslld $2,%ymm10,%ymm10
|
|
vpxor %ymm9,%ymm6,%ymm6
|
|
xorl %ebx,%esi
|
|
addl -96(%r13),%ecx
|
|
vpxor %ymm10,%ymm6,%ymm6
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
vpaddd %ymm11,%ymm6,%ymm9
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
vmovdqu %ymm9,192(%rsp)
|
|
addl -92(%r13),%ebx
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
vpalignr $8,%ymm3,%ymm4,%ymm7
|
|
addl -88(%r13),%ebp
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
vpsrldq $4,%ymm6,%ymm8
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
vpxor %ymm3,%ymm7,%ymm7
|
|
vpxor %ymm5,%ymm8,%ymm8
|
|
addl -84(%r13),%eax
|
|
leal (%rax,%rbx,1),%eax
|
|
vpxor %ymm8,%ymm7,%ymm7
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
vpsrld $31,%ymm7,%ymm8
|
|
xorl %edx,%ebp
|
|
addl -64(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
vpslldq $12,%ymm7,%ymm10
|
|
vpaddd %ymm7,%ymm7,%ymm7
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
vpsrld $30,%ymm10,%ymm9
|
|
vpor %ymm8,%ymm7,%ymm7
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
vpslld $2,%ymm10,%ymm10
|
|
vpxor %ymm9,%ymm7,%ymm7
|
|
xorl %ecx,%eax
|
|
addl -60(%r13),%edx
|
|
vpxor %ymm10,%ymm7,%ymm7
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
rorxl $2,%esi,%eax
|
|
vpaddd %ymm11,%ymm7,%ymm9
|
|
xorl %ebp,%esi
|
|
addl %r12d,%edx
|
|
xorl %ebx,%esi
|
|
vmovdqu %ymm9,224(%rsp)
|
|
addl -56(%r13),%ecx
|
|
leal (%rcx,%rsi,1),%ecx
|
|
rorxl $27,%edx,%r12d
|
|
rorxl $2,%edx,%esi
|
|
xorl %eax,%edx
|
|
addl %r12d,%ecx
|
|
xorl %ebp,%edx
|
|
addl -52(%r13),%ebx
|
|
leal (%rbx,%rdx,1),%ebx
|
|
rorxl $27,%ecx,%r12d
|
|
rorxl $2,%ecx,%edx
|
|
xorl %esi,%ecx
|
|
addl %r12d,%ebx
|
|
xorl %eax,%ecx
|
|
addl -32(%r13),%ebp
|
|
leal (%rcx,%rbp,1),%ebp
|
|
rorxl $27,%ebx,%r12d
|
|
rorxl $2,%ebx,%ecx
|
|
xorl %edx,%ebx
|
|
addl %r12d,%ebp
|
|
xorl %esi,%ebx
|
|
addl -28(%r13),%eax
|
|
leal (%rax,%rbx,1),%eax
|
|
rorxl $27,%ebp,%r12d
|
|
rorxl $2,%ebp,%ebx
|
|
xorl %ecx,%ebp
|
|
addl %r12d,%eax
|
|
xorl %edx,%ebp
|
|
addl -24(%r13),%esi
|
|
leal (%rsi,%rbp,1),%esi
|
|
rorxl $27,%eax,%r12d
|
|
rorxl $2,%eax,%ebp
|
|
xorl %ebx,%eax
|
|
addl %r12d,%esi
|
|
xorl %ecx,%eax
|
|
addl -20(%r13),%edx
|
|
leal (%rdx,%rax,1),%edx
|
|
rorxl $27,%esi,%r12d
|
|
addl %r12d,%edx
|
|
leaq 128(%rsp),%r13
|
|
|
|
|
|
addl 0(%r8),%edx
|
|
addl 4(%r8),%esi
|
|
addl 8(%r8),%ebp
|
|
movl %edx,0(%r8)
|
|
addl 12(%r8),%ebx
|
|
movl %esi,4(%r8)
|
|
movl %edx,%eax
|
|
addl 16(%r8),%ecx
|
|
movl %ebp,%r12d
|
|
movl %ebp,8(%r8)
|
|
movl %ebx,%edx
|
|
|
|
movl %ebx,12(%r8)
|
|
movl %esi,%ebp
|
|
movl %ecx,16(%r8)
|
|
|
|
movl %ecx,%esi
|
|
movl %r12d,%ecx
|
|
|
|
|
|
cmpq %r10,%r9
|
|
jbe .Loop_avx2
|
|
|
|
.Ldone_avx2:
|
|
vzeroupper
|
|
leaq (%r14),%rsi
|
|
movq -40(%rsi),%r14
|
|
movq -32(%rsi),%r13
|
|
movq -24(%rsi),%r12
|
|
movq -16(%rsi),%rbp
|
|
movq -8(%rsi),%rbx
|
|
leaq (%rsi),%rsp
|
|
.Lepilogue_avx2:
|
|
.byte 0xf3,0xc3
|
|
.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2
|
|
.align 64
|
|
K_XX_XX:
|
|
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
|
|
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
|
|
.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
|
|
.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
|
|
.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
|
|
.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
|
|
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
|
|
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
|
|
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
|
|
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
|
|
.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
|
|
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
|
.align 64
|
|
|