mirror of https://github.com/lukechilds/node.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
8214 lines
154 KiB
8214 lines
154 KiB
OPTION DOTNAME
|
|
.text$ SEGMENT ALIGN(256) 'CODE'
|
|
|
|
EXTERN OPENSSL_ia32cap_P:NEAR
|
|
|
|
PUBLIC sha256_multi_block
|
|
|
|
ALIGN 32
|
|
sha256_multi_block PROC PUBLIC
|
|
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD PTR[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_sha256_multi_block::
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
mov rcx,QWORD PTR[((OPENSSL_ia32cap_P+4))]
|
|
bt rcx,61
|
|
jc _shaext_shortcut
|
|
test ecx,268435456
|
|
jnz _avx_shortcut
|
|
mov rax,rsp
|
|
push rbx
|
|
push rbp
|
|
lea rsp,QWORD PTR[((-168))+rsp]
|
|
movaps XMMWORD PTR[rsp],xmm6
|
|
movaps XMMWORD PTR[16+rsp],xmm7
|
|
movaps XMMWORD PTR[32+rsp],xmm8
|
|
movaps XMMWORD PTR[48+rsp],xmm9
|
|
movaps XMMWORD PTR[(-120)+rax],xmm10
|
|
movaps XMMWORD PTR[(-104)+rax],xmm11
|
|
movaps XMMWORD PTR[(-88)+rax],xmm12
|
|
movaps XMMWORD PTR[(-72)+rax],xmm13
|
|
movaps XMMWORD PTR[(-56)+rax],xmm14
|
|
movaps XMMWORD PTR[(-40)+rax],xmm15
|
|
sub rsp,288
|
|
and rsp,-256
|
|
mov QWORD PTR[272+rsp],rax
|
|
$L$body::
|
|
lea rbp,QWORD PTR[((K256+128))]
|
|
lea rbx,QWORD PTR[256+rsp]
|
|
lea rdi,QWORD PTR[128+rdi]
|
|
|
|
$L$oop_grande::
|
|
mov DWORD PTR[280+rsp],edx
|
|
xor edx,edx
|
|
mov r8,QWORD PTR[rsi]
|
|
mov ecx,DWORD PTR[8+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[rbx],ecx
|
|
cmovle r8,rbp
|
|
mov r9,QWORD PTR[16+rsi]
|
|
mov ecx,DWORD PTR[24+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[4+rbx],ecx
|
|
cmovle r9,rbp
|
|
mov r10,QWORD PTR[32+rsi]
|
|
mov ecx,DWORD PTR[40+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[8+rbx],ecx
|
|
cmovle r10,rbp
|
|
mov r11,QWORD PTR[48+rsi]
|
|
mov ecx,DWORD PTR[56+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[12+rbx],ecx
|
|
cmovle r11,rbp
|
|
test edx,edx
|
|
jz $L$done
|
|
|
|
movdqu xmm8,XMMWORD PTR[((0-128))+rdi]
|
|
lea rax,QWORD PTR[128+rsp]
|
|
movdqu xmm9,XMMWORD PTR[((32-128))+rdi]
|
|
movdqu xmm10,XMMWORD PTR[((64-128))+rdi]
|
|
movdqu xmm11,XMMWORD PTR[((96-128))+rdi]
|
|
movdqu xmm12,XMMWORD PTR[((128-128))+rdi]
|
|
movdqu xmm13,XMMWORD PTR[((160-128))+rdi]
|
|
movdqu xmm14,XMMWORD PTR[((192-128))+rdi]
|
|
movdqu xmm15,XMMWORD PTR[((224-128))+rdi]
|
|
movdqu xmm6,XMMWORD PTR[$L$pbswap]
|
|
jmp $L$oop
|
|
|
|
ALIGN 32
|
|
$L$oop::
|
|
movdqa xmm4,xmm10
|
|
pxor xmm4,xmm9
|
|
movd xmm5,DWORD PTR[r8]
|
|
movd xmm0,DWORD PTR[r9]
|
|
movd xmm1,DWORD PTR[r10]
|
|
movd xmm2,DWORD PTR[r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm12
|
|
DB 102,15,56,0,238
|
|
movdqa xmm2,xmm12
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm12
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(0-128)+rax],xmm5
|
|
paddd xmm5,xmm15
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-128))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm12
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm12
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm14
|
|
pand xmm3,xmm13
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm8
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm8
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm9
|
|
movdqa xmm7,xmm8
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm8
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm15,xmm9
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm15,xmm4
|
|
paddd xmm11,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm15,xmm5
|
|
paddd xmm15,xmm7
|
|
movd xmm5,DWORD PTR[4+r8]
|
|
movd xmm0,DWORD PTR[4+r9]
|
|
movd xmm1,DWORD PTR[4+r10]
|
|
movd xmm2,DWORD PTR[4+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm11
|
|
|
|
movdqa xmm2,xmm11
|
|
DB 102,15,56,0,238
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm11
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(16-128)+rax],xmm5
|
|
paddd xmm5,xmm14
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-96))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm11
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm11
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm13
|
|
pand xmm4,xmm12
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm15
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm15
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm8
|
|
movdqa xmm7,xmm15
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm15
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm14,xmm8
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm14,xmm3
|
|
paddd xmm10,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm14,xmm5
|
|
paddd xmm14,xmm7
|
|
movd xmm5,DWORD PTR[8+r8]
|
|
movd xmm0,DWORD PTR[8+r9]
|
|
movd xmm1,DWORD PTR[8+r10]
|
|
movd xmm2,DWORD PTR[8+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm10
|
|
DB 102,15,56,0,238
|
|
movdqa xmm2,xmm10
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm10
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(32-128)+rax],xmm5
|
|
paddd xmm5,xmm13
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-64))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm10
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm10
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm12
|
|
pand xmm3,xmm11
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm14
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm14
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm15
|
|
movdqa xmm7,xmm14
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm14
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm13,xmm15
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm13,xmm4
|
|
paddd xmm9,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm13,xmm5
|
|
paddd xmm13,xmm7
|
|
movd xmm5,DWORD PTR[12+r8]
|
|
movd xmm0,DWORD PTR[12+r9]
|
|
movd xmm1,DWORD PTR[12+r10]
|
|
movd xmm2,DWORD PTR[12+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm9
|
|
|
|
movdqa xmm2,xmm9
|
|
DB 102,15,56,0,238
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm9
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(48-128)+rax],xmm5
|
|
paddd xmm5,xmm12
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-32))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm9
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm9
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm11
|
|
pand xmm4,xmm10
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm13
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm13
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm14
|
|
movdqa xmm7,xmm13
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm13
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm12,xmm14
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm12,xmm3
|
|
paddd xmm8,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm12,xmm5
|
|
paddd xmm12,xmm7
|
|
movd xmm5,DWORD PTR[16+r8]
|
|
movd xmm0,DWORD PTR[16+r9]
|
|
movd xmm1,DWORD PTR[16+r10]
|
|
movd xmm2,DWORD PTR[16+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm8
|
|
DB 102,15,56,0,238
|
|
movdqa xmm2,xmm8
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm8
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(64-128)+rax],xmm5
|
|
paddd xmm5,xmm11
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm8
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm8
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm10
|
|
pand xmm3,xmm9
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm12
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm12
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm13
|
|
movdqa xmm7,xmm12
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm12
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm11,xmm13
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm11,xmm4
|
|
paddd xmm15,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm11,xmm5
|
|
paddd xmm11,xmm7
|
|
movd xmm5,DWORD PTR[20+r8]
|
|
movd xmm0,DWORD PTR[20+r9]
|
|
movd xmm1,DWORD PTR[20+r10]
|
|
movd xmm2,DWORD PTR[20+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm15
|
|
|
|
movdqa xmm2,xmm15
|
|
DB 102,15,56,0,238
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm15
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(80-128)+rax],xmm5
|
|
paddd xmm5,xmm10
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[32+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm15
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm15
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm9
|
|
pand xmm4,xmm8
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm11
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm11
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm12
|
|
movdqa xmm7,xmm11
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm11
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm10,xmm12
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm10,xmm3
|
|
paddd xmm14,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm10,xmm5
|
|
paddd xmm10,xmm7
|
|
movd xmm5,DWORD PTR[24+r8]
|
|
movd xmm0,DWORD PTR[24+r9]
|
|
movd xmm1,DWORD PTR[24+r10]
|
|
movd xmm2,DWORD PTR[24+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm14
|
|
DB 102,15,56,0,238
|
|
movdqa xmm2,xmm14
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm14
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(96-128)+rax],xmm5
|
|
paddd xmm5,xmm9
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[64+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm14
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm14
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm8
|
|
pand xmm3,xmm15
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm10
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm10
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm11
|
|
movdqa xmm7,xmm10
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm10
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm9,xmm11
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm9,xmm4
|
|
paddd xmm13,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm9,xmm5
|
|
paddd xmm9,xmm7
|
|
movd xmm5,DWORD PTR[28+r8]
|
|
movd xmm0,DWORD PTR[28+r9]
|
|
movd xmm1,DWORD PTR[28+r10]
|
|
movd xmm2,DWORD PTR[28+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm13
|
|
|
|
movdqa xmm2,xmm13
|
|
DB 102,15,56,0,238
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm13
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(112-128)+rax],xmm5
|
|
paddd xmm5,xmm8
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[96+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm13
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm13
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm15
|
|
pand xmm4,xmm14
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm9
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm9
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm10
|
|
movdqa xmm7,xmm9
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm9
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm8,xmm10
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm8,xmm3
|
|
paddd xmm12,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm8,xmm5
|
|
paddd xmm8,xmm7
|
|
lea rbp,QWORD PTR[256+rbp]
|
|
movd xmm5,DWORD PTR[32+r8]
|
|
movd xmm0,DWORD PTR[32+r9]
|
|
movd xmm1,DWORD PTR[32+r10]
|
|
movd xmm2,DWORD PTR[32+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm12
|
|
DB 102,15,56,0,238
|
|
movdqa xmm2,xmm12
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm12
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(128-128)+rax],xmm5
|
|
paddd xmm5,xmm15
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-128))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm12
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm12
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm14
|
|
pand xmm3,xmm13
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm8
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm8
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm9
|
|
movdqa xmm7,xmm8
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm8
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm15,xmm9
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm15,xmm4
|
|
paddd xmm11,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm15,xmm5
|
|
paddd xmm15,xmm7
|
|
movd xmm5,DWORD PTR[36+r8]
|
|
movd xmm0,DWORD PTR[36+r9]
|
|
movd xmm1,DWORD PTR[36+r10]
|
|
movd xmm2,DWORD PTR[36+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm11
|
|
|
|
movdqa xmm2,xmm11
|
|
DB 102,15,56,0,238
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm11
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(144-128)+rax],xmm5
|
|
paddd xmm5,xmm14
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-96))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm11
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm11
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm13
|
|
pand xmm4,xmm12
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm15
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm15
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm8
|
|
movdqa xmm7,xmm15
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm15
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm14,xmm8
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm14,xmm3
|
|
paddd xmm10,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm14,xmm5
|
|
paddd xmm14,xmm7
|
|
movd xmm5,DWORD PTR[40+r8]
|
|
movd xmm0,DWORD PTR[40+r9]
|
|
movd xmm1,DWORD PTR[40+r10]
|
|
movd xmm2,DWORD PTR[40+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm10
|
|
DB 102,15,56,0,238
|
|
movdqa xmm2,xmm10
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm10
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(160-128)+rax],xmm5
|
|
paddd xmm5,xmm13
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-64))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm10
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm10
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm12
|
|
pand xmm3,xmm11
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm14
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm14
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm15
|
|
movdqa xmm7,xmm14
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm14
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm13,xmm15
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm13,xmm4
|
|
paddd xmm9,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm13,xmm5
|
|
paddd xmm13,xmm7
|
|
movd xmm5,DWORD PTR[44+r8]
|
|
movd xmm0,DWORD PTR[44+r9]
|
|
movd xmm1,DWORD PTR[44+r10]
|
|
movd xmm2,DWORD PTR[44+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm9
|
|
|
|
movdqa xmm2,xmm9
|
|
DB 102,15,56,0,238
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm9
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(176-128)+rax],xmm5
|
|
paddd xmm5,xmm12
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-32))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm9
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm9
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm11
|
|
pand xmm4,xmm10
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm13
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm13
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm14
|
|
movdqa xmm7,xmm13
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm13
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm12,xmm14
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm12,xmm3
|
|
paddd xmm8,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm12,xmm5
|
|
paddd xmm12,xmm7
|
|
movd xmm5,DWORD PTR[48+r8]
|
|
movd xmm0,DWORD PTR[48+r9]
|
|
movd xmm1,DWORD PTR[48+r10]
|
|
movd xmm2,DWORD PTR[48+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm8
|
|
DB 102,15,56,0,238
|
|
movdqa xmm2,xmm8
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm8
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(192-128)+rax],xmm5
|
|
paddd xmm5,xmm11
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm8
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm8
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm10
|
|
pand xmm3,xmm9
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm12
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm12
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm13
|
|
movdqa xmm7,xmm12
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm12
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm11,xmm13
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm11,xmm4
|
|
paddd xmm15,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm11,xmm5
|
|
paddd xmm11,xmm7
|
|
movd xmm5,DWORD PTR[52+r8]
|
|
movd xmm0,DWORD PTR[52+r9]
|
|
movd xmm1,DWORD PTR[52+r10]
|
|
movd xmm2,DWORD PTR[52+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm15
|
|
|
|
movdqa xmm2,xmm15
|
|
DB 102,15,56,0,238
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm15
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(208-128)+rax],xmm5
|
|
paddd xmm5,xmm10
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[32+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm15
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm15
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm9
|
|
pand xmm4,xmm8
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm11
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm11
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm12
|
|
movdqa xmm7,xmm11
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm11
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm10,xmm12
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm10,xmm3
|
|
paddd xmm14,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm10,xmm5
|
|
paddd xmm10,xmm7
|
|
movd xmm5,DWORD PTR[56+r8]
|
|
movd xmm0,DWORD PTR[56+r9]
|
|
movd xmm1,DWORD PTR[56+r10]
|
|
movd xmm2,DWORD PTR[56+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm14
|
|
DB 102,15,56,0,238
|
|
movdqa xmm2,xmm14
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm14
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(224-128)+rax],xmm5
|
|
paddd xmm5,xmm9
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[64+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm14
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm14
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm8
|
|
pand xmm3,xmm15
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm10
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm10
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm11
|
|
movdqa xmm7,xmm10
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm10
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm9,xmm11
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm9,xmm4
|
|
paddd xmm13,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm9,xmm5
|
|
paddd xmm9,xmm7
|
|
movd xmm5,DWORD PTR[60+r8]
|
|
lea r8,QWORD PTR[64+r8]
|
|
movd xmm0,DWORD PTR[60+r9]
|
|
lea r9,QWORD PTR[64+r9]
|
|
movd xmm1,DWORD PTR[60+r10]
|
|
lea r10,QWORD PTR[64+r10]
|
|
movd xmm2,DWORD PTR[60+r11]
|
|
lea r11,QWORD PTR[64+r11]
|
|
punpckldq xmm5,xmm1
|
|
punpckldq xmm0,xmm2
|
|
punpckldq xmm5,xmm0
|
|
movdqa xmm7,xmm13
|
|
|
|
movdqa xmm2,xmm13
|
|
DB 102,15,56,0,238
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm13
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(240-128)+rax],xmm5
|
|
paddd xmm5,xmm8
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[96+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm13
|
|
prefetcht0 [63+r8]
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm13
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm15
|
|
pand xmm4,xmm14
|
|
pxor xmm7,xmm1
|
|
|
|
prefetcht0 [63+r9]
|
|
movdqa xmm1,xmm9
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm9
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm10
|
|
movdqa xmm7,xmm9
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm9
|
|
|
|
prefetcht0 [63+r10]
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
prefetcht0 [63+r11]
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm8,xmm10
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm8,xmm3
|
|
paddd xmm12,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm8,xmm5
|
|
paddd xmm8,xmm7
|
|
lea rbp,QWORD PTR[256+rbp]
|
|
movdqu xmm5,XMMWORD PTR[((0-128))+rax]
|
|
mov ecx,3
|
|
jmp $L$oop_16_xx
|
|
ALIGN 32
|
|
$L$oop_16_xx::
|
|
movdqa xmm6,XMMWORD PTR[((16-128))+rax]
|
|
paddd xmm5,XMMWORD PTR[((144-128))+rax]
|
|
|
|
movdqa xmm7,xmm6
|
|
movdqa xmm1,xmm6
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm6
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((224-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm3,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm3
|
|
|
|
psrld xmm3,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
psrld xmm3,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm3
|
|
pxor xmm0,xmm1
|
|
paddd xmm5,xmm0
|
|
movdqa xmm7,xmm12
|
|
|
|
movdqa xmm2,xmm12
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm12
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(0-128)+rax],xmm5
|
|
paddd xmm5,xmm15
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-128))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm12
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm12
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm14
|
|
pand xmm3,xmm13
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm8
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm8
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm9
|
|
movdqa xmm7,xmm8
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm8
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm15,xmm9
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm15,xmm4
|
|
paddd xmm11,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm15,xmm5
|
|
paddd xmm15,xmm7
|
|
movdqa xmm5,XMMWORD PTR[((32-128))+rax]
|
|
paddd xmm6,XMMWORD PTR[((160-128))+rax]
|
|
|
|
movdqa xmm7,xmm5
|
|
movdqa xmm1,xmm5
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm5
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((240-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm4,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm4
|
|
|
|
psrld xmm4,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
psrld xmm4,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm4
|
|
pxor xmm0,xmm1
|
|
paddd xmm6,xmm0
|
|
movdqa xmm7,xmm11
|
|
|
|
movdqa xmm2,xmm11
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm11
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(16-128)+rax],xmm6
|
|
paddd xmm6,xmm14
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm6,XMMWORD PTR[((-96))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm11
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm11
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm13
|
|
pand xmm4,xmm12
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm15
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm15
|
|
psrld xmm1,2
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm8
|
|
movdqa xmm7,xmm15
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm15
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm6,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm14,xmm8
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm14,xmm3
|
|
paddd xmm10,xmm6
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm14,xmm6
|
|
paddd xmm14,xmm7
|
|
movdqa xmm6,XMMWORD PTR[((48-128))+rax]
|
|
paddd xmm5,XMMWORD PTR[((176-128))+rax]
|
|
|
|
movdqa xmm7,xmm6
|
|
movdqa xmm1,xmm6
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm6
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((0-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm3,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm3
|
|
|
|
psrld xmm3,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
psrld xmm3,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm3
|
|
pxor xmm0,xmm1
|
|
paddd xmm5,xmm0
|
|
movdqa xmm7,xmm10
|
|
|
|
movdqa xmm2,xmm10
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm10
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(32-128)+rax],xmm5
|
|
paddd xmm5,xmm13
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-64))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm10
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm10
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm12
|
|
pand xmm3,xmm11
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm14
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm14
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm15
|
|
movdqa xmm7,xmm14
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm14
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm13,xmm15
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm13,xmm4
|
|
paddd xmm9,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm13,xmm5
|
|
paddd xmm13,xmm7
|
|
movdqa xmm5,XMMWORD PTR[((64-128))+rax]
|
|
paddd xmm6,XMMWORD PTR[((192-128))+rax]
|
|
|
|
movdqa xmm7,xmm5
|
|
movdqa xmm1,xmm5
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm5
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((16-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm4,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm4
|
|
|
|
psrld xmm4,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
psrld xmm4,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm4
|
|
pxor xmm0,xmm1
|
|
paddd xmm6,xmm0
|
|
movdqa xmm7,xmm9
|
|
|
|
movdqa xmm2,xmm9
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm9
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(48-128)+rax],xmm6
|
|
paddd xmm6,xmm12
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm6,XMMWORD PTR[((-32))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm9
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm9
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm11
|
|
pand xmm4,xmm10
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm13
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm13
|
|
psrld xmm1,2
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm14
|
|
movdqa xmm7,xmm13
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm13
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm6,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm12,xmm14
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm12,xmm3
|
|
paddd xmm8,xmm6
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm12,xmm6
|
|
paddd xmm12,xmm7
|
|
movdqa xmm6,XMMWORD PTR[((80-128))+rax]
|
|
paddd xmm5,XMMWORD PTR[((208-128))+rax]
|
|
|
|
movdqa xmm7,xmm6
|
|
movdqa xmm1,xmm6
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm6
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((32-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm3,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm3
|
|
|
|
psrld xmm3,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
psrld xmm3,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm3
|
|
pxor xmm0,xmm1
|
|
paddd xmm5,xmm0
|
|
movdqa xmm7,xmm8
|
|
|
|
movdqa xmm2,xmm8
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm8
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(64-128)+rax],xmm5
|
|
paddd xmm5,xmm11
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm8
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm8
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm10
|
|
pand xmm3,xmm9
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm12
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm12
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm13
|
|
movdqa xmm7,xmm12
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm12
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm11,xmm13
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm11,xmm4
|
|
paddd xmm15,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm11,xmm5
|
|
paddd xmm11,xmm7
|
|
movdqa xmm5,XMMWORD PTR[((96-128))+rax]
|
|
paddd xmm6,XMMWORD PTR[((224-128))+rax]
|
|
|
|
movdqa xmm7,xmm5
|
|
movdqa xmm1,xmm5
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm5
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((48-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm4,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm4
|
|
|
|
psrld xmm4,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
psrld xmm4,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm4
|
|
pxor xmm0,xmm1
|
|
paddd xmm6,xmm0
|
|
movdqa xmm7,xmm15
|
|
|
|
movdqa xmm2,xmm15
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm15
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(80-128)+rax],xmm6
|
|
paddd xmm6,xmm10
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm6,XMMWORD PTR[32+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm15
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm15
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm9
|
|
pand xmm4,xmm8
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm11
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm11
|
|
psrld xmm1,2
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm12
|
|
movdqa xmm7,xmm11
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm11
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm6,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm10,xmm12
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm10,xmm3
|
|
paddd xmm14,xmm6
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm10,xmm6
|
|
paddd xmm10,xmm7
|
|
movdqa xmm6,XMMWORD PTR[((112-128))+rax]
|
|
paddd xmm5,XMMWORD PTR[((240-128))+rax]
|
|
|
|
movdqa xmm7,xmm6
|
|
movdqa xmm1,xmm6
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm6
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((64-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm3,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm3
|
|
|
|
psrld xmm3,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
psrld xmm3,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm3
|
|
pxor xmm0,xmm1
|
|
paddd xmm5,xmm0
|
|
movdqa xmm7,xmm14
|
|
|
|
movdqa xmm2,xmm14
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm14
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(96-128)+rax],xmm5
|
|
paddd xmm5,xmm9
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[64+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm14
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm14
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm8
|
|
pand xmm3,xmm15
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm10
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm10
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm11
|
|
movdqa xmm7,xmm10
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm10
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm9,xmm11
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm9,xmm4
|
|
paddd xmm13,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm9,xmm5
|
|
paddd xmm9,xmm7
|
|
movdqa xmm5,XMMWORD PTR[((128-128))+rax]
|
|
paddd xmm6,XMMWORD PTR[((0-128))+rax]
|
|
|
|
movdqa xmm7,xmm5
|
|
movdqa xmm1,xmm5
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm5
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((80-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm4,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm4
|
|
|
|
psrld xmm4,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
psrld xmm4,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm4
|
|
pxor xmm0,xmm1
|
|
paddd xmm6,xmm0
|
|
movdqa xmm7,xmm13
|
|
|
|
movdqa xmm2,xmm13
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm13
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(112-128)+rax],xmm6
|
|
paddd xmm6,xmm8
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm6,XMMWORD PTR[96+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm13
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm13
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm15
|
|
pand xmm4,xmm14
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm9
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm9
|
|
psrld xmm1,2
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm10
|
|
movdqa xmm7,xmm9
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm9
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm6,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm8,xmm10
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm8,xmm3
|
|
paddd xmm12,xmm6
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm8,xmm6
|
|
paddd xmm8,xmm7
|
|
lea rbp,QWORD PTR[256+rbp]
|
|
movdqa xmm6,XMMWORD PTR[((144-128))+rax]
|
|
paddd xmm5,XMMWORD PTR[((16-128))+rax]
|
|
|
|
movdqa xmm7,xmm6
|
|
movdqa xmm1,xmm6
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm6
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((96-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm3,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm3
|
|
|
|
psrld xmm3,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
psrld xmm3,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm3
|
|
pxor xmm0,xmm1
|
|
paddd xmm5,xmm0
|
|
movdqa xmm7,xmm12
|
|
|
|
movdqa xmm2,xmm12
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm12
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(128-128)+rax],xmm5
|
|
paddd xmm5,xmm15
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-128))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm12
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm12
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm14
|
|
pand xmm3,xmm13
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm8
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm8
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm9
|
|
movdqa xmm7,xmm8
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm8
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm15,xmm9
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm15,xmm4
|
|
paddd xmm11,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm15,xmm5
|
|
paddd xmm15,xmm7
|
|
movdqa xmm5,XMMWORD PTR[((160-128))+rax]
|
|
paddd xmm6,XMMWORD PTR[((32-128))+rax]
|
|
|
|
movdqa xmm7,xmm5
|
|
movdqa xmm1,xmm5
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm5
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((112-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm4,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm4
|
|
|
|
psrld xmm4,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
psrld xmm4,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm4
|
|
pxor xmm0,xmm1
|
|
paddd xmm6,xmm0
|
|
movdqa xmm7,xmm11
|
|
|
|
movdqa xmm2,xmm11
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm11
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(144-128)+rax],xmm6
|
|
paddd xmm6,xmm14
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm6,XMMWORD PTR[((-96))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm11
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm11
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm13
|
|
pand xmm4,xmm12
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm15
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm15
|
|
psrld xmm1,2
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm8
|
|
movdqa xmm7,xmm15
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm15
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm6,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm14,xmm8
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm14,xmm3
|
|
paddd xmm10,xmm6
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm14,xmm6
|
|
paddd xmm14,xmm7
|
|
movdqa xmm6,XMMWORD PTR[((176-128))+rax]
|
|
paddd xmm5,XMMWORD PTR[((48-128))+rax]
|
|
|
|
movdqa xmm7,xmm6
|
|
movdqa xmm1,xmm6
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm6
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((128-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm3,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm3
|
|
|
|
psrld xmm3,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
psrld xmm3,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm3
|
|
pxor xmm0,xmm1
|
|
paddd xmm5,xmm0
|
|
movdqa xmm7,xmm10
|
|
|
|
movdqa xmm2,xmm10
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm10
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(160-128)+rax],xmm5
|
|
paddd xmm5,xmm13
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[((-64))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm10
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm10
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm12
|
|
pand xmm3,xmm11
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm14
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm14
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm15
|
|
movdqa xmm7,xmm14
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm14
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm13,xmm15
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm13,xmm4
|
|
paddd xmm9,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm13,xmm5
|
|
paddd xmm13,xmm7
|
|
movdqa xmm5,XMMWORD PTR[((192-128))+rax]
|
|
paddd xmm6,XMMWORD PTR[((64-128))+rax]
|
|
|
|
movdqa xmm7,xmm5
|
|
movdqa xmm1,xmm5
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm5
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((144-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm4,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm4
|
|
|
|
psrld xmm4,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
psrld xmm4,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm4
|
|
pxor xmm0,xmm1
|
|
paddd xmm6,xmm0
|
|
movdqa xmm7,xmm9
|
|
|
|
movdqa xmm2,xmm9
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm9
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(176-128)+rax],xmm6
|
|
paddd xmm6,xmm12
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm6,XMMWORD PTR[((-32))+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm9
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm9
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm11
|
|
pand xmm4,xmm10
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm13
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm13
|
|
psrld xmm1,2
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm14
|
|
movdqa xmm7,xmm13
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm13
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm6,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm12,xmm14
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm12,xmm3
|
|
paddd xmm8,xmm6
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm12,xmm6
|
|
paddd xmm12,xmm7
|
|
movdqa xmm6,XMMWORD PTR[((208-128))+rax]
|
|
paddd xmm5,XMMWORD PTR[((80-128))+rax]
|
|
|
|
movdqa xmm7,xmm6
|
|
movdqa xmm1,xmm6
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm6
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((160-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm3,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm3
|
|
|
|
psrld xmm3,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
psrld xmm3,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm3
|
|
pxor xmm0,xmm1
|
|
paddd xmm5,xmm0
|
|
movdqa xmm7,xmm8
|
|
|
|
movdqa xmm2,xmm8
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm8
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(192-128)+rax],xmm5
|
|
paddd xmm5,xmm11
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm8
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm8
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm10
|
|
pand xmm3,xmm9
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm12
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm12
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm13
|
|
movdqa xmm7,xmm12
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm12
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm11,xmm13
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm11,xmm4
|
|
paddd xmm15,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm11,xmm5
|
|
paddd xmm11,xmm7
|
|
movdqa xmm5,XMMWORD PTR[((224-128))+rax]
|
|
paddd xmm6,XMMWORD PTR[((96-128))+rax]
|
|
|
|
movdqa xmm7,xmm5
|
|
movdqa xmm1,xmm5
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm5
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((176-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm4,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm4
|
|
|
|
psrld xmm4,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
psrld xmm4,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm4
|
|
pxor xmm0,xmm1
|
|
paddd xmm6,xmm0
|
|
movdqa xmm7,xmm15
|
|
|
|
movdqa xmm2,xmm15
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm15
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(208-128)+rax],xmm6
|
|
paddd xmm6,xmm10
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm6,XMMWORD PTR[32+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm15
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm15
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm9
|
|
pand xmm4,xmm8
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm11
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm11
|
|
psrld xmm1,2
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm12
|
|
movdqa xmm7,xmm11
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm11
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm6,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm10,xmm12
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm10,xmm3
|
|
paddd xmm14,xmm6
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm10,xmm6
|
|
paddd xmm10,xmm7
|
|
movdqa xmm6,XMMWORD PTR[((240-128))+rax]
|
|
paddd xmm5,XMMWORD PTR[((112-128))+rax]
|
|
|
|
movdqa xmm7,xmm6
|
|
movdqa xmm1,xmm6
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm6
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((192-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm3,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm3
|
|
|
|
psrld xmm3,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
psrld xmm3,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm3
|
|
pxor xmm0,xmm1
|
|
paddd xmm5,xmm0
|
|
movdqa xmm7,xmm14
|
|
|
|
movdqa xmm2,xmm14
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm14
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(224-128)+rax],xmm5
|
|
paddd xmm5,xmm9
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm5,XMMWORD PTR[64+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm14
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm3,xmm14
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm8
|
|
pand xmm3,xmm15
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm10
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm10
|
|
psrld xmm1,2
|
|
paddd xmm5,xmm7
|
|
pxor xmm0,xmm3
|
|
movdqa xmm3,xmm11
|
|
movdqa xmm7,xmm10
|
|
pslld xmm2,10
|
|
pxor xmm3,xmm10
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm5,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm4,xmm3
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm9,xmm11
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm9,xmm4
|
|
paddd xmm13,xmm5
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm9,xmm5
|
|
paddd xmm9,xmm7
|
|
movdqa xmm5,XMMWORD PTR[((0-128))+rax]
|
|
paddd xmm6,XMMWORD PTR[((128-128))+rax]
|
|
|
|
movdqa xmm7,xmm5
|
|
movdqa xmm1,xmm5
|
|
psrld xmm7,3
|
|
movdqa xmm2,xmm5
|
|
|
|
psrld xmm1,7
|
|
movdqa xmm0,XMMWORD PTR[((208-128))+rax]
|
|
pslld xmm2,14
|
|
pxor xmm7,xmm1
|
|
psrld xmm1,18-7
|
|
movdqa xmm4,xmm0
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,25-14
|
|
pxor xmm7,xmm1
|
|
psrld xmm0,10
|
|
movdqa xmm1,xmm4
|
|
|
|
psrld xmm4,17
|
|
pxor xmm7,xmm2
|
|
pslld xmm1,13
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
psrld xmm4,19-17
|
|
pxor xmm0,xmm1
|
|
pslld xmm1,15-13
|
|
pxor xmm0,xmm4
|
|
pxor xmm0,xmm1
|
|
paddd xmm6,xmm0
|
|
movdqa xmm7,xmm13
|
|
|
|
movdqa xmm2,xmm13
|
|
|
|
psrld xmm7,6
|
|
movdqa xmm1,xmm13
|
|
pslld xmm2,7
|
|
movdqa XMMWORD PTR[(240-128)+rax],xmm6
|
|
paddd xmm6,xmm8
|
|
|
|
psrld xmm1,11
|
|
pxor xmm7,xmm2
|
|
pslld xmm2,21-7
|
|
paddd xmm6,XMMWORD PTR[96+rbp]
|
|
pxor xmm7,xmm1
|
|
|
|
psrld xmm1,25-11
|
|
movdqa xmm0,xmm13
|
|
|
|
pxor xmm7,xmm2
|
|
movdqa xmm4,xmm13
|
|
pslld xmm2,26-21
|
|
pandn xmm0,xmm15
|
|
pand xmm4,xmm14
|
|
pxor xmm7,xmm1
|
|
|
|
|
|
movdqa xmm1,xmm9
|
|
pxor xmm7,xmm2
|
|
movdqa xmm2,xmm9
|
|
psrld xmm1,2
|
|
paddd xmm6,xmm7
|
|
pxor xmm0,xmm4
|
|
movdqa xmm4,xmm10
|
|
movdqa xmm7,xmm9
|
|
pslld xmm2,10
|
|
pxor xmm4,xmm9
|
|
|
|
|
|
psrld xmm7,13
|
|
pxor xmm1,xmm2
|
|
paddd xmm6,xmm0
|
|
pslld xmm2,19-10
|
|
pand xmm3,xmm4
|
|
pxor xmm1,xmm7
|
|
|
|
|
|
psrld xmm7,22-13
|
|
pxor xmm1,xmm2
|
|
movdqa xmm8,xmm10
|
|
pslld xmm2,30-19
|
|
pxor xmm7,xmm1
|
|
pxor xmm8,xmm3
|
|
paddd xmm12,xmm6
|
|
pxor xmm7,xmm2
|
|
|
|
paddd xmm8,xmm6
|
|
paddd xmm8,xmm7
|
|
lea rbp,QWORD PTR[256+rbp]
|
|
dec ecx
|
|
jnz $L$oop_16_xx
|
|
|
|
mov ecx,1
|
|
lea rbp,QWORD PTR[((K256+128))]
|
|
|
|
movdqa xmm7,XMMWORD PTR[rbx]
|
|
cmp ecx,DWORD PTR[rbx]
|
|
pxor xmm0,xmm0
|
|
cmovge r8,rbp
|
|
cmp ecx,DWORD PTR[4+rbx]
|
|
movdqa xmm6,xmm7
|
|
cmovge r9,rbp
|
|
cmp ecx,DWORD PTR[8+rbx]
|
|
pcmpgtd xmm6,xmm0
|
|
cmovge r10,rbp
|
|
cmp ecx,DWORD PTR[12+rbx]
|
|
paddd xmm7,xmm6
|
|
cmovge r11,rbp
|
|
|
|
movdqu xmm0,XMMWORD PTR[((0-128))+rdi]
|
|
pand xmm8,xmm6
|
|
movdqu xmm1,XMMWORD PTR[((32-128))+rdi]
|
|
pand xmm9,xmm6
|
|
movdqu xmm2,XMMWORD PTR[((64-128))+rdi]
|
|
pand xmm10,xmm6
|
|
movdqu xmm5,XMMWORD PTR[((96-128))+rdi]
|
|
pand xmm11,xmm6
|
|
paddd xmm8,xmm0
|
|
movdqu xmm0,XMMWORD PTR[((128-128))+rdi]
|
|
pand xmm12,xmm6
|
|
paddd xmm9,xmm1
|
|
movdqu xmm1,XMMWORD PTR[((160-128))+rdi]
|
|
pand xmm13,xmm6
|
|
paddd xmm10,xmm2
|
|
movdqu xmm2,XMMWORD PTR[((192-128))+rdi]
|
|
pand xmm14,xmm6
|
|
paddd xmm11,xmm5
|
|
movdqu xmm5,XMMWORD PTR[((224-128))+rdi]
|
|
pand xmm15,xmm6
|
|
paddd xmm12,xmm0
|
|
paddd xmm13,xmm1
|
|
movdqu XMMWORD PTR[(0-128)+rdi],xmm8
|
|
paddd xmm14,xmm2
|
|
movdqu XMMWORD PTR[(32-128)+rdi],xmm9
|
|
paddd xmm15,xmm5
|
|
movdqu XMMWORD PTR[(64-128)+rdi],xmm10
|
|
movdqu XMMWORD PTR[(96-128)+rdi],xmm11
|
|
movdqu XMMWORD PTR[(128-128)+rdi],xmm12
|
|
movdqu XMMWORD PTR[(160-128)+rdi],xmm13
|
|
movdqu XMMWORD PTR[(192-128)+rdi],xmm14
|
|
movdqu XMMWORD PTR[(224-128)+rdi],xmm15
|
|
|
|
movdqa XMMWORD PTR[rbx],xmm7
|
|
movdqa xmm6,XMMWORD PTR[$L$pbswap]
|
|
dec edx
|
|
jnz $L$oop
|
|
|
|
mov edx,DWORD PTR[280+rsp]
|
|
lea rdi,QWORD PTR[16+rdi]
|
|
lea rsi,QWORD PTR[64+rsi]
|
|
dec edx
|
|
jnz $L$oop_grande
|
|
|
|
$L$done::
|
|
mov rax,QWORD PTR[272+rsp]
|
|
movaps xmm6,XMMWORD PTR[((-184))+rax]
|
|
movaps xmm7,XMMWORD PTR[((-168))+rax]
|
|
movaps xmm8,XMMWORD PTR[((-152))+rax]
|
|
movaps xmm9,XMMWORD PTR[((-136))+rax]
|
|
movaps xmm10,XMMWORD PTR[((-120))+rax]
|
|
movaps xmm11,XMMWORD PTR[((-104))+rax]
|
|
movaps xmm12,XMMWORD PTR[((-88))+rax]
|
|
movaps xmm13,XMMWORD PTR[((-72))+rax]
|
|
movaps xmm14,XMMWORD PTR[((-56))+rax]
|
|
movaps xmm15,XMMWORD PTR[((-40))+rax]
|
|
mov rbp,QWORD PTR[((-16))+rax]
|
|
mov rbx,QWORD PTR[((-8))+rax]
|
|
lea rsp,QWORD PTR[rax]
|
|
$L$epilogue::
|
|
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD PTR[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
$L$SEH_end_sha256_multi_block::
|
|
sha256_multi_block ENDP
|
|
|
|
ALIGN 32
|
|
sha256_multi_block_shaext PROC PRIVATE
|
|
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD PTR[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_sha256_multi_block_shaext::
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
_shaext_shortcut::
|
|
mov rax,rsp
|
|
push rbx
|
|
push rbp
|
|
lea rsp,QWORD PTR[((-168))+rsp]
|
|
movaps XMMWORD PTR[rsp],xmm6
|
|
movaps XMMWORD PTR[16+rsp],xmm7
|
|
movaps XMMWORD PTR[32+rsp],xmm8
|
|
movaps XMMWORD PTR[48+rsp],xmm9
|
|
movaps XMMWORD PTR[(-120)+rax],xmm10
|
|
movaps XMMWORD PTR[(-104)+rax],xmm11
|
|
movaps XMMWORD PTR[(-88)+rax],xmm12
|
|
movaps XMMWORD PTR[(-72)+rax],xmm13
|
|
movaps XMMWORD PTR[(-56)+rax],xmm14
|
|
movaps XMMWORD PTR[(-40)+rax],xmm15
|
|
sub rsp,288
|
|
shl edx,1
|
|
and rsp,-256
|
|
lea rdi,QWORD PTR[128+rdi]
|
|
mov QWORD PTR[272+rsp],rax
|
|
$L$body_shaext::
|
|
lea rbx,QWORD PTR[256+rsp]
|
|
lea rbp,QWORD PTR[((K256_shaext+128))]
|
|
|
|
$L$oop_grande_shaext::
|
|
mov DWORD PTR[280+rsp],edx
|
|
xor edx,edx
|
|
mov r8,QWORD PTR[rsi]
|
|
mov ecx,DWORD PTR[8+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[rbx],ecx
|
|
cmovle r8,rsp
|
|
mov r9,QWORD PTR[16+rsi]
|
|
mov ecx,DWORD PTR[24+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[4+rbx],ecx
|
|
cmovle r9,rsp
|
|
test edx,edx
|
|
jz $L$done_shaext
|
|
|
|
movq xmm12,QWORD PTR[((0-128))+rdi]
|
|
movq xmm4,QWORD PTR[((32-128))+rdi]
|
|
movq xmm13,QWORD PTR[((64-128))+rdi]
|
|
movq xmm5,QWORD PTR[((96-128))+rdi]
|
|
movq xmm8,QWORD PTR[((128-128))+rdi]
|
|
movq xmm9,QWORD PTR[((160-128))+rdi]
|
|
movq xmm10,QWORD PTR[((192-128))+rdi]
|
|
movq xmm11,QWORD PTR[((224-128))+rdi]
|
|
|
|
punpckldq xmm12,xmm4
|
|
punpckldq xmm13,xmm5
|
|
punpckldq xmm8,xmm9
|
|
punpckldq xmm10,xmm11
|
|
movdqa xmm3,XMMWORD PTR[((K256_shaext-16))]
|
|
|
|
movdqa xmm14,xmm12
|
|
movdqa xmm15,xmm13
|
|
punpcklqdq xmm12,xmm8
|
|
punpcklqdq xmm13,xmm10
|
|
punpckhqdq xmm14,xmm8
|
|
punpckhqdq xmm15,xmm10
|
|
|
|
pshufd xmm12,xmm12,27
|
|
pshufd xmm13,xmm13,27
|
|
pshufd xmm14,xmm14,27
|
|
pshufd xmm15,xmm15,27
|
|
jmp $L$oop_shaext
|
|
|
|
ALIGN 32
|
|
$L$oop_shaext::
|
|
movdqu xmm4,XMMWORD PTR[r8]
|
|
movdqu xmm8,XMMWORD PTR[r9]
|
|
movdqu xmm5,XMMWORD PTR[16+r8]
|
|
movdqu xmm9,XMMWORD PTR[16+r9]
|
|
movdqu xmm6,XMMWORD PTR[32+r8]
|
|
DB 102,15,56,0,227
|
|
movdqu xmm10,XMMWORD PTR[32+r9]
|
|
DB 102,68,15,56,0,195
|
|
movdqu xmm7,XMMWORD PTR[48+r8]
|
|
lea r8,QWORD PTR[64+r8]
|
|
movdqu xmm11,XMMWORD PTR[48+r9]
|
|
lea r9,QWORD PTR[64+r9]
|
|
|
|
movdqa xmm0,XMMWORD PTR[((0-128))+rbp]
|
|
DB 102,15,56,0,235
|
|
paddd xmm0,xmm4
|
|
pxor xmm4,xmm12
|
|
movdqa xmm1,xmm0
|
|
movdqa xmm2,XMMWORD PTR[((0-128))+rbp]
|
|
DB 102,68,15,56,0,203
|
|
paddd xmm2,xmm8
|
|
movdqa XMMWORD PTR[80+rsp],xmm13
|
|
DB 69,15,56,203,236
|
|
pxor xmm8,xmm14
|
|
movdqa xmm0,xmm2
|
|
movdqa XMMWORD PTR[112+rsp],xmm15
|
|
DB 69,15,56,203,254
|
|
pshufd xmm0,xmm1,00eh
|
|
pxor xmm4,xmm12
|
|
movdqa XMMWORD PTR[64+rsp],xmm12
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
pxor xmm8,xmm14
|
|
movdqa XMMWORD PTR[96+rsp],xmm14
|
|
movdqa xmm1,XMMWORD PTR[((16-128))+rbp]
|
|
paddd xmm1,xmm5
|
|
DB 102,15,56,0,243
|
|
DB 69,15,56,203,247
|
|
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((16-128))+rbp]
|
|
paddd xmm2,xmm9
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
prefetcht0 [127+r8]
|
|
DB 102,15,56,0,251
|
|
DB 102,68,15,56,0,211
|
|
prefetcht0 [127+r9]
|
|
DB 69,15,56,203,254
|
|
pshufd xmm0,xmm1,00eh
|
|
DB 102,68,15,56,0,219
|
|
DB 15,56,204,229
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((32-128))+rbp]
|
|
paddd xmm1,xmm6
|
|
DB 69,15,56,203,247
|
|
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((32-128))+rbp]
|
|
paddd xmm2,xmm10
|
|
DB 69,15,56,203,236
|
|
DB 69,15,56,204,193
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm7
|
|
DB 69,15,56,203,254
|
|
pshufd xmm0,xmm1,00eh
|
|
DB 102,15,58,15,222,4
|
|
paddd xmm4,xmm3
|
|
movdqa xmm3,xmm11
|
|
DB 102,65,15,58,15,218,4
|
|
DB 15,56,204,238
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((48-128))+rbp]
|
|
paddd xmm1,xmm7
|
|
DB 69,15,56,203,247
|
|
DB 69,15,56,204,202
|
|
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((48-128))+rbp]
|
|
paddd xmm8,xmm3
|
|
paddd xmm2,xmm11
|
|
DB 15,56,205,231
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm4
|
|
DB 102,15,58,15,223,4
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,195
|
|
pshufd xmm0,xmm1,00eh
|
|
paddd xmm5,xmm3
|
|
movdqa xmm3,xmm8
|
|
DB 102,65,15,58,15,219,4
|
|
DB 15,56,204,247
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((64-128))+rbp]
|
|
paddd xmm1,xmm4
|
|
DB 69,15,56,203,247
|
|
DB 69,15,56,204,211
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((64-128))+rbp]
|
|
paddd xmm9,xmm3
|
|
paddd xmm2,xmm8
|
|
DB 15,56,205,236
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm5
|
|
DB 102,15,58,15,220,4
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,200
|
|
pshufd xmm0,xmm1,00eh
|
|
paddd xmm6,xmm3
|
|
movdqa xmm3,xmm9
|
|
DB 102,65,15,58,15,216,4
|
|
DB 15,56,204,252
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((80-128))+rbp]
|
|
paddd xmm1,xmm5
|
|
DB 69,15,56,203,247
|
|
DB 69,15,56,204,216
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((80-128))+rbp]
|
|
paddd xmm10,xmm3
|
|
paddd xmm2,xmm9
|
|
DB 15,56,205,245
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm6
|
|
DB 102,15,58,15,221,4
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,209
|
|
pshufd xmm0,xmm1,00eh
|
|
paddd xmm7,xmm3
|
|
movdqa xmm3,xmm10
|
|
DB 102,65,15,58,15,217,4
|
|
DB 15,56,204,229
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((96-128))+rbp]
|
|
paddd xmm1,xmm6
|
|
DB 69,15,56,203,247
|
|
DB 69,15,56,204,193
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((96-128))+rbp]
|
|
paddd xmm11,xmm3
|
|
paddd xmm2,xmm10
|
|
DB 15,56,205,254
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm7
|
|
DB 102,15,58,15,222,4
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,218
|
|
pshufd xmm0,xmm1,00eh
|
|
paddd xmm4,xmm3
|
|
movdqa xmm3,xmm11
|
|
DB 102,65,15,58,15,218,4
|
|
DB 15,56,204,238
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((112-128))+rbp]
|
|
paddd xmm1,xmm7
|
|
DB 69,15,56,203,247
|
|
DB 69,15,56,204,202
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((112-128))+rbp]
|
|
paddd xmm8,xmm3
|
|
paddd xmm2,xmm11
|
|
DB 15,56,205,231
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm4
|
|
DB 102,15,58,15,223,4
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,195
|
|
pshufd xmm0,xmm1,00eh
|
|
paddd xmm5,xmm3
|
|
movdqa xmm3,xmm8
|
|
DB 102,65,15,58,15,219,4
|
|
DB 15,56,204,247
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((128-128))+rbp]
|
|
paddd xmm1,xmm4
|
|
DB 69,15,56,203,247
|
|
DB 69,15,56,204,211
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((128-128))+rbp]
|
|
paddd xmm9,xmm3
|
|
paddd xmm2,xmm8
|
|
DB 15,56,205,236
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm5
|
|
DB 102,15,58,15,220,4
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,200
|
|
pshufd xmm0,xmm1,00eh
|
|
paddd xmm6,xmm3
|
|
movdqa xmm3,xmm9
|
|
DB 102,65,15,58,15,216,4
|
|
DB 15,56,204,252
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((144-128))+rbp]
|
|
paddd xmm1,xmm5
|
|
DB 69,15,56,203,247
|
|
DB 69,15,56,204,216
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((144-128))+rbp]
|
|
paddd xmm10,xmm3
|
|
paddd xmm2,xmm9
|
|
DB 15,56,205,245
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm6
|
|
DB 102,15,58,15,221,4
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,209
|
|
pshufd xmm0,xmm1,00eh
|
|
paddd xmm7,xmm3
|
|
movdqa xmm3,xmm10
|
|
DB 102,65,15,58,15,217,4
|
|
DB 15,56,204,229
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((160-128))+rbp]
|
|
paddd xmm1,xmm6
|
|
DB 69,15,56,203,247
|
|
DB 69,15,56,204,193
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((160-128))+rbp]
|
|
paddd xmm11,xmm3
|
|
paddd xmm2,xmm10
|
|
DB 15,56,205,254
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm7
|
|
DB 102,15,58,15,222,4
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,218
|
|
pshufd xmm0,xmm1,00eh
|
|
paddd xmm4,xmm3
|
|
movdqa xmm3,xmm11
|
|
DB 102,65,15,58,15,218,4
|
|
DB 15,56,204,238
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((176-128))+rbp]
|
|
paddd xmm1,xmm7
|
|
DB 69,15,56,203,247
|
|
DB 69,15,56,204,202
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((176-128))+rbp]
|
|
paddd xmm8,xmm3
|
|
paddd xmm2,xmm11
|
|
DB 15,56,205,231
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm4
|
|
DB 102,15,58,15,223,4
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,195
|
|
pshufd xmm0,xmm1,00eh
|
|
paddd xmm5,xmm3
|
|
movdqa xmm3,xmm8
|
|
DB 102,65,15,58,15,219,4
|
|
DB 15,56,204,247
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((192-128))+rbp]
|
|
paddd xmm1,xmm4
|
|
DB 69,15,56,203,247
|
|
DB 69,15,56,204,211
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((192-128))+rbp]
|
|
paddd xmm9,xmm3
|
|
paddd xmm2,xmm8
|
|
DB 15,56,205,236
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm5
|
|
DB 102,15,58,15,220,4
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,200
|
|
pshufd xmm0,xmm1,00eh
|
|
paddd xmm6,xmm3
|
|
movdqa xmm3,xmm9
|
|
DB 102,65,15,58,15,216,4
|
|
DB 15,56,204,252
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((208-128))+rbp]
|
|
paddd xmm1,xmm5
|
|
DB 69,15,56,203,247
|
|
DB 69,15,56,204,216
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((208-128))+rbp]
|
|
paddd xmm10,xmm3
|
|
paddd xmm2,xmm9
|
|
DB 15,56,205,245
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
movdqa xmm3,xmm6
|
|
DB 102,15,58,15,221,4
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,209
|
|
pshufd xmm0,xmm1,00eh
|
|
paddd xmm7,xmm3
|
|
movdqa xmm3,xmm10
|
|
DB 102,65,15,58,15,217,4
|
|
nop
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm1,XMMWORD PTR[((224-128))+rbp]
|
|
paddd xmm1,xmm6
|
|
DB 69,15,56,203,247
|
|
|
|
movdqa xmm0,xmm1
|
|
movdqa xmm2,XMMWORD PTR[((224-128))+rbp]
|
|
paddd xmm11,xmm3
|
|
paddd xmm2,xmm10
|
|
DB 15,56,205,254
|
|
nop
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
mov ecx,1
|
|
pxor xmm6,xmm6
|
|
DB 69,15,56,203,254
|
|
DB 69,15,56,205,218
|
|
pshufd xmm0,xmm1,00eh
|
|
movdqa xmm1,XMMWORD PTR[((240-128))+rbp]
|
|
paddd xmm1,xmm7
|
|
movq xmm7,QWORD PTR[rbx]
|
|
nop
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
movdqa xmm2,XMMWORD PTR[((240-128))+rbp]
|
|
paddd xmm2,xmm11
|
|
DB 69,15,56,203,247
|
|
|
|
movdqa xmm0,xmm1
|
|
cmp ecx,DWORD PTR[rbx]
|
|
cmovge r8,rsp
|
|
cmp ecx,DWORD PTR[4+rbx]
|
|
cmovge r9,rsp
|
|
pshufd xmm9,xmm7,000h
|
|
DB 69,15,56,203,236
|
|
movdqa xmm0,xmm2
|
|
pshufd xmm10,xmm7,055h
|
|
movdqa xmm11,xmm7
|
|
DB 69,15,56,203,254
|
|
pshufd xmm0,xmm1,00eh
|
|
pcmpgtd xmm9,xmm6
|
|
pcmpgtd xmm10,xmm6
|
|
DB 69,15,56,203,229
|
|
pshufd xmm0,xmm2,00eh
|
|
pcmpgtd xmm11,xmm6
|
|
movdqa xmm3,XMMWORD PTR[((K256_shaext-16))]
|
|
DB 69,15,56,203,247
|
|
|
|
pand xmm13,xmm9
|
|
pand xmm15,xmm10
|
|
pand xmm12,xmm9
|
|
pand xmm14,xmm10
|
|
paddd xmm11,xmm7
|
|
|
|
paddd xmm13,XMMWORD PTR[80+rsp]
|
|
paddd xmm15,XMMWORD PTR[112+rsp]
|
|
paddd xmm12,XMMWORD PTR[64+rsp]
|
|
paddd xmm14,XMMWORD PTR[96+rsp]
|
|
|
|
movq QWORD PTR[rbx],xmm11
|
|
dec edx
|
|
jnz $L$oop_shaext
|
|
|
|
mov edx,DWORD PTR[280+rsp]
|
|
|
|
pshufd xmm12,xmm12,27
|
|
pshufd xmm13,xmm13,27
|
|
pshufd xmm14,xmm14,27
|
|
pshufd xmm15,xmm15,27
|
|
|
|
movdqa xmm5,xmm12
|
|
movdqa xmm6,xmm13
|
|
punpckldq xmm12,xmm14
|
|
punpckhdq xmm5,xmm14
|
|
punpckldq xmm13,xmm15
|
|
punpckhdq xmm6,xmm15
|
|
|
|
movq QWORD PTR[(0-128)+rdi],xmm12
|
|
psrldq xmm12,8
|
|
movq QWORD PTR[(128-128)+rdi],xmm5
|
|
psrldq xmm5,8
|
|
movq QWORD PTR[(32-128)+rdi],xmm12
|
|
movq QWORD PTR[(160-128)+rdi],xmm5
|
|
|
|
movq QWORD PTR[(64-128)+rdi],xmm13
|
|
psrldq xmm13,8
|
|
movq QWORD PTR[(192-128)+rdi],xmm6
|
|
psrldq xmm6,8
|
|
movq QWORD PTR[(96-128)+rdi],xmm13
|
|
movq QWORD PTR[(224-128)+rdi],xmm6
|
|
|
|
lea rdi,QWORD PTR[8+rdi]
|
|
lea rsi,QWORD PTR[32+rsi]
|
|
dec edx
|
|
jnz $L$oop_grande_shaext
|
|
|
|
$L$done_shaext::
|
|
|
|
movaps xmm6,XMMWORD PTR[((-184))+rax]
|
|
movaps xmm7,XMMWORD PTR[((-168))+rax]
|
|
movaps xmm8,XMMWORD PTR[((-152))+rax]
|
|
movaps xmm9,XMMWORD PTR[((-136))+rax]
|
|
movaps xmm10,XMMWORD PTR[((-120))+rax]
|
|
movaps xmm11,XMMWORD PTR[((-104))+rax]
|
|
movaps xmm12,XMMWORD PTR[((-88))+rax]
|
|
movaps xmm13,XMMWORD PTR[((-72))+rax]
|
|
movaps xmm14,XMMWORD PTR[((-56))+rax]
|
|
movaps xmm15,XMMWORD PTR[((-40))+rax]
|
|
mov rbp,QWORD PTR[((-16))+rax]
|
|
mov rbx,QWORD PTR[((-8))+rax]
|
|
lea rsp,QWORD PTR[rax]
|
|
$L$epilogue_shaext::
|
|
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD PTR[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
$L$SEH_end_sha256_multi_block_shaext::
|
|
sha256_multi_block_shaext ENDP
|
|
|
|
ALIGN 32
|
|
sha256_multi_block_avx PROC PRIVATE
|
|
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD PTR[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_sha256_multi_block_avx::
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
_avx_shortcut::
|
|
shr rcx,32
|
|
cmp edx,2
|
|
jb $L$avx
|
|
test ecx,32
|
|
jnz _avx2_shortcut
|
|
jmp $L$avx
|
|
ALIGN 32
|
|
$L$avx::
|
|
mov rax,rsp
|
|
push rbx
|
|
push rbp
|
|
lea rsp,QWORD PTR[((-168))+rsp]
|
|
movaps XMMWORD PTR[rsp],xmm6
|
|
movaps XMMWORD PTR[16+rsp],xmm7
|
|
movaps XMMWORD PTR[32+rsp],xmm8
|
|
movaps XMMWORD PTR[48+rsp],xmm9
|
|
movaps XMMWORD PTR[(-120)+rax],xmm10
|
|
movaps XMMWORD PTR[(-104)+rax],xmm11
|
|
movaps XMMWORD PTR[(-88)+rax],xmm12
|
|
movaps XMMWORD PTR[(-72)+rax],xmm13
|
|
movaps XMMWORD PTR[(-56)+rax],xmm14
|
|
movaps XMMWORD PTR[(-40)+rax],xmm15
|
|
sub rsp,288
|
|
and rsp,-256
|
|
mov QWORD PTR[272+rsp],rax
|
|
$L$body_avx::
|
|
lea rbp,QWORD PTR[((K256+128))]
|
|
lea rbx,QWORD PTR[256+rsp]
|
|
lea rdi,QWORD PTR[128+rdi]
|
|
|
|
$L$oop_grande_avx::
|
|
mov DWORD PTR[280+rsp],edx
|
|
xor edx,edx
|
|
mov r8,QWORD PTR[rsi]
|
|
mov ecx,DWORD PTR[8+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[rbx],ecx
|
|
cmovle r8,rbp
|
|
mov r9,QWORD PTR[16+rsi]
|
|
mov ecx,DWORD PTR[24+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[4+rbx],ecx
|
|
cmovle r9,rbp
|
|
mov r10,QWORD PTR[32+rsi]
|
|
mov ecx,DWORD PTR[40+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[8+rbx],ecx
|
|
cmovle r10,rbp
|
|
mov r11,QWORD PTR[48+rsi]
|
|
mov ecx,DWORD PTR[56+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[12+rbx],ecx
|
|
cmovle r11,rbp
|
|
test edx,edx
|
|
jz $L$done_avx
|
|
|
|
vmovdqu xmm8,XMMWORD PTR[((0-128))+rdi]
|
|
lea rax,QWORD PTR[128+rsp]
|
|
vmovdqu xmm9,XMMWORD PTR[((32-128))+rdi]
|
|
vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi]
|
|
vmovdqu xmm11,XMMWORD PTR[((96-128))+rdi]
|
|
vmovdqu xmm12,XMMWORD PTR[((128-128))+rdi]
|
|
vmovdqu xmm13,XMMWORD PTR[((160-128))+rdi]
|
|
vmovdqu xmm14,XMMWORD PTR[((192-128))+rdi]
|
|
vmovdqu xmm15,XMMWORD PTR[((224-128))+rdi]
|
|
vmovdqu xmm6,XMMWORD PTR[$L$pbswap]
|
|
jmp $L$oop_avx
|
|
|
|
ALIGN 32
|
|
$L$oop_avx::
|
|
vpxor xmm4,xmm10,xmm9
|
|
vmovd xmm5,DWORD PTR[r8]
|
|
vmovd xmm0,DWORD PTR[r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm12,6
|
|
vpslld xmm2,xmm12,26
|
|
vmovdqu XMMWORD PTR[(0-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm15
|
|
|
|
vpsrld xmm1,xmm12,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm12,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm12,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm12,7
|
|
vpandn xmm0,xmm12,xmm14
|
|
vpand xmm3,xmm12,xmm13
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm15,xmm8,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm8,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm9,xmm8
|
|
|
|
vpxor xmm15,xmm15,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm8,13
|
|
|
|
vpslld xmm2,xmm8,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm15,xmm1
|
|
|
|
vpsrld xmm1,xmm8,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm8,10
|
|
vpxor xmm15,xmm9,xmm4
|
|
vpaddd xmm11,xmm11,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm15,xmm15,xmm5
|
|
vpaddd xmm15,xmm15,xmm7
|
|
vmovd xmm5,DWORD PTR[4+r8]
|
|
vmovd xmm0,DWORD PTR[4+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[4+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[4+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm11,6
|
|
vpslld xmm2,xmm11,26
|
|
vmovdqu XMMWORD PTR[(16-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm14
|
|
|
|
vpsrld xmm1,xmm11,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm11,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-96))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm11,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm11,7
|
|
vpandn xmm0,xmm11,xmm13
|
|
vpand xmm4,xmm11,xmm12
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm14,xmm15,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm15,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm8,xmm15
|
|
|
|
vpxor xmm14,xmm14,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm15,13
|
|
|
|
vpslld xmm2,xmm15,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm14,xmm1
|
|
|
|
vpsrld xmm1,xmm15,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm15,10
|
|
vpxor xmm14,xmm8,xmm3
|
|
vpaddd xmm10,xmm10,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm14,xmm14,xmm5
|
|
vpaddd xmm14,xmm14,xmm7
|
|
vmovd xmm5,DWORD PTR[8+r8]
|
|
vmovd xmm0,DWORD PTR[8+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[8+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[8+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm10,6
|
|
vpslld xmm2,xmm10,26
|
|
vmovdqu XMMWORD PTR[(32-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm13
|
|
|
|
vpsrld xmm1,xmm10,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm10,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm10,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm10,7
|
|
vpandn xmm0,xmm10,xmm12
|
|
vpand xmm3,xmm10,xmm11
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm13,xmm14,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm14,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm15,xmm14
|
|
|
|
vpxor xmm13,xmm13,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm14,13
|
|
|
|
vpslld xmm2,xmm14,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm13,xmm1
|
|
|
|
vpsrld xmm1,xmm14,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm14,10
|
|
vpxor xmm13,xmm15,xmm4
|
|
vpaddd xmm9,xmm9,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm13,xmm13,xmm5
|
|
vpaddd xmm13,xmm13,xmm7
|
|
vmovd xmm5,DWORD PTR[12+r8]
|
|
vmovd xmm0,DWORD PTR[12+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[12+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[12+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm9,6
|
|
vpslld xmm2,xmm9,26
|
|
vmovdqu XMMWORD PTR[(48-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm12
|
|
|
|
vpsrld xmm1,xmm9,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm9,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-32))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm9,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm9,7
|
|
vpandn xmm0,xmm9,xmm11
|
|
vpand xmm4,xmm9,xmm10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm12,xmm13,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm13,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm14,xmm13
|
|
|
|
vpxor xmm12,xmm12,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm13,13
|
|
|
|
vpslld xmm2,xmm13,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm12,xmm1
|
|
|
|
vpsrld xmm1,xmm13,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm13,10
|
|
vpxor xmm12,xmm14,xmm3
|
|
vpaddd xmm8,xmm8,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm12,xmm12,xmm5
|
|
vpaddd xmm12,xmm12,xmm7
|
|
vmovd xmm5,DWORD PTR[16+r8]
|
|
vmovd xmm0,DWORD PTR[16+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[16+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[16+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm8,6
|
|
vpslld xmm2,xmm8,26
|
|
vmovdqu XMMWORD PTR[(64-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm11
|
|
|
|
vpsrld xmm1,xmm8,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm8,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm8,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm8,7
|
|
vpandn xmm0,xmm8,xmm10
|
|
vpand xmm3,xmm8,xmm9
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm11,xmm12,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm12,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm13,xmm12
|
|
|
|
vpxor xmm11,xmm11,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm12,13
|
|
|
|
vpslld xmm2,xmm12,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm11,xmm1
|
|
|
|
vpsrld xmm1,xmm12,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm12,10
|
|
vpxor xmm11,xmm13,xmm4
|
|
vpaddd xmm15,xmm15,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm11,xmm11,xmm5
|
|
vpaddd xmm11,xmm11,xmm7
|
|
vmovd xmm5,DWORD PTR[20+r8]
|
|
vmovd xmm0,DWORD PTR[20+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[20+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[20+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm15,6
|
|
vpslld xmm2,xmm15,26
|
|
vmovdqu XMMWORD PTR[(80-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm10
|
|
|
|
vpsrld xmm1,xmm15,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm15,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[32+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm15,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm15,7
|
|
vpandn xmm0,xmm15,xmm9
|
|
vpand xmm4,xmm15,xmm8
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm10,xmm11,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm11,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm12,xmm11
|
|
|
|
vpxor xmm10,xmm10,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm11,13
|
|
|
|
vpslld xmm2,xmm11,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm10,xmm1
|
|
|
|
vpsrld xmm1,xmm11,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm11,10
|
|
vpxor xmm10,xmm12,xmm3
|
|
vpaddd xmm14,xmm14,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm10,xmm10,xmm5
|
|
vpaddd xmm10,xmm10,xmm7
|
|
vmovd xmm5,DWORD PTR[24+r8]
|
|
vmovd xmm0,DWORD PTR[24+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[24+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[24+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm14,6
|
|
vpslld xmm2,xmm14,26
|
|
vmovdqu XMMWORD PTR[(96-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm9
|
|
|
|
vpsrld xmm1,xmm14,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm14,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm14,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm14,7
|
|
vpandn xmm0,xmm14,xmm8
|
|
vpand xmm3,xmm14,xmm15
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm9,xmm10,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm10,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm11,xmm10
|
|
|
|
vpxor xmm9,xmm9,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm10,13
|
|
|
|
vpslld xmm2,xmm10,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm9,xmm1
|
|
|
|
vpsrld xmm1,xmm10,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm10,10
|
|
vpxor xmm9,xmm11,xmm4
|
|
vpaddd xmm13,xmm13,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm9,xmm9,xmm5
|
|
vpaddd xmm9,xmm9,xmm7
|
|
vmovd xmm5,DWORD PTR[28+r8]
|
|
vmovd xmm0,DWORD PTR[28+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[28+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[28+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm13,6
|
|
vpslld xmm2,xmm13,26
|
|
vmovdqu XMMWORD PTR[(112-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm8
|
|
|
|
vpsrld xmm1,xmm13,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm13,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[96+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm13,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm13,7
|
|
vpandn xmm0,xmm13,xmm15
|
|
vpand xmm4,xmm13,xmm14
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm8,xmm9,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm9,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm10,xmm9
|
|
|
|
vpxor xmm8,xmm8,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm9,13
|
|
|
|
vpslld xmm2,xmm9,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm8,xmm1
|
|
|
|
vpsrld xmm1,xmm9,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm9,10
|
|
vpxor xmm8,xmm10,xmm3
|
|
vpaddd xmm12,xmm12,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm8,xmm8,xmm5
|
|
vpaddd xmm8,xmm8,xmm7
|
|
add rbp,256
|
|
vmovd xmm5,DWORD PTR[32+r8]
|
|
vmovd xmm0,DWORD PTR[32+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[32+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[32+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm12,6
|
|
vpslld xmm2,xmm12,26
|
|
vmovdqu XMMWORD PTR[(128-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm15
|
|
|
|
vpsrld xmm1,xmm12,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm12,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm12,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm12,7
|
|
vpandn xmm0,xmm12,xmm14
|
|
vpand xmm3,xmm12,xmm13
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm15,xmm8,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm8,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm9,xmm8
|
|
|
|
vpxor xmm15,xmm15,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm8,13
|
|
|
|
vpslld xmm2,xmm8,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm15,xmm1
|
|
|
|
vpsrld xmm1,xmm8,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm8,10
|
|
vpxor xmm15,xmm9,xmm4
|
|
vpaddd xmm11,xmm11,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm15,xmm15,xmm5
|
|
vpaddd xmm15,xmm15,xmm7
|
|
vmovd xmm5,DWORD PTR[36+r8]
|
|
vmovd xmm0,DWORD PTR[36+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[36+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[36+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm11,6
|
|
vpslld xmm2,xmm11,26
|
|
vmovdqu XMMWORD PTR[(144-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm14
|
|
|
|
vpsrld xmm1,xmm11,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm11,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-96))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm11,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm11,7
|
|
vpandn xmm0,xmm11,xmm13
|
|
vpand xmm4,xmm11,xmm12
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm14,xmm15,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm15,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm8,xmm15
|
|
|
|
vpxor xmm14,xmm14,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm15,13
|
|
|
|
vpslld xmm2,xmm15,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm14,xmm1
|
|
|
|
vpsrld xmm1,xmm15,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm15,10
|
|
vpxor xmm14,xmm8,xmm3
|
|
vpaddd xmm10,xmm10,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm14,xmm14,xmm5
|
|
vpaddd xmm14,xmm14,xmm7
|
|
vmovd xmm5,DWORD PTR[40+r8]
|
|
vmovd xmm0,DWORD PTR[40+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[40+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[40+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm10,6
|
|
vpslld xmm2,xmm10,26
|
|
vmovdqu XMMWORD PTR[(160-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm13
|
|
|
|
vpsrld xmm1,xmm10,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm10,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm10,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm10,7
|
|
vpandn xmm0,xmm10,xmm12
|
|
vpand xmm3,xmm10,xmm11
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm13,xmm14,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm14,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm15,xmm14
|
|
|
|
vpxor xmm13,xmm13,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm14,13
|
|
|
|
vpslld xmm2,xmm14,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm13,xmm1
|
|
|
|
vpsrld xmm1,xmm14,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm14,10
|
|
vpxor xmm13,xmm15,xmm4
|
|
vpaddd xmm9,xmm9,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm13,xmm13,xmm5
|
|
vpaddd xmm13,xmm13,xmm7
|
|
vmovd xmm5,DWORD PTR[44+r8]
|
|
vmovd xmm0,DWORD PTR[44+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[44+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[44+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm9,6
|
|
vpslld xmm2,xmm9,26
|
|
vmovdqu XMMWORD PTR[(176-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm12
|
|
|
|
vpsrld xmm1,xmm9,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm9,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-32))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm9,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm9,7
|
|
vpandn xmm0,xmm9,xmm11
|
|
vpand xmm4,xmm9,xmm10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm12,xmm13,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm13,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm14,xmm13
|
|
|
|
vpxor xmm12,xmm12,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm13,13
|
|
|
|
vpslld xmm2,xmm13,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm12,xmm1
|
|
|
|
vpsrld xmm1,xmm13,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm13,10
|
|
vpxor xmm12,xmm14,xmm3
|
|
vpaddd xmm8,xmm8,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm12,xmm12,xmm5
|
|
vpaddd xmm12,xmm12,xmm7
|
|
vmovd xmm5,DWORD PTR[48+r8]
|
|
vmovd xmm0,DWORD PTR[48+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[48+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[48+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm8,6
|
|
vpslld xmm2,xmm8,26
|
|
vmovdqu XMMWORD PTR[(192-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm11
|
|
|
|
vpsrld xmm1,xmm8,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm8,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm8,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm8,7
|
|
vpandn xmm0,xmm8,xmm10
|
|
vpand xmm3,xmm8,xmm9
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm11,xmm12,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm12,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm13,xmm12
|
|
|
|
vpxor xmm11,xmm11,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm12,13
|
|
|
|
vpslld xmm2,xmm12,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm11,xmm1
|
|
|
|
vpsrld xmm1,xmm12,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm12,10
|
|
vpxor xmm11,xmm13,xmm4
|
|
vpaddd xmm15,xmm15,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm11,xmm11,xmm5
|
|
vpaddd xmm11,xmm11,xmm7
|
|
vmovd xmm5,DWORD PTR[52+r8]
|
|
vmovd xmm0,DWORD PTR[52+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[52+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[52+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm15,6
|
|
vpslld xmm2,xmm15,26
|
|
vmovdqu XMMWORD PTR[(208-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm10
|
|
|
|
vpsrld xmm1,xmm15,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm15,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[32+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm15,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm15,7
|
|
vpandn xmm0,xmm15,xmm9
|
|
vpand xmm4,xmm15,xmm8
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm10,xmm11,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm11,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm12,xmm11
|
|
|
|
vpxor xmm10,xmm10,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm11,13
|
|
|
|
vpslld xmm2,xmm11,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm10,xmm1
|
|
|
|
vpsrld xmm1,xmm11,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm11,10
|
|
vpxor xmm10,xmm12,xmm3
|
|
vpaddd xmm14,xmm14,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm10,xmm10,xmm5
|
|
vpaddd xmm10,xmm10,xmm7
|
|
vmovd xmm5,DWORD PTR[56+r8]
|
|
vmovd xmm0,DWORD PTR[56+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[56+r10],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[56+r11],1
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm14,6
|
|
vpslld xmm2,xmm14,26
|
|
vmovdqu XMMWORD PTR[(224-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm9
|
|
|
|
vpsrld xmm1,xmm14,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm14,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm14,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm14,7
|
|
vpandn xmm0,xmm14,xmm8
|
|
vpand xmm3,xmm14,xmm15
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm9,xmm10,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm10,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm11,xmm10
|
|
|
|
vpxor xmm9,xmm9,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm10,13
|
|
|
|
vpslld xmm2,xmm10,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm9,xmm1
|
|
|
|
vpsrld xmm1,xmm10,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm10,10
|
|
vpxor xmm9,xmm11,xmm4
|
|
vpaddd xmm13,xmm13,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm9,xmm9,xmm5
|
|
vpaddd xmm9,xmm9,xmm7
|
|
vmovd xmm5,DWORD PTR[60+r8]
|
|
lea r8,QWORD PTR[64+r8]
|
|
vmovd xmm0,DWORD PTR[60+r9]
|
|
lea r9,QWORD PTR[64+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[60+r10],1
|
|
lea r10,QWORD PTR[64+r10]
|
|
vpinsrd xmm0,xmm0,DWORD PTR[60+r11],1
|
|
lea r11,QWORD PTR[64+r11]
|
|
vpunpckldq xmm5,xmm5,xmm0
|
|
vpshufb xmm5,xmm5,xmm6
|
|
vpsrld xmm7,xmm13,6
|
|
vpslld xmm2,xmm13,26
|
|
vmovdqu XMMWORD PTR[(240-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm8
|
|
|
|
vpsrld xmm1,xmm13,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm13,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[96+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm13,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
prefetcht0 [63+r8]
|
|
vpslld xmm2,xmm13,7
|
|
vpandn xmm0,xmm13,xmm15
|
|
vpand xmm4,xmm13,xmm14
|
|
prefetcht0 [63+r9]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm8,xmm9,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
prefetcht0 [63+r10]
|
|
vpslld xmm1,xmm9,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm10,xmm9
|
|
prefetcht0 [63+r11]
|
|
vpxor xmm8,xmm8,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm9,13
|
|
|
|
vpslld xmm2,xmm9,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm8,xmm1
|
|
|
|
vpsrld xmm1,xmm9,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm9,10
|
|
vpxor xmm8,xmm10,xmm3
|
|
vpaddd xmm12,xmm12,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm8,xmm8,xmm5
|
|
vpaddd xmm8,xmm8,xmm7
|
|
add rbp,256
|
|
vmovdqu xmm5,XMMWORD PTR[((0-128))+rax]
|
|
mov ecx,3
|
|
jmp $L$oop_16_xx_avx
|
|
ALIGN 32
|
|
$L$oop_16_xx_avx::
|
|
vmovdqu xmm6,XMMWORD PTR[((16-128))+rax]
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((144-128))+rax]
|
|
|
|
vpsrld xmm7,xmm6,3
|
|
vpsrld xmm1,xmm6,7
|
|
vpslld xmm2,xmm6,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm6,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm6,14
|
|
vmovdqu xmm0,XMMWORD PTR[((224-128))+rax]
|
|
vpsrld xmm3,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpxor xmm7,xmm3,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpsrld xmm7,xmm12,6
|
|
vpslld xmm2,xmm12,26
|
|
vmovdqu XMMWORD PTR[(0-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm15
|
|
|
|
vpsrld xmm1,xmm12,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm12,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm12,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm12,7
|
|
vpandn xmm0,xmm12,xmm14
|
|
vpand xmm3,xmm12,xmm13
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm15,xmm8,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm8,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm9,xmm8
|
|
|
|
vpxor xmm15,xmm15,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm8,13
|
|
|
|
vpslld xmm2,xmm8,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm15,xmm1
|
|
|
|
vpsrld xmm1,xmm8,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm8,10
|
|
vpxor xmm15,xmm9,xmm4
|
|
vpaddd xmm11,xmm11,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm15,xmm15,xmm5
|
|
vpaddd xmm15,xmm15,xmm7
|
|
vmovdqu xmm5,XMMWORD PTR[((32-128))+rax]
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((160-128))+rax]
|
|
|
|
vpsrld xmm7,xmm5,3
|
|
vpsrld xmm1,xmm5,7
|
|
vpslld xmm2,xmm5,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm5,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm5,14
|
|
vmovdqu xmm0,XMMWORD PTR[((240-128))+rax]
|
|
vpsrld xmm4,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpxor xmm7,xmm4,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpsrld xmm7,xmm11,6
|
|
vpslld xmm2,xmm11,26
|
|
vmovdqu XMMWORD PTR[(16-128)+rax],xmm6
|
|
vpaddd xmm6,xmm6,xmm14
|
|
|
|
vpsrld xmm1,xmm11,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm11,21
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((-96))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm11,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm11,7
|
|
vpandn xmm0,xmm11,xmm13
|
|
vpand xmm4,xmm11,xmm12
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm14,xmm15,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm15,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm8,xmm15
|
|
|
|
vpxor xmm14,xmm14,xmm1
|
|
vpaddd xmm6,xmm6,xmm7
|
|
|
|
vpsrld xmm1,xmm15,13
|
|
|
|
vpslld xmm2,xmm15,19
|
|
vpaddd xmm6,xmm6,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm14,xmm1
|
|
|
|
vpsrld xmm1,xmm15,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm15,10
|
|
vpxor xmm14,xmm8,xmm3
|
|
vpaddd xmm10,xmm10,xmm6
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm14,xmm14,xmm6
|
|
vpaddd xmm14,xmm14,xmm7
|
|
vmovdqu xmm6,XMMWORD PTR[((48-128))+rax]
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((176-128))+rax]
|
|
|
|
vpsrld xmm7,xmm6,3
|
|
vpsrld xmm1,xmm6,7
|
|
vpslld xmm2,xmm6,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm6,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm6,14
|
|
vmovdqu xmm0,XMMWORD PTR[((0-128))+rax]
|
|
vpsrld xmm3,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpxor xmm7,xmm3,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpsrld xmm7,xmm10,6
|
|
vpslld xmm2,xmm10,26
|
|
vmovdqu XMMWORD PTR[(32-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm13
|
|
|
|
vpsrld xmm1,xmm10,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm10,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm10,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm10,7
|
|
vpandn xmm0,xmm10,xmm12
|
|
vpand xmm3,xmm10,xmm11
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm13,xmm14,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm14,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm15,xmm14
|
|
|
|
vpxor xmm13,xmm13,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm14,13
|
|
|
|
vpslld xmm2,xmm14,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm13,xmm1
|
|
|
|
vpsrld xmm1,xmm14,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm14,10
|
|
vpxor xmm13,xmm15,xmm4
|
|
vpaddd xmm9,xmm9,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm13,xmm13,xmm5
|
|
vpaddd xmm13,xmm13,xmm7
|
|
vmovdqu xmm5,XMMWORD PTR[((64-128))+rax]
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((192-128))+rax]
|
|
|
|
vpsrld xmm7,xmm5,3
|
|
vpsrld xmm1,xmm5,7
|
|
vpslld xmm2,xmm5,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm5,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm5,14
|
|
vmovdqu xmm0,XMMWORD PTR[((16-128))+rax]
|
|
vpsrld xmm4,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpxor xmm7,xmm4,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpsrld xmm7,xmm9,6
|
|
vpslld xmm2,xmm9,26
|
|
vmovdqu XMMWORD PTR[(48-128)+rax],xmm6
|
|
vpaddd xmm6,xmm6,xmm12
|
|
|
|
vpsrld xmm1,xmm9,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm9,21
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((-32))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm9,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm9,7
|
|
vpandn xmm0,xmm9,xmm11
|
|
vpand xmm4,xmm9,xmm10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm12,xmm13,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm13,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm14,xmm13
|
|
|
|
vpxor xmm12,xmm12,xmm1
|
|
vpaddd xmm6,xmm6,xmm7
|
|
|
|
vpsrld xmm1,xmm13,13
|
|
|
|
vpslld xmm2,xmm13,19
|
|
vpaddd xmm6,xmm6,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm12,xmm1
|
|
|
|
vpsrld xmm1,xmm13,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm13,10
|
|
vpxor xmm12,xmm14,xmm3
|
|
vpaddd xmm8,xmm8,xmm6
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm12,xmm12,xmm6
|
|
vpaddd xmm12,xmm12,xmm7
|
|
vmovdqu xmm6,XMMWORD PTR[((80-128))+rax]
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((208-128))+rax]
|
|
|
|
vpsrld xmm7,xmm6,3
|
|
vpsrld xmm1,xmm6,7
|
|
vpslld xmm2,xmm6,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm6,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm6,14
|
|
vmovdqu xmm0,XMMWORD PTR[((32-128))+rax]
|
|
vpsrld xmm3,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpxor xmm7,xmm3,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpsrld xmm7,xmm8,6
|
|
vpslld xmm2,xmm8,26
|
|
vmovdqu XMMWORD PTR[(64-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm11
|
|
|
|
vpsrld xmm1,xmm8,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm8,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm8,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm8,7
|
|
vpandn xmm0,xmm8,xmm10
|
|
vpand xmm3,xmm8,xmm9
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm11,xmm12,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm12,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm13,xmm12
|
|
|
|
vpxor xmm11,xmm11,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm12,13
|
|
|
|
vpslld xmm2,xmm12,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm11,xmm1
|
|
|
|
vpsrld xmm1,xmm12,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm12,10
|
|
vpxor xmm11,xmm13,xmm4
|
|
vpaddd xmm15,xmm15,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm11,xmm11,xmm5
|
|
vpaddd xmm11,xmm11,xmm7
|
|
vmovdqu xmm5,XMMWORD PTR[((96-128))+rax]
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((224-128))+rax]
|
|
|
|
vpsrld xmm7,xmm5,3
|
|
vpsrld xmm1,xmm5,7
|
|
vpslld xmm2,xmm5,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm5,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm5,14
|
|
vmovdqu xmm0,XMMWORD PTR[((48-128))+rax]
|
|
vpsrld xmm4,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpxor xmm7,xmm4,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpsrld xmm7,xmm15,6
|
|
vpslld xmm2,xmm15,26
|
|
vmovdqu XMMWORD PTR[(80-128)+rax],xmm6
|
|
vpaddd xmm6,xmm6,xmm10
|
|
|
|
vpsrld xmm1,xmm15,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm15,21
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[32+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm15,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm15,7
|
|
vpandn xmm0,xmm15,xmm9
|
|
vpand xmm4,xmm15,xmm8
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm10,xmm11,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm11,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm12,xmm11
|
|
|
|
vpxor xmm10,xmm10,xmm1
|
|
vpaddd xmm6,xmm6,xmm7
|
|
|
|
vpsrld xmm1,xmm11,13
|
|
|
|
vpslld xmm2,xmm11,19
|
|
vpaddd xmm6,xmm6,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm10,xmm1
|
|
|
|
vpsrld xmm1,xmm11,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm11,10
|
|
vpxor xmm10,xmm12,xmm3
|
|
vpaddd xmm14,xmm14,xmm6
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm10,xmm10,xmm6
|
|
vpaddd xmm10,xmm10,xmm7
|
|
vmovdqu xmm6,XMMWORD PTR[((112-128))+rax]
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((240-128))+rax]
|
|
|
|
vpsrld xmm7,xmm6,3
|
|
vpsrld xmm1,xmm6,7
|
|
vpslld xmm2,xmm6,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm6,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm6,14
|
|
vmovdqu xmm0,XMMWORD PTR[((64-128))+rax]
|
|
vpsrld xmm3,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpxor xmm7,xmm3,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpsrld xmm7,xmm14,6
|
|
vpslld xmm2,xmm14,26
|
|
vmovdqu XMMWORD PTR[(96-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm9
|
|
|
|
vpsrld xmm1,xmm14,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm14,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm14,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm14,7
|
|
vpandn xmm0,xmm14,xmm8
|
|
vpand xmm3,xmm14,xmm15
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm9,xmm10,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm10,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm11,xmm10
|
|
|
|
vpxor xmm9,xmm9,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm10,13
|
|
|
|
vpslld xmm2,xmm10,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm9,xmm1
|
|
|
|
vpsrld xmm1,xmm10,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm10,10
|
|
vpxor xmm9,xmm11,xmm4
|
|
vpaddd xmm13,xmm13,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm9,xmm9,xmm5
|
|
vpaddd xmm9,xmm9,xmm7
|
|
vmovdqu xmm5,XMMWORD PTR[((128-128))+rax]
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((0-128))+rax]
|
|
|
|
vpsrld xmm7,xmm5,3
|
|
vpsrld xmm1,xmm5,7
|
|
vpslld xmm2,xmm5,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm5,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm5,14
|
|
vmovdqu xmm0,XMMWORD PTR[((80-128))+rax]
|
|
vpsrld xmm4,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpxor xmm7,xmm4,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpsrld xmm7,xmm13,6
|
|
vpslld xmm2,xmm13,26
|
|
vmovdqu XMMWORD PTR[(112-128)+rax],xmm6
|
|
vpaddd xmm6,xmm6,xmm8
|
|
|
|
vpsrld xmm1,xmm13,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm13,21
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[96+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm13,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm13,7
|
|
vpandn xmm0,xmm13,xmm15
|
|
vpand xmm4,xmm13,xmm14
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm8,xmm9,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm9,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm10,xmm9
|
|
|
|
vpxor xmm8,xmm8,xmm1
|
|
vpaddd xmm6,xmm6,xmm7
|
|
|
|
vpsrld xmm1,xmm9,13
|
|
|
|
vpslld xmm2,xmm9,19
|
|
vpaddd xmm6,xmm6,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm8,xmm1
|
|
|
|
vpsrld xmm1,xmm9,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm9,10
|
|
vpxor xmm8,xmm10,xmm3
|
|
vpaddd xmm12,xmm12,xmm6
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm8,xmm8,xmm6
|
|
vpaddd xmm8,xmm8,xmm7
|
|
add rbp,256
|
|
vmovdqu xmm6,XMMWORD PTR[((144-128))+rax]
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((16-128))+rax]
|
|
|
|
vpsrld xmm7,xmm6,3
|
|
vpsrld xmm1,xmm6,7
|
|
vpslld xmm2,xmm6,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm6,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm6,14
|
|
vmovdqu xmm0,XMMWORD PTR[((96-128))+rax]
|
|
vpsrld xmm3,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpxor xmm7,xmm3,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpsrld xmm7,xmm12,6
|
|
vpslld xmm2,xmm12,26
|
|
vmovdqu XMMWORD PTR[(128-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm15
|
|
|
|
vpsrld xmm1,xmm12,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm12,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm12,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm12,7
|
|
vpandn xmm0,xmm12,xmm14
|
|
vpand xmm3,xmm12,xmm13
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm15,xmm8,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm8,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm9,xmm8
|
|
|
|
vpxor xmm15,xmm15,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm8,13
|
|
|
|
vpslld xmm2,xmm8,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm15,xmm1
|
|
|
|
vpsrld xmm1,xmm8,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm8,10
|
|
vpxor xmm15,xmm9,xmm4
|
|
vpaddd xmm11,xmm11,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm15,xmm15,xmm5
|
|
vpaddd xmm15,xmm15,xmm7
|
|
vmovdqu xmm5,XMMWORD PTR[((160-128))+rax]
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((32-128))+rax]
|
|
|
|
vpsrld xmm7,xmm5,3
|
|
vpsrld xmm1,xmm5,7
|
|
vpslld xmm2,xmm5,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm5,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm5,14
|
|
vmovdqu xmm0,XMMWORD PTR[((112-128))+rax]
|
|
vpsrld xmm4,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpxor xmm7,xmm4,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpsrld xmm7,xmm11,6
|
|
vpslld xmm2,xmm11,26
|
|
vmovdqu XMMWORD PTR[(144-128)+rax],xmm6
|
|
vpaddd xmm6,xmm6,xmm14
|
|
|
|
vpsrld xmm1,xmm11,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm11,21
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((-96))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm11,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm11,7
|
|
vpandn xmm0,xmm11,xmm13
|
|
vpand xmm4,xmm11,xmm12
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm14,xmm15,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm15,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm8,xmm15
|
|
|
|
vpxor xmm14,xmm14,xmm1
|
|
vpaddd xmm6,xmm6,xmm7
|
|
|
|
vpsrld xmm1,xmm15,13
|
|
|
|
vpslld xmm2,xmm15,19
|
|
vpaddd xmm6,xmm6,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm14,xmm1
|
|
|
|
vpsrld xmm1,xmm15,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm15,10
|
|
vpxor xmm14,xmm8,xmm3
|
|
vpaddd xmm10,xmm10,xmm6
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm14,xmm14,xmm6
|
|
vpaddd xmm14,xmm14,xmm7
|
|
vmovdqu xmm6,XMMWORD PTR[((176-128))+rax]
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((48-128))+rax]
|
|
|
|
vpsrld xmm7,xmm6,3
|
|
vpsrld xmm1,xmm6,7
|
|
vpslld xmm2,xmm6,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm6,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm6,14
|
|
vmovdqu xmm0,XMMWORD PTR[((128-128))+rax]
|
|
vpsrld xmm3,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpxor xmm7,xmm3,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpsrld xmm7,xmm10,6
|
|
vpslld xmm2,xmm10,26
|
|
vmovdqu XMMWORD PTR[(160-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm13
|
|
|
|
vpsrld xmm1,xmm10,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm10,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm10,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm10,7
|
|
vpandn xmm0,xmm10,xmm12
|
|
vpand xmm3,xmm10,xmm11
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm13,xmm14,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm14,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm15,xmm14
|
|
|
|
vpxor xmm13,xmm13,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm14,13
|
|
|
|
vpslld xmm2,xmm14,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm13,xmm1
|
|
|
|
vpsrld xmm1,xmm14,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm14,10
|
|
vpxor xmm13,xmm15,xmm4
|
|
vpaddd xmm9,xmm9,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm13,xmm13,xmm5
|
|
vpaddd xmm13,xmm13,xmm7
|
|
vmovdqu xmm5,XMMWORD PTR[((192-128))+rax]
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((64-128))+rax]
|
|
|
|
vpsrld xmm7,xmm5,3
|
|
vpsrld xmm1,xmm5,7
|
|
vpslld xmm2,xmm5,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm5,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm5,14
|
|
vmovdqu xmm0,XMMWORD PTR[((144-128))+rax]
|
|
vpsrld xmm4,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpxor xmm7,xmm4,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpsrld xmm7,xmm9,6
|
|
vpslld xmm2,xmm9,26
|
|
vmovdqu XMMWORD PTR[(176-128)+rax],xmm6
|
|
vpaddd xmm6,xmm6,xmm12
|
|
|
|
vpsrld xmm1,xmm9,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm9,21
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((-32))+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm9,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm9,7
|
|
vpandn xmm0,xmm9,xmm11
|
|
vpand xmm4,xmm9,xmm10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm12,xmm13,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm13,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm14,xmm13
|
|
|
|
vpxor xmm12,xmm12,xmm1
|
|
vpaddd xmm6,xmm6,xmm7
|
|
|
|
vpsrld xmm1,xmm13,13
|
|
|
|
vpslld xmm2,xmm13,19
|
|
vpaddd xmm6,xmm6,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm12,xmm1
|
|
|
|
vpsrld xmm1,xmm13,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm13,10
|
|
vpxor xmm12,xmm14,xmm3
|
|
vpaddd xmm8,xmm8,xmm6
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm12,xmm12,xmm6
|
|
vpaddd xmm12,xmm12,xmm7
|
|
vmovdqu xmm6,XMMWORD PTR[((208-128))+rax]
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((80-128))+rax]
|
|
|
|
vpsrld xmm7,xmm6,3
|
|
vpsrld xmm1,xmm6,7
|
|
vpslld xmm2,xmm6,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm6,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm6,14
|
|
vmovdqu xmm0,XMMWORD PTR[((160-128))+rax]
|
|
vpsrld xmm3,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpxor xmm7,xmm3,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpsrld xmm7,xmm8,6
|
|
vpslld xmm2,xmm8,26
|
|
vmovdqu XMMWORD PTR[(192-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm11
|
|
|
|
vpsrld xmm1,xmm8,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm8,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm8,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm8,7
|
|
vpandn xmm0,xmm8,xmm10
|
|
vpand xmm3,xmm8,xmm9
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm11,xmm12,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm12,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm13,xmm12
|
|
|
|
vpxor xmm11,xmm11,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm12,13
|
|
|
|
vpslld xmm2,xmm12,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm11,xmm1
|
|
|
|
vpsrld xmm1,xmm12,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm12,10
|
|
vpxor xmm11,xmm13,xmm4
|
|
vpaddd xmm15,xmm15,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm11,xmm11,xmm5
|
|
vpaddd xmm11,xmm11,xmm7
|
|
vmovdqu xmm5,XMMWORD PTR[((224-128))+rax]
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((96-128))+rax]
|
|
|
|
vpsrld xmm7,xmm5,3
|
|
vpsrld xmm1,xmm5,7
|
|
vpslld xmm2,xmm5,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm5,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm5,14
|
|
vmovdqu xmm0,XMMWORD PTR[((176-128))+rax]
|
|
vpsrld xmm4,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpxor xmm7,xmm4,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpsrld xmm7,xmm15,6
|
|
vpslld xmm2,xmm15,26
|
|
vmovdqu XMMWORD PTR[(208-128)+rax],xmm6
|
|
vpaddd xmm6,xmm6,xmm10
|
|
|
|
vpsrld xmm1,xmm15,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm15,21
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[32+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm15,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm15,7
|
|
vpandn xmm0,xmm15,xmm9
|
|
vpand xmm4,xmm15,xmm8
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm10,xmm11,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm11,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm12,xmm11
|
|
|
|
vpxor xmm10,xmm10,xmm1
|
|
vpaddd xmm6,xmm6,xmm7
|
|
|
|
vpsrld xmm1,xmm11,13
|
|
|
|
vpslld xmm2,xmm11,19
|
|
vpaddd xmm6,xmm6,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm10,xmm1
|
|
|
|
vpsrld xmm1,xmm11,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm11,10
|
|
vpxor xmm10,xmm12,xmm3
|
|
vpaddd xmm14,xmm14,xmm6
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm10,xmm10,xmm6
|
|
vpaddd xmm10,xmm10,xmm7
|
|
vmovdqu xmm6,XMMWORD PTR[((240-128))+rax]
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[((112-128))+rax]
|
|
|
|
vpsrld xmm7,xmm6,3
|
|
vpsrld xmm1,xmm6,7
|
|
vpslld xmm2,xmm6,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm6,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm6,14
|
|
vmovdqu xmm0,XMMWORD PTR[((192-128))+rax]
|
|
vpsrld xmm3,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpxor xmm7,xmm3,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm5,xmm5,xmm7
|
|
vpsrld xmm7,xmm14,6
|
|
vpslld xmm2,xmm14,26
|
|
vmovdqu XMMWORD PTR[(224-128)+rax],xmm5
|
|
vpaddd xmm5,xmm5,xmm9
|
|
|
|
vpsrld xmm1,xmm14,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm14,21
|
|
vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm14,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm14,7
|
|
vpandn xmm0,xmm14,xmm8
|
|
vpand xmm3,xmm14,xmm15
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm9,xmm10,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm10,30
|
|
vpxor xmm0,xmm0,xmm3
|
|
vpxor xmm3,xmm11,xmm10
|
|
|
|
vpxor xmm9,xmm9,xmm1
|
|
vpaddd xmm5,xmm5,xmm7
|
|
|
|
vpsrld xmm1,xmm10,13
|
|
|
|
vpslld xmm2,xmm10,19
|
|
vpaddd xmm5,xmm5,xmm0
|
|
vpand xmm4,xmm4,xmm3
|
|
|
|
vpxor xmm7,xmm9,xmm1
|
|
|
|
vpsrld xmm1,xmm10,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm10,10
|
|
vpxor xmm9,xmm11,xmm4
|
|
vpaddd xmm13,xmm13,xmm5
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm9,xmm9,xmm5
|
|
vpaddd xmm9,xmm9,xmm7
|
|
vmovdqu xmm5,XMMWORD PTR[((0-128))+rax]
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[((128-128))+rax]
|
|
|
|
vpsrld xmm7,xmm5,3
|
|
vpsrld xmm1,xmm5,7
|
|
vpslld xmm2,xmm5,25
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm5,18
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm5,14
|
|
vmovdqu xmm0,XMMWORD PTR[((208-128))+rax]
|
|
vpsrld xmm4,xmm0,10
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpsrld xmm1,xmm0,17
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,15
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpxor xmm7,xmm4,xmm1
|
|
vpsrld xmm1,xmm0,19
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm0,13
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpaddd xmm6,xmm6,xmm7
|
|
vpsrld xmm7,xmm13,6
|
|
vpslld xmm2,xmm13,26
|
|
vmovdqu XMMWORD PTR[(240-128)+rax],xmm6
|
|
vpaddd xmm6,xmm6,xmm8
|
|
|
|
vpsrld xmm1,xmm13,11
|
|
vpxor xmm7,xmm7,xmm2
|
|
vpslld xmm2,xmm13,21
|
|
vpaddd xmm6,xmm6,XMMWORD PTR[96+rbp]
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm1,xmm13,25
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm13,7
|
|
vpandn xmm0,xmm13,xmm15
|
|
vpand xmm4,xmm13,xmm14
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
|
|
vpsrld xmm8,xmm9,2
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm1,xmm9,30
|
|
vpxor xmm0,xmm0,xmm4
|
|
vpxor xmm4,xmm10,xmm9
|
|
|
|
vpxor xmm8,xmm8,xmm1
|
|
vpaddd xmm6,xmm6,xmm7
|
|
|
|
vpsrld xmm1,xmm9,13
|
|
|
|
vpslld xmm2,xmm9,19
|
|
vpaddd xmm6,xmm6,xmm0
|
|
vpand xmm3,xmm3,xmm4
|
|
|
|
vpxor xmm7,xmm8,xmm1
|
|
|
|
vpsrld xmm1,xmm9,22
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpslld xmm2,xmm9,10
|
|
vpxor xmm8,xmm10,xmm3
|
|
vpaddd xmm12,xmm12,xmm6
|
|
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm7,xmm7,xmm2
|
|
|
|
vpaddd xmm8,xmm8,xmm6
|
|
vpaddd xmm8,xmm8,xmm7
|
|
add rbp,256
|
|
dec ecx
|
|
jnz $L$oop_16_xx_avx
|
|
|
|
mov ecx,1
|
|
lea rbp,QWORD PTR[((K256+128))]
|
|
cmp ecx,DWORD PTR[rbx]
|
|
cmovge r8,rbp
|
|
cmp ecx,DWORD PTR[4+rbx]
|
|
cmovge r9,rbp
|
|
cmp ecx,DWORD PTR[8+rbx]
|
|
cmovge r10,rbp
|
|
cmp ecx,DWORD PTR[12+rbx]
|
|
cmovge r11,rbp
|
|
vmovdqa xmm7,XMMWORD PTR[rbx]
|
|
vpxor xmm0,xmm0,xmm0
|
|
vmovdqa xmm6,xmm7
|
|
vpcmpgtd xmm6,xmm6,xmm0
|
|
vpaddd xmm7,xmm7,xmm6
|
|
|
|
vmovdqu xmm0,XMMWORD PTR[((0-128))+rdi]
|
|
vpand xmm8,xmm8,xmm6
|
|
vmovdqu xmm1,XMMWORD PTR[((32-128))+rdi]
|
|
vpand xmm9,xmm9,xmm6
|
|
vmovdqu xmm2,XMMWORD PTR[((64-128))+rdi]
|
|
vpand xmm10,xmm10,xmm6
|
|
vmovdqu xmm5,XMMWORD PTR[((96-128))+rdi]
|
|
vpand xmm11,xmm11,xmm6
|
|
vpaddd xmm8,xmm8,xmm0
|
|
vmovdqu xmm0,XMMWORD PTR[((128-128))+rdi]
|
|
vpand xmm12,xmm12,xmm6
|
|
vpaddd xmm9,xmm9,xmm1
|
|
vmovdqu xmm1,XMMWORD PTR[((160-128))+rdi]
|
|
vpand xmm13,xmm13,xmm6
|
|
vpaddd xmm10,xmm10,xmm2
|
|
vmovdqu xmm2,XMMWORD PTR[((192-128))+rdi]
|
|
vpand xmm14,xmm14,xmm6
|
|
vpaddd xmm11,xmm11,xmm5
|
|
vmovdqu xmm5,XMMWORD PTR[((224-128))+rdi]
|
|
vpand xmm15,xmm15,xmm6
|
|
vpaddd xmm12,xmm12,xmm0
|
|
vpaddd xmm13,xmm13,xmm1
|
|
vmovdqu XMMWORD PTR[(0-128)+rdi],xmm8
|
|
vpaddd xmm14,xmm14,xmm2
|
|
vmovdqu XMMWORD PTR[(32-128)+rdi],xmm9
|
|
vpaddd xmm15,xmm15,xmm5
|
|
vmovdqu XMMWORD PTR[(64-128)+rdi],xmm10
|
|
vmovdqu XMMWORD PTR[(96-128)+rdi],xmm11
|
|
vmovdqu XMMWORD PTR[(128-128)+rdi],xmm12
|
|
vmovdqu XMMWORD PTR[(160-128)+rdi],xmm13
|
|
vmovdqu XMMWORD PTR[(192-128)+rdi],xmm14
|
|
vmovdqu XMMWORD PTR[(224-128)+rdi],xmm15
|
|
|
|
vmovdqu XMMWORD PTR[rbx],xmm7
|
|
vmovdqu xmm6,XMMWORD PTR[$L$pbswap]
|
|
dec edx
|
|
jnz $L$oop_avx
|
|
|
|
mov edx,DWORD PTR[280+rsp]
|
|
lea rdi,QWORD PTR[16+rdi]
|
|
lea rsi,QWORD PTR[64+rsi]
|
|
dec edx
|
|
jnz $L$oop_grande_avx
|
|
|
|
$L$done_avx::
|
|
mov rax,QWORD PTR[272+rsp]
|
|
vzeroupper
|
|
movaps xmm6,XMMWORD PTR[((-184))+rax]
|
|
movaps xmm7,XMMWORD PTR[((-168))+rax]
|
|
movaps xmm8,XMMWORD PTR[((-152))+rax]
|
|
movaps xmm9,XMMWORD PTR[((-136))+rax]
|
|
movaps xmm10,XMMWORD PTR[((-120))+rax]
|
|
movaps xmm11,XMMWORD PTR[((-104))+rax]
|
|
movaps xmm12,XMMWORD PTR[((-88))+rax]
|
|
movaps xmm13,XMMWORD PTR[((-72))+rax]
|
|
movaps xmm14,XMMWORD PTR[((-56))+rax]
|
|
movaps xmm15,XMMWORD PTR[((-40))+rax]
|
|
mov rbp,QWORD PTR[((-16))+rax]
|
|
mov rbx,QWORD PTR[((-8))+rax]
|
|
lea rsp,QWORD PTR[rax]
|
|
$L$epilogue_avx::
|
|
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD PTR[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
$L$SEH_end_sha256_multi_block_avx::
|
|
sha256_multi_block_avx ENDP
|
|
|
|
ALIGN 32
|
|
sha256_multi_block_avx2 PROC PRIVATE
|
|
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD PTR[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_sha256_multi_block_avx2::
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
_avx2_shortcut::
|
|
mov rax,rsp
|
|
push rbx
|
|
push rbp
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
lea rsp,QWORD PTR[((-168))+rsp]
|
|
movaps XMMWORD PTR[rsp],xmm6
|
|
movaps XMMWORD PTR[16+rsp],xmm7
|
|
movaps XMMWORD PTR[32+rsp],xmm8
|
|
movaps XMMWORD PTR[48+rsp],xmm9
|
|
movaps XMMWORD PTR[64+rsp],xmm10
|
|
movaps XMMWORD PTR[80+rsp],xmm11
|
|
movaps XMMWORD PTR[(-120)+rax],xmm12
|
|
movaps XMMWORD PTR[(-104)+rax],xmm13
|
|
movaps XMMWORD PTR[(-88)+rax],xmm14
|
|
movaps XMMWORD PTR[(-72)+rax],xmm15
|
|
sub rsp,576
|
|
and rsp,-256
|
|
mov QWORD PTR[544+rsp],rax
|
|
$L$body_avx2::
|
|
lea rbp,QWORD PTR[((K256+128))]
|
|
lea rdi,QWORD PTR[128+rdi]
|
|
|
|
$L$oop_grande_avx2::
|
|
mov DWORD PTR[552+rsp],edx
|
|
xor edx,edx
|
|
lea rbx,QWORD PTR[512+rsp]
|
|
mov r12,QWORD PTR[rsi]
|
|
mov ecx,DWORD PTR[8+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[rbx],ecx
|
|
cmovle r12,rbp
|
|
mov r13,QWORD PTR[16+rsi]
|
|
mov ecx,DWORD PTR[24+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[4+rbx],ecx
|
|
cmovle r13,rbp
|
|
mov r14,QWORD PTR[32+rsi]
|
|
mov ecx,DWORD PTR[40+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[8+rbx],ecx
|
|
cmovle r14,rbp
|
|
mov r15,QWORD PTR[48+rsi]
|
|
mov ecx,DWORD PTR[56+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[12+rbx],ecx
|
|
cmovle r15,rbp
|
|
mov r8,QWORD PTR[64+rsi]
|
|
mov ecx,DWORD PTR[72+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[16+rbx],ecx
|
|
cmovle r8,rbp
|
|
mov r9,QWORD PTR[80+rsi]
|
|
mov ecx,DWORD PTR[88+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[20+rbx],ecx
|
|
cmovle r9,rbp
|
|
mov r10,QWORD PTR[96+rsi]
|
|
mov ecx,DWORD PTR[104+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[24+rbx],ecx
|
|
cmovle r10,rbp
|
|
mov r11,QWORD PTR[112+rsi]
|
|
mov ecx,DWORD PTR[120+rsi]
|
|
cmp ecx,edx
|
|
cmovg edx,ecx
|
|
test ecx,ecx
|
|
mov DWORD PTR[28+rbx],ecx
|
|
cmovle r11,rbp
|
|
vmovdqu ymm8,YMMWORD PTR[((0-128))+rdi]
|
|
lea rax,QWORD PTR[128+rsp]
|
|
vmovdqu ymm9,YMMWORD PTR[((32-128))+rdi]
|
|
lea rbx,QWORD PTR[((256+128))+rsp]
|
|
vmovdqu ymm10,YMMWORD PTR[((64-128))+rdi]
|
|
vmovdqu ymm11,YMMWORD PTR[((96-128))+rdi]
|
|
vmovdqu ymm12,YMMWORD PTR[((128-128))+rdi]
|
|
vmovdqu ymm13,YMMWORD PTR[((160-128))+rdi]
|
|
vmovdqu ymm14,YMMWORD PTR[((192-128))+rdi]
|
|
vmovdqu ymm15,YMMWORD PTR[((224-128))+rdi]
|
|
vmovdqu ymm6,YMMWORD PTR[$L$pbswap]
|
|
jmp $L$oop_avx2
|
|
|
|
ALIGN 32
|
|
$L$oop_avx2::
|
|
vpxor ymm4,ymm10,ymm9
|
|
vmovd xmm5,DWORD PTR[r12]
|
|
vmovd xmm0,DWORD PTR[r8]
|
|
vmovd xmm1,DWORD PTR[r13]
|
|
vmovd xmm2,DWORD PTR[r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm12,6
|
|
vpslld ymm2,ymm12,26
|
|
vmovdqu YMMWORD PTR[(0-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm15
|
|
|
|
vpsrld ymm1,ymm12,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm12,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm12,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm12,7
|
|
vpandn ymm0,ymm12,ymm14
|
|
vpand ymm3,ymm12,ymm13
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm15,ymm8,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm8,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm9,ymm8
|
|
|
|
vpxor ymm15,ymm15,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm8,13
|
|
|
|
vpslld ymm2,ymm8,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm15,ymm1
|
|
|
|
vpsrld ymm1,ymm8,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm8,10
|
|
vpxor ymm15,ymm9,ymm4
|
|
vpaddd ymm11,ymm11,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm15,ymm15,ymm5
|
|
vpaddd ymm15,ymm15,ymm7
|
|
vmovd xmm5,DWORD PTR[4+r12]
|
|
vmovd xmm0,DWORD PTR[4+r8]
|
|
vmovd xmm1,DWORD PTR[4+r13]
|
|
vmovd xmm2,DWORD PTR[4+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[4+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[4+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[4+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[4+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm11,6
|
|
vpslld ymm2,ymm11,26
|
|
vmovdqu YMMWORD PTR[(32-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm14
|
|
|
|
vpsrld ymm1,ymm11,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm11,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-96))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm11,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm11,7
|
|
vpandn ymm0,ymm11,ymm13
|
|
vpand ymm4,ymm11,ymm12
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm14,ymm15,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm15,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm8,ymm15
|
|
|
|
vpxor ymm14,ymm14,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm15,13
|
|
|
|
vpslld ymm2,ymm15,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm14,ymm1
|
|
|
|
vpsrld ymm1,ymm15,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm15,10
|
|
vpxor ymm14,ymm8,ymm3
|
|
vpaddd ymm10,ymm10,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm14,ymm14,ymm5
|
|
vpaddd ymm14,ymm14,ymm7
|
|
vmovd xmm5,DWORD PTR[8+r12]
|
|
vmovd xmm0,DWORD PTR[8+r8]
|
|
vmovd xmm1,DWORD PTR[8+r13]
|
|
vmovd xmm2,DWORD PTR[8+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[8+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[8+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[8+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[8+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm10,6
|
|
vpslld ymm2,ymm10,26
|
|
vmovdqu YMMWORD PTR[(64-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm13
|
|
|
|
vpsrld ymm1,ymm10,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm10,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm10,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm10,7
|
|
vpandn ymm0,ymm10,ymm12
|
|
vpand ymm3,ymm10,ymm11
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm13,ymm14,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm14,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm15,ymm14
|
|
|
|
vpxor ymm13,ymm13,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm14,13
|
|
|
|
vpslld ymm2,ymm14,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm13,ymm1
|
|
|
|
vpsrld ymm1,ymm14,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm14,10
|
|
vpxor ymm13,ymm15,ymm4
|
|
vpaddd ymm9,ymm9,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm13,ymm13,ymm5
|
|
vpaddd ymm13,ymm13,ymm7
|
|
vmovd xmm5,DWORD PTR[12+r12]
|
|
vmovd xmm0,DWORD PTR[12+r8]
|
|
vmovd xmm1,DWORD PTR[12+r13]
|
|
vmovd xmm2,DWORD PTR[12+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[12+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[12+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[12+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[12+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm9,6
|
|
vpslld ymm2,ymm9,26
|
|
vmovdqu YMMWORD PTR[(96-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm12
|
|
|
|
vpsrld ymm1,ymm9,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm9,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-32))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm9,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm9,7
|
|
vpandn ymm0,ymm9,ymm11
|
|
vpand ymm4,ymm9,ymm10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm12,ymm13,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm13,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm14,ymm13
|
|
|
|
vpxor ymm12,ymm12,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm13,13
|
|
|
|
vpslld ymm2,ymm13,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm12,ymm1
|
|
|
|
vpsrld ymm1,ymm13,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm13,10
|
|
vpxor ymm12,ymm14,ymm3
|
|
vpaddd ymm8,ymm8,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm12,ymm12,ymm5
|
|
vpaddd ymm12,ymm12,ymm7
|
|
vmovd xmm5,DWORD PTR[16+r12]
|
|
vmovd xmm0,DWORD PTR[16+r8]
|
|
vmovd xmm1,DWORD PTR[16+r13]
|
|
vmovd xmm2,DWORD PTR[16+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[16+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[16+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[16+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[16+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm8,6
|
|
vpslld ymm2,ymm8,26
|
|
vmovdqu YMMWORD PTR[(128-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm11
|
|
|
|
vpsrld ymm1,ymm8,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm8,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm8,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm8,7
|
|
vpandn ymm0,ymm8,ymm10
|
|
vpand ymm3,ymm8,ymm9
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm11,ymm12,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm12,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm13,ymm12
|
|
|
|
vpxor ymm11,ymm11,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm12,13
|
|
|
|
vpslld ymm2,ymm12,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm11,ymm1
|
|
|
|
vpsrld ymm1,ymm12,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm12,10
|
|
vpxor ymm11,ymm13,ymm4
|
|
vpaddd ymm15,ymm15,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm11,ymm11,ymm5
|
|
vpaddd ymm11,ymm11,ymm7
|
|
vmovd xmm5,DWORD PTR[20+r12]
|
|
vmovd xmm0,DWORD PTR[20+r8]
|
|
vmovd xmm1,DWORD PTR[20+r13]
|
|
vmovd xmm2,DWORD PTR[20+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[20+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[20+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[20+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[20+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm15,6
|
|
vpslld ymm2,ymm15,26
|
|
vmovdqu YMMWORD PTR[(160-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm10
|
|
|
|
vpsrld ymm1,ymm15,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm15,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[32+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm15,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm15,7
|
|
vpandn ymm0,ymm15,ymm9
|
|
vpand ymm4,ymm15,ymm8
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm10,ymm11,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm11,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm12,ymm11
|
|
|
|
vpxor ymm10,ymm10,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm11,13
|
|
|
|
vpslld ymm2,ymm11,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm10,ymm1
|
|
|
|
vpsrld ymm1,ymm11,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm11,10
|
|
vpxor ymm10,ymm12,ymm3
|
|
vpaddd ymm14,ymm14,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm10,ymm10,ymm5
|
|
vpaddd ymm10,ymm10,ymm7
|
|
vmovd xmm5,DWORD PTR[24+r12]
|
|
vmovd xmm0,DWORD PTR[24+r8]
|
|
vmovd xmm1,DWORD PTR[24+r13]
|
|
vmovd xmm2,DWORD PTR[24+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[24+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[24+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[24+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[24+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm14,6
|
|
vpslld ymm2,ymm14,26
|
|
vmovdqu YMMWORD PTR[(192-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm9
|
|
|
|
vpsrld ymm1,ymm14,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm14,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm14,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm14,7
|
|
vpandn ymm0,ymm14,ymm8
|
|
vpand ymm3,ymm14,ymm15
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm9,ymm10,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm10,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm11,ymm10
|
|
|
|
vpxor ymm9,ymm9,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm10,13
|
|
|
|
vpslld ymm2,ymm10,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm9,ymm1
|
|
|
|
vpsrld ymm1,ymm10,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm10,10
|
|
vpxor ymm9,ymm11,ymm4
|
|
vpaddd ymm13,ymm13,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm9,ymm9,ymm5
|
|
vpaddd ymm9,ymm9,ymm7
|
|
vmovd xmm5,DWORD PTR[28+r12]
|
|
vmovd xmm0,DWORD PTR[28+r8]
|
|
vmovd xmm1,DWORD PTR[28+r13]
|
|
vmovd xmm2,DWORD PTR[28+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[28+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[28+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[28+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[28+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm13,6
|
|
vpslld ymm2,ymm13,26
|
|
vmovdqu YMMWORD PTR[(224-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm8
|
|
|
|
vpsrld ymm1,ymm13,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm13,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[96+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm13,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm13,7
|
|
vpandn ymm0,ymm13,ymm15
|
|
vpand ymm4,ymm13,ymm14
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm8,ymm9,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm9,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm10,ymm9
|
|
|
|
vpxor ymm8,ymm8,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm9,13
|
|
|
|
vpslld ymm2,ymm9,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm8,ymm1
|
|
|
|
vpsrld ymm1,ymm9,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm9,10
|
|
vpxor ymm8,ymm10,ymm3
|
|
vpaddd ymm12,ymm12,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm8,ymm8,ymm5
|
|
vpaddd ymm8,ymm8,ymm7
|
|
add rbp,256
|
|
vmovd xmm5,DWORD PTR[32+r12]
|
|
vmovd xmm0,DWORD PTR[32+r8]
|
|
vmovd xmm1,DWORD PTR[32+r13]
|
|
vmovd xmm2,DWORD PTR[32+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[32+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[32+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[32+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[32+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm12,6
|
|
vpslld ymm2,ymm12,26
|
|
vmovdqu YMMWORD PTR[(256-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm15
|
|
|
|
vpsrld ymm1,ymm12,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm12,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm12,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm12,7
|
|
vpandn ymm0,ymm12,ymm14
|
|
vpand ymm3,ymm12,ymm13
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm15,ymm8,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm8,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm9,ymm8
|
|
|
|
vpxor ymm15,ymm15,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm8,13
|
|
|
|
vpslld ymm2,ymm8,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm15,ymm1
|
|
|
|
vpsrld ymm1,ymm8,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm8,10
|
|
vpxor ymm15,ymm9,ymm4
|
|
vpaddd ymm11,ymm11,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm15,ymm15,ymm5
|
|
vpaddd ymm15,ymm15,ymm7
|
|
vmovd xmm5,DWORD PTR[36+r12]
|
|
vmovd xmm0,DWORD PTR[36+r8]
|
|
vmovd xmm1,DWORD PTR[36+r13]
|
|
vmovd xmm2,DWORD PTR[36+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[36+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[36+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[36+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[36+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm11,6
|
|
vpslld ymm2,ymm11,26
|
|
vmovdqu YMMWORD PTR[(288-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm14
|
|
|
|
vpsrld ymm1,ymm11,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm11,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-96))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm11,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm11,7
|
|
vpandn ymm0,ymm11,ymm13
|
|
vpand ymm4,ymm11,ymm12
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm14,ymm15,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm15,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm8,ymm15
|
|
|
|
vpxor ymm14,ymm14,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm15,13
|
|
|
|
vpslld ymm2,ymm15,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm14,ymm1
|
|
|
|
vpsrld ymm1,ymm15,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm15,10
|
|
vpxor ymm14,ymm8,ymm3
|
|
vpaddd ymm10,ymm10,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm14,ymm14,ymm5
|
|
vpaddd ymm14,ymm14,ymm7
|
|
vmovd xmm5,DWORD PTR[40+r12]
|
|
vmovd xmm0,DWORD PTR[40+r8]
|
|
vmovd xmm1,DWORD PTR[40+r13]
|
|
vmovd xmm2,DWORD PTR[40+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[40+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[40+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[40+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[40+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm10,6
|
|
vpslld ymm2,ymm10,26
|
|
vmovdqu YMMWORD PTR[(320-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm13
|
|
|
|
vpsrld ymm1,ymm10,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm10,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm10,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm10,7
|
|
vpandn ymm0,ymm10,ymm12
|
|
vpand ymm3,ymm10,ymm11
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm13,ymm14,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm14,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm15,ymm14
|
|
|
|
vpxor ymm13,ymm13,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm14,13
|
|
|
|
vpslld ymm2,ymm14,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm13,ymm1
|
|
|
|
vpsrld ymm1,ymm14,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm14,10
|
|
vpxor ymm13,ymm15,ymm4
|
|
vpaddd ymm9,ymm9,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm13,ymm13,ymm5
|
|
vpaddd ymm13,ymm13,ymm7
|
|
vmovd xmm5,DWORD PTR[44+r12]
|
|
vmovd xmm0,DWORD PTR[44+r8]
|
|
vmovd xmm1,DWORD PTR[44+r13]
|
|
vmovd xmm2,DWORD PTR[44+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[44+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[44+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[44+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[44+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm9,6
|
|
vpslld ymm2,ymm9,26
|
|
vmovdqu YMMWORD PTR[(352-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm12
|
|
|
|
vpsrld ymm1,ymm9,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm9,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-32))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm9,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm9,7
|
|
vpandn ymm0,ymm9,ymm11
|
|
vpand ymm4,ymm9,ymm10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm12,ymm13,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm13,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm14,ymm13
|
|
|
|
vpxor ymm12,ymm12,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm13,13
|
|
|
|
vpslld ymm2,ymm13,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm12,ymm1
|
|
|
|
vpsrld ymm1,ymm13,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm13,10
|
|
vpxor ymm12,ymm14,ymm3
|
|
vpaddd ymm8,ymm8,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm12,ymm12,ymm5
|
|
vpaddd ymm12,ymm12,ymm7
|
|
vmovd xmm5,DWORD PTR[48+r12]
|
|
vmovd xmm0,DWORD PTR[48+r8]
|
|
vmovd xmm1,DWORD PTR[48+r13]
|
|
vmovd xmm2,DWORD PTR[48+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[48+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[48+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[48+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[48+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm8,6
|
|
vpslld ymm2,ymm8,26
|
|
vmovdqu YMMWORD PTR[(384-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm11
|
|
|
|
vpsrld ymm1,ymm8,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm8,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm8,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm8,7
|
|
vpandn ymm0,ymm8,ymm10
|
|
vpand ymm3,ymm8,ymm9
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm11,ymm12,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm12,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm13,ymm12
|
|
|
|
vpxor ymm11,ymm11,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm12,13
|
|
|
|
vpslld ymm2,ymm12,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm11,ymm1
|
|
|
|
vpsrld ymm1,ymm12,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm12,10
|
|
vpxor ymm11,ymm13,ymm4
|
|
vpaddd ymm15,ymm15,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm11,ymm11,ymm5
|
|
vpaddd ymm11,ymm11,ymm7
|
|
vmovd xmm5,DWORD PTR[52+r12]
|
|
vmovd xmm0,DWORD PTR[52+r8]
|
|
vmovd xmm1,DWORD PTR[52+r13]
|
|
vmovd xmm2,DWORD PTR[52+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[52+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[52+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[52+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[52+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm15,6
|
|
vpslld ymm2,ymm15,26
|
|
vmovdqu YMMWORD PTR[(416-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm10
|
|
|
|
vpsrld ymm1,ymm15,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm15,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[32+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm15,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm15,7
|
|
vpandn ymm0,ymm15,ymm9
|
|
vpand ymm4,ymm15,ymm8
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm10,ymm11,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm11,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm12,ymm11
|
|
|
|
vpxor ymm10,ymm10,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm11,13
|
|
|
|
vpslld ymm2,ymm11,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm10,ymm1
|
|
|
|
vpsrld ymm1,ymm11,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm11,10
|
|
vpxor ymm10,ymm12,ymm3
|
|
vpaddd ymm14,ymm14,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm10,ymm10,ymm5
|
|
vpaddd ymm10,ymm10,ymm7
|
|
vmovd xmm5,DWORD PTR[56+r12]
|
|
vmovd xmm0,DWORD PTR[56+r8]
|
|
vmovd xmm1,DWORD PTR[56+r13]
|
|
vmovd xmm2,DWORD PTR[56+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[56+r14],1
|
|
vpinsrd xmm0,xmm0,DWORD PTR[56+r10],1
|
|
vpinsrd xmm1,xmm1,DWORD PTR[56+r15],1
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[56+r11],1
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm14,6
|
|
vpslld ymm2,ymm14,26
|
|
vmovdqu YMMWORD PTR[(448-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm9
|
|
|
|
vpsrld ymm1,ymm14,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm14,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm14,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm14,7
|
|
vpandn ymm0,ymm14,ymm8
|
|
vpand ymm3,ymm14,ymm15
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm9,ymm10,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm10,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm11,ymm10
|
|
|
|
vpxor ymm9,ymm9,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm10,13
|
|
|
|
vpslld ymm2,ymm10,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm9,ymm1
|
|
|
|
vpsrld ymm1,ymm10,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm10,10
|
|
vpxor ymm9,ymm11,ymm4
|
|
vpaddd ymm13,ymm13,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm9,ymm9,ymm5
|
|
vpaddd ymm9,ymm9,ymm7
|
|
vmovd xmm5,DWORD PTR[60+r12]
|
|
lea r12,QWORD PTR[64+r12]
|
|
vmovd xmm0,DWORD PTR[60+r8]
|
|
lea r8,QWORD PTR[64+r8]
|
|
vmovd xmm1,DWORD PTR[60+r13]
|
|
lea r13,QWORD PTR[64+r13]
|
|
vmovd xmm2,DWORD PTR[60+r9]
|
|
lea r9,QWORD PTR[64+r9]
|
|
vpinsrd xmm5,xmm5,DWORD PTR[60+r14],1
|
|
lea r14,QWORD PTR[64+r14]
|
|
vpinsrd xmm0,xmm0,DWORD PTR[60+r10],1
|
|
lea r10,QWORD PTR[64+r10]
|
|
vpinsrd xmm1,xmm1,DWORD PTR[60+r15],1
|
|
lea r15,QWORD PTR[64+r15]
|
|
vpunpckldq ymm5,ymm5,ymm1
|
|
vpinsrd xmm2,xmm2,DWORD PTR[60+r11],1
|
|
lea r11,QWORD PTR[64+r11]
|
|
vpunpckldq ymm0,ymm0,ymm2
|
|
vinserti128 ymm5,ymm5,xmm0,1
|
|
vpshufb ymm5,ymm5,ymm6
|
|
vpsrld ymm7,ymm13,6
|
|
vpslld ymm2,ymm13,26
|
|
vmovdqu YMMWORD PTR[(480-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm8
|
|
|
|
vpsrld ymm1,ymm13,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm13,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[96+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm13,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
prefetcht0 [63+r12]
|
|
vpslld ymm2,ymm13,7
|
|
vpandn ymm0,ymm13,ymm15
|
|
vpand ymm4,ymm13,ymm14
|
|
prefetcht0 [63+r13]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm8,ymm9,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
prefetcht0 [63+r14]
|
|
vpslld ymm1,ymm9,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm10,ymm9
|
|
prefetcht0 [63+r15]
|
|
vpxor ymm8,ymm8,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm9,13
|
|
prefetcht0 [63+r8]
|
|
vpslld ymm2,ymm9,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
prefetcht0 [63+r9]
|
|
vpxor ymm7,ymm8,ymm1
|
|
|
|
vpsrld ymm1,ymm9,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
prefetcht0 [63+r10]
|
|
vpslld ymm2,ymm9,10
|
|
vpxor ymm8,ymm10,ymm3
|
|
vpaddd ymm12,ymm12,ymm5
|
|
prefetcht0 [63+r11]
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm8,ymm8,ymm5
|
|
vpaddd ymm8,ymm8,ymm7
|
|
add rbp,256
|
|
vmovdqu ymm5,YMMWORD PTR[((0-128))+rax]
|
|
mov ecx,3
|
|
jmp $L$oop_16_xx_avx2
|
|
ALIGN 32
|
|
$L$oop_16_xx_avx2::
|
|
vmovdqu ymm6,YMMWORD PTR[((32-128))+rax]
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((288-256-128))+rbx]
|
|
|
|
vpsrld ymm7,ymm6,3
|
|
vpsrld ymm1,ymm6,7
|
|
vpslld ymm2,ymm6,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm6,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm6,14
|
|
vmovdqu ymm0,YMMWORD PTR[((448-256-128))+rbx]
|
|
vpsrld ymm3,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpxor ymm7,ymm3,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpsrld ymm7,ymm12,6
|
|
vpslld ymm2,ymm12,26
|
|
vmovdqu YMMWORD PTR[(0-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm15
|
|
|
|
vpsrld ymm1,ymm12,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm12,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm12,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm12,7
|
|
vpandn ymm0,ymm12,ymm14
|
|
vpand ymm3,ymm12,ymm13
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm15,ymm8,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm8,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm9,ymm8
|
|
|
|
vpxor ymm15,ymm15,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm8,13
|
|
|
|
vpslld ymm2,ymm8,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm15,ymm1
|
|
|
|
vpsrld ymm1,ymm8,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm8,10
|
|
vpxor ymm15,ymm9,ymm4
|
|
vpaddd ymm11,ymm11,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm15,ymm15,ymm5
|
|
vpaddd ymm15,ymm15,ymm7
|
|
vmovdqu ymm5,YMMWORD PTR[((64-128))+rax]
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((320-256-128))+rbx]
|
|
|
|
vpsrld ymm7,ymm5,3
|
|
vpsrld ymm1,ymm5,7
|
|
vpslld ymm2,ymm5,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm5,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm5,14
|
|
vmovdqu ymm0,YMMWORD PTR[((480-256-128))+rbx]
|
|
vpsrld ymm4,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpxor ymm7,ymm4,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpsrld ymm7,ymm11,6
|
|
vpslld ymm2,ymm11,26
|
|
vmovdqu YMMWORD PTR[(32-128)+rax],ymm6
|
|
vpaddd ymm6,ymm6,ymm14
|
|
|
|
vpsrld ymm1,ymm11,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm11,21
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((-96))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm11,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm11,7
|
|
vpandn ymm0,ymm11,ymm13
|
|
vpand ymm4,ymm11,ymm12
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm14,ymm15,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm15,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm8,ymm15
|
|
|
|
vpxor ymm14,ymm14,ymm1
|
|
vpaddd ymm6,ymm6,ymm7
|
|
|
|
vpsrld ymm1,ymm15,13
|
|
|
|
vpslld ymm2,ymm15,19
|
|
vpaddd ymm6,ymm6,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm14,ymm1
|
|
|
|
vpsrld ymm1,ymm15,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm15,10
|
|
vpxor ymm14,ymm8,ymm3
|
|
vpaddd ymm10,ymm10,ymm6
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm14,ymm14,ymm6
|
|
vpaddd ymm14,ymm14,ymm7
|
|
vmovdqu ymm6,YMMWORD PTR[((96-128))+rax]
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((352-256-128))+rbx]
|
|
|
|
vpsrld ymm7,ymm6,3
|
|
vpsrld ymm1,ymm6,7
|
|
vpslld ymm2,ymm6,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm6,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm6,14
|
|
vmovdqu ymm0,YMMWORD PTR[((0-128))+rax]
|
|
vpsrld ymm3,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpxor ymm7,ymm3,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpsrld ymm7,ymm10,6
|
|
vpslld ymm2,ymm10,26
|
|
vmovdqu YMMWORD PTR[(64-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm13
|
|
|
|
vpsrld ymm1,ymm10,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm10,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm10,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm10,7
|
|
vpandn ymm0,ymm10,ymm12
|
|
vpand ymm3,ymm10,ymm11
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm13,ymm14,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm14,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm15,ymm14
|
|
|
|
vpxor ymm13,ymm13,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm14,13
|
|
|
|
vpslld ymm2,ymm14,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm13,ymm1
|
|
|
|
vpsrld ymm1,ymm14,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm14,10
|
|
vpxor ymm13,ymm15,ymm4
|
|
vpaddd ymm9,ymm9,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm13,ymm13,ymm5
|
|
vpaddd ymm13,ymm13,ymm7
|
|
vmovdqu ymm5,YMMWORD PTR[((128-128))+rax]
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((384-256-128))+rbx]
|
|
|
|
vpsrld ymm7,ymm5,3
|
|
vpsrld ymm1,ymm5,7
|
|
vpslld ymm2,ymm5,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm5,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm5,14
|
|
vmovdqu ymm0,YMMWORD PTR[((32-128))+rax]
|
|
vpsrld ymm4,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpxor ymm7,ymm4,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpsrld ymm7,ymm9,6
|
|
vpslld ymm2,ymm9,26
|
|
vmovdqu YMMWORD PTR[(96-128)+rax],ymm6
|
|
vpaddd ymm6,ymm6,ymm12
|
|
|
|
vpsrld ymm1,ymm9,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm9,21
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((-32))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm9,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm9,7
|
|
vpandn ymm0,ymm9,ymm11
|
|
vpand ymm4,ymm9,ymm10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm12,ymm13,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm13,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm14,ymm13
|
|
|
|
vpxor ymm12,ymm12,ymm1
|
|
vpaddd ymm6,ymm6,ymm7
|
|
|
|
vpsrld ymm1,ymm13,13
|
|
|
|
vpslld ymm2,ymm13,19
|
|
vpaddd ymm6,ymm6,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm12,ymm1
|
|
|
|
vpsrld ymm1,ymm13,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm13,10
|
|
vpxor ymm12,ymm14,ymm3
|
|
vpaddd ymm8,ymm8,ymm6
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm12,ymm12,ymm6
|
|
vpaddd ymm12,ymm12,ymm7
|
|
vmovdqu ymm6,YMMWORD PTR[((160-128))+rax]
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((416-256-128))+rbx]
|
|
|
|
vpsrld ymm7,ymm6,3
|
|
vpsrld ymm1,ymm6,7
|
|
vpslld ymm2,ymm6,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm6,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm6,14
|
|
vmovdqu ymm0,YMMWORD PTR[((64-128))+rax]
|
|
vpsrld ymm3,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpxor ymm7,ymm3,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpsrld ymm7,ymm8,6
|
|
vpslld ymm2,ymm8,26
|
|
vmovdqu YMMWORD PTR[(128-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm11
|
|
|
|
vpsrld ymm1,ymm8,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm8,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm8,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm8,7
|
|
vpandn ymm0,ymm8,ymm10
|
|
vpand ymm3,ymm8,ymm9
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm11,ymm12,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm12,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm13,ymm12
|
|
|
|
vpxor ymm11,ymm11,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm12,13
|
|
|
|
vpslld ymm2,ymm12,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm11,ymm1
|
|
|
|
vpsrld ymm1,ymm12,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm12,10
|
|
vpxor ymm11,ymm13,ymm4
|
|
vpaddd ymm15,ymm15,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm11,ymm11,ymm5
|
|
vpaddd ymm11,ymm11,ymm7
|
|
vmovdqu ymm5,YMMWORD PTR[((192-128))+rax]
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((448-256-128))+rbx]
|
|
|
|
vpsrld ymm7,ymm5,3
|
|
vpsrld ymm1,ymm5,7
|
|
vpslld ymm2,ymm5,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm5,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm5,14
|
|
vmovdqu ymm0,YMMWORD PTR[((96-128))+rax]
|
|
vpsrld ymm4,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpxor ymm7,ymm4,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpsrld ymm7,ymm15,6
|
|
vpslld ymm2,ymm15,26
|
|
vmovdqu YMMWORD PTR[(160-128)+rax],ymm6
|
|
vpaddd ymm6,ymm6,ymm10
|
|
|
|
vpsrld ymm1,ymm15,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm15,21
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[32+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm15,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm15,7
|
|
vpandn ymm0,ymm15,ymm9
|
|
vpand ymm4,ymm15,ymm8
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm10,ymm11,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm11,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm12,ymm11
|
|
|
|
vpxor ymm10,ymm10,ymm1
|
|
vpaddd ymm6,ymm6,ymm7
|
|
|
|
vpsrld ymm1,ymm11,13
|
|
|
|
vpslld ymm2,ymm11,19
|
|
vpaddd ymm6,ymm6,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm10,ymm1
|
|
|
|
vpsrld ymm1,ymm11,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm11,10
|
|
vpxor ymm10,ymm12,ymm3
|
|
vpaddd ymm14,ymm14,ymm6
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm10,ymm10,ymm6
|
|
vpaddd ymm10,ymm10,ymm7
|
|
vmovdqu ymm6,YMMWORD PTR[((224-128))+rax]
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((480-256-128))+rbx]
|
|
|
|
vpsrld ymm7,ymm6,3
|
|
vpsrld ymm1,ymm6,7
|
|
vpslld ymm2,ymm6,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm6,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm6,14
|
|
vmovdqu ymm0,YMMWORD PTR[((128-128))+rax]
|
|
vpsrld ymm3,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpxor ymm7,ymm3,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpsrld ymm7,ymm14,6
|
|
vpslld ymm2,ymm14,26
|
|
vmovdqu YMMWORD PTR[(192-128)+rax],ymm5
|
|
vpaddd ymm5,ymm5,ymm9
|
|
|
|
vpsrld ymm1,ymm14,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm14,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm14,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm14,7
|
|
vpandn ymm0,ymm14,ymm8
|
|
vpand ymm3,ymm14,ymm15
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm9,ymm10,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm10,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm11,ymm10
|
|
|
|
vpxor ymm9,ymm9,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm10,13
|
|
|
|
vpslld ymm2,ymm10,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm9,ymm1
|
|
|
|
vpsrld ymm1,ymm10,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm10,10
|
|
vpxor ymm9,ymm11,ymm4
|
|
vpaddd ymm13,ymm13,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm9,ymm9,ymm5
|
|
vpaddd ymm9,ymm9,ymm7
|
|
vmovdqu ymm5,YMMWORD PTR[((256-256-128))+rbx]
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((0-128))+rax]
|
|
|
|
vpsrld ymm7,ymm5,3
|
|
vpsrld ymm1,ymm5,7
|
|
vpslld ymm2,ymm5,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm5,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm5,14
|
|
vmovdqu ymm0,YMMWORD PTR[((160-128))+rax]
|
|
vpsrld ymm4,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpxor ymm7,ymm4,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpsrld ymm7,ymm13,6
|
|
vpslld ymm2,ymm13,26
|
|
vmovdqu YMMWORD PTR[(224-128)+rax],ymm6
|
|
vpaddd ymm6,ymm6,ymm8
|
|
|
|
vpsrld ymm1,ymm13,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm13,21
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[96+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm13,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm13,7
|
|
vpandn ymm0,ymm13,ymm15
|
|
vpand ymm4,ymm13,ymm14
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm8,ymm9,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm9,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm10,ymm9
|
|
|
|
vpxor ymm8,ymm8,ymm1
|
|
vpaddd ymm6,ymm6,ymm7
|
|
|
|
vpsrld ymm1,ymm9,13
|
|
|
|
vpslld ymm2,ymm9,19
|
|
vpaddd ymm6,ymm6,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm8,ymm1
|
|
|
|
vpsrld ymm1,ymm9,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm9,10
|
|
vpxor ymm8,ymm10,ymm3
|
|
vpaddd ymm12,ymm12,ymm6
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm8,ymm8,ymm6
|
|
vpaddd ymm8,ymm8,ymm7
|
|
add rbp,256
|
|
vmovdqu ymm6,YMMWORD PTR[((288-256-128))+rbx]
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((32-128))+rax]
|
|
|
|
vpsrld ymm7,ymm6,3
|
|
vpsrld ymm1,ymm6,7
|
|
vpslld ymm2,ymm6,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm6,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm6,14
|
|
vmovdqu ymm0,YMMWORD PTR[((192-128))+rax]
|
|
vpsrld ymm3,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpxor ymm7,ymm3,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpsrld ymm7,ymm12,6
|
|
vpslld ymm2,ymm12,26
|
|
vmovdqu YMMWORD PTR[(256-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm15
|
|
|
|
vpsrld ymm1,ymm12,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm12,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm12,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm12,7
|
|
vpandn ymm0,ymm12,ymm14
|
|
vpand ymm3,ymm12,ymm13
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm15,ymm8,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm8,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm9,ymm8
|
|
|
|
vpxor ymm15,ymm15,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm8,13
|
|
|
|
vpslld ymm2,ymm8,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm15,ymm1
|
|
|
|
vpsrld ymm1,ymm8,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm8,10
|
|
vpxor ymm15,ymm9,ymm4
|
|
vpaddd ymm11,ymm11,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm15,ymm15,ymm5
|
|
vpaddd ymm15,ymm15,ymm7
|
|
vmovdqu ymm5,YMMWORD PTR[((320-256-128))+rbx]
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((64-128))+rax]
|
|
|
|
vpsrld ymm7,ymm5,3
|
|
vpsrld ymm1,ymm5,7
|
|
vpslld ymm2,ymm5,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm5,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm5,14
|
|
vmovdqu ymm0,YMMWORD PTR[((224-128))+rax]
|
|
vpsrld ymm4,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpxor ymm7,ymm4,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpsrld ymm7,ymm11,6
|
|
vpslld ymm2,ymm11,26
|
|
vmovdqu YMMWORD PTR[(288-256-128)+rbx],ymm6
|
|
vpaddd ymm6,ymm6,ymm14
|
|
|
|
vpsrld ymm1,ymm11,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm11,21
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((-96))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm11,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm11,7
|
|
vpandn ymm0,ymm11,ymm13
|
|
vpand ymm4,ymm11,ymm12
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm14,ymm15,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm15,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm8,ymm15
|
|
|
|
vpxor ymm14,ymm14,ymm1
|
|
vpaddd ymm6,ymm6,ymm7
|
|
|
|
vpsrld ymm1,ymm15,13
|
|
|
|
vpslld ymm2,ymm15,19
|
|
vpaddd ymm6,ymm6,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm14,ymm1
|
|
|
|
vpsrld ymm1,ymm15,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm15,10
|
|
vpxor ymm14,ymm8,ymm3
|
|
vpaddd ymm10,ymm10,ymm6
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm14,ymm14,ymm6
|
|
vpaddd ymm14,ymm14,ymm7
|
|
vmovdqu ymm6,YMMWORD PTR[((352-256-128))+rbx]
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((96-128))+rax]
|
|
|
|
vpsrld ymm7,ymm6,3
|
|
vpsrld ymm1,ymm6,7
|
|
vpslld ymm2,ymm6,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm6,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm6,14
|
|
vmovdqu ymm0,YMMWORD PTR[((256-256-128))+rbx]
|
|
vpsrld ymm3,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpxor ymm7,ymm3,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpsrld ymm7,ymm10,6
|
|
vpslld ymm2,ymm10,26
|
|
vmovdqu YMMWORD PTR[(320-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm13
|
|
|
|
vpsrld ymm1,ymm10,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm10,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm10,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm10,7
|
|
vpandn ymm0,ymm10,ymm12
|
|
vpand ymm3,ymm10,ymm11
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm13,ymm14,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm14,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm15,ymm14
|
|
|
|
vpxor ymm13,ymm13,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm14,13
|
|
|
|
vpslld ymm2,ymm14,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm13,ymm1
|
|
|
|
vpsrld ymm1,ymm14,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm14,10
|
|
vpxor ymm13,ymm15,ymm4
|
|
vpaddd ymm9,ymm9,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm13,ymm13,ymm5
|
|
vpaddd ymm13,ymm13,ymm7
|
|
vmovdqu ymm5,YMMWORD PTR[((384-256-128))+rbx]
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((128-128))+rax]
|
|
|
|
vpsrld ymm7,ymm5,3
|
|
vpsrld ymm1,ymm5,7
|
|
vpslld ymm2,ymm5,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm5,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm5,14
|
|
vmovdqu ymm0,YMMWORD PTR[((288-256-128))+rbx]
|
|
vpsrld ymm4,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpxor ymm7,ymm4,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpsrld ymm7,ymm9,6
|
|
vpslld ymm2,ymm9,26
|
|
vmovdqu YMMWORD PTR[(352-256-128)+rbx],ymm6
|
|
vpaddd ymm6,ymm6,ymm12
|
|
|
|
vpsrld ymm1,ymm9,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm9,21
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((-32))+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm9,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm9,7
|
|
vpandn ymm0,ymm9,ymm11
|
|
vpand ymm4,ymm9,ymm10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm12,ymm13,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm13,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm14,ymm13
|
|
|
|
vpxor ymm12,ymm12,ymm1
|
|
vpaddd ymm6,ymm6,ymm7
|
|
|
|
vpsrld ymm1,ymm13,13
|
|
|
|
vpslld ymm2,ymm13,19
|
|
vpaddd ymm6,ymm6,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm12,ymm1
|
|
|
|
vpsrld ymm1,ymm13,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm13,10
|
|
vpxor ymm12,ymm14,ymm3
|
|
vpaddd ymm8,ymm8,ymm6
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm12,ymm12,ymm6
|
|
vpaddd ymm12,ymm12,ymm7
|
|
vmovdqu ymm6,YMMWORD PTR[((416-256-128))+rbx]
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((160-128))+rax]
|
|
|
|
vpsrld ymm7,ymm6,3
|
|
vpsrld ymm1,ymm6,7
|
|
vpslld ymm2,ymm6,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm6,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm6,14
|
|
vmovdqu ymm0,YMMWORD PTR[((320-256-128))+rbx]
|
|
vpsrld ymm3,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpxor ymm7,ymm3,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpsrld ymm7,ymm8,6
|
|
vpslld ymm2,ymm8,26
|
|
vmovdqu YMMWORD PTR[(384-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm11
|
|
|
|
vpsrld ymm1,ymm8,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm8,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm8,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm8,7
|
|
vpandn ymm0,ymm8,ymm10
|
|
vpand ymm3,ymm8,ymm9
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm11,ymm12,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm12,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm13,ymm12
|
|
|
|
vpxor ymm11,ymm11,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm12,13
|
|
|
|
vpslld ymm2,ymm12,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm11,ymm1
|
|
|
|
vpsrld ymm1,ymm12,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm12,10
|
|
vpxor ymm11,ymm13,ymm4
|
|
vpaddd ymm15,ymm15,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm11,ymm11,ymm5
|
|
vpaddd ymm11,ymm11,ymm7
|
|
vmovdqu ymm5,YMMWORD PTR[((448-256-128))+rbx]
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((192-128))+rax]
|
|
|
|
vpsrld ymm7,ymm5,3
|
|
vpsrld ymm1,ymm5,7
|
|
vpslld ymm2,ymm5,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm5,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm5,14
|
|
vmovdqu ymm0,YMMWORD PTR[((352-256-128))+rbx]
|
|
vpsrld ymm4,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpxor ymm7,ymm4,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpsrld ymm7,ymm15,6
|
|
vpslld ymm2,ymm15,26
|
|
vmovdqu YMMWORD PTR[(416-256-128)+rbx],ymm6
|
|
vpaddd ymm6,ymm6,ymm10
|
|
|
|
vpsrld ymm1,ymm15,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm15,21
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[32+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm15,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm15,7
|
|
vpandn ymm0,ymm15,ymm9
|
|
vpand ymm4,ymm15,ymm8
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm10,ymm11,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm11,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm12,ymm11
|
|
|
|
vpxor ymm10,ymm10,ymm1
|
|
vpaddd ymm6,ymm6,ymm7
|
|
|
|
vpsrld ymm1,ymm11,13
|
|
|
|
vpslld ymm2,ymm11,19
|
|
vpaddd ymm6,ymm6,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm10,ymm1
|
|
|
|
vpsrld ymm1,ymm11,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm11,10
|
|
vpxor ymm10,ymm12,ymm3
|
|
vpaddd ymm14,ymm14,ymm6
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm10,ymm10,ymm6
|
|
vpaddd ymm10,ymm10,ymm7
|
|
vmovdqu ymm6,YMMWORD PTR[((480-256-128))+rbx]
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[((224-128))+rax]
|
|
|
|
vpsrld ymm7,ymm6,3
|
|
vpsrld ymm1,ymm6,7
|
|
vpslld ymm2,ymm6,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm6,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm6,14
|
|
vmovdqu ymm0,YMMWORD PTR[((384-256-128))+rbx]
|
|
vpsrld ymm3,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpxor ymm7,ymm3,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm5,ymm5,ymm7
|
|
vpsrld ymm7,ymm14,6
|
|
vpslld ymm2,ymm14,26
|
|
vmovdqu YMMWORD PTR[(448-256-128)+rbx],ymm5
|
|
vpaddd ymm5,ymm5,ymm9
|
|
|
|
vpsrld ymm1,ymm14,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm14,21
|
|
vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm14,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm14,7
|
|
vpandn ymm0,ymm14,ymm8
|
|
vpand ymm3,ymm14,ymm15
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm9,ymm10,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm10,30
|
|
vpxor ymm0,ymm0,ymm3
|
|
vpxor ymm3,ymm11,ymm10
|
|
|
|
vpxor ymm9,ymm9,ymm1
|
|
vpaddd ymm5,ymm5,ymm7
|
|
|
|
vpsrld ymm1,ymm10,13
|
|
|
|
vpslld ymm2,ymm10,19
|
|
vpaddd ymm5,ymm5,ymm0
|
|
vpand ymm4,ymm4,ymm3
|
|
|
|
vpxor ymm7,ymm9,ymm1
|
|
|
|
vpsrld ymm1,ymm10,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm10,10
|
|
vpxor ymm9,ymm11,ymm4
|
|
vpaddd ymm13,ymm13,ymm5
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm9,ymm9,ymm5
|
|
vpaddd ymm9,ymm9,ymm7
|
|
vmovdqu ymm5,YMMWORD PTR[((0-128))+rax]
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[((256-256-128))+rbx]
|
|
|
|
vpsrld ymm7,ymm5,3
|
|
vpsrld ymm1,ymm5,7
|
|
vpslld ymm2,ymm5,25
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm5,18
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm5,14
|
|
vmovdqu ymm0,YMMWORD PTR[((416-256-128))+rbx]
|
|
vpsrld ymm4,ymm0,10
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpsrld ymm1,ymm0,17
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,15
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpxor ymm7,ymm4,ymm1
|
|
vpsrld ymm1,ymm0,19
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm0,13
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpaddd ymm6,ymm6,ymm7
|
|
vpsrld ymm7,ymm13,6
|
|
vpslld ymm2,ymm13,26
|
|
vmovdqu YMMWORD PTR[(480-256-128)+rbx],ymm6
|
|
vpaddd ymm6,ymm6,ymm8
|
|
|
|
vpsrld ymm1,ymm13,11
|
|
vpxor ymm7,ymm7,ymm2
|
|
vpslld ymm2,ymm13,21
|
|
vpaddd ymm6,ymm6,YMMWORD PTR[96+rbp]
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm1,ymm13,25
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm13,7
|
|
vpandn ymm0,ymm13,ymm15
|
|
vpand ymm4,ymm13,ymm14
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
|
|
vpsrld ymm8,ymm9,2
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm1,ymm9,30
|
|
vpxor ymm0,ymm0,ymm4
|
|
vpxor ymm4,ymm10,ymm9
|
|
|
|
vpxor ymm8,ymm8,ymm1
|
|
vpaddd ymm6,ymm6,ymm7
|
|
|
|
vpsrld ymm1,ymm9,13
|
|
|
|
vpslld ymm2,ymm9,19
|
|
vpaddd ymm6,ymm6,ymm0
|
|
vpand ymm3,ymm3,ymm4
|
|
|
|
vpxor ymm7,ymm8,ymm1
|
|
|
|
vpsrld ymm1,ymm9,22
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpslld ymm2,ymm9,10
|
|
vpxor ymm8,ymm10,ymm3
|
|
vpaddd ymm12,ymm12,ymm6
|
|
|
|
vpxor ymm7,ymm7,ymm1
|
|
vpxor ymm7,ymm7,ymm2
|
|
|
|
vpaddd ymm8,ymm8,ymm6
|
|
vpaddd ymm8,ymm8,ymm7
|
|
add rbp,256
|
|
dec ecx
|
|
jnz $L$oop_16_xx_avx2
|
|
|
|
mov ecx,1
|
|
lea rbx,QWORD PTR[512+rsp]
|
|
lea rbp,QWORD PTR[((K256+128))]
|
|
cmp ecx,DWORD PTR[rbx]
|
|
cmovge r12,rbp
|
|
cmp ecx,DWORD PTR[4+rbx]
|
|
cmovge r13,rbp
|
|
cmp ecx,DWORD PTR[8+rbx]
|
|
cmovge r14,rbp
|
|
cmp ecx,DWORD PTR[12+rbx]
|
|
cmovge r15,rbp
|
|
cmp ecx,DWORD PTR[16+rbx]
|
|
cmovge r8,rbp
|
|
cmp ecx,DWORD PTR[20+rbx]
|
|
cmovge r9,rbp
|
|
cmp ecx,DWORD PTR[24+rbx]
|
|
cmovge r10,rbp
|
|
cmp ecx,DWORD PTR[28+rbx]
|
|
cmovge r11,rbp
|
|
vmovdqa ymm7,YMMWORD PTR[rbx]
|
|
vpxor ymm0,ymm0,ymm0
|
|
vmovdqa ymm6,ymm7
|
|
vpcmpgtd ymm6,ymm6,ymm0
|
|
vpaddd ymm7,ymm7,ymm6
|
|
|
|
vmovdqu ymm0,YMMWORD PTR[((0-128))+rdi]
|
|
vpand ymm8,ymm8,ymm6
|
|
vmovdqu ymm1,YMMWORD PTR[((32-128))+rdi]
|
|
vpand ymm9,ymm9,ymm6
|
|
vmovdqu ymm2,YMMWORD PTR[((64-128))+rdi]
|
|
vpand ymm10,ymm10,ymm6
|
|
vmovdqu ymm5,YMMWORD PTR[((96-128))+rdi]
|
|
vpand ymm11,ymm11,ymm6
|
|
vpaddd ymm8,ymm8,ymm0
|
|
vmovdqu ymm0,YMMWORD PTR[((128-128))+rdi]
|
|
vpand ymm12,ymm12,ymm6
|
|
vpaddd ymm9,ymm9,ymm1
|
|
vmovdqu ymm1,YMMWORD PTR[((160-128))+rdi]
|
|
vpand ymm13,ymm13,ymm6
|
|
vpaddd ymm10,ymm10,ymm2
|
|
vmovdqu ymm2,YMMWORD PTR[((192-128))+rdi]
|
|
vpand ymm14,ymm14,ymm6
|
|
vpaddd ymm11,ymm11,ymm5
|
|
vmovdqu ymm5,YMMWORD PTR[((224-128))+rdi]
|
|
vpand ymm15,ymm15,ymm6
|
|
vpaddd ymm12,ymm12,ymm0
|
|
vpaddd ymm13,ymm13,ymm1
|
|
vmovdqu YMMWORD PTR[(0-128)+rdi],ymm8
|
|
vpaddd ymm14,ymm14,ymm2
|
|
vmovdqu YMMWORD PTR[(32-128)+rdi],ymm9
|
|
vpaddd ymm15,ymm15,ymm5
|
|
vmovdqu YMMWORD PTR[(64-128)+rdi],ymm10
|
|
vmovdqu YMMWORD PTR[(96-128)+rdi],ymm11
|
|
vmovdqu YMMWORD PTR[(128-128)+rdi],ymm12
|
|
vmovdqu YMMWORD PTR[(160-128)+rdi],ymm13
|
|
vmovdqu YMMWORD PTR[(192-128)+rdi],ymm14
|
|
vmovdqu YMMWORD PTR[(224-128)+rdi],ymm15
|
|
|
|
vmovdqu YMMWORD PTR[rbx],ymm7
|
|
lea rbx,QWORD PTR[((256+128))+rsp]
|
|
vmovdqu ymm6,YMMWORD PTR[$L$pbswap]
|
|
dec edx
|
|
jnz $L$oop_avx2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$L$done_avx2::
|
|
mov rax,QWORD PTR[544+rsp]
|
|
vzeroupper
|
|
movaps xmm6,XMMWORD PTR[((-216))+rax]
|
|
movaps xmm7,XMMWORD PTR[((-200))+rax]
|
|
movaps xmm8,XMMWORD PTR[((-184))+rax]
|
|
movaps xmm9,XMMWORD PTR[((-168))+rax]
|
|
movaps xmm10,XMMWORD PTR[((-152))+rax]
|
|
movaps xmm11,XMMWORD PTR[((-136))+rax]
|
|
movaps xmm12,XMMWORD PTR[((-120))+rax]
|
|
movaps xmm13,XMMWORD PTR[((-104))+rax]
|
|
movaps xmm14,XMMWORD PTR[((-88))+rax]
|
|
movaps xmm15,XMMWORD PTR[((-72))+rax]
|
|
mov r15,QWORD PTR[((-48))+rax]
|
|
mov r14,QWORD PTR[((-40))+rax]
|
|
mov r13,QWORD PTR[((-32))+rax]
|
|
mov r12,QWORD PTR[((-24))+rax]
|
|
mov rbp,QWORD PTR[((-16))+rax]
|
|
mov rbx,QWORD PTR[((-8))+rax]
|
|
lea rsp,QWORD PTR[rax]
|
|
$L$epilogue_avx2::
|
|
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD PTR[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
$L$SEH_end_sha256_multi_block_avx2::
|
|
sha256_multi_block_avx2 ENDP
|
|
ALIGN 256
|
|
K256::
|
|
DD 1116352408,1116352408,1116352408,1116352408
|
|
DD 1116352408,1116352408,1116352408,1116352408
|
|
DD 1899447441,1899447441,1899447441,1899447441
|
|
DD 1899447441,1899447441,1899447441,1899447441
|
|
DD 3049323471,3049323471,3049323471,3049323471
|
|
DD 3049323471,3049323471,3049323471,3049323471
|
|
DD 3921009573,3921009573,3921009573,3921009573
|
|
DD 3921009573,3921009573,3921009573,3921009573
|
|
DD 961987163,961987163,961987163,961987163
|
|
DD 961987163,961987163,961987163,961987163
|
|
DD 1508970993,1508970993,1508970993,1508970993
|
|
DD 1508970993,1508970993,1508970993,1508970993
|
|
DD 2453635748,2453635748,2453635748,2453635748
|
|
DD 2453635748,2453635748,2453635748,2453635748
|
|
DD 2870763221,2870763221,2870763221,2870763221
|
|
DD 2870763221,2870763221,2870763221,2870763221
|
|
DD 3624381080,3624381080,3624381080,3624381080
|
|
DD 3624381080,3624381080,3624381080,3624381080
|
|
DD 310598401,310598401,310598401,310598401
|
|
DD 310598401,310598401,310598401,310598401
|
|
DD 607225278,607225278,607225278,607225278
|
|
DD 607225278,607225278,607225278,607225278
|
|
DD 1426881987,1426881987,1426881987,1426881987
|
|
DD 1426881987,1426881987,1426881987,1426881987
|
|
DD 1925078388,1925078388,1925078388,1925078388
|
|
DD 1925078388,1925078388,1925078388,1925078388
|
|
DD 2162078206,2162078206,2162078206,2162078206
|
|
DD 2162078206,2162078206,2162078206,2162078206
|
|
DD 2614888103,2614888103,2614888103,2614888103
|
|
DD 2614888103,2614888103,2614888103,2614888103
|
|
DD 3248222580,3248222580,3248222580,3248222580
|
|
DD 3248222580,3248222580,3248222580,3248222580
|
|
DD 3835390401,3835390401,3835390401,3835390401
|
|
DD 3835390401,3835390401,3835390401,3835390401
|
|
DD 4022224774,4022224774,4022224774,4022224774
|
|
DD 4022224774,4022224774,4022224774,4022224774
|
|
DD 264347078,264347078,264347078,264347078
|
|
DD 264347078,264347078,264347078,264347078
|
|
DD 604807628,604807628,604807628,604807628
|
|
DD 604807628,604807628,604807628,604807628
|
|
DD 770255983,770255983,770255983,770255983
|
|
DD 770255983,770255983,770255983,770255983
|
|
DD 1249150122,1249150122,1249150122,1249150122
|
|
DD 1249150122,1249150122,1249150122,1249150122
|
|
DD 1555081692,1555081692,1555081692,1555081692
|
|
DD 1555081692,1555081692,1555081692,1555081692
|
|
DD 1996064986,1996064986,1996064986,1996064986
|
|
DD 1996064986,1996064986,1996064986,1996064986
|
|
DD 2554220882,2554220882,2554220882,2554220882
|
|
DD 2554220882,2554220882,2554220882,2554220882
|
|
DD 2821834349,2821834349,2821834349,2821834349
|
|
DD 2821834349,2821834349,2821834349,2821834349
|
|
DD 2952996808,2952996808,2952996808,2952996808
|
|
DD 2952996808,2952996808,2952996808,2952996808
|
|
DD 3210313671,3210313671,3210313671,3210313671
|
|
DD 3210313671,3210313671,3210313671,3210313671
|
|
DD 3336571891,3336571891,3336571891,3336571891
|
|
DD 3336571891,3336571891,3336571891,3336571891
|
|
DD 3584528711,3584528711,3584528711,3584528711
|
|
DD 3584528711,3584528711,3584528711,3584528711
|
|
DD 113926993,113926993,113926993,113926993
|
|
DD 113926993,113926993,113926993,113926993
|
|
DD 338241895,338241895,338241895,338241895
|
|
DD 338241895,338241895,338241895,338241895
|
|
DD 666307205,666307205,666307205,666307205
|
|
DD 666307205,666307205,666307205,666307205
|
|
DD 773529912,773529912,773529912,773529912
|
|
DD 773529912,773529912,773529912,773529912
|
|
DD 1294757372,1294757372,1294757372,1294757372
|
|
DD 1294757372,1294757372,1294757372,1294757372
|
|
DD 1396182291,1396182291,1396182291,1396182291
|
|
DD 1396182291,1396182291,1396182291,1396182291
|
|
DD 1695183700,1695183700,1695183700,1695183700
|
|
DD 1695183700,1695183700,1695183700,1695183700
|
|
DD 1986661051,1986661051,1986661051,1986661051
|
|
DD 1986661051,1986661051,1986661051,1986661051
|
|
DD 2177026350,2177026350,2177026350,2177026350
|
|
DD 2177026350,2177026350,2177026350,2177026350
|
|
DD 2456956037,2456956037,2456956037,2456956037
|
|
DD 2456956037,2456956037,2456956037,2456956037
|
|
DD 2730485921,2730485921,2730485921,2730485921
|
|
DD 2730485921,2730485921,2730485921,2730485921
|
|
DD 2820302411,2820302411,2820302411,2820302411
|
|
DD 2820302411,2820302411,2820302411,2820302411
|
|
DD 3259730800,3259730800,3259730800,3259730800
|
|
DD 3259730800,3259730800,3259730800,3259730800
|
|
DD 3345764771,3345764771,3345764771,3345764771
|
|
DD 3345764771,3345764771,3345764771,3345764771
|
|
DD 3516065817,3516065817,3516065817,3516065817
|
|
DD 3516065817,3516065817,3516065817,3516065817
|
|
DD 3600352804,3600352804,3600352804,3600352804
|
|
DD 3600352804,3600352804,3600352804,3600352804
|
|
DD 4094571909,4094571909,4094571909,4094571909
|
|
DD 4094571909,4094571909,4094571909,4094571909
|
|
DD 275423344,275423344,275423344,275423344
|
|
DD 275423344,275423344,275423344,275423344
|
|
DD 430227734,430227734,430227734,430227734
|
|
DD 430227734,430227734,430227734,430227734
|
|
DD 506948616,506948616,506948616,506948616
|
|
DD 506948616,506948616,506948616,506948616
|
|
DD 659060556,659060556,659060556,659060556
|
|
DD 659060556,659060556,659060556,659060556
|
|
DD 883997877,883997877,883997877,883997877
|
|
DD 883997877,883997877,883997877,883997877
|
|
DD 958139571,958139571,958139571,958139571
|
|
DD 958139571,958139571,958139571,958139571
|
|
DD 1322822218,1322822218,1322822218,1322822218
|
|
DD 1322822218,1322822218,1322822218,1322822218
|
|
DD 1537002063,1537002063,1537002063,1537002063
|
|
DD 1537002063,1537002063,1537002063,1537002063
|
|
DD 1747873779,1747873779,1747873779,1747873779
|
|
DD 1747873779,1747873779,1747873779,1747873779
|
|
DD 1955562222,1955562222,1955562222,1955562222
|
|
DD 1955562222,1955562222,1955562222,1955562222
|
|
DD 2024104815,2024104815,2024104815,2024104815
|
|
DD 2024104815,2024104815,2024104815,2024104815
|
|
DD 2227730452,2227730452,2227730452,2227730452
|
|
DD 2227730452,2227730452,2227730452,2227730452
|
|
DD 2361852424,2361852424,2361852424,2361852424
|
|
DD 2361852424,2361852424,2361852424,2361852424
|
|
DD 2428436474,2428436474,2428436474,2428436474
|
|
DD 2428436474,2428436474,2428436474,2428436474
|
|
DD 2756734187,2756734187,2756734187,2756734187
|
|
DD 2756734187,2756734187,2756734187,2756734187
|
|
DD 3204031479,3204031479,3204031479,3204031479
|
|
DD 3204031479,3204031479,3204031479,3204031479
|
|
DD 3329325298,3329325298,3329325298,3329325298
|
|
DD 3329325298,3329325298,3329325298,3329325298
|
|
$L$pbswap::
|
|
DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
|
|
DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
|
|
K256_shaext::
|
|
DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h
|
|
DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h
|
|
DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h
|
|
DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h
|
|
DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch
|
|
DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah
|
|
DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
|
|
DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h
|
|
DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h
|
|
DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h
|
|
DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
|
|
DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h
|
|
DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h
|
|
DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h
|
|
DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h
|
|
DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
|
|
DB 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111
|
|
DB 99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114
|
|
DB 32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
|
|
DB 65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112
|
|
DB 101,110,115,115,108,46,111,114,103,62,0
|
|
EXTERN __imp_RtlVirtualUnwind:NEAR
|
|
|
|
ALIGN 16
|
|
se_handler PROC PRIVATE
|
|
push rsi
|
|
push rdi
|
|
push rbx
|
|
push rbp
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
pushfq
|
|
sub rsp,64
|
|
|
|
mov rax,QWORD PTR[120+r8]
|
|
mov rbx,QWORD PTR[248+r8]
|
|
|
|
mov rsi,QWORD PTR[8+r9]
|
|
mov r11,QWORD PTR[56+r9]
|
|
|
|
mov r10d,DWORD PTR[r11]
|
|
lea r10,QWORD PTR[r10*1+rsi]
|
|
cmp rbx,r10
|
|
jb $L$in_prologue
|
|
|
|
mov rax,QWORD PTR[152+r8]
|
|
|
|
mov r10d,DWORD PTR[4+r11]
|
|
lea r10,QWORD PTR[r10*1+rsi]
|
|
cmp rbx,r10
|
|
jae $L$in_prologue
|
|
|
|
mov rax,QWORD PTR[272+rax]
|
|
|
|
mov rbx,QWORD PTR[((-8))+rax]
|
|
mov rbp,QWORD PTR[((-16))+rax]
|
|
mov QWORD PTR[144+r8],rbx
|
|
mov QWORD PTR[160+r8],rbp
|
|
|
|
lea rsi,QWORD PTR[((-24-160))+rax]
|
|
lea rdi,QWORD PTR[512+r8]
|
|
mov ecx,20
|
|
DD 0a548f3fch
|
|
|
|
$L$in_prologue::
|
|
mov rdi,QWORD PTR[8+rax]
|
|
mov rsi,QWORD PTR[16+rax]
|
|
mov QWORD PTR[152+r8],rax
|
|
mov QWORD PTR[168+r8],rsi
|
|
mov QWORD PTR[176+r8],rdi
|
|
|
|
mov rdi,QWORD PTR[40+r9]
|
|
mov rsi,r8
|
|
mov ecx,154
|
|
DD 0a548f3fch
|
|
|
|
mov rsi,r9
|
|
xor rcx,rcx
|
|
mov rdx,QWORD PTR[8+rsi]
|
|
mov r8,QWORD PTR[rsi]
|
|
mov r9,QWORD PTR[16+rsi]
|
|
mov r10,QWORD PTR[40+rsi]
|
|
lea r11,QWORD PTR[56+rsi]
|
|
lea r12,QWORD PTR[24+rsi]
|
|
mov QWORD PTR[32+rsp],r10
|
|
mov QWORD PTR[40+rsp],r11
|
|
mov QWORD PTR[48+rsp],r12
|
|
mov QWORD PTR[56+rsp],rcx
|
|
call QWORD PTR[__imp_RtlVirtualUnwind]
|
|
|
|
mov eax,1
|
|
add rsp,64
|
|
popfq
|
|
pop r15
|
|
pop r14
|
|
pop r13
|
|
pop r12
|
|
pop rbp
|
|
pop rbx
|
|
pop rdi
|
|
pop rsi
|
|
DB 0F3h,0C3h ;repret
|
|
se_handler ENDP
|
|
|
|
ALIGN 16
|
|
avx2_handler PROC PRIVATE
|
|
push rsi
|
|
push rdi
|
|
push rbx
|
|
push rbp
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
pushfq
|
|
sub rsp,64
|
|
|
|
mov rax,QWORD PTR[120+r8]
|
|
mov rbx,QWORD PTR[248+r8]
|
|
|
|
mov rsi,QWORD PTR[8+r9]
|
|
mov r11,QWORD PTR[56+r9]
|
|
|
|
mov r10d,DWORD PTR[r11]
|
|
lea r10,QWORD PTR[r10*1+rsi]
|
|
cmp rbx,r10
|
|
jb $L$in_prologue
|
|
|
|
mov rax,QWORD PTR[152+r8]
|
|
|
|
mov r10d,DWORD PTR[4+r11]
|
|
lea r10,QWORD PTR[r10*1+rsi]
|
|
cmp rbx,r10
|
|
jae $L$in_prologue
|
|
|
|
mov rax,QWORD PTR[544+r8]
|
|
|
|
mov rbx,QWORD PTR[((-8))+rax]
|
|
mov rbp,QWORD PTR[((-16))+rax]
|
|
mov r12,QWORD PTR[((-24))+rax]
|
|
mov r13,QWORD PTR[((-32))+rax]
|
|
mov r14,QWORD PTR[((-40))+rax]
|
|
mov r15,QWORD PTR[((-48))+rax]
|
|
mov QWORD PTR[144+r8],rbx
|
|
mov QWORD PTR[160+r8],rbp
|
|
mov QWORD PTR[216+r8],r12
|
|
mov QWORD PTR[224+r8],r13
|
|
mov QWORD PTR[232+r8],r14
|
|
mov QWORD PTR[240+r8],r15
|
|
|
|
lea rsi,QWORD PTR[((-56-160))+rax]
|
|
lea rdi,QWORD PTR[512+r8]
|
|
mov ecx,20
|
|
DD 0a548f3fch
|
|
|
|
jmp $L$in_prologue
|
|
avx2_handler ENDP
|
|
.text$ ENDS
|
|
.pdata SEGMENT READONLY ALIGN(4)
|
|
ALIGN 4
|
|
DD imagerel $L$SEH_begin_sha256_multi_block
|
|
DD imagerel $L$SEH_end_sha256_multi_block
|
|
DD imagerel $L$SEH_info_sha256_multi_block
|
|
DD imagerel $L$SEH_begin_sha256_multi_block_shaext
|
|
DD imagerel $L$SEH_end_sha256_multi_block_shaext
|
|
DD imagerel $L$SEH_info_sha256_multi_block_shaext
|
|
DD imagerel $L$SEH_begin_sha256_multi_block_avx
|
|
DD imagerel $L$SEH_end_sha256_multi_block_avx
|
|
DD imagerel $L$SEH_info_sha256_multi_block_avx
|
|
DD imagerel $L$SEH_begin_sha256_multi_block_avx2
|
|
DD imagerel $L$SEH_end_sha256_multi_block_avx2
|
|
DD imagerel $L$SEH_info_sha256_multi_block_avx2
|
|
.pdata ENDS
|
|
.xdata SEGMENT READONLY ALIGN(8)
|
|
ALIGN 8
|
|
$L$SEH_info_sha256_multi_block::
|
|
DB 9,0,0,0
|
|
DD imagerel se_handler
|
|
DD imagerel $L$body,imagerel $L$epilogue
|
|
$L$SEH_info_sha256_multi_block_shaext::
|
|
DB 9,0,0,0
|
|
DD imagerel se_handler
|
|
DD imagerel $L$body_shaext,imagerel $L$epilogue_shaext
|
|
$L$SEH_info_sha256_multi_block_avx::
|
|
DB 9,0,0,0
|
|
DD imagerel se_handler
|
|
DD imagerel $L$body_avx,imagerel $L$epilogue_avx
|
|
$L$SEH_info_sha256_multi_block_avx2::
|
|
DB 9,0,0,0
|
|
DD imagerel avx2_handler
|
|
DD imagerel $L$body_avx2,imagerel $L$epilogue_avx2
|
|
|
|
.xdata ENDS
|
|
END
|
|
|