You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

8215 lines
154 KiB

OPTION DOTNAME
.text$ SEGMENT ALIGN(256) 'CODE'
EXTERN OPENSSL_ia32cap_P:NEAR
PUBLIC sha256_multi_block
ALIGN 32
sha256_multi_block PROC PUBLIC
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
mov QWORD PTR[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_sha256_multi_block::
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,QWORD PTR[((OPENSSL_ia32cap_P+4))]
bt rcx,61
jc _shaext_shortcut
test ecx,268435456
jnz _avx_shortcut
mov rax,rsp
push rbx
push rbp
lea rsp,QWORD PTR[((-168))+rsp]
movaps XMMWORD PTR[rsp],xmm6
movaps XMMWORD PTR[16+rsp],xmm7
movaps XMMWORD PTR[32+rsp],xmm8
movaps XMMWORD PTR[48+rsp],xmm9
movaps XMMWORD PTR[(-120)+rax],xmm10
movaps XMMWORD PTR[(-104)+rax],xmm11
movaps XMMWORD PTR[(-88)+rax],xmm12
movaps XMMWORD PTR[(-72)+rax],xmm13
movaps XMMWORD PTR[(-56)+rax],xmm14
movaps XMMWORD PTR[(-40)+rax],xmm15
sub rsp,288
and rsp,-256
mov QWORD PTR[272+rsp],rax
$L$body::
lea rbp,QWORD PTR[((K256+128))]
lea rbx,QWORD PTR[256+rsp]
lea rdi,QWORD PTR[128+rdi]
$L$oop_grande::
mov DWORD PTR[280+rsp],edx
xor edx,edx
mov r8,QWORD PTR[rsi]
mov ecx,DWORD PTR[8+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[rbx],ecx
cmovle r8,rbp
mov r9,QWORD PTR[16+rsi]
mov ecx,DWORD PTR[24+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[4+rbx],ecx
cmovle r9,rbp
mov r10,QWORD PTR[32+rsi]
mov ecx,DWORD PTR[40+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[8+rbx],ecx
cmovle r10,rbp
mov r11,QWORD PTR[48+rsi]
mov ecx,DWORD PTR[56+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[12+rbx],ecx
cmovle r11,rbp
test edx,edx
jz $L$done
movdqu xmm8,XMMWORD PTR[((0-128))+rdi]
lea rax,QWORD PTR[128+rsp]
movdqu xmm9,XMMWORD PTR[((32-128))+rdi]
movdqu xmm10,XMMWORD PTR[((64-128))+rdi]
movdqu xmm11,XMMWORD PTR[((96-128))+rdi]
movdqu xmm12,XMMWORD PTR[((128-128))+rdi]
movdqu xmm13,XMMWORD PTR[((160-128))+rdi]
movdqu xmm14,XMMWORD PTR[((192-128))+rdi]
movdqu xmm15,XMMWORD PTR[((224-128))+rdi]
movdqu xmm6,XMMWORD PTR[$L$pbswap]
jmp $L$oop
ALIGN 32
$L$oop::
movdqa xmm4,xmm10
pxor xmm4,xmm9
movd xmm5,DWORD PTR[r8]
movd xmm0,DWORD PTR[r9]
movd xmm1,DWORD PTR[r10]
movd xmm2,DWORD PTR[r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm12
DB 102,15,56,0,238
movdqa xmm2,xmm12
psrld xmm7,6
movdqa xmm1,xmm12
pslld xmm2,7
movdqa XMMWORD PTR[(0-128)+rax],xmm5
paddd xmm5,xmm15
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-128))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm12
pxor xmm7,xmm2
movdqa xmm3,xmm12
pslld xmm2,26-21
pandn xmm0,xmm14
pand xmm3,xmm13
pxor xmm7,xmm1
movdqa xmm1,xmm8
pxor xmm7,xmm2
movdqa xmm2,xmm8
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm9
movdqa xmm7,xmm8
pslld xmm2,10
pxor xmm3,xmm8
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm15,xmm9
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm15,xmm4
paddd xmm11,xmm5
pxor xmm7,xmm2
paddd xmm15,xmm5
paddd xmm15,xmm7
movd xmm5,DWORD PTR[4+r8]
movd xmm0,DWORD PTR[4+r9]
movd xmm1,DWORD PTR[4+r10]
movd xmm2,DWORD PTR[4+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm11
movdqa xmm2,xmm11
DB 102,15,56,0,238
psrld xmm7,6
movdqa xmm1,xmm11
pslld xmm2,7
movdqa XMMWORD PTR[(16-128)+rax],xmm5
paddd xmm5,xmm14
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-96))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm11
pxor xmm7,xmm2
movdqa xmm4,xmm11
pslld xmm2,26-21
pandn xmm0,xmm13
pand xmm4,xmm12
pxor xmm7,xmm1
movdqa xmm1,xmm15
pxor xmm7,xmm2
movdqa xmm2,xmm15
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm8
movdqa xmm7,xmm15
pslld xmm2,10
pxor xmm4,xmm15
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm14,xmm8
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm14,xmm3
paddd xmm10,xmm5
pxor xmm7,xmm2
paddd xmm14,xmm5
paddd xmm14,xmm7
movd xmm5,DWORD PTR[8+r8]
movd xmm0,DWORD PTR[8+r9]
movd xmm1,DWORD PTR[8+r10]
movd xmm2,DWORD PTR[8+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm10
DB 102,15,56,0,238
movdqa xmm2,xmm10
psrld xmm7,6
movdqa xmm1,xmm10
pslld xmm2,7
movdqa XMMWORD PTR[(32-128)+rax],xmm5
paddd xmm5,xmm13
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-64))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm10
pxor xmm7,xmm2
movdqa xmm3,xmm10
pslld xmm2,26-21
pandn xmm0,xmm12
pand xmm3,xmm11
pxor xmm7,xmm1
movdqa xmm1,xmm14
pxor xmm7,xmm2
movdqa xmm2,xmm14
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm15
movdqa xmm7,xmm14
pslld xmm2,10
pxor xmm3,xmm14
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm13,xmm15
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm13,xmm4
paddd xmm9,xmm5
pxor xmm7,xmm2
paddd xmm13,xmm5
paddd xmm13,xmm7
movd xmm5,DWORD PTR[12+r8]
movd xmm0,DWORD PTR[12+r9]
movd xmm1,DWORD PTR[12+r10]
movd xmm2,DWORD PTR[12+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm9
movdqa xmm2,xmm9
DB 102,15,56,0,238
psrld xmm7,6
movdqa xmm1,xmm9
pslld xmm2,7
movdqa XMMWORD PTR[(48-128)+rax],xmm5
paddd xmm5,xmm12
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-32))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm9
pxor xmm7,xmm2
movdqa xmm4,xmm9
pslld xmm2,26-21
pandn xmm0,xmm11
pand xmm4,xmm10
pxor xmm7,xmm1
movdqa xmm1,xmm13
pxor xmm7,xmm2
movdqa xmm2,xmm13
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm14
movdqa xmm7,xmm13
pslld xmm2,10
pxor xmm4,xmm13
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm12,xmm14
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm12,xmm3
paddd xmm8,xmm5
pxor xmm7,xmm2
paddd xmm12,xmm5
paddd xmm12,xmm7
movd xmm5,DWORD PTR[16+r8]
movd xmm0,DWORD PTR[16+r9]
movd xmm1,DWORD PTR[16+r10]
movd xmm2,DWORD PTR[16+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm8
DB 102,15,56,0,238
movdqa xmm2,xmm8
psrld xmm7,6
movdqa xmm1,xmm8
pslld xmm2,7
movdqa XMMWORD PTR[(64-128)+rax],xmm5
paddd xmm5,xmm11
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm8
pxor xmm7,xmm2
movdqa xmm3,xmm8
pslld xmm2,26-21
pandn xmm0,xmm10
pand xmm3,xmm9
pxor xmm7,xmm1
movdqa xmm1,xmm12
pxor xmm7,xmm2
movdqa xmm2,xmm12
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm13
movdqa xmm7,xmm12
pslld xmm2,10
pxor xmm3,xmm12
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm11,xmm13
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm11,xmm4
paddd xmm15,xmm5
pxor xmm7,xmm2
paddd xmm11,xmm5
paddd xmm11,xmm7
movd xmm5,DWORD PTR[20+r8]
movd xmm0,DWORD PTR[20+r9]
movd xmm1,DWORD PTR[20+r10]
movd xmm2,DWORD PTR[20+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm15
movdqa xmm2,xmm15
DB 102,15,56,0,238
psrld xmm7,6
movdqa xmm1,xmm15
pslld xmm2,7
movdqa XMMWORD PTR[(80-128)+rax],xmm5
paddd xmm5,xmm10
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[32+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm15
pxor xmm7,xmm2
movdqa xmm4,xmm15
pslld xmm2,26-21
pandn xmm0,xmm9
pand xmm4,xmm8
pxor xmm7,xmm1
movdqa xmm1,xmm11
pxor xmm7,xmm2
movdqa xmm2,xmm11
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm12
movdqa xmm7,xmm11
pslld xmm2,10
pxor xmm4,xmm11
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm10,xmm12
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm10,xmm3
paddd xmm14,xmm5
pxor xmm7,xmm2
paddd xmm10,xmm5
paddd xmm10,xmm7
movd xmm5,DWORD PTR[24+r8]
movd xmm0,DWORD PTR[24+r9]
movd xmm1,DWORD PTR[24+r10]
movd xmm2,DWORD PTR[24+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm14
DB 102,15,56,0,238
movdqa xmm2,xmm14
psrld xmm7,6
movdqa xmm1,xmm14
pslld xmm2,7
movdqa XMMWORD PTR[(96-128)+rax],xmm5
paddd xmm5,xmm9
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[64+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm14
pxor xmm7,xmm2
movdqa xmm3,xmm14
pslld xmm2,26-21
pandn xmm0,xmm8
pand xmm3,xmm15
pxor xmm7,xmm1
movdqa xmm1,xmm10
pxor xmm7,xmm2
movdqa xmm2,xmm10
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm11
movdqa xmm7,xmm10
pslld xmm2,10
pxor xmm3,xmm10
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm9,xmm11
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm9,xmm4
paddd xmm13,xmm5
pxor xmm7,xmm2
paddd xmm9,xmm5
paddd xmm9,xmm7
movd xmm5,DWORD PTR[28+r8]
movd xmm0,DWORD PTR[28+r9]
movd xmm1,DWORD PTR[28+r10]
movd xmm2,DWORD PTR[28+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm13
movdqa xmm2,xmm13
DB 102,15,56,0,238
psrld xmm7,6
movdqa xmm1,xmm13
pslld xmm2,7
movdqa XMMWORD PTR[(112-128)+rax],xmm5
paddd xmm5,xmm8
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[96+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm13
pxor xmm7,xmm2
movdqa xmm4,xmm13
pslld xmm2,26-21
pandn xmm0,xmm15
pand xmm4,xmm14
pxor xmm7,xmm1
movdqa xmm1,xmm9
pxor xmm7,xmm2
movdqa xmm2,xmm9
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm10
movdqa xmm7,xmm9
pslld xmm2,10
pxor xmm4,xmm9
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm8,xmm10
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm8,xmm3
paddd xmm12,xmm5
pxor xmm7,xmm2
paddd xmm8,xmm5
paddd xmm8,xmm7
lea rbp,QWORD PTR[256+rbp]
movd xmm5,DWORD PTR[32+r8]
movd xmm0,DWORD PTR[32+r9]
movd xmm1,DWORD PTR[32+r10]
movd xmm2,DWORD PTR[32+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm12
DB 102,15,56,0,238
movdqa xmm2,xmm12
psrld xmm7,6
movdqa xmm1,xmm12
pslld xmm2,7
movdqa XMMWORD PTR[(128-128)+rax],xmm5
paddd xmm5,xmm15
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-128))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm12
pxor xmm7,xmm2
movdqa xmm3,xmm12
pslld xmm2,26-21
pandn xmm0,xmm14
pand xmm3,xmm13
pxor xmm7,xmm1
movdqa xmm1,xmm8
pxor xmm7,xmm2
movdqa xmm2,xmm8
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm9
movdqa xmm7,xmm8
pslld xmm2,10
pxor xmm3,xmm8
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm15,xmm9
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm15,xmm4
paddd xmm11,xmm5
pxor xmm7,xmm2
paddd xmm15,xmm5
paddd xmm15,xmm7
movd xmm5,DWORD PTR[36+r8]
movd xmm0,DWORD PTR[36+r9]
movd xmm1,DWORD PTR[36+r10]
movd xmm2,DWORD PTR[36+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm11
movdqa xmm2,xmm11
DB 102,15,56,0,238
psrld xmm7,6
movdqa xmm1,xmm11
pslld xmm2,7
movdqa XMMWORD PTR[(144-128)+rax],xmm5
paddd xmm5,xmm14
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-96))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm11
pxor xmm7,xmm2
movdqa xmm4,xmm11
pslld xmm2,26-21
pandn xmm0,xmm13
pand xmm4,xmm12
pxor xmm7,xmm1
movdqa xmm1,xmm15
pxor xmm7,xmm2
movdqa xmm2,xmm15
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm8
movdqa xmm7,xmm15
pslld xmm2,10
pxor xmm4,xmm15
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm14,xmm8
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm14,xmm3
paddd xmm10,xmm5
pxor xmm7,xmm2
paddd xmm14,xmm5
paddd xmm14,xmm7
movd xmm5,DWORD PTR[40+r8]
movd xmm0,DWORD PTR[40+r9]
movd xmm1,DWORD PTR[40+r10]
movd xmm2,DWORD PTR[40+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm10
DB 102,15,56,0,238
movdqa xmm2,xmm10
psrld xmm7,6
movdqa xmm1,xmm10
pslld xmm2,7
movdqa XMMWORD PTR[(160-128)+rax],xmm5
paddd xmm5,xmm13
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-64))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm10
pxor xmm7,xmm2
movdqa xmm3,xmm10
pslld xmm2,26-21
pandn xmm0,xmm12
pand xmm3,xmm11
pxor xmm7,xmm1
movdqa xmm1,xmm14
pxor xmm7,xmm2
movdqa xmm2,xmm14
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm15
movdqa xmm7,xmm14
pslld xmm2,10
pxor xmm3,xmm14
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm13,xmm15
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm13,xmm4
paddd xmm9,xmm5
pxor xmm7,xmm2
paddd xmm13,xmm5
paddd xmm13,xmm7
movd xmm5,DWORD PTR[44+r8]
movd xmm0,DWORD PTR[44+r9]
movd xmm1,DWORD PTR[44+r10]
movd xmm2,DWORD PTR[44+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm9
movdqa xmm2,xmm9
DB 102,15,56,0,238
psrld xmm7,6
movdqa xmm1,xmm9
pslld xmm2,7
movdqa XMMWORD PTR[(176-128)+rax],xmm5
paddd xmm5,xmm12
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-32))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm9
pxor xmm7,xmm2
movdqa xmm4,xmm9
pslld xmm2,26-21
pandn xmm0,xmm11
pand xmm4,xmm10
pxor xmm7,xmm1
movdqa xmm1,xmm13
pxor xmm7,xmm2
movdqa xmm2,xmm13
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm14
movdqa xmm7,xmm13
pslld xmm2,10
pxor xmm4,xmm13
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm12,xmm14
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm12,xmm3
paddd xmm8,xmm5
pxor xmm7,xmm2
paddd xmm12,xmm5
paddd xmm12,xmm7
movd xmm5,DWORD PTR[48+r8]
movd xmm0,DWORD PTR[48+r9]
movd xmm1,DWORD PTR[48+r10]
movd xmm2,DWORD PTR[48+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm8
DB 102,15,56,0,238
movdqa xmm2,xmm8
psrld xmm7,6
movdqa xmm1,xmm8
pslld xmm2,7
movdqa XMMWORD PTR[(192-128)+rax],xmm5
paddd xmm5,xmm11
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm8
pxor xmm7,xmm2
movdqa xmm3,xmm8
pslld xmm2,26-21
pandn xmm0,xmm10
pand xmm3,xmm9
pxor xmm7,xmm1
movdqa xmm1,xmm12
pxor xmm7,xmm2
movdqa xmm2,xmm12
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm13
movdqa xmm7,xmm12
pslld xmm2,10
pxor xmm3,xmm12
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm11,xmm13
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm11,xmm4
paddd xmm15,xmm5
pxor xmm7,xmm2
paddd xmm11,xmm5
paddd xmm11,xmm7
movd xmm5,DWORD PTR[52+r8]
movd xmm0,DWORD PTR[52+r9]
movd xmm1,DWORD PTR[52+r10]
movd xmm2,DWORD PTR[52+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm15
movdqa xmm2,xmm15
DB 102,15,56,0,238
psrld xmm7,6
movdqa xmm1,xmm15
pslld xmm2,7
movdqa XMMWORD PTR[(208-128)+rax],xmm5
paddd xmm5,xmm10
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[32+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm15
pxor xmm7,xmm2
movdqa xmm4,xmm15
pslld xmm2,26-21
pandn xmm0,xmm9
pand xmm4,xmm8
pxor xmm7,xmm1
movdqa xmm1,xmm11
pxor xmm7,xmm2
movdqa xmm2,xmm11
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm12
movdqa xmm7,xmm11
pslld xmm2,10
pxor xmm4,xmm11
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm10,xmm12
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm10,xmm3
paddd xmm14,xmm5
pxor xmm7,xmm2
paddd xmm10,xmm5
paddd xmm10,xmm7
movd xmm5,DWORD PTR[56+r8]
movd xmm0,DWORD PTR[56+r9]
movd xmm1,DWORD PTR[56+r10]
movd xmm2,DWORD PTR[56+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm14
DB 102,15,56,0,238
movdqa xmm2,xmm14
psrld xmm7,6
movdqa xmm1,xmm14
pslld xmm2,7
movdqa XMMWORD PTR[(224-128)+rax],xmm5
paddd xmm5,xmm9
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[64+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm14
pxor xmm7,xmm2
movdqa xmm3,xmm14
pslld xmm2,26-21
pandn xmm0,xmm8
pand xmm3,xmm15
pxor xmm7,xmm1
movdqa xmm1,xmm10
pxor xmm7,xmm2
movdqa xmm2,xmm10
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm11
movdqa xmm7,xmm10
pslld xmm2,10
pxor xmm3,xmm10
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm9,xmm11
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm9,xmm4
paddd xmm13,xmm5
pxor xmm7,xmm2
paddd xmm9,xmm5
paddd xmm9,xmm7
movd xmm5,DWORD PTR[60+r8]
lea r8,QWORD PTR[64+r8]
movd xmm0,DWORD PTR[60+r9]
lea r9,QWORD PTR[64+r9]
movd xmm1,DWORD PTR[60+r10]
lea r10,QWORD PTR[64+r10]
movd xmm2,DWORD PTR[60+r11]
lea r11,QWORD PTR[64+r11]
punpckldq xmm5,xmm1
punpckldq xmm0,xmm2
punpckldq xmm5,xmm0
movdqa xmm7,xmm13
movdqa xmm2,xmm13
DB 102,15,56,0,238
psrld xmm7,6
movdqa xmm1,xmm13
pslld xmm2,7
movdqa XMMWORD PTR[(240-128)+rax],xmm5
paddd xmm5,xmm8
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[96+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm13
prefetcht0 [63+r8]
pxor xmm7,xmm2
movdqa xmm4,xmm13
pslld xmm2,26-21
pandn xmm0,xmm15
pand xmm4,xmm14
pxor xmm7,xmm1
prefetcht0 [63+r9]
movdqa xmm1,xmm9
pxor xmm7,xmm2
movdqa xmm2,xmm9
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm10
movdqa xmm7,xmm9
pslld xmm2,10
pxor xmm4,xmm9
prefetcht0 [63+r10]
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
prefetcht0 [63+r11]
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm8,xmm10
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm8,xmm3
paddd xmm12,xmm5
pxor xmm7,xmm2
paddd xmm8,xmm5
paddd xmm8,xmm7
lea rbp,QWORD PTR[256+rbp]
movdqu xmm5,XMMWORD PTR[((0-128))+rax]
mov ecx,3
jmp $L$oop_16_xx
ALIGN 32
$L$oop_16_xx::
movdqa xmm6,XMMWORD PTR[((16-128))+rax]
paddd xmm5,XMMWORD PTR[((144-128))+rax]
movdqa xmm7,xmm6
movdqa xmm1,xmm6
psrld xmm7,3
movdqa xmm2,xmm6
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((224-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm3,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm3
psrld xmm3,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm5,xmm7
pxor xmm0,xmm3
psrld xmm3,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm3
pxor xmm0,xmm1
paddd xmm5,xmm0
movdqa xmm7,xmm12
movdqa xmm2,xmm12
psrld xmm7,6
movdqa xmm1,xmm12
pslld xmm2,7
movdqa XMMWORD PTR[(0-128)+rax],xmm5
paddd xmm5,xmm15
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-128))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm12
pxor xmm7,xmm2
movdqa xmm3,xmm12
pslld xmm2,26-21
pandn xmm0,xmm14
pand xmm3,xmm13
pxor xmm7,xmm1
movdqa xmm1,xmm8
pxor xmm7,xmm2
movdqa xmm2,xmm8
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm9
movdqa xmm7,xmm8
pslld xmm2,10
pxor xmm3,xmm8
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm15,xmm9
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm15,xmm4
paddd xmm11,xmm5
pxor xmm7,xmm2
paddd xmm15,xmm5
paddd xmm15,xmm7
movdqa xmm5,XMMWORD PTR[((32-128))+rax]
paddd xmm6,XMMWORD PTR[((160-128))+rax]
movdqa xmm7,xmm5
movdqa xmm1,xmm5
psrld xmm7,3
movdqa xmm2,xmm5
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((240-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm4,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm4
psrld xmm4,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm6,xmm7
pxor xmm0,xmm4
psrld xmm4,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm4
pxor xmm0,xmm1
paddd xmm6,xmm0
movdqa xmm7,xmm11
movdqa xmm2,xmm11
psrld xmm7,6
movdqa xmm1,xmm11
pslld xmm2,7
movdqa XMMWORD PTR[(16-128)+rax],xmm6
paddd xmm6,xmm14
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm6,XMMWORD PTR[((-96))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm11
pxor xmm7,xmm2
movdqa xmm4,xmm11
pslld xmm2,26-21
pandn xmm0,xmm13
pand xmm4,xmm12
pxor xmm7,xmm1
movdqa xmm1,xmm15
pxor xmm7,xmm2
movdqa xmm2,xmm15
psrld xmm1,2
paddd xmm6,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm8
movdqa xmm7,xmm15
pslld xmm2,10
pxor xmm4,xmm15
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm6,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm14,xmm8
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm14,xmm3
paddd xmm10,xmm6
pxor xmm7,xmm2
paddd xmm14,xmm6
paddd xmm14,xmm7
movdqa xmm6,XMMWORD PTR[((48-128))+rax]
paddd xmm5,XMMWORD PTR[((176-128))+rax]
movdqa xmm7,xmm6
movdqa xmm1,xmm6
psrld xmm7,3
movdqa xmm2,xmm6
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((0-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm3,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm3
psrld xmm3,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm5,xmm7
pxor xmm0,xmm3
psrld xmm3,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm3
pxor xmm0,xmm1
paddd xmm5,xmm0
movdqa xmm7,xmm10
movdqa xmm2,xmm10
psrld xmm7,6
movdqa xmm1,xmm10
pslld xmm2,7
movdqa XMMWORD PTR[(32-128)+rax],xmm5
paddd xmm5,xmm13
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-64))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm10
pxor xmm7,xmm2
movdqa xmm3,xmm10
pslld xmm2,26-21
pandn xmm0,xmm12
pand xmm3,xmm11
pxor xmm7,xmm1
movdqa xmm1,xmm14
pxor xmm7,xmm2
movdqa xmm2,xmm14
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm15
movdqa xmm7,xmm14
pslld xmm2,10
pxor xmm3,xmm14
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm13,xmm15
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm13,xmm4
paddd xmm9,xmm5
pxor xmm7,xmm2
paddd xmm13,xmm5
paddd xmm13,xmm7
movdqa xmm5,XMMWORD PTR[((64-128))+rax]
paddd xmm6,XMMWORD PTR[((192-128))+rax]
movdqa xmm7,xmm5
movdqa xmm1,xmm5
psrld xmm7,3
movdqa xmm2,xmm5
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((16-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm4,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm4
psrld xmm4,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm6,xmm7
pxor xmm0,xmm4
psrld xmm4,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm4
pxor xmm0,xmm1
paddd xmm6,xmm0
movdqa xmm7,xmm9
movdqa xmm2,xmm9
psrld xmm7,6
movdqa xmm1,xmm9
pslld xmm2,7
movdqa XMMWORD PTR[(48-128)+rax],xmm6
paddd xmm6,xmm12
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm6,XMMWORD PTR[((-32))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm9
pxor xmm7,xmm2
movdqa xmm4,xmm9
pslld xmm2,26-21
pandn xmm0,xmm11
pand xmm4,xmm10
pxor xmm7,xmm1
movdqa xmm1,xmm13
pxor xmm7,xmm2
movdqa xmm2,xmm13
psrld xmm1,2
paddd xmm6,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm14
movdqa xmm7,xmm13
pslld xmm2,10
pxor xmm4,xmm13
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm6,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm12,xmm14
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm12,xmm3
paddd xmm8,xmm6
pxor xmm7,xmm2
paddd xmm12,xmm6
paddd xmm12,xmm7
movdqa xmm6,XMMWORD PTR[((80-128))+rax]
paddd xmm5,XMMWORD PTR[((208-128))+rax]
movdqa xmm7,xmm6
movdqa xmm1,xmm6
psrld xmm7,3
movdqa xmm2,xmm6
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((32-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm3,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm3
psrld xmm3,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm5,xmm7
pxor xmm0,xmm3
psrld xmm3,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm3
pxor xmm0,xmm1
paddd xmm5,xmm0
movdqa xmm7,xmm8
movdqa xmm2,xmm8
psrld xmm7,6
movdqa xmm1,xmm8
pslld xmm2,7
movdqa XMMWORD PTR[(64-128)+rax],xmm5
paddd xmm5,xmm11
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm8
pxor xmm7,xmm2
movdqa xmm3,xmm8
pslld xmm2,26-21
pandn xmm0,xmm10
pand xmm3,xmm9
pxor xmm7,xmm1
movdqa xmm1,xmm12
pxor xmm7,xmm2
movdqa xmm2,xmm12
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm13
movdqa xmm7,xmm12
pslld xmm2,10
pxor xmm3,xmm12
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm11,xmm13
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm11,xmm4
paddd xmm15,xmm5
pxor xmm7,xmm2
paddd xmm11,xmm5
paddd xmm11,xmm7
movdqa xmm5,XMMWORD PTR[((96-128))+rax]
paddd xmm6,XMMWORD PTR[((224-128))+rax]
movdqa xmm7,xmm5
movdqa xmm1,xmm5
psrld xmm7,3
movdqa xmm2,xmm5
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((48-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm4,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm4
psrld xmm4,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm6,xmm7
pxor xmm0,xmm4
psrld xmm4,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm4
pxor xmm0,xmm1
paddd xmm6,xmm0
movdqa xmm7,xmm15
movdqa xmm2,xmm15
psrld xmm7,6
movdqa xmm1,xmm15
pslld xmm2,7
movdqa XMMWORD PTR[(80-128)+rax],xmm6
paddd xmm6,xmm10
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm6,XMMWORD PTR[32+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm15
pxor xmm7,xmm2
movdqa xmm4,xmm15
pslld xmm2,26-21
pandn xmm0,xmm9
pand xmm4,xmm8
pxor xmm7,xmm1
movdqa xmm1,xmm11
pxor xmm7,xmm2
movdqa xmm2,xmm11
psrld xmm1,2
paddd xmm6,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm12
movdqa xmm7,xmm11
pslld xmm2,10
pxor xmm4,xmm11
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm6,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm10,xmm12
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm10,xmm3
paddd xmm14,xmm6
pxor xmm7,xmm2
paddd xmm10,xmm6
paddd xmm10,xmm7
movdqa xmm6,XMMWORD PTR[((112-128))+rax]
paddd xmm5,XMMWORD PTR[((240-128))+rax]
movdqa xmm7,xmm6
movdqa xmm1,xmm6
psrld xmm7,3
movdqa xmm2,xmm6
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((64-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm3,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm3
psrld xmm3,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm5,xmm7
pxor xmm0,xmm3
psrld xmm3,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm3
pxor xmm0,xmm1
paddd xmm5,xmm0
movdqa xmm7,xmm14
movdqa xmm2,xmm14
psrld xmm7,6
movdqa xmm1,xmm14
pslld xmm2,7
movdqa XMMWORD PTR[(96-128)+rax],xmm5
paddd xmm5,xmm9
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[64+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm14
pxor xmm7,xmm2
movdqa xmm3,xmm14
pslld xmm2,26-21
pandn xmm0,xmm8
pand xmm3,xmm15
pxor xmm7,xmm1
movdqa xmm1,xmm10
pxor xmm7,xmm2
movdqa xmm2,xmm10
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm11
movdqa xmm7,xmm10
pslld xmm2,10
pxor xmm3,xmm10
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm9,xmm11
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm9,xmm4
paddd xmm13,xmm5
pxor xmm7,xmm2
paddd xmm9,xmm5
paddd xmm9,xmm7
movdqa xmm5,XMMWORD PTR[((128-128))+rax]
paddd xmm6,XMMWORD PTR[((0-128))+rax]
movdqa xmm7,xmm5
movdqa xmm1,xmm5
psrld xmm7,3
movdqa xmm2,xmm5
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((80-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm4,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm4
psrld xmm4,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm6,xmm7
pxor xmm0,xmm4
psrld xmm4,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm4
pxor xmm0,xmm1
paddd xmm6,xmm0
movdqa xmm7,xmm13
movdqa xmm2,xmm13
psrld xmm7,6
movdqa xmm1,xmm13
pslld xmm2,7
movdqa XMMWORD PTR[(112-128)+rax],xmm6
paddd xmm6,xmm8
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm6,XMMWORD PTR[96+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm13
pxor xmm7,xmm2
movdqa xmm4,xmm13
pslld xmm2,26-21
pandn xmm0,xmm15
pand xmm4,xmm14
pxor xmm7,xmm1
movdqa xmm1,xmm9
pxor xmm7,xmm2
movdqa xmm2,xmm9
psrld xmm1,2
paddd xmm6,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm10
movdqa xmm7,xmm9
pslld xmm2,10
pxor xmm4,xmm9
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm6,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm8,xmm10
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm8,xmm3
paddd xmm12,xmm6
pxor xmm7,xmm2
paddd xmm8,xmm6
paddd xmm8,xmm7
lea rbp,QWORD PTR[256+rbp]
movdqa xmm6,XMMWORD PTR[((144-128))+rax]
paddd xmm5,XMMWORD PTR[((16-128))+rax]
movdqa xmm7,xmm6
movdqa xmm1,xmm6
psrld xmm7,3
movdqa xmm2,xmm6
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((96-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm3,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm3
psrld xmm3,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm5,xmm7
pxor xmm0,xmm3
psrld xmm3,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm3
pxor xmm0,xmm1
paddd xmm5,xmm0
movdqa xmm7,xmm12
movdqa xmm2,xmm12
psrld xmm7,6
movdqa xmm1,xmm12
pslld xmm2,7
movdqa XMMWORD PTR[(128-128)+rax],xmm5
paddd xmm5,xmm15
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-128))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm12
pxor xmm7,xmm2
movdqa xmm3,xmm12
pslld xmm2,26-21
pandn xmm0,xmm14
pand xmm3,xmm13
pxor xmm7,xmm1
movdqa xmm1,xmm8
pxor xmm7,xmm2
movdqa xmm2,xmm8
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm9
movdqa xmm7,xmm8
pslld xmm2,10
pxor xmm3,xmm8
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm15,xmm9
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm15,xmm4
paddd xmm11,xmm5
pxor xmm7,xmm2
paddd xmm15,xmm5
paddd xmm15,xmm7
movdqa xmm5,XMMWORD PTR[((160-128))+rax]
paddd xmm6,XMMWORD PTR[((32-128))+rax]
movdqa xmm7,xmm5
movdqa xmm1,xmm5
psrld xmm7,3
movdqa xmm2,xmm5
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((112-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm4,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm4
psrld xmm4,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm6,xmm7
pxor xmm0,xmm4
psrld xmm4,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm4
pxor xmm0,xmm1
paddd xmm6,xmm0
movdqa xmm7,xmm11
movdqa xmm2,xmm11
psrld xmm7,6
movdqa xmm1,xmm11
pslld xmm2,7
movdqa XMMWORD PTR[(144-128)+rax],xmm6
paddd xmm6,xmm14
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm6,XMMWORD PTR[((-96))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm11
pxor xmm7,xmm2
movdqa xmm4,xmm11
pslld xmm2,26-21
pandn xmm0,xmm13
pand xmm4,xmm12
pxor xmm7,xmm1
movdqa xmm1,xmm15
pxor xmm7,xmm2
movdqa xmm2,xmm15
psrld xmm1,2
paddd xmm6,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm8
movdqa xmm7,xmm15
pslld xmm2,10
pxor xmm4,xmm15
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm6,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm14,xmm8
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm14,xmm3
paddd xmm10,xmm6
pxor xmm7,xmm2
paddd xmm14,xmm6
paddd xmm14,xmm7
movdqa xmm6,XMMWORD PTR[((176-128))+rax]
paddd xmm5,XMMWORD PTR[((48-128))+rax]
movdqa xmm7,xmm6
movdqa xmm1,xmm6
psrld xmm7,3
movdqa xmm2,xmm6
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((128-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm3,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm3
psrld xmm3,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm5,xmm7
pxor xmm0,xmm3
psrld xmm3,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm3
pxor xmm0,xmm1
paddd xmm5,xmm0
movdqa xmm7,xmm10
movdqa xmm2,xmm10
psrld xmm7,6
movdqa xmm1,xmm10
pslld xmm2,7
movdqa XMMWORD PTR[(160-128)+rax],xmm5
paddd xmm5,xmm13
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[((-64))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm10
pxor xmm7,xmm2
movdqa xmm3,xmm10
pslld xmm2,26-21
pandn xmm0,xmm12
pand xmm3,xmm11
pxor xmm7,xmm1
movdqa xmm1,xmm14
pxor xmm7,xmm2
movdqa xmm2,xmm14
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm15
movdqa xmm7,xmm14
pslld xmm2,10
pxor xmm3,xmm14
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm13,xmm15
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm13,xmm4
paddd xmm9,xmm5
pxor xmm7,xmm2
paddd xmm13,xmm5
paddd xmm13,xmm7
movdqa xmm5,XMMWORD PTR[((192-128))+rax]
paddd xmm6,XMMWORD PTR[((64-128))+rax]
movdqa xmm7,xmm5
movdqa xmm1,xmm5
psrld xmm7,3
movdqa xmm2,xmm5
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((144-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm4,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm4
psrld xmm4,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm6,xmm7
pxor xmm0,xmm4
psrld xmm4,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm4
pxor xmm0,xmm1
paddd xmm6,xmm0
movdqa xmm7,xmm9
movdqa xmm2,xmm9
psrld xmm7,6
movdqa xmm1,xmm9
pslld xmm2,7
movdqa XMMWORD PTR[(176-128)+rax],xmm6
paddd xmm6,xmm12
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm6,XMMWORD PTR[((-32))+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm9
pxor xmm7,xmm2
movdqa xmm4,xmm9
pslld xmm2,26-21
pandn xmm0,xmm11
pand xmm4,xmm10
pxor xmm7,xmm1
movdqa xmm1,xmm13
pxor xmm7,xmm2
movdqa xmm2,xmm13
psrld xmm1,2
paddd xmm6,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm14
movdqa xmm7,xmm13
pslld xmm2,10
pxor xmm4,xmm13
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm6,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm12,xmm14
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm12,xmm3
paddd xmm8,xmm6
pxor xmm7,xmm2
paddd xmm12,xmm6
paddd xmm12,xmm7
movdqa xmm6,XMMWORD PTR[((208-128))+rax]
paddd xmm5,XMMWORD PTR[((80-128))+rax]
movdqa xmm7,xmm6
movdqa xmm1,xmm6
psrld xmm7,3
movdqa xmm2,xmm6
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((160-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm3,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm3
psrld xmm3,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm5,xmm7
pxor xmm0,xmm3
psrld xmm3,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm3
pxor xmm0,xmm1
paddd xmm5,xmm0
movdqa xmm7,xmm8
movdqa xmm2,xmm8
psrld xmm7,6
movdqa xmm1,xmm8
pslld xmm2,7
movdqa XMMWORD PTR[(192-128)+rax],xmm5
paddd xmm5,xmm11
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm8
pxor xmm7,xmm2
movdqa xmm3,xmm8
pslld xmm2,26-21
pandn xmm0,xmm10
pand xmm3,xmm9
pxor xmm7,xmm1
movdqa xmm1,xmm12
pxor xmm7,xmm2
movdqa xmm2,xmm12
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm13
movdqa xmm7,xmm12
pslld xmm2,10
pxor xmm3,xmm12
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm11,xmm13
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm11,xmm4
paddd xmm15,xmm5
pxor xmm7,xmm2
paddd xmm11,xmm5
paddd xmm11,xmm7
movdqa xmm5,XMMWORD PTR[((224-128))+rax]
paddd xmm6,XMMWORD PTR[((96-128))+rax]
movdqa xmm7,xmm5
movdqa xmm1,xmm5
psrld xmm7,3
movdqa xmm2,xmm5
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((176-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm4,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm4
psrld xmm4,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm6,xmm7
pxor xmm0,xmm4
psrld xmm4,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm4
pxor xmm0,xmm1
paddd xmm6,xmm0
movdqa xmm7,xmm15
movdqa xmm2,xmm15
psrld xmm7,6
movdqa xmm1,xmm15
pslld xmm2,7
movdqa XMMWORD PTR[(208-128)+rax],xmm6
paddd xmm6,xmm10
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm6,XMMWORD PTR[32+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm15
pxor xmm7,xmm2
movdqa xmm4,xmm15
pslld xmm2,26-21
pandn xmm0,xmm9
pand xmm4,xmm8
pxor xmm7,xmm1
movdqa xmm1,xmm11
pxor xmm7,xmm2
movdqa xmm2,xmm11
psrld xmm1,2
paddd xmm6,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm12
movdqa xmm7,xmm11
pslld xmm2,10
pxor xmm4,xmm11
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm6,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm10,xmm12
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm10,xmm3
paddd xmm14,xmm6
pxor xmm7,xmm2
paddd xmm10,xmm6
paddd xmm10,xmm7
movdqa xmm6,XMMWORD PTR[((240-128))+rax]
paddd xmm5,XMMWORD PTR[((112-128))+rax]
movdqa xmm7,xmm6
movdqa xmm1,xmm6
psrld xmm7,3
movdqa xmm2,xmm6
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((192-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm3,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm3
psrld xmm3,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm5,xmm7
pxor xmm0,xmm3
psrld xmm3,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm3
pxor xmm0,xmm1
paddd xmm5,xmm0
movdqa xmm7,xmm14
movdqa xmm2,xmm14
psrld xmm7,6
movdqa xmm1,xmm14
pslld xmm2,7
movdqa XMMWORD PTR[(224-128)+rax],xmm5
paddd xmm5,xmm9
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm5,XMMWORD PTR[64+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm14
pxor xmm7,xmm2
movdqa xmm3,xmm14
pslld xmm2,26-21
pandn xmm0,xmm8
pand xmm3,xmm15
pxor xmm7,xmm1
movdqa xmm1,xmm10
pxor xmm7,xmm2
movdqa xmm2,xmm10
psrld xmm1,2
paddd xmm5,xmm7
pxor xmm0,xmm3
movdqa xmm3,xmm11
movdqa xmm7,xmm10
pslld xmm2,10
pxor xmm3,xmm10
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm5,xmm0
pslld xmm2,19-10
pand xmm4,xmm3
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm9,xmm11
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm9,xmm4
paddd xmm13,xmm5
pxor xmm7,xmm2
paddd xmm9,xmm5
paddd xmm9,xmm7
movdqa xmm5,XMMWORD PTR[((0-128))+rax]
paddd xmm6,XMMWORD PTR[((128-128))+rax]
movdqa xmm7,xmm5
movdqa xmm1,xmm5
psrld xmm7,3
movdqa xmm2,xmm5
psrld xmm1,7
movdqa xmm0,XMMWORD PTR[((208-128))+rax]
pslld xmm2,14
pxor xmm7,xmm1
psrld xmm1,18-7
movdqa xmm4,xmm0
pxor xmm7,xmm2
pslld xmm2,25-14
pxor xmm7,xmm1
psrld xmm0,10
movdqa xmm1,xmm4
psrld xmm4,17
pxor xmm7,xmm2
pslld xmm1,13
paddd xmm6,xmm7
pxor xmm0,xmm4
psrld xmm4,19-17
pxor xmm0,xmm1
pslld xmm1,15-13
pxor xmm0,xmm4
pxor xmm0,xmm1
paddd xmm6,xmm0
movdqa xmm7,xmm13
movdqa xmm2,xmm13
psrld xmm7,6
movdqa xmm1,xmm13
pslld xmm2,7
movdqa XMMWORD PTR[(240-128)+rax],xmm6
paddd xmm6,xmm8
psrld xmm1,11
pxor xmm7,xmm2
pslld xmm2,21-7
paddd xmm6,XMMWORD PTR[96+rbp]
pxor xmm7,xmm1
psrld xmm1,25-11
movdqa xmm0,xmm13
pxor xmm7,xmm2
movdqa xmm4,xmm13
pslld xmm2,26-21
pandn xmm0,xmm15
pand xmm4,xmm14
pxor xmm7,xmm1
movdqa xmm1,xmm9
pxor xmm7,xmm2
movdqa xmm2,xmm9
psrld xmm1,2
paddd xmm6,xmm7
pxor xmm0,xmm4
movdqa xmm4,xmm10
movdqa xmm7,xmm9
pslld xmm2,10
pxor xmm4,xmm9
psrld xmm7,13
pxor xmm1,xmm2
paddd xmm6,xmm0
pslld xmm2,19-10
pand xmm3,xmm4
pxor xmm1,xmm7
psrld xmm7,22-13
pxor xmm1,xmm2
movdqa xmm8,xmm10
pslld xmm2,30-19
pxor xmm7,xmm1
pxor xmm8,xmm3
paddd xmm12,xmm6
pxor xmm7,xmm2
paddd xmm8,xmm6
paddd xmm8,xmm7
lea rbp,QWORD PTR[256+rbp]
dec ecx
jnz $L$oop_16_xx
mov ecx,1
lea rbp,QWORD PTR[((K256+128))]
movdqa xmm7,XMMWORD PTR[rbx]
cmp ecx,DWORD PTR[rbx]
pxor xmm0,xmm0
cmovge r8,rbp
cmp ecx,DWORD PTR[4+rbx]
movdqa xmm6,xmm7
cmovge r9,rbp
cmp ecx,DWORD PTR[8+rbx]
pcmpgtd xmm6,xmm0
cmovge r10,rbp
cmp ecx,DWORD PTR[12+rbx]
paddd xmm7,xmm6
cmovge r11,rbp
movdqu xmm0,XMMWORD PTR[((0-128))+rdi]
pand xmm8,xmm6
movdqu xmm1,XMMWORD PTR[((32-128))+rdi]
pand xmm9,xmm6
movdqu xmm2,XMMWORD PTR[((64-128))+rdi]
pand xmm10,xmm6
movdqu xmm5,XMMWORD PTR[((96-128))+rdi]
pand xmm11,xmm6
paddd xmm8,xmm0
movdqu xmm0,XMMWORD PTR[((128-128))+rdi]
pand xmm12,xmm6
paddd xmm9,xmm1
movdqu xmm1,XMMWORD PTR[((160-128))+rdi]
pand xmm13,xmm6
paddd xmm10,xmm2
movdqu xmm2,XMMWORD PTR[((192-128))+rdi]
pand xmm14,xmm6
paddd xmm11,xmm5
movdqu xmm5,XMMWORD PTR[((224-128))+rdi]
pand xmm15,xmm6
paddd xmm12,xmm0
paddd xmm13,xmm1
movdqu XMMWORD PTR[(0-128)+rdi],xmm8
paddd xmm14,xmm2
movdqu XMMWORD PTR[(32-128)+rdi],xmm9
paddd xmm15,xmm5
movdqu XMMWORD PTR[(64-128)+rdi],xmm10
movdqu XMMWORD PTR[(96-128)+rdi],xmm11
movdqu XMMWORD PTR[(128-128)+rdi],xmm12
movdqu XMMWORD PTR[(160-128)+rdi],xmm13
movdqu XMMWORD PTR[(192-128)+rdi],xmm14
movdqu XMMWORD PTR[(224-128)+rdi],xmm15
movdqa XMMWORD PTR[rbx],xmm7
movdqa xmm6,XMMWORD PTR[$L$pbswap]
dec edx
jnz $L$oop
mov edx,DWORD PTR[280+rsp]
lea rdi,QWORD PTR[16+rdi]
lea rsi,QWORD PTR[64+rsi]
dec edx
jnz $L$oop_grande
$L$done::
mov rax,QWORD PTR[272+rsp]
movaps xmm6,XMMWORD PTR[((-184))+rax]
movaps xmm7,XMMWORD PTR[((-168))+rax]
movaps xmm8,XMMWORD PTR[((-152))+rax]
movaps xmm9,XMMWORD PTR[((-136))+rax]
movaps xmm10,XMMWORD PTR[((-120))+rax]
movaps xmm11,XMMWORD PTR[((-104))+rax]
movaps xmm12,XMMWORD PTR[((-88))+rax]
movaps xmm13,XMMWORD PTR[((-72))+rax]
movaps xmm14,XMMWORD PTR[((-56))+rax]
movaps xmm15,XMMWORD PTR[((-40))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov rbx,QWORD PTR[((-8))+rax]
lea rsp,QWORD PTR[rax]
$L$epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_sha256_multi_block::
sha256_multi_block ENDP
ALIGN 32
sha256_multi_block_shaext PROC PRIVATE
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
mov QWORD PTR[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_sha256_multi_block_shaext::
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
_shaext_shortcut::
mov rax,rsp
push rbx
push rbp
lea rsp,QWORD PTR[((-168))+rsp]
movaps XMMWORD PTR[rsp],xmm6
movaps XMMWORD PTR[16+rsp],xmm7
movaps XMMWORD PTR[32+rsp],xmm8
movaps XMMWORD PTR[48+rsp],xmm9
movaps XMMWORD PTR[(-120)+rax],xmm10
movaps XMMWORD PTR[(-104)+rax],xmm11
movaps XMMWORD PTR[(-88)+rax],xmm12
movaps XMMWORD PTR[(-72)+rax],xmm13
movaps XMMWORD PTR[(-56)+rax],xmm14
movaps XMMWORD PTR[(-40)+rax],xmm15
sub rsp,288
shl edx,1
and rsp,-256
lea rdi,QWORD PTR[128+rdi]
mov QWORD PTR[272+rsp],rax
$L$body_shaext::
lea rbx,QWORD PTR[256+rsp]
lea rbp,QWORD PTR[((K256_shaext+128))]
$L$oop_grande_shaext::
mov DWORD PTR[280+rsp],edx
xor edx,edx
mov r8,QWORD PTR[rsi]
mov ecx,DWORD PTR[8+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[rbx],ecx
cmovle r8,rsp
mov r9,QWORD PTR[16+rsi]
mov ecx,DWORD PTR[24+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[4+rbx],ecx
cmovle r9,rsp
test edx,edx
jz $L$done_shaext
movq xmm12,QWORD PTR[((0-128))+rdi]
movq xmm4,QWORD PTR[((32-128))+rdi]
movq xmm13,QWORD PTR[((64-128))+rdi]
movq xmm5,QWORD PTR[((96-128))+rdi]
movq xmm8,QWORD PTR[((128-128))+rdi]
movq xmm9,QWORD PTR[((160-128))+rdi]
movq xmm10,QWORD PTR[((192-128))+rdi]
movq xmm11,QWORD PTR[((224-128))+rdi]
punpckldq xmm12,xmm4
punpckldq xmm13,xmm5
punpckldq xmm8,xmm9
punpckldq xmm10,xmm11
movdqa xmm3,XMMWORD PTR[((K256_shaext-16))]
movdqa xmm14,xmm12
movdqa xmm15,xmm13
punpcklqdq xmm12,xmm8
punpcklqdq xmm13,xmm10
punpckhqdq xmm14,xmm8
punpckhqdq xmm15,xmm10
pshufd xmm12,xmm12,27
pshufd xmm13,xmm13,27
pshufd xmm14,xmm14,27
pshufd xmm15,xmm15,27
jmp $L$oop_shaext
ALIGN 32
$L$oop_shaext::
movdqu xmm4,XMMWORD PTR[r8]
movdqu xmm8,XMMWORD PTR[r9]
movdqu xmm5,XMMWORD PTR[16+r8]
movdqu xmm9,XMMWORD PTR[16+r9]
movdqu xmm6,XMMWORD PTR[32+r8]
DB 102,15,56,0,227
movdqu xmm10,XMMWORD PTR[32+r9]
DB 102,68,15,56,0,195
movdqu xmm7,XMMWORD PTR[48+r8]
lea r8,QWORD PTR[64+r8]
movdqu xmm11,XMMWORD PTR[48+r9]
lea r9,QWORD PTR[64+r9]
movdqa xmm0,XMMWORD PTR[((0-128))+rbp]
DB 102,15,56,0,235
paddd xmm0,xmm4
pxor xmm4,xmm12
movdqa xmm1,xmm0
movdqa xmm2,XMMWORD PTR[((0-128))+rbp]
DB 102,68,15,56,0,203
paddd xmm2,xmm8
movdqa XMMWORD PTR[80+rsp],xmm13
DB 69,15,56,203,236
pxor xmm8,xmm14
movdqa xmm0,xmm2
movdqa XMMWORD PTR[112+rsp],xmm15
DB 69,15,56,203,254
pshufd xmm0,xmm1,00eh
pxor xmm4,xmm12
movdqa XMMWORD PTR[64+rsp],xmm12
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
pxor xmm8,xmm14
movdqa XMMWORD PTR[96+rsp],xmm14
movdqa xmm1,XMMWORD PTR[((16-128))+rbp]
paddd xmm1,xmm5
DB 102,15,56,0,243
DB 69,15,56,203,247
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((16-128))+rbp]
paddd xmm2,xmm9
DB 69,15,56,203,236
movdqa xmm0,xmm2
prefetcht0 [127+r8]
DB 102,15,56,0,251
DB 102,68,15,56,0,211
prefetcht0 [127+r9]
DB 69,15,56,203,254
pshufd xmm0,xmm1,00eh
DB 102,68,15,56,0,219
DB 15,56,204,229
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((32-128))+rbp]
paddd xmm1,xmm6
DB 69,15,56,203,247
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((32-128))+rbp]
paddd xmm2,xmm10
DB 69,15,56,203,236
DB 69,15,56,204,193
movdqa xmm0,xmm2
movdqa xmm3,xmm7
DB 69,15,56,203,254
pshufd xmm0,xmm1,00eh
DB 102,15,58,15,222,4
paddd xmm4,xmm3
movdqa xmm3,xmm11
DB 102,65,15,58,15,218,4
DB 15,56,204,238
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((48-128))+rbp]
paddd xmm1,xmm7
DB 69,15,56,203,247
DB 69,15,56,204,202
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((48-128))+rbp]
paddd xmm8,xmm3
paddd xmm2,xmm11
DB 15,56,205,231
DB 69,15,56,203,236
movdqa xmm0,xmm2
movdqa xmm3,xmm4
DB 102,15,58,15,223,4
DB 69,15,56,203,254
DB 69,15,56,205,195
pshufd xmm0,xmm1,00eh
paddd xmm5,xmm3
movdqa xmm3,xmm8
DB 102,65,15,58,15,219,4
DB 15,56,204,247
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((64-128))+rbp]
paddd xmm1,xmm4
DB 69,15,56,203,247
DB 69,15,56,204,211
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((64-128))+rbp]
paddd xmm9,xmm3
paddd xmm2,xmm8
DB 15,56,205,236
DB 69,15,56,203,236
movdqa xmm0,xmm2
movdqa xmm3,xmm5
DB 102,15,58,15,220,4
DB 69,15,56,203,254
DB 69,15,56,205,200
pshufd xmm0,xmm1,00eh
paddd xmm6,xmm3
movdqa xmm3,xmm9
DB 102,65,15,58,15,216,4
DB 15,56,204,252
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((80-128))+rbp]
paddd xmm1,xmm5
DB 69,15,56,203,247
DB 69,15,56,204,216
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((80-128))+rbp]
paddd xmm10,xmm3
paddd xmm2,xmm9
DB 15,56,205,245
DB 69,15,56,203,236
movdqa xmm0,xmm2
movdqa xmm3,xmm6
DB 102,15,58,15,221,4
DB 69,15,56,203,254
DB 69,15,56,205,209
pshufd xmm0,xmm1,00eh
paddd xmm7,xmm3
movdqa xmm3,xmm10
DB 102,65,15,58,15,217,4
DB 15,56,204,229
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((96-128))+rbp]
paddd xmm1,xmm6
DB 69,15,56,203,247
DB 69,15,56,204,193
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((96-128))+rbp]
paddd xmm11,xmm3
paddd xmm2,xmm10
DB 15,56,205,254
DB 69,15,56,203,236
movdqa xmm0,xmm2
movdqa xmm3,xmm7
DB 102,15,58,15,222,4
DB 69,15,56,203,254
DB 69,15,56,205,218
pshufd xmm0,xmm1,00eh
paddd xmm4,xmm3
movdqa xmm3,xmm11
DB 102,65,15,58,15,218,4
DB 15,56,204,238
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((112-128))+rbp]
paddd xmm1,xmm7
DB 69,15,56,203,247
DB 69,15,56,204,202
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((112-128))+rbp]
paddd xmm8,xmm3
paddd xmm2,xmm11
DB 15,56,205,231
DB 69,15,56,203,236
movdqa xmm0,xmm2
movdqa xmm3,xmm4
DB 102,15,58,15,223,4
DB 69,15,56,203,254
DB 69,15,56,205,195
pshufd xmm0,xmm1,00eh
paddd xmm5,xmm3
movdqa xmm3,xmm8
DB 102,65,15,58,15,219,4
DB 15,56,204,247
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((128-128))+rbp]
paddd xmm1,xmm4
DB 69,15,56,203,247
DB 69,15,56,204,211
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((128-128))+rbp]
paddd xmm9,xmm3
paddd xmm2,xmm8
DB 15,56,205,236
DB 69,15,56,203,236
movdqa xmm0,xmm2
movdqa xmm3,xmm5
DB 102,15,58,15,220,4
DB 69,15,56,203,254
DB 69,15,56,205,200
pshufd xmm0,xmm1,00eh
paddd xmm6,xmm3
movdqa xmm3,xmm9
DB 102,65,15,58,15,216,4
DB 15,56,204,252
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((144-128))+rbp]
paddd xmm1,xmm5
DB 69,15,56,203,247
DB 69,15,56,204,216
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((144-128))+rbp]
paddd xmm10,xmm3
paddd xmm2,xmm9
DB 15,56,205,245
DB 69,15,56,203,236
movdqa xmm0,xmm2
movdqa xmm3,xmm6
DB 102,15,58,15,221,4
DB 69,15,56,203,254
DB 69,15,56,205,209
pshufd xmm0,xmm1,00eh
paddd xmm7,xmm3
movdqa xmm3,xmm10
DB 102,65,15,58,15,217,4
DB 15,56,204,229
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((160-128))+rbp]
paddd xmm1,xmm6
DB 69,15,56,203,247
DB 69,15,56,204,193
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((160-128))+rbp]
paddd xmm11,xmm3
paddd xmm2,xmm10
DB 15,56,205,254
DB 69,15,56,203,236
movdqa xmm0,xmm2
movdqa xmm3,xmm7
DB 102,15,58,15,222,4
DB 69,15,56,203,254
DB 69,15,56,205,218
pshufd xmm0,xmm1,00eh
paddd xmm4,xmm3
movdqa xmm3,xmm11
DB 102,65,15,58,15,218,4
DB 15,56,204,238
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((176-128))+rbp]
paddd xmm1,xmm7
DB 69,15,56,203,247
DB 69,15,56,204,202
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((176-128))+rbp]
paddd xmm8,xmm3
paddd xmm2,xmm11
DB 15,56,205,231
DB 69,15,56,203,236
movdqa xmm0,xmm2
movdqa xmm3,xmm4
DB 102,15,58,15,223,4
DB 69,15,56,203,254
DB 69,15,56,205,195
pshufd xmm0,xmm1,00eh
paddd xmm5,xmm3
movdqa xmm3,xmm8
DB 102,65,15,58,15,219,4
DB 15,56,204,247
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((192-128))+rbp]
paddd xmm1,xmm4
DB 69,15,56,203,247
DB 69,15,56,204,211
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((192-128))+rbp]
paddd xmm9,xmm3
paddd xmm2,xmm8
DB 15,56,205,236
DB 69,15,56,203,236
movdqa xmm0,xmm2
movdqa xmm3,xmm5
DB 102,15,58,15,220,4
DB 69,15,56,203,254
DB 69,15,56,205,200
pshufd xmm0,xmm1,00eh
paddd xmm6,xmm3
movdqa xmm3,xmm9
DB 102,65,15,58,15,216,4
DB 15,56,204,252
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((208-128))+rbp]
paddd xmm1,xmm5
DB 69,15,56,203,247
DB 69,15,56,204,216
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((208-128))+rbp]
paddd xmm10,xmm3
paddd xmm2,xmm9
DB 15,56,205,245
DB 69,15,56,203,236
movdqa xmm0,xmm2
movdqa xmm3,xmm6
DB 102,15,58,15,221,4
DB 69,15,56,203,254
DB 69,15,56,205,209
pshufd xmm0,xmm1,00eh
paddd xmm7,xmm3
movdqa xmm3,xmm10
DB 102,65,15,58,15,217,4
nop
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm1,XMMWORD PTR[((224-128))+rbp]
paddd xmm1,xmm6
DB 69,15,56,203,247
movdqa xmm0,xmm1
movdqa xmm2,XMMWORD PTR[((224-128))+rbp]
paddd xmm11,xmm3
paddd xmm2,xmm10
DB 15,56,205,254
nop
DB 69,15,56,203,236
movdqa xmm0,xmm2
mov ecx,1
pxor xmm6,xmm6
DB 69,15,56,203,254
DB 69,15,56,205,218
pshufd xmm0,xmm1,00eh
movdqa xmm1,XMMWORD PTR[((240-128))+rbp]
paddd xmm1,xmm7
movq xmm7,QWORD PTR[rbx]
nop
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
movdqa xmm2,XMMWORD PTR[((240-128))+rbp]
paddd xmm2,xmm11
DB 69,15,56,203,247
movdqa xmm0,xmm1
cmp ecx,DWORD PTR[rbx]
cmovge r8,rsp
cmp ecx,DWORD PTR[4+rbx]
cmovge r9,rsp
pshufd xmm9,xmm7,000h
DB 69,15,56,203,236
movdqa xmm0,xmm2
pshufd xmm10,xmm7,055h
movdqa xmm11,xmm7
DB 69,15,56,203,254
pshufd xmm0,xmm1,00eh
pcmpgtd xmm9,xmm6
pcmpgtd xmm10,xmm6
DB 69,15,56,203,229
pshufd xmm0,xmm2,00eh
pcmpgtd xmm11,xmm6
movdqa xmm3,XMMWORD PTR[((K256_shaext-16))]
DB 69,15,56,203,247
pand xmm13,xmm9
pand xmm15,xmm10
pand xmm12,xmm9
pand xmm14,xmm10
paddd xmm11,xmm7
paddd xmm13,XMMWORD PTR[80+rsp]
paddd xmm15,XMMWORD PTR[112+rsp]
paddd xmm12,XMMWORD PTR[64+rsp]
paddd xmm14,XMMWORD PTR[96+rsp]
movq QWORD PTR[rbx],xmm11
dec edx
jnz $L$oop_shaext
mov edx,DWORD PTR[280+rsp]
pshufd xmm12,xmm12,27
pshufd xmm13,xmm13,27
pshufd xmm14,xmm14,27
pshufd xmm15,xmm15,27
movdqa xmm5,xmm12
movdqa xmm6,xmm13
punpckldq xmm12,xmm14
punpckhdq xmm5,xmm14
punpckldq xmm13,xmm15
punpckhdq xmm6,xmm15
movq QWORD PTR[(0-128)+rdi],xmm12
psrldq xmm12,8
movq QWORD PTR[(128-128)+rdi],xmm5
psrldq xmm5,8
movq QWORD PTR[(32-128)+rdi],xmm12
movq QWORD PTR[(160-128)+rdi],xmm5
movq QWORD PTR[(64-128)+rdi],xmm13
psrldq xmm13,8
movq QWORD PTR[(192-128)+rdi],xmm6
psrldq xmm6,8
movq QWORD PTR[(96-128)+rdi],xmm13
movq QWORD PTR[(224-128)+rdi],xmm6
lea rdi,QWORD PTR[8+rdi]
lea rsi,QWORD PTR[32+rsi]
dec edx
jnz $L$oop_grande_shaext
$L$done_shaext::
movaps xmm6,XMMWORD PTR[((-184))+rax]
movaps xmm7,XMMWORD PTR[((-168))+rax]
movaps xmm8,XMMWORD PTR[((-152))+rax]
movaps xmm9,XMMWORD PTR[((-136))+rax]
movaps xmm10,XMMWORD PTR[((-120))+rax]
movaps xmm11,XMMWORD PTR[((-104))+rax]
movaps xmm12,XMMWORD PTR[((-88))+rax]
movaps xmm13,XMMWORD PTR[((-72))+rax]
movaps xmm14,XMMWORD PTR[((-56))+rax]
movaps xmm15,XMMWORD PTR[((-40))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov rbx,QWORD PTR[((-8))+rax]
lea rsp,QWORD PTR[rax]
$L$epilogue_shaext::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_sha256_multi_block_shaext::
sha256_multi_block_shaext ENDP
ALIGN 32
sha256_multi_block_avx PROC PRIVATE
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
mov QWORD PTR[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_sha256_multi_block_avx::
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
_avx_shortcut::
shr rcx,32
cmp edx,2
jb $L$avx
test ecx,32
jnz _avx2_shortcut
jmp $L$avx
ALIGN 32
$L$avx::
mov rax,rsp
push rbx
push rbp
lea rsp,QWORD PTR[((-168))+rsp]
movaps XMMWORD PTR[rsp],xmm6
movaps XMMWORD PTR[16+rsp],xmm7
movaps XMMWORD PTR[32+rsp],xmm8
movaps XMMWORD PTR[48+rsp],xmm9
movaps XMMWORD PTR[(-120)+rax],xmm10
movaps XMMWORD PTR[(-104)+rax],xmm11
movaps XMMWORD PTR[(-88)+rax],xmm12
movaps XMMWORD PTR[(-72)+rax],xmm13
movaps XMMWORD PTR[(-56)+rax],xmm14
movaps XMMWORD PTR[(-40)+rax],xmm15
sub rsp,288
and rsp,-256
mov QWORD PTR[272+rsp],rax
$L$body_avx::
lea rbp,QWORD PTR[((K256+128))]
lea rbx,QWORD PTR[256+rsp]
lea rdi,QWORD PTR[128+rdi]
$L$oop_grande_avx::
mov DWORD PTR[280+rsp],edx
xor edx,edx
mov r8,QWORD PTR[rsi]
mov ecx,DWORD PTR[8+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[rbx],ecx
cmovle r8,rbp
mov r9,QWORD PTR[16+rsi]
mov ecx,DWORD PTR[24+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[4+rbx],ecx
cmovle r9,rbp
mov r10,QWORD PTR[32+rsi]
mov ecx,DWORD PTR[40+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[8+rbx],ecx
cmovle r10,rbp
mov r11,QWORD PTR[48+rsi]
mov ecx,DWORD PTR[56+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[12+rbx],ecx
cmovle r11,rbp
test edx,edx
jz $L$done_avx
vmovdqu xmm8,XMMWORD PTR[((0-128))+rdi]
lea rax,QWORD PTR[128+rsp]
vmovdqu xmm9,XMMWORD PTR[((32-128))+rdi]
vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi]
vmovdqu xmm11,XMMWORD PTR[((96-128))+rdi]
vmovdqu xmm12,XMMWORD PTR[((128-128))+rdi]
vmovdqu xmm13,XMMWORD PTR[((160-128))+rdi]
vmovdqu xmm14,XMMWORD PTR[((192-128))+rdi]
vmovdqu xmm15,XMMWORD PTR[((224-128))+rdi]
vmovdqu xmm6,XMMWORD PTR[$L$pbswap]
jmp $L$oop_avx
ALIGN 32
$L$oop_avx::
vpxor xmm4,xmm10,xmm9
vmovd xmm5,DWORD PTR[r8]
vmovd xmm0,DWORD PTR[r9]
vpinsrd xmm5,xmm5,DWORD PTR[r10],1
vpinsrd xmm0,xmm0,DWORD PTR[r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm12,6
vpslld xmm2,xmm12,26
vmovdqu XMMWORD PTR[(0-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm15
vpsrld xmm1,xmm12,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm12,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,7
vpandn xmm0,xmm12,xmm14
vpand xmm3,xmm12,xmm13
vpxor xmm7,xmm7,xmm1
vpsrld xmm15,xmm8,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm8,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm9,xmm8
vpxor xmm15,xmm15,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm8,13
vpslld xmm2,xmm8,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm15,xmm1
vpsrld xmm1,xmm8,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,10
vpxor xmm15,xmm9,xmm4
vpaddd xmm11,xmm11,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm15,xmm15,xmm5
vpaddd xmm15,xmm15,xmm7
vmovd xmm5,DWORD PTR[4+r8]
vmovd xmm0,DWORD PTR[4+r9]
vpinsrd xmm5,xmm5,DWORD PTR[4+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[4+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm11,6
vpslld xmm2,xmm11,26
vmovdqu XMMWORD PTR[(16-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm14
vpsrld xmm1,xmm11,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-96))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm11,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,7
vpandn xmm0,xmm11,xmm13
vpand xmm4,xmm11,xmm12
vpxor xmm7,xmm7,xmm1
vpsrld xmm14,xmm15,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm15,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm8,xmm15
vpxor xmm14,xmm14,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm15,13
vpslld xmm2,xmm15,19
vpaddd xmm5,xmm5,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm14,xmm1
vpsrld xmm1,xmm15,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,10
vpxor xmm14,xmm8,xmm3
vpaddd xmm10,xmm10,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm14,xmm14,xmm5
vpaddd xmm14,xmm14,xmm7
vmovd xmm5,DWORD PTR[8+r8]
vmovd xmm0,DWORD PTR[8+r9]
vpinsrd xmm5,xmm5,DWORD PTR[8+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[8+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm10,6
vpslld xmm2,xmm10,26
vmovdqu XMMWORD PTR[(32-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm13
vpsrld xmm1,xmm10,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm10,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,7
vpandn xmm0,xmm10,xmm12
vpand xmm3,xmm10,xmm11
vpxor xmm7,xmm7,xmm1
vpsrld xmm13,xmm14,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm14,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm15,xmm14
vpxor xmm13,xmm13,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm14,13
vpslld xmm2,xmm14,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm13,xmm1
vpsrld xmm1,xmm14,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,10
vpxor xmm13,xmm15,xmm4
vpaddd xmm9,xmm9,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm13,xmm13,xmm5
vpaddd xmm13,xmm13,xmm7
vmovd xmm5,DWORD PTR[12+r8]
vmovd xmm0,DWORD PTR[12+r9]
vpinsrd xmm5,xmm5,DWORD PTR[12+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[12+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm9,6
vpslld xmm2,xmm9,26
vmovdqu XMMWORD PTR[(48-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm12
vpsrld xmm1,xmm9,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-32))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm9,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,7
vpandn xmm0,xmm9,xmm11
vpand xmm4,xmm9,xmm10
vpxor xmm7,xmm7,xmm1
vpsrld xmm12,xmm13,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm13,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm14,xmm13
vpxor xmm12,xmm12,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm13,13
vpslld xmm2,xmm13,19
vpaddd xmm5,xmm5,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm12,xmm1
vpsrld xmm1,xmm13,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm13,10
vpxor xmm12,xmm14,xmm3
vpaddd xmm8,xmm8,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm12,xmm12,xmm5
vpaddd xmm12,xmm12,xmm7
vmovd xmm5,DWORD PTR[16+r8]
vmovd xmm0,DWORD PTR[16+r9]
vpinsrd xmm5,xmm5,DWORD PTR[16+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[16+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm8,6
vpslld xmm2,xmm8,26
vmovdqu XMMWORD PTR[(64-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm11
vpsrld xmm1,xmm8,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,21
vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm8,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,7
vpandn xmm0,xmm8,xmm10
vpand xmm3,xmm8,xmm9
vpxor xmm7,xmm7,xmm1
vpsrld xmm11,xmm12,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm12,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm13,xmm12
vpxor xmm11,xmm11,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm12,13
vpslld xmm2,xmm12,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm11,xmm1
vpsrld xmm1,xmm12,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,10
vpxor xmm11,xmm13,xmm4
vpaddd xmm15,xmm15,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm11,xmm11,xmm5
vpaddd xmm11,xmm11,xmm7
vmovd xmm5,DWORD PTR[20+r8]
vmovd xmm0,DWORD PTR[20+r9]
vpinsrd xmm5,xmm5,DWORD PTR[20+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[20+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm15,6
vpslld xmm2,xmm15,26
vmovdqu XMMWORD PTR[(80-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm10
vpsrld xmm1,xmm15,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,21
vpaddd xmm5,xmm5,XMMWORD PTR[32+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm15,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,7
vpandn xmm0,xmm15,xmm9
vpand xmm4,xmm15,xmm8
vpxor xmm7,xmm7,xmm1
vpsrld xmm10,xmm11,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm11,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm12,xmm11
vpxor xmm10,xmm10,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm11,13
vpslld xmm2,xmm11,19
vpaddd xmm5,xmm5,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm10,xmm1
vpsrld xmm1,xmm11,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,10
vpxor xmm10,xmm12,xmm3
vpaddd xmm14,xmm14,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm10,xmm10,xmm5
vpaddd xmm10,xmm10,xmm7
vmovd xmm5,DWORD PTR[24+r8]
vmovd xmm0,DWORD PTR[24+r9]
vpinsrd xmm5,xmm5,DWORD PTR[24+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[24+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm14,6
vpslld xmm2,xmm14,26
vmovdqu XMMWORD PTR[(96-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm9
vpsrld xmm1,xmm14,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,21
vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm14,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,7
vpandn xmm0,xmm14,xmm8
vpand xmm3,xmm14,xmm15
vpxor xmm7,xmm7,xmm1
vpsrld xmm9,xmm10,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm10,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm11,xmm10
vpxor xmm9,xmm9,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm10,13
vpslld xmm2,xmm10,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm9,xmm1
vpsrld xmm1,xmm10,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,10
vpxor xmm9,xmm11,xmm4
vpaddd xmm13,xmm13,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm9,xmm9,xmm5
vpaddd xmm9,xmm9,xmm7
vmovd xmm5,DWORD PTR[28+r8]
vmovd xmm0,DWORD PTR[28+r9]
vpinsrd xmm5,xmm5,DWORD PTR[28+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[28+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm13,6
vpslld xmm2,xmm13,26
vmovdqu XMMWORD PTR[(112-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm8
vpsrld xmm1,xmm13,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm13,21
vpaddd xmm5,xmm5,XMMWORD PTR[96+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm13,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm13,7
vpandn xmm0,xmm13,xmm15
vpand xmm4,xmm13,xmm14
vpxor xmm7,xmm7,xmm1
vpsrld xmm8,xmm9,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm9,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm10,xmm9
vpxor xmm8,xmm8,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm9,13
vpslld xmm2,xmm9,19
vpaddd xmm5,xmm5,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm8,xmm1
vpsrld xmm1,xmm9,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,10
vpxor xmm8,xmm10,xmm3
vpaddd xmm12,xmm12,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm8,xmm8,xmm5
vpaddd xmm8,xmm8,xmm7
add rbp,256
vmovd xmm5,DWORD PTR[32+r8]
vmovd xmm0,DWORD PTR[32+r9]
vpinsrd xmm5,xmm5,DWORD PTR[32+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[32+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm12,6
vpslld xmm2,xmm12,26
vmovdqu XMMWORD PTR[(128-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm15
vpsrld xmm1,xmm12,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm12,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,7
vpandn xmm0,xmm12,xmm14
vpand xmm3,xmm12,xmm13
vpxor xmm7,xmm7,xmm1
vpsrld xmm15,xmm8,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm8,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm9,xmm8
vpxor xmm15,xmm15,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm8,13
vpslld xmm2,xmm8,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm15,xmm1
vpsrld xmm1,xmm8,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,10
vpxor xmm15,xmm9,xmm4
vpaddd xmm11,xmm11,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm15,xmm15,xmm5
vpaddd xmm15,xmm15,xmm7
vmovd xmm5,DWORD PTR[36+r8]
vmovd xmm0,DWORD PTR[36+r9]
vpinsrd xmm5,xmm5,DWORD PTR[36+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[36+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm11,6
vpslld xmm2,xmm11,26
vmovdqu XMMWORD PTR[(144-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm14
vpsrld xmm1,xmm11,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-96))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm11,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,7
vpandn xmm0,xmm11,xmm13
vpand xmm4,xmm11,xmm12
vpxor xmm7,xmm7,xmm1
vpsrld xmm14,xmm15,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm15,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm8,xmm15
vpxor xmm14,xmm14,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm15,13
vpslld xmm2,xmm15,19
vpaddd xmm5,xmm5,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm14,xmm1
vpsrld xmm1,xmm15,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,10
vpxor xmm14,xmm8,xmm3
vpaddd xmm10,xmm10,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm14,xmm14,xmm5
vpaddd xmm14,xmm14,xmm7
vmovd xmm5,DWORD PTR[40+r8]
vmovd xmm0,DWORD PTR[40+r9]
vpinsrd xmm5,xmm5,DWORD PTR[40+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[40+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm10,6
vpslld xmm2,xmm10,26
vmovdqu XMMWORD PTR[(160-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm13
vpsrld xmm1,xmm10,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm10,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,7
vpandn xmm0,xmm10,xmm12
vpand xmm3,xmm10,xmm11
vpxor xmm7,xmm7,xmm1
vpsrld xmm13,xmm14,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm14,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm15,xmm14
vpxor xmm13,xmm13,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm14,13
vpslld xmm2,xmm14,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm13,xmm1
vpsrld xmm1,xmm14,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,10
vpxor xmm13,xmm15,xmm4
vpaddd xmm9,xmm9,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm13,xmm13,xmm5
vpaddd xmm13,xmm13,xmm7
vmovd xmm5,DWORD PTR[44+r8]
vmovd xmm0,DWORD PTR[44+r9]
vpinsrd xmm5,xmm5,DWORD PTR[44+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[44+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm9,6
vpslld xmm2,xmm9,26
vmovdqu XMMWORD PTR[(176-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm12
vpsrld xmm1,xmm9,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-32))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm9,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,7
vpandn xmm0,xmm9,xmm11
vpand xmm4,xmm9,xmm10
vpxor xmm7,xmm7,xmm1
vpsrld xmm12,xmm13,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm13,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm14,xmm13
vpxor xmm12,xmm12,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm13,13
vpslld xmm2,xmm13,19
vpaddd xmm5,xmm5,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm12,xmm1
vpsrld xmm1,xmm13,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm13,10
vpxor xmm12,xmm14,xmm3
vpaddd xmm8,xmm8,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm12,xmm12,xmm5
vpaddd xmm12,xmm12,xmm7
vmovd xmm5,DWORD PTR[48+r8]
vmovd xmm0,DWORD PTR[48+r9]
vpinsrd xmm5,xmm5,DWORD PTR[48+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[48+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm8,6
vpslld xmm2,xmm8,26
vmovdqu XMMWORD PTR[(192-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm11
vpsrld xmm1,xmm8,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,21
vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm8,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,7
vpandn xmm0,xmm8,xmm10
vpand xmm3,xmm8,xmm9
vpxor xmm7,xmm7,xmm1
vpsrld xmm11,xmm12,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm12,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm13,xmm12
vpxor xmm11,xmm11,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm12,13
vpslld xmm2,xmm12,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm11,xmm1
vpsrld xmm1,xmm12,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,10
vpxor xmm11,xmm13,xmm4
vpaddd xmm15,xmm15,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm11,xmm11,xmm5
vpaddd xmm11,xmm11,xmm7
vmovd xmm5,DWORD PTR[52+r8]
vmovd xmm0,DWORD PTR[52+r9]
vpinsrd xmm5,xmm5,DWORD PTR[52+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[52+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm15,6
vpslld xmm2,xmm15,26
vmovdqu XMMWORD PTR[(208-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm10
vpsrld xmm1,xmm15,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,21
vpaddd xmm5,xmm5,XMMWORD PTR[32+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm15,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,7
vpandn xmm0,xmm15,xmm9
vpand xmm4,xmm15,xmm8
vpxor xmm7,xmm7,xmm1
vpsrld xmm10,xmm11,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm11,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm12,xmm11
vpxor xmm10,xmm10,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm11,13
vpslld xmm2,xmm11,19
vpaddd xmm5,xmm5,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm10,xmm1
vpsrld xmm1,xmm11,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,10
vpxor xmm10,xmm12,xmm3
vpaddd xmm14,xmm14,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm10,xmm10,xmm5
vpaddd xmm10,xmm10,xmm7
vmovd xmm5,DWORD PTR[56+r8]
vmovd xmm0,DWORD PTR[56+r9]
vpinsrd xmm5,xmm5,DWORD PTR[56+r10],1
vpinsrd xmm0,xmm0,DWORD PTR[56+r11],1
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm14,6
vpslld xmm2,xmm14,26
vmovdqu XMMWORD PTR[(224-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm9
vpsrld xmm1,xmm14,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,21
vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm14,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,7
vpandn xmm0,xmm14,xmm8
vpand xmm3,xmm14,xmm15
vpxor xmm7,xmm7,xmm1
vpsrld xmm9,xmm10,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm10,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm11,xmm10
vpxor xmm9,xmm9,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm10,13
vpslld xmm2,xmm10,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm9,xmm1
vpsrld xmm1,xmm10,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,10
vpxor xmm9,xmm11,xmm4
vpaddd xmm13,xmm13,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm9,xmm9,xmm5
vpaddd xmm9,xmm9,xmm7
vmovd xmm5,DWORD PTR[60+r8]
lea r8,QWORD PTR[64+r8]
vmovd xmm0,DWORD PTR[60+r9]
lea r9,QWORD PTR[64+r9]
vpinsrd xmm5,xmm5,DWORD PTR[60+r10],1
lea r10,QWORD PTR[64+r10]
vpinsrd xmm0,xmm0,DWORD PTR[60+r11],1
lea r11,QWORD PTR[64+r11]
vpunpckldq xmm5,xmm5,xmm0
vpshufb xmm5,xmm5,xmm6
vpsrld xmm7,xmm13,6
vpslld xmm2,xmm13,26
vmovdqu XMMWORD PTR[(240-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm8
vpsrld xmm1,xmm13,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm13,21
vpaddd xmm5,xmm5,XMMWORD PTR[96+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm13,25
vpxor xmm7,xmm7,xmm2
prefetcht0 [63+r8]
vpslld xmm2,xmm13,7
vpandn xmm0,xmm13,xmm15
vpand xmm4,xmm13,xmm14
prefetcht0 [63+r9]
vpxor xmm7,xmm7,xmm1
vpsrld xmm8,xmm9,2
vpxor xmm7,xmm7,xmm2
prefetcht0 [63+r10]
vpslld xmm1,xmm9,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm10,xmm9
prefetcht0 [63+r11]
vpxor xmm8,xmm8,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm9,13
vpslld xmm2,xmm9,19
vpaddd xmm5,xmm5,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm8,xmm1
vpsrld xmm1,xmm9,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,10
vpxor xmm8,xmm10,xmm3
vpaddd xmm12,xmm12,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm8,xmm8,xmm5
vpaddd xmm8,xmm8,xmm7
add rbp,256
vmovdqu xmm5,XMMWORD PTR[((0-128))+rax]
mov ecx,3
jmp $L$oop_16_xx_avx
ALIGN 32
$L$oop_16_xx_avx::
vmovdqu xmm6,XMMWORD PTR[((16-128))+rax]
vpaddd xmm5,xmm5,XMMWORD PTR[((144-128))+rax]
vpsrld xmm7,xmm6,3
vpsrld xmm1,xmm6,7
vpslld xmm2,xmm6,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm6,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm6,14
vmovdqu xmm0,XMMWORD PTR[((224-128))+rax]
vpsrld xmm3,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm5,xmm5,xmm7
vpxor xmm7,xmm3,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm5,xmm5,xmm7
vpsrld xmm7,xmm12,6
vpslld xmm2,xmm12,26
vmovdqu XMMWORD PTR[(0-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm15
vpsrld xmm1,xmm12,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm12,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,7
vpandn xmm0,xmm12,xmm14
vpand xmm3,xmm12,xmm13
vpxor xmm7,xmm7,xmm1
vpsrld xmm15,xmm8,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm8,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm9,xmm8
vpxor xmm15,xmm15,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm8,13
vpslld xmm2,xmm8,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm15,xmm1
vpsrld xmm1,xmm8,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,10
vpxor xmm15,xmm9,xmm4
vpaddd xmm11,xmm11,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm15,xmm15,xmm5
vpaddd xmm15,xmm15,xmm7
vmovdqu xmm5,XMMWORD PTR[((32-128))+rax]
vpaddd xmm6,xmm6,XMMWORD PTR[((160-128))+rax]
vpsrld xmm7,xmm5,3
vpsrld xmm1,xmm5,7
vpslld xmm2,xmm5,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm5,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm5,14
vmovdqu xmm0,XMMWORD PTR[((240-128))+rax]
vpsrld xmm4,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm6,xmm6,xmm7
vpxor xmm7,xmm4,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm6,xmm6,xmm7
vpsrld xmm7,xmm11,6
vpslld xmm2,xmm11,26
vmovdqu XMMWORD PTR[(16-128)+rax],xmm6
vpaddd xmm6,xmm6,xmm14
vpsrld xmm1,xmm11,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,21
vpaddd xmm6,xmm6,XMMWORD PTR[((-96))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm11,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,7
vpandn xmm0,xmm11,xmm13
vpand xmm4,xmm11,xmm12
vpxor xmm7,xmm7,xmm1
vpsrld xmm14,xmm15,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm15,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm8,xmm15
vpxor xmm14,xmm14,xmm1
vpaddd xmm6,xmm6,xmm7
vpsrld xmm1,xmm15,13
vpslld xmm2,xmm15,19
vpaddd xmm6,xmm6,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm14,xmm1
vpsrld xmm1,xmm15,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,10
vpxor xmm14,xmm8,xmm3
vpaddd xmm10,xmm10,xmm6
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm14,xmm14,xmm6
vpaddd xmm14,xmm14,xmm7
vmovdqu xmm6,XMMWORD PTR[((48-128))+rax]
vpaddd xmm5,xmm5,XMMWORD PTR[((176-128))+rax]
vpsrld xmm7,xmm6,3
vpsrld xmm1,xmm6,7
vpslld xmm2,xmm6,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm6,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm6,14
vmovdqu xmm0,XMMWORD PTR[((0-128))+rax]
vpsrld xmm3,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm5,xmm5,xmm7
vpxor xmm7,xmm3,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm5,xmm5,xmm7
vpsrld xmm7,xmm10,6
vpslld xmm2,xmm10,26
vmovdqu XMMWORD PTR[(32-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm13
vpsrld xmm1,xmm10,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm10,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,7
vpandn xmm0,xmm10,xmm12
vpand xmm3,xmm10,xmm11
vpxor xmm7,xmm7,xmm1
vpsrld xmm13,xmm14,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm14,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm15,xmm14
vpxor xmm13,xmm13,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm14,13
vpslld xmm2,xmm14,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm13,xmm1
vpsrld xmm1,xmm14,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,10
vpxor xmm13,xmm15,xmm4
vpaddd xmm9,xmm9,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm13,xmm13,xmm5
vpaddd xmm13,xmm13,xmm7
vmovdqu xmm5,XMMWORD PTR[((64-128))+rax]
vpaddd xmm6,xmm6,XMMWORD PTR[((192-128))+rax]
vpsrld xmm7,xmm5,3
vpsrld xmm1,xmm5,7
vpslld xmm2,xmm5,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm5,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm5,14
vmovdqu xmm0,XMMWORD PTR[((16-128))+rax]
vpsrld xmm4,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm6,xmm6,xmm7
vpxor xmm7,xmm4,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm6,xmm6,xmm7
vpsrld xmm7,xmm9,6
vpslld xmm2,xmm9,26
vmovdqu XMMWORD PTR[(48-128)+rax],xmm6
vpaddd xmm6,xmm6,xmm12
vpsrld xmm1,xmm9,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,21
vpaddd xmm6,xmm6,XMMWORD PTR[((-32))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm9,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,7
vpandn xmm0,xmm9,xmm11
vpand xmm4,xmm9,xmm10
vpxor xmm7,xmm7,xmm1
vpsrld xmm12,xmm13,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm13,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm14,xmm13
vpxor xmm12,xmm12,xmm1
vpaddd xmm6,xmm6,xmm7
vpsrld xmm1,xmm13,13
vpslld xmm2,xmm13,19
vpaddd xmm6,xmm6,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm12,xmm1
vpsrld xmm1,xmm13,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm13,10
vpxor xmm12,xmm14,xmm3
vpaddd xmm8,xmm8,xmm6
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm12,xmm12,xmm6
vpaddd xmm12,xmm12,xmm7
vmovdqu xmm6,XMMWORD PTR[((80-128))+rax]
vpaddd xmm5,xmm5,XMMWORD PTR[((208-128))+rax]
vpsrld xmm7,xmm6,3
vpsrld xmm1,xmm6,7
vpslld xmm2,xmm6,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm6,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm6,14
vmovdqu xmm0,XMMWORD PTR[((32-128))+rax]
vpsrld xmm3,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm5,xmm5,xmm7
vpxor xmm7,xmm3,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm5,xmm5,xmm7
vpsrld xmm7,xmm8,6
vpslld xmm2,xmm8,26
vmovdqu XMMWORD PTR[(64-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm11
vpsrld xmm1,xmm8,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,21
vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm8,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,7
vpandn xmm0,xmm8,xmm10
vpand xmm3,xmm8,xmm9
vpxor xmm7,xmm7,xmm1
vpsrld xmm11,xmm12,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm12,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm13,xmm12
vpxor xmm11,xmm11,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm12,13
vpslld xmm2,xmm12,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm11,xmm1
vpsrld xmm1,xmm12,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,10
vpxor xmm11,xmm13,xmm4
vpaddd xmm15,xmm15,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm11,xmm11,xmm5
vpaddd xmm11,xmm11,xmm7
vmovdqu xmm5,XMMWORD PTR[((96-128))+rax]
vpaddd xmm6,xmm6,XMMWORD PTR[((224-128))+rax]
vpsrld xmm7,xmm5,3
vpsrld xmm1,xmm5,7
vpslld xmm2,xmm5,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm5,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm5,14
vmovdqu xmm0,XMMWORD PTR[((48-128))+rax]
vpsrld xmm4,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm6,xmm6,xmm7
vpxor xmm7,xmm4,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm6,xmm6,xmm7
vpsrld xmm7,xmm15,6
vpslld xmm2,xmm15,26
vmovdqu XMMWORD PTR[(80-128)+rax],xmm6
vpaddd xmm6,xmm6,xmm10
vpsrld xmm1,xmm15,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,21
vpaddd xmm6,xmm6,XMMWORD PTR[32+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm15,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,7
vpandn xmm0,xmm15,xmm9
vpand xmm4,xmm15,xmm8
vpxor xmm7,xmm7,xmm1
vpsrld xmm10,xmm11,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm11,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm12,xmm11
vpxor xmm10,xmm10,xmm1
vpaddd xmm6,xmm6,xmm7
vpsrld xmm1,xmm11,13
vpslld xmm2,xmm11,19
vpaddd xmm6,xmm6,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm10,xmm1
vpsrld xmm1,xmm11,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,10
vpxor xmm10,xmm12,xmm3
vpaddd xmm14,xmm14,xmm6
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm10,xmm10,xmm6
vpaddd xmm10,xmm10,xmm7
vmovdqu xmm6,XMMWORD PTR[((112-128))+rax]
vpaddd xmm5,xmm5,XMMWORD PTR[((240-128))+rax]
vpsrld xmm7,xmm6,3
vpsrld xmm1,xmm6,7
vpslld xmm2,xmm6,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm6,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm6,14
vmovdqu xmm0,XMMWORD PTR[((64-128))+rax]
vpsrld xmm3,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm5,xmm5,xmm7
vpxor xmm7,xmm3,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm5,xmm5,xmm7
vpsrld xmm7,xmm14,6
vpslld xmm2,xmm14,26
vmovdqu XMMWORD PTR[(96-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm9
vpsrld xmm1,xmm14,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,21
vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm14,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,7
vpandn xmm0,xmm14,xmm8
vpand xmm3,xmm14,xmm15
vpxor xmm7,xmm7,xmm1
vpsrld xmm9,xmm10,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm10,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm11,xmm10
vpxor xmm9,xmm9,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm10,13
vpslld xmm2,xmm10,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm9,xmm1
vpsrld xmm1,xmm10,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,10
vpxor xmm9,xmm11,xmm4
vpaddd xmm13,xmm13,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm9,xmm9,xmm5
vpaddd xmm9,xmm9,xmm7
vmovdqu xmm5,XMMWORD PTR[((128-128))+rax]
vpaddd xmm6,xmm6,XMMWORD PTR[((0-128))+rax]
vpsrld xmm7,xmm5,3
vpsrld xmm1,xmm5,7
vpslld xmm2,xmm5,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm5,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm5,14
vmovdqu xmm0,XMMWORD PTR[((80-128))+rax]
vpsrld xmm4,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm6,xmm6,xmm7
vpxor xmm7,xmm4,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm6,xmm6,xmm7
vpsrld xmm7,xmm13,6
vpslld xmm2,xmm13,26
vmovdqu XMMWORD PTR[(112-128)+rax],xmm6
vpaddd xmm6,xmm6,xmm8
vpsrld xmm1,xmm13,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm13,21
vpaddd xmm6,xmm6,XMMWORD PTR[96+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm13,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm13,7
vpandn xmm0,xmm13,xmm15
vpand xmm4,xmm13,xmm14
vpxor xmm7,xmm7,xmm1
vpsrld xmm8,xmm9,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm9,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm10,xmm9
vpxor xmm8,xmm8,xmm1
vpaddd xmm6,xmm6,xmm7
vpsrld xmm1,xmm9,13
vpslld xmm2,xmm9,19
vpaddd xmm6,xmm6,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm8,xmm1
vpsrld xmm1,xmm9,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,10
vpxor xmm8,xmm10,xmm3
vpaddd xmm12,xmm12,xmm6
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm8,xmm8,xmm6
vpaddd xmm8,xmm8,xmm7
add rbp,256
vmovdqu xmm6,XMMWORD PTR[((144-128))+rax]
vpaddd xmm5,xmm5,XMMWORD PTR[((16-128))+rax]
vpsrld xmm7,xmm6,3
vpsrld xmm1,xmm6,7
vpslld xmm2,xmm6,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm6,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm6,14
vmovdqu xmm0,XMMWORD PTR[((96-128))+rax]
vpsrld xmm3,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm5,xmm5,xmm7
vpxor xmm7,xmm3,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm5,xmm5,xmm7
vpsrld xmm7,xmm12,6
vpslld xmm2,xmm12,26
vmovdqu XMMWORD PTR[(128-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm15
vpsrld xmm1,xmm12,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm12,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,7
vpandn xmm0,xmm12,xmm14
vpand xmm3,xmm12,xmm13
vpxor xmm7,xmm7,xmm1
vpsrld xmm15,xmm8,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm8,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm9,xmm8
vpxor xmm15,xmm15,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm8,13
vpslld xmm2,xmm8,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm15,xmm1
vpsrld xmm1,xmm8,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,10
vpxor xmm15,xmm9,xmm4
vpaddd xmm11,xmm11,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm15,xmm15,xmm5
vpaddd xmm15,xmm15,xmm7
vmovdqu xmm5,XMMWORD PTR[((160-128))+rax]
vpaddd xmm6,xmm6,XMMWORD PTR[((32-128))+rax]
vpsrld xmm7,xmm5,3
vpsrld xmm1,xmm5,7
vpslld xmm2,xmm5,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm5,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm5,14
vmovdqu xmm0,XMMWORD PTR[((112-128))+rax]
vpsrld xmm4,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm6,xmm6,xmm7
vpxor xmm7,xmm4,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm6,xmm6,xmm7
vpsrld xmm7,xmm11,6
vpslld xmm2,xmm11,26
vmovdqu XMMWORD PTR[(144-128)+rax],xmm6
vpaddd xmm6,xmm6,xmm14
vpsrld xmm1,xmm11,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,21
vpaddd xmm6,xmm6,XMMWORD PTR[((-96))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm11,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,7
vpandn xmm0,xmm11,xmm13
vpand xmm4,xmm11,xmm12
vpxor xmm7,xmm7,xmm1
vpsrld xmm14,xmm15,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm15,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm8,xmm15
vpxor xmm14,xmm14,xmm1
vpaddd xmm6,xmm6,xmm7
vpsrld xmm1,xmm15,13
vpslld xmm2,xmm15,19
vpaddd xmm6,xmm6,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm14,xmm1
vpsrld xmm1,xmm15,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,10
vpxor xmm14,xmm8,xmm3
vpaddd xmm10,xmm10,xmm6
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm14,xmm14,xmm6
vpaddd xmm14,xmm14,xmm7
vmovdqu xmm6,XMMWORD PTR[((176-128))+rax]
vpaddd xmm5,xmm5,XMMWORD PTR[((48-128))+rax]
vpsrld xmm7,xmm6,3
vpsrld xmm1,xmm6,7
vpslld xmm2,xmm6,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm6,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm6,14
vmovdqu xmm0,XMMWORD PTR[((128-128))+rax]
vpsrld xmm3,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm5,xmm5,xmm7
vpxor xmm7,xmm3,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm5,xmm5,xmm7
vpsrld xmm7,xmm10,6
vpslld xmm2,xmm10,26
vmovdqu XMMWORD PTR[(160-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm13
vpsrld xmm1,xmm10,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,21
vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm10,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,7
vpandn xmm0,xmm10,xmm12
vpand xmm3,xmm10,xmm11
vpxor xmm7,xmm7,xmm1
vpsrld xmm13,xmm14,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm14,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm15,xmm14
vpxor xmm13,xmm13,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm14,13
vpslld xmm2,xmm14,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm13,xmm1
vpsrld xmm1,xmm14,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,10
vpxor xmm13,xmm15,xmm4
vpaddd xmm9,xmm9,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm13,xmm13,xmm5
vpaddd xmm13,xmm13,xmm7
vmovdqu xmm5,XMMWORD PTR[((192-128))+rax]
vpaddd xmm6,xmm6,XMMWORD PTR[((64-128))+rax]
vpsrld xmm7,xmm5,3
vpsrld xmm1,xmm5,7
vpslld xmm2,xmm5,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm5,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm5,14
vmovdqu xmm0,XMMWORD PTR[((144-128))+rax]
vpsrld xmm4,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm6,xmm6,xmm7
vpxor xmm7,xmm4,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm6,xmm6,xmm7
vpsrld xmm7,xmm9,6
vpslld xmm2,xmm9,26
vmovdqu XMMWORD PTR[(176-128)+rax],xmm6
vpaddd xmm6,xmm6,xmm12
vpsrld xmm1,xmm9,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,21
vpaddd xmm6,xmm6,XMMWORD PTR[((-32))+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm9,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,7
vpandn xmm0,xmm9,xmm11
vpand xmm4,xmm9,xmm10
vpxor xmm7,xmm7,xmm1
vpsrld xmm12,xmm13,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm13,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm14,xmm13
vpxor xmm12,xmm12,xmm1
vpaddd xmm6,xmm6,xmm7
vpsrld xmm1,xmm13,13
vpslld xmm2,xmm13,19
vpaddd xmm6,xmm6,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm12,xmm1
vpsrld xmm1,xmm13,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm13,10
vpxor xmm12,xmm14,xmm3
vpaddd xmm8,xmm8,xmm6
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm12,xmm12,xmm6
vpaddd xmm12,xmm12,xmm7
vmovdqu xmm6,XMMWORD PTR[((208-128))+rax]
vpaddd xmm5,xmm5,XMMWORD PTR[((80-128))+rax]
vpsrld xmm7,xmm6,3
vpsrld xmm1,xmm6,7
vpslld xmm2,xmm6,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm6,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm6,14
vmovdqu xmm0,XMMWORD PTR[((160-128))+rax]
vpsrld xmm3,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm5,xmm5,xmm7
vpxor xmm7,xmm3,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm5,xmm5,xmm7
vpsrld xmm7,xmm8,6
vpslld xmm2,xmm8,26
vmovdqu XMMWORD PTR[(192-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm11
vpsrld xmm1,xmm8,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,21
vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm8,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm8,7
vpandn xmm0,xmm8,xmm10
vpand xmm3,xmm8,xmm9
vpxor xmm7,xmm7,xmm1
vpsrld xmm11,xmm12,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm12,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm13,xmm12
vpxor xmm11,xmm11,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm12,13
vpslld xmm2,xmm12,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm11,xmm1
vpsrld xmm1,xmm12,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm12,10
vpxor xmm11,xmm13,xmm4
vpaddd xmm15,xmm15,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm11,xmm11,xmm5
vpaddd xmm11,xmm11,xmm7
vmovdqu xmm5,XMMWORD PTR[((224-128))+rax]
vpaddd xmm6,xmm6,XMMWORD PTR[((96-128))+rax]
vpsrld xmm7,xmm5,3
vpsrld xmm1,xmm5,7
vpslld xmm2,xmm5,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm5,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm5,14
vmovdqu xmm0,XMMWORD PTR[((176-128))+rax]
vpsrld xmm4,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm6,xmm6,xmm7
vpxor xmm7,xmm4,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm6,xmm6,xmm7
vpsrld xmm7,xmm15,6
vpslld xmm2,xmm15,26
vmovdqu XMMWORD PTR[(208-128)+rax],xmm6
vpaddd xmm6,xmm6,xmm10
vpsrld xmm1,xmm15,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,21
vpaddd xmm6,xmm6,XMMWORD PTR[32+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm15,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm15,7
vpandn xmm0,xmm15,xmm9
vpand xmm4,xmm15,xmm8
vpxor xmm7,xmm7,xmm1
vpsrld xmm10,xmm11,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm11,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm12,xmm11
vpxor xmm10,xmm10,xmm1
vpaddd xmm6,xmm6,xmm7
vpsrld xmm1,xmm11,13
vpslld xmm2,xmm11,19
vpaddd xmm6,xmm6,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm10,xmm1
vpsrld xmm1,xmm11,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm11,10
vpxor xmm10,xmm12,xmm3
vpaddd xmm14,xmm14,xmm6
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm10,xmm10,xmm6
vpaddd xmm10,xmm10,xmm7
vmovdqu xmm6,XMMWORD PTR[((240-128))+rax]
vpaddd xmm5,xmm5,XMMWORD PTR[((112-128))+rax]
vpsrld xmm7,xmm6,3
vpsrld xmm1,xmm6,7
vpslld xmm2,xmm6,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm6,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm6,14
vmovdqu xmm0,XMMWORD PTR[((192-128))+rax]
vpsrld xmm3,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm5,xmm5,xmm7
vpxor xmm7,xmm3,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm5,xmm5,xmm7
vpsrld xmm7,xmm14,6
vpslld xmm2,xmm14,26
vmovdqu XMMWORD PTR[(224-128)+rax],xmm5
vpaddd xmm5,xmm5,xmm9
vpsrld xmm1,xmm14,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,21
vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm14,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm14,7
vpandn xmm0,xmm14,xmm8
vpand xmm3,xmm14,xmm15
vpxor xmm7,xmm7,xmm1
vpsrld xmm9,xmm10,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm10,30
vpxor xmm0,xmm0,xmm3
vpxor xmm3,xmm11,xmm10
vpxor xmm9,xmm9,xmm1
vpaddd xmm5,xmm5,xmm7
vpsrld xmm1,xmm10,13
vpslld xmm2,xmm10,19
vpaddd xmm5,xmm5,xmm0
vpand xmm4,xmm4,xmm3
vpxor xmm7,xmm9,xmm1
vpsrld xmm1,xmm10,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm10,10
vpxor xmm9,xmm11,xmm4
vpaddd xmm13,xmm13,xmm5
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm9,xmm9,xmm5
vpaddd xmm9,xmm9,xmm7
vmovdqu xmm5,XMMWORD PTR[((0-128))+rax]
vpaddd xmm6,xmm6,XMMWORD PTR[((128-128))+rax]
vpsrld xmm7,xmm5,3
vpsrld xmm1,xmm5,7
vpslld xmm2,xmm5,25
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm5,18
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm5,14
vmovdqu xmm0,XMMWORD PTR[((208-128))+rax]
vpsrld xmm4,xmm0,10
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm0,17
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,15
vpaddd xmm6,xmm6,xmm7
vpxor xmm7,xmm4,xmm1
vpsrld xmm1,xmm0,19
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm0,13
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm6,xmm6,xmm7
vpsrld xmm7,xmm13,6
vpslld xmm2,xmm13,26
vmovdqu XMMWORD PTR[(240-128)+rax],xmm6
vpaddd xmm6,xmm6,xmm8
vpsrld xmm1,xmm13,11
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm13,21
vpaddd xmm6,xmm6,XMMWORD PTR[96+rbp]
vpxor xmm7,xmm7,xmm1
vpsrld xmm1,xmm13,25
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm13,7
vpandn xmm0,xmm13,xmm15
vpand xmm4,xmm13,xmm14
vpxor xmm7,xmm7,xmm1
vpsrld xmm8,xmm9,2
vpxor xmm7,xmm7,xmm2
vpslld xmm1,xmm9,30
vpxor xmm0,xmm0,xmm4
vpxor xmm4,xmm10,xmm9
vpxor xmm8,xmm8,xmm1
vpaddd xmm6,xmm6,xmm7
vpsrld xmm1,xmm9,13
vpslld xmm2,xmm9,19
vpaddd xmm6,xmm6,xmm0
vpand xmm3,xmm3,xmm4
vpxor xmm7,xmm8,xmm1
vpsrld xmm1,xmm9,22
vpxor xmm7,xmm7,xmm2
vpslld xmm2,xmm9,10
vpxor xmm8,xmm10,xmm3
vpaddd xmm12,xmm12,xmm6
vpxor xmm7,xmm7,xmm1
vpxor xmm7,xmm7,xmm2
vpaddd xmm8,xmm8,xmm6
vpaddd xmm8,xmm8,xmm7
add rbp,256
dec ecx
jnz $L$oop_16_xx_avx
mov ecx,1
lea rbp,QWORD PTR[((K256+128))]
cmp ecx,DWORD PTR[rbx]
cmovge r8,rbp
cmp ecx,DWORD PTR[4+rbx]
cmovge r9,rbp
cmp ecx,DWORD PTR[8+rbx]
cmovge r10,rbp
cmp ecx,DWORD PTR[12+rbx]
cmovge r11,rbp
vmovdqa xmm7,XMMWORD PTR[rbx]
vpxor xmm0,xmm0,xmm0
vmovdqa xmm6,xmm7
vpcmpgtd xmm6,xmm6,xmm0
vpaddd xmm7,xmm7,xmm6
vmovdqu xmm0,XMMWORD PTR[((0-128))+rdi]
vpand xmm8,xmm8,xmm6
vmovdqu xmm1,XMMWORD PTR[((32-128))+rdi]
vpand xmm9,xmm9,xmm6
vmovdqu xmm2,XMMWORD PTR[((64-128))+rdi]
vpand xmm10,xmm10,xmm6
vmovdqu xmm5,XMMWORD PTR[((96-128))+rdi]
vpand xmm11,xmm11,xmm6
vpaddd xmm8,xmm8,xmm0
vmovdqu xmm0,XMMWORD PTR[((128-128))+rdi]
vpand xmm12,xmm12,xmm6
vpaddd xmm9,xmm9,xmm1
vmovdqu xmm1,XMMWORD PTR[((160-128))+rdi]
vpand xmm13,xmm13,xmm6
vpaddd xmm10,xmm10,xmm2
vmovdqu xmm2,XMMWORD PTR[((192-128))+rdi]
vpand xmm14,xmm14,xmm6
vpaddd xmm11,xmm11,xmm5
vmovdqu xmm5,XMMWORD PTR[((224-128))+rdi]
vpand xmm15,xmm15,xmm6
vpaddd xmm12,xmm12,xmm0
vpaddd xmm13,xmm13,xmm1
vmovdqu XMMWORD PTR[(0-128)+rdi],xmm8
vpaddd xmm14,xmm14,xmm2
vmovdqu XMMWORD PTR[(32-128)+rdi],xmm9
vpaddd xmm15,xmm15,xmm5
vmovdqu XMMWORD PTR[(64-128)+rdi],xmm10
vmovdqu XMMWORD PTR[(96-128)+rdi],xmm11
vmovdqu XMMWORD PTR[(128-128)+rdi],xmm12
vmovdqu XMMWORD PTR[(160-128)+rdi],xmm13
vmovdqu XMMWORD PTR[(192-128)+rdi],xmm14
vmovdqu XMMWORD PTR[(224-128)+rdi],xmm15
vmovdqu XMMWORD PTR[rbx],xmm7
vmovdqu xmm6,XMMWORD PTR[$L$pbswap]
dec edx
jnz $L$oop_avx
mov edx,DWORD PTR[280+rsp]
lea rdi,QWORD PTR[16+rdi]
lea rsi,QWORD PTR[64+rsi]
dec edx
jnz $L$oop_grande_avx
$L$done_avx::
mov rax,QWORD PTR[272+rsp]
vzeroupper
movaps xmm6,XMMWORD PTR[((-184))+rax]
movaps xmm7,XMMWORD PTR[((-168))+rax]
movaps xmm8,XMMWORD PTR[((-152))+rax]
movaps xmm9,XMMWORD PTR[((-136))+rax]
movaps xmm10,XMMWORD PTR[((-120))+rax]
movaps xmm11,XMMWORD PTR[((-104))+rax]
movaps xmm12,XMMWORD PTR[((-88))+rax]
movaps xmm13,XMMWORD PTR[((-72))+rax]
movaps xmm14,XMMWORD PTR[((-56))+rax]
movaps xmm15,XMMWORD PTR[((-40))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov rbx,QWORD PTR[((-8))+rax]
lea rsp,QWORD PTR[rax]
$L$epilogue_avx::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_sha256_multi_block_avx::
sha256_multi_block_avx ENDP
ALIGN 32
sha256_multi_block_avx2 PROC PRIVATE
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
mov QWORD PTR[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_sha256_multi_block_avx2::
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
_avx2_shortcut::
mov rax,rsp
push rbx
push rbp
push r12
push r13
push r14
push r15
lea rsp,QWORD PTR[((-168))+rsp]
movaps XMMWORD PTR[rsp],xmm6
movaps XMMWORD PTR[16+rsp],xmm7
movaps XMMWORD PTR[32+rsp],xmm8
movaps XMMWORD PTR[48+rsp],xmm9
movaps XMMWORD PTR[64+rsp],xmm10
movaps XMMWORD PTR[80+rsp],xmm11
movaps XMMWORD PTR[(-120)+rax],xmm12
movaps XMMWORD PTR[(-104)+rax],xmm13
movaps XMMWORD PTR[(-88)+rax],xmm14
movaps XMMWORD PTR[(-72)+rax],xmm15
sub rsp,576
and rsp,-256
mov QWORD PTR[544+rsp],rax
$L$body_avx2::
lea rbp,QWORD PTR[((K256+128))]
lea rdi,QWORD PTR[128+rdi]
$L$oop_grande_avx2::
mov DWORD PTR[552+rsp],edx
xor edx,edx
lea rbx,QWORD PTR[512+rsp]
mov r12,QWORD PTR[rsi]
mov ecx,DWORD PTR[8+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[rbx],ecx
cmovle r12,rbp
mov r13,QWORD PTR[16+rsi]
mov ecx,DWORD PTR[24+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[4+rbx],ecx
cmovle r13,rbp
mov r14,QWORD PTR[32+rsi]
mov ecx,DWORD PTR[40+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[8+rbx],ecx
cmovle r14,rbp
mov r15,QWORD PTR[48+rsi]
mov ecx,DWORD PTR[56+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[12+rbx],ecx
cmovle r15,rbp
mov r8,QWORD PTR[64+rsi]
mov ecx,DWORD PTR[72+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[16+rbx],ecx
cmovle r8,rbp
mov r9,QWORD PTR[80+rsi]
mov ecx,DWORD PTR[88+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[20+rbx],ecx
cmovle r9,rbp
mov r10,QWORD PTR[96+rsi]
mov ecx,DWORD PTR[104+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[24+rbx],ecx
cmovle r10,rbp
mov r11,QWORD PTR[112+rsi]
mov ecx,DWORD PTR[120+rsi]
cmp ecx,edx
cmovg edx,ecx
test ecx,ecx
mov DWORD PTR[28+rbx],ecx
cmovle r11,rbp
vmovdqu ymm8,YMMWORD PTR[((0-128))+rdi]
lea rax,QWORD PTR[128+rsp]
vmovdqu ymm9,YMMWORD PTR[((32-128))+rdi]
lea rbx,QWORD PTR[((256+128))+rsp]
vmovdqu ymm10,YMMWORD PTR[((64-128))+rdi]
vmovdqu ymm11,YMMWORD PTR[((96-128))+rdi]
vmovdqu ymm12,YMMWORD PTR[((128-128))+rdi]
vmovdqu ymm13,YMMWORD PTR[((160-128))+rdi]
vmovdqu ymm14,YMMWORD PTR[((192-128))+rdi]
vmovdqu ymm15,YMMWORD PTR[((224-128))+rdi]
vmovdqu ymm6,YMMWORD PTR[$L$pbswap]
jmp $L$oop_avx2
ALIGN 32
$L$oop_avx2::
vpxor ymm4,ymm10,ymm9
vmovd xmm5,DWORD PTR[r12]
vmovd xmm0,DWORD PTR[r8]
vmovd xmm1,DWORD PTR[r13]
vmovd xmm2,DWORD PTR[r9]
vpinsrd xmm5,xmm5,DWORD PTR[r14],1
vpinsrd xmm0,xmm0,DWORD PTR[r10],1
vpinsrd xmm1,xmm1,DWORD PTR[r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm12,6
vpslld ymm2,ymm12,26
vmovdqu YMMWORD PTR[(0-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm15
vpsrld ymm1,ymm12,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm12,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,7
vpandn ymm0,ymm12,ymm14
vpand ymm3,ymm12,ymm13
vpxor ymm7,ymm7,ymm1
vpsrld ymm15,ymm8,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm8,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm9,ymm8
vpxor ymm15,ymm15,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm8,13
vpslld ymm2,ymm8,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm15,ymm1
vpsrld ymm1,ymm8,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,10
vpxor ymm15,ymm9,ymm4
vpaddd ymm11,ymm11,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm15,ymm15,ymm5
vpaddd ymm15,ymm15,ymm7
vmovd xmm5,DWORD PTR[4+r12]
vmovd xmm0,DWORD PTR[4+r8]
vmovd xmm1,DWORD PTR[4+r13]
vmovd xmm2,DWORD PTR[4+r9]
vpinsrd xmm5,xmm5,DWORD PTR[4+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[4+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[4+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[4+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm11,6
vpslld ymm2,ymm11,26
vmovdqu YMMWORD PTR[(32-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm14
vpsrld ymm1,ymm11,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-96))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm11,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,7
vpandn ymm0,ymm11,ymm13
vpand ymm4,ymm11,ymm12
vpxor ymm7,ymm7,ymm1
vpsrld ymm14,ymm15,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm15,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm8,ymm15
vpxor ymm14,ymm14,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm15,13
vpslld ymm2,ymm15,19
vpaddd ymm5,ymm5,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm14,ymm1
vpsrld ymm1,ymm15,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,10
vpxor ymm14,ymm8,ymm3
vpaddd ymm10,ymm10,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm14,ymm14,ymm5
vpaddd ymm14,ymm14,ymm7
vmovd xmm5,DWORD PTR[8+r12]
vmovd xmm0,DWORD PTR[8+r8]
vmovd xmm1,DWORD PTR[8+r13]
vmovd xmm2,DWORD PTR[8+r9]
vpinsrd xmm5,xmm5,DWORD PTR[8+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[8+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[8+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[8+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm10,6
vpslld ymm2,ymm10,26
vmovdqu YMMWORD PTR[(64-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm13
vpsrld ymm1,ymm10,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm10,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,7
vpandn ymm0,ymm10,ymm12
vpand ymm3,ymm10,ymm11
vpxor ymm7,ymm7,ymm1
vpsrld ymm13,ymm14,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm14,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm15,ymm14
vpxor ymm13,ymm13,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm14,13
vpslld ymm2,ymm14,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm13,ymm1
vpsrld ymm1,ymm14,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,10
vpxor ymm13,ymm15,ymm4
vpaddd ymm9,ymm9,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm13,ymm13,ymm5
vpaddd ymm13,ymm13,ymm7
vmovd xmm5,DWORD PTR[12+r12]
vmovd xmm0,DWORD PTR[12+r8]
vmovd xmm1,DWORD PTR[12+r13]
vmovd xmm2,DWORD PTR[12+r9]
vpinsrd xmm5,xmm5,DWORD PTR[12+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[12+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[12+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[12+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm9,6
vpslld ymm2,ymm9,26
vmovdqu YMMWORD PTR[(96-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm12
vpsrld ymm1,ymm9,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm9,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-32))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm9,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm9,7
vpandn ymm0,ymm9,ymm11
vpand ymm4,ymm9,ymm10
vpxor ymm7,ymm7,ymm1
vpsrld ymm12,ymm13,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm13,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm14,ymm13
vpxor ymm12,ymm12,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm13,13
vpslld ymm2,ymm13,19
vpaddd ymm5,ymm5,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm12,ymm1
vpsrld ymm1,ymm13,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm13,10
vpxor ymm12,ymm14,ymm3
vpaddd ymm8,ymm8,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm12,ymm12,ymm5
vpaddd ymm12,ymm12,ymm7
vmovd xmm5,DWORD PTR[16+r12]
vmovd xmm0,DWORD PTR[16+r8]
vmovd xmm1,DWORD PTR[16+r13]
vmovd xmm2,DWORD PTR[16+r9]
vpinsrd xmm5,xmm5,DWORD PTR[16+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[16+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[16+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[16+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm8,6
vpslld ymm2,ymm8,26
vmovdqu YMMWORD PTR[(128-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm11
vpsrld ymm1,ymm8,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,21
vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm8,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,7
vpandn ymm0,ymm8,ymm10
vpand ymm3,ymm8,ymm9
vpxor ymm7,ymm7,ymm1
vpsrld ymm11,ymm12,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm12,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm13,ymm12
vpxor ymm11,ymm11,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm12,13
vpslld ymm2,ymm12,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm11,ymm1
vpsrld ymm1,ymm12,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,10
vpxor ymm11,ymm13,ymm4
vpaddd ymm15,ymm15,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm11,ymm11,ymm5
vpaddd ymm11,ymm11,ymm7
vmovd xmm5,DWORD PTR[20+r12]
vmovd xmm0,DWORD PTR[20+r8]
vmovd xmm1,DWORD PTR[20+r13]
vmovd xmm2,DWORD PTR[20+r9]
vpinsrd xmm5,xmm5,DWORD PTR[20+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[20+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[20+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[20+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm15,6
vpslld ymm2,ymm15,26
vmovdqu YMMWORD PTR[(160-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm10
vpsrld ymm1,ymm15,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,21
vpaddd ymm5,ymm5,YMMWORD PTR[32+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm15,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,7
vpandn ymm0,ymm15,ymm9
vpand ymm4,ymm15,ymm8
vpxor ymm7,ymm7,ymm1
vpsrld ymm10,ymm11,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm11,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm12,ymm11
vpxor ymm10,ymm10,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm11,13
vpslld ymm2,ymm11,19
vpaddd ymm5,ymm5,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm10,ymm1
vpsrld ymm1,ymm11,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,10
vpxor ymm10,ymm12,ymm3
vpaddd ymm14,ymm14,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm10,ymm10,ymm5
vpaddd ymm10,ymm10,ymm7
vmovd xmm5,DWORD PTR[24+r12]
vmovd xmm0,DWORD PTR[24+r8]
vmovd xmm1,DWORD PTR[24+r13]
vmovd xmm2,DWORD PTR[24+r9]
vpinsrd xmm5,xmm5,DWORD PTR[24+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[24+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[24+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[24+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm14,6
vpslld ymm2,ymm14,26
vmovdqu YMMWORD PTR[(192-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm9
vpsrld ymm1,ymm14,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,21
vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm14,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,7
vpandn ymm0,ymm14,ymm8
vpand ymm3,ymm14,ymm15
vpxor ymm7,ymm7,ymm1
vpsrld ymm9,ymm10,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm10,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm11,ymm10
vpxor ymm9,ymm9,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm10,13
vpslld ymm2,ymm10,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm9,ymm1
vpsrld ymm1,ymm10,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,10
vpxor ymm9,ymm11,ymm4
vpaddd ymm13,ymm13,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm9,ymm9,ymm5
vpaddd ymm9,ymm9,ymm7
vmovd xmm5,DWORD PTR[28+r12]
vmovd xmm0,DWORD PTR[28+r8]
vmovd xmm1,DWORD PTR[28+r13]
vmovd xmm2,DWORD PTR[28+r9]
vpinsrd xmm5,xmm5,DWORD PTR[28+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[28+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[28+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[28+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm13,6
vpslld ymm2,ymm13,26
vmovdqu YMMWORD PTR[(224-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm8
vpsrld ymm1,ymm13,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm13,21
vpaddd ymm5,ymm5,YMMWORD PTR[96+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm13,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm13,7
vpandn ymm0,ymm13,ymm15
vpand ymm4,ymm13,ymm14
vpxor ymm7,ymm7,ymm1
vpsrld ymm8,ymm9,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm9,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm10,ymm9
vpxor ymm8,ymm8,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm9,13
vpslld ymm2,ymm9,19
vpaddd ymm5,ymm5,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm8,ymm1
vpsrld ymm1,ymm9,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm9,10
vpxor ymm8,ymm10,ymm3
vpaddd ymm12,ymm12,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm8,ymm8,ymm5
vpaddd ymm8,ymm8,ymm7
add rbp,256
vmovd xmm5,DWORD PTR[32+r12]
vmovd xmm0,DWORD PTR[32+r8]
vmovd xmm1,DWORD PTR[32+r13]
vmovd xmm2,DWORD PTR[32+r9]
vpinsrd xmm5,xmm5,DWORD PTR[32+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[32+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[32+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[32+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm12,6
vpslld ymm2,ymm12,26
vmovdqu YMMWORD PTR[(256-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm15
vpsrld ymm1,ymm12,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm12,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,7
vpandn ymm0,ymm12,ymm14
vpand ymm3,ymm12,ymm13
vpxor ymm7,ymm7,ymm1
vpsrld ymm15,ymm8,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm8,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm9,ymm8
vpxor ymm15,ymm15,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm8,13
vpslld ymm2,ymm8,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm15,ymm1
vpsrld ymm1,ymm8,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,10
vpxor ymm15,ymm9,ymm4
vpaddd ymm11,ymm11,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm15,ymm15,ymm5
vpaddd ymm15,ymm15,ymm7
vmovd xmm5,DWORD PTR[36+r12]
vmovd xmm0,DWORD PTR[36+r8]
vmovd xmm1,DWORD PTR[36+r13]
vmovd xmm2,DWORD PTR[36+r9]
vpinsrd xmm5,xmm5,DWORD PTR[36+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[36+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[36+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[36+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm11,6
vpslld ymm2,ymm11,26
vmovdqu YMMWORD PTR[(288-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm14
vpsrld ymm1,ymm11,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-96))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm11,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,7
vpandn ymm0,ymm11,ymm13
vpand ymm4,ymm11,ymm12
vpxor ymm7,ymm7,ymm1
vpsrld ymm14,ymm15,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm15,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm8,ymm15
vpxor ymm14,ymm14,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm15,13
vpslld ymm2,ymm15,19
vpaddd ymm5,ymm5,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm14,ymm1
vpsrld ymm1,ymm15,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,10
vpxor ymm14,ymm8,ymm3
vpaddd ymm10,ymm10,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm14,ymm14,ymm5
vpaddd ymm14,ymm14,ymm7
vmovd xmm5,DWORD PTR[40+r12]
vmovd xmm0,DWORD PTR[40+r8]
vmovd xmm1,DWORD PTR[40+r13]
vmovd xmm2,DWORD PTR[40+r9]
vpinsrd xmm5,xmm5,DWORD PTR[40+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[40+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[40+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[40+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm10,6
vpslld ymm2,ymm10,26
vmovdqu YMMWORD PTR[(320-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm13
vpsrld ymm1,ymm10,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm10,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,7
vpandn ymm0,ymm10,ymm12
vpand ymm3,ymm10,ymm11
vpxor ymm7,ymm7,ymm1
vpsrld ymm13,ymm14,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm14,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm15,ymm14
vpxor ymm13,ymm13,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm14,13
vpslld ymm2,ymm14,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm13,ymm1
vpsrld ymm1,ymm14,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,10
vpxor ymm13,ymm15,ymm4
vpaddd ymm9,ymm9,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm13,ymm13,ymm5
vpaddd ymm13,ymm13,ymm7
vmovd xmm5,DWORD PTR[44+r12]
vmovd xmm0,DWORD PTR[44+r8]
vmovd xmm1,DWORD PTR[44+r13]
vmovd xmm2,DWORD PTR[44+r9]
vpinsrd xmm5,xmm5,DWORD PTR[44+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[44+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[44+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[44+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm9,6
vpslld ymm2,ymm9,26
vmovdqu YMMWORD PTR[(352-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm12
vpsrld ymm1,ymm9,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm9,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-32))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm9,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm9,7
vpandn ymm0,ymm9,ymm11
vpand ymm4,ymm9,ymm10
vpxor ymm7,ymm7,ymm1
vpsrld ymm12,ymm13,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm13,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm14,ymm13
vpxor ymm12,ymm12,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm13,13
vpslld ymm2,ymm13,19
vpaddd ymm5,ymm5,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm12,ymm1
vpsrld ymm1,ymm13,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm13,10
vpxor ymm12,ymm14,ymm3
vpaddd ymm8,ymm8,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm12,ymm12,ymm5
vpaddd ymm12,ymm12,ymm7
vmovd xmm5,DWORD PTR[48+r12]
vmovd xmm0,DWORD PTR[48+r8]
vmovd xmm1,DWORD PTR[48+r13]
vmovd xmm2,DWORD PTR[48+r9]
vpinsrd xmm5,xmm5,DWORD PTR[48+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[48+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[48+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[48+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm8,6
vpslld ymm2,ymm8,26
vmovdqu YMMWORD PTR[(384-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm11
vpsrld ymm1,ymm8,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,21
vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm8,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,7
vpandn ymm0,ymm8,ymm10
vpand ymm3,ymm8,ymm9
vpxor ymm7,ymm7,ymm1
vpsrld ymm11,ymm12,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm12,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm13,ymm12
vpxor ymm11,ymm11,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm12,13
vpslld ymm2,ymm12,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm11,ymm1
vpsrld ymm1,ymm12,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,10
vpxor ymm11,ymm13,ymm4
vpaddd ymm15,ymm15,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm11,ymm11,ymm5
vpaddd ymm11,ymm11,ymm7
vmovd xmm5,DWORD PTR[52+r12]
vmovd xmm0,DWORD PTR[52+r8]
vmovd xmm1,DWORD PTR[52+r13]
vmovd xmm2,DWORD PTR[52+r9]
vpinsrd xmm5,xmm5,DWORD PTR[52+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[52+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[52+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[52+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm15,6
vpslld ymm2,ymm15,26
vmovdqu YMMWORD PTR[(416-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm10
vpsrld ymm1,ymm15,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,21
vpaddd ymm5,ymm5,YMMWORD PTR[32+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm15,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,7
vpandn ymm0,ymm15,ymm9
vpand ymm4,ymm15,ymm8
vpxor ymm7,ymm7,ymm1
vpsrld ymm10,ymm11,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm11,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm12,ymm11
vpxor ymm10,ymm10,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm11,13
vpslld ymm2,ymm11,19
vpaddd ymm5,ymm5,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm10,ymm1
vpsrld ymm1,ymm11,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,10
vpxor ymm10,ymm12,ymm3
vpaddd ymm14,ymm14,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm10,ymm10,ymm5
vpaddd ymm10,ymm10,ymm7
vmovd xmm5,DWORD PTR[56+r12]
vmovd xmm0,DWORD PTR[56+r8]
vmovd xmm1,DWORD PTR[56+r13]
vmovd xmm2,DWORD PTR[56+r9]
vpinsrd xmm5,xmm5,DWORD PTR[56+r14],1
vpinsrd xmm0,xmm0,DWORD PTR[56+r10],1
vpinsrd xmm1,xmm1,DWORD PTR[56+r15],1
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[56+r11],1
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm14,6
vpslld ymm2,ymm14,26
vmovdqu YMMWORD PTR[(448-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm9
vpsrld ymm1,ymm14,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,21
vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm14,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,7
vpandn ymm0,ymm14,ymm8
vpand ymm3,ymm14,ymm15
vpxor ymm7,ymm7,ymm1
vpsrld ymm9,ymm10,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm10,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm11,ymm10
vpxor ymm9,ymm9,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm10,13
vpslld ymm2,ymm10,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm9,ymm1
vpsrld ymm1,ymm10,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,10
vpxor ymm9,ymm11,ymm4
vpaddd ymm13,ymm13,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm9,ymm9,ymm5
vpaddd ymm9,ymm9,ymm7
vmovd xmm5,DWORD PTR[60+r12]
lea r12,QWORD PTR[64+r12]
vmovd xmm0,DWORD PTR[60+r8]
lea r8,QWORD PTR[64+r8]
vmovd xmm1,DWORD PTR[60+r13]
lea r13,QWORD PTR[64+r13]
vmovd xmm2,DWORD PTR[60+r9]
lea r9,QWORD PTR[64+r9]
vpinsrd xmm5,xmm5,DWORD PTR[60+r14],1
lea r14,QWORD PTR[64+r14]
vpinsrd xmm0,xmm0,DWORD PTR[60+r10],1
lea r10,QWORD PTR[64+r10]
vpinsrd xmm1,xmm1,DWORD PTR[60+r15],1
lea r15,QWORD PTR[64+r15]
vpunpckldq ymm5,ymm5,ymm1
vpinsrd xmm2,xmm2,DWORD PTR[60+r11],1
lea r11,QWORD PTR[64+r11]
vpunpckldq ymm0,ymm0,ymm2
vinserti128 ymm5,ymm5,xmm0,1
vpshufb ymm5,ymm5,ymm6
vpsrld ymm7,ymm13,6
vpslld ymm2,ymm13,26
vmovdqu YMMWORD PTR[(480-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm8
vpsrld ymm1,ymm13,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm13,21
vpaddd ymm5,ymm5,YMMWORD PTR[96+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm13,25
vpxor ymm7,ymm7,ymm2
prefetcht0 [63+r12]
vpslld ymm2,ymm13,7
vpandn ymm0,ymm13,ymm15
vpand ymm4,ymm13,ymm14
prefetcht0 [63+r13]
vpxor ymm7,ymm7,ymm1
vpsrld ymm8,ymm9,2
vpxor ymm7,ymm7,ymm2
prefetcht0 [63+r14]
vpslld ymm1,ymm9,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm10,ymm9
prefetcht0 [63+r15]
vpxor ymm8,ymm8,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm9,13
prefetcht0 [63+r8]
vpslld ymm2,ymm9,19
vpaddd ymm5,ymm5,ymm0
vpand ymm3,ymm3,ymm4
prefetcht0 [63+r9]
vpxor ymm7,ymm8,ymm1
vpsrld ymm1,ymm9,22
vpxor ymm7,ymm7,ymm2
prefetcht0 [63+r10]
vpslld ymm2,ymm9,10
vpxor ymm8,ymm10,ymm3
vpaddd ymm12,ymm12,ymm5
prefetcht0 [63+r11]
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm8,ymm8,ymm5
vpaddd ymm8,ymm8,ymm7
add rbp,256
vmovdqu ymm5,YMMWORD PTR[((0-128))+rax]
mov ecx,3
jmp $L$oop_16_xx_avx2
ALIGN 32
$L$oop_16_xx_avx2::
vmovdqu ymm6,YMMWORD PTR[((32-128))+rax]
vpaddd ymm5,ymm5,YMMWORD PTR[((288-256-128))+rbx]
vpsrld ymm7,ymm6,3
vpsrld ymm1,ymm6,7
vpslld ymm2,ymm6,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm6,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm6,14
vmovdqu ymm0,YMMWORD PTR[((448-256-128))+rbx]
vpsrld ymm3,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm5,ymm5,ymm7
vpxor ymm7,ymm3,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm5,ymm5,ymm7
vpsrld ymm7,ymm12,6
vpslld ymm2,ymm12,26
vmovdqu YMMWORD PTR[(0-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm15
vpsrld ymm1,ymm12,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm12,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,7
vpandn ymm0,ymm12,ymm14
vpand ymm3,ymm12,ymm13
vpxor ymm7,ymm7,ymm1
vpsrld ymm15,ymm8,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm8,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm9,ymm8
vpxor ymm15,ymm15,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm8,13
vpslld ymm2,ymm8,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm15,ymm1
vpsrld ymm1,ymm8,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,10
vpxor ymm15,ymm9,ymm4
vpaddd ymm11,ymm11,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm15,ymm15,ymm5
vpaddd ymm15,ymm15,ymm7
vmovdqu ymm5,YMMWORD PTR[((64-128))+rax]
vpaddd ymm6,ymm6,YMMWORD PTR[((320-256-128))+rbx]
vpsrld ymm7,ymm5,3
vpsrld ymm1,ymm5,7
vpslld ymm2,ymm5,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm5,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm5,14
vmovdqu ymm0,YMMWORD PTR[((480-256-128))+rbx]
vpsrld ymm4,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm6,ymm6,ymm7
vpxor ymm7,ymm4,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm6,ymm6,ymm7
vpsrld ymm7,ymm11,6
vpslld ymm2,ymm11,26
vmovdqu YMMWORD PTR[(32-128)+rax],ymm6
vpaddd ymm6,ymm6,ymm14
vpsrld ymm1,ymm11,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,21
vpaddd ymm6,ymm6,YMMWORD PTR[((-96))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm11,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,7
vpandn ymm0,ymm11,ymm13
vpand ymm4,ymm11,ymm12
vpxor ymm7,ymm7,ymm1
vpsrld ymm14,ymm15,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm15,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm8,ymm15
vpxor ymm14,ymm14,ymm1
vpaddd ymm6,ymm6,ymm7
vpsrld ymm1,ymm15,13
vpslld ymm2,ymm15,19
vpaddd ymm6,ymm6,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm14,ymm1
vpsrld ymm1,ymm15,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,10
vpxor ymm14,ymm8,ymm3
vpaddd ymm10,ymm10,ymm6
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm14,ymm14,ymm6
vpaddd ymm14,ymm14,ymm7
vmovdqu ymm6,YMMWORD PTR[((96-128))+rax]
vpaddd ymm5,ymm5,YMMWORD PTR[((352-256-128))+rbx]
vpsrld ymm7,ymm6,3
vpsrld ymm1,ymm6,7
vpslld ymm2,ymm6,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm6,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm6,14
vmovdqu ymm0,YMMWORD PTR[((0-128))+rax]
vpsrld ymm3,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm5,ymm5,ymm7
vpxor ymm7,ymm3,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm5,ymm5,ymm7
vpsrld ymm7,ymm10,6
vpslld ymm2,ymm10,26
vmovdqu YMMWORD PTR[(64-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm13
vpsrld ymm1,ymm10,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm10,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,7
vpandn ymm0,ymm10,ymm12
vpand ymm3,ymm10,ymm11
vpxor ymm7,ymm7,ymm1
vpsrld ymm13,ymm14,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm14,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm15,ymm14
vpxor ymm13,ymm13,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm14,13
vpslld ymm2,ymm14,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm13,ymm1
vpsrld ymm1,ymm14,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,10
vpxor ymm13,ymm15,ymm4
vpaddd ymm9,ymm9,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm13,ymm13,ymm5
vpaddd ymm13,ymm13,ymm7
vmovdqu ymm5,YMMWORD PTR[((128-128))+rax]
vpaddd ymm6,ymm6,YMMWORD PTR[((384-256-128))+rbx]
vpsrld ymm7,ymm5,3
vpsrld ymm1,ymm5,7
vpslld ymm2,ymm5,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm5,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm5,14
vmovdqu ymm0,YMMWORD PTR[((32-128))+rax]
vpsrld ymm4,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm6,ymm6,ymm7
vpxor ymm7,ymm4,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm6,ymm6,ymm7
vpsrld ymm7,ymm9,6
vpslld ymm2,ymm9,26
vmovdqu YMMWORD PTR[(96-128)+rax],ymm6
vpaddd ymm6,ymm6,ymm12
vpsrld ymm1,ymm9,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm9,21
vpaddd ymm6,ymm6,YMMWORD PTR[((-32))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm9,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm9,7
vpandn ymm0,ymm9,ymm11
vpand ymm4,ymm9,ymm10
vpxor ymm7,ymm7,ymm1
vpsrld ymm12,ymm13,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm13,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm14,ymm13
vpxor ymm12,ymm12,ymm1
vpaddd ymm6,ymm6,ymm7
vpsrld ymm1,ymm13,13
vpslld ymm2,ymm13,19
vpaddd ymm6,ymm6,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm12,ymm1
vpsrld ymm1,ymm13,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm13,10
vpxor ymm12,ymm14,ymm3
vpaddd ymm8,ymm8,ymm6
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm12,ymm12,ymm6
vpaddd ymm12,ymm12,ymm7
vmovdqu ymm6,YMMWORD PTR[((160-128))+rax]
vpaddd ymm5,ymm5,YMMWORD PTR[((416-256-128))+rbx]
vpsrld ymm7,ymm6,3
vpsrld ymm1,ymm6,7
vpslld ymm2,ymm6,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm6,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm6,14
vmovdqu ymm0,YMMWORD PTR[((64-128))+rax]
vpsrld ymm3,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm5,ymm5,ymm7
vpxor ymm7,ymm3,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm5,ymm5,ymm7
vpsrld ymm7,ymm8,6
vpslld ymm2,ymm8,26
vmovdqu YMMWORD PTR[(128-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm11
vpsrld ymm1,ymm8,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,21
vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm8,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,7
vpandn ymm0,ymm8,ymm10
vpand ymm3,ymm8,ymm9
vpxor ymm7,ymm7,ymm1
vpsrld ymm11,ymm12,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm12,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm13,ymm12
vpxor ymm11,ymm11,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm12,13
vpslld ymm2,ymm12,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm11,ymm1
vpsrld ymm1,ymm12,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,10
vpxor ymm11,ymm13,ymm4
vpaddd ymm15,ymm15,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm11,ymm11,ymm5
vpaddd ymm11,ymm11,ymm7
vmovdqu ymm5,YMMWORD PTR[((192-128))+rax]
vpaddd ymm6,ymm6,YMMWORD PTR[((448-256-128))+rbx]
vpsrld ymm7,ymm5,3
vpsrld ymm1,ymm5,7
vpslld ymm2,ymm5,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm5,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm5,14
vmovdqu ymm0,YMMWORD PTR[((96-128))+rax]
vpsrld ymm4,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm6,ymm6,ymm7
vpxor ymm7,ymm4,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm6,ymm6,ymm7
vpsrld ymm7,ymm15,6
vpslld ymm2,ymm15,26
vmovdqu YMMWORD PTR[(160-128)+rax],ymm6
vpaddd ymm6,ymm6,ymm10
vpsrld ymm1,ymm15,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,21
vpaddd ymm6,ymm6,YMMWORD PTR[32+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm15,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,7
vpandn ymm0,ymm15,ymm9
vpand ymm4,ymm15,ymm8
vpxor ymm7,ymm7,ymm1
vpsrld ymm10,ymm11,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm11,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm12,ymm11
vpxor ymm10,ymm10,ymm1
vpaddd ymm6,ymm6,ymm7
vpsrld ymm1,ymm11,13
vpslld ymm2,ymm11,19
vpaddd ymm6,ymm6,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm10,ymm1
vpsrld ymm1,ymm11,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,10
vpxor ymm10,ymm12,ymm3
vpaddd ymm14,ymm14,ymm6
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm10,ymm10,ymm6
vpaddd ymm10,ymm10,ymm7
vmovdqu ymm6,YMMWORD PTR[((224-128))+rax]
vpaddd ymm5,ymm5,YMMWORD PTR[((480-256-128))+rbx]
vpsrld ymm7,ymm6,3
vpsrld ymm1,ymm6,7
vpslld ymm2,ymm6,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm6,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm6,14
vmovdqu ymm0,YMMWORD PTR[((128-128))+rax]
vpsrld ymm3,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm5,ymm5,ymm7
vpxor ymm7,ymm3,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm5,ymm5,ymm7
vpsrld ymm7,ymm14,6
vpslld ymm2,ymm14,26
vmovdqu YMMWORD PTR[(192-128)+rax],ymm5
vpaddd ymm5,ymm5,ymm9
vpsrld ymm1,ymm14,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,21
vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm14,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,7
vpandn ymm0,ymm14,ymm8
vpand ymm3,ymm14,ymm15
vpxor ymm7,ymm7,ymm1
vpsrld ymm9,ymm10,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm10,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm11,ymm10
vpxor ymm9,ymm9,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm10,13
vpslld ymm2,ymm10,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm9,ymm1
vpsrld ymm1,ymm10,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,10
vpxor ymm9,ymm11,ymm4
vpaddd ymm13,ymm13,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm9,ymm9,ymm5
vpaddd ymm9,ymm9,ymm7
vmovdqu ymm5,YMMWORD PTR[((256-256-128))+rbx]
vpaddd ymm6,ymm6,YMMWORD PTR[((0-128))+rax]
vpsrld ymm7,ymm5,3
vpsrld ymm1,ymm5,7
vpslld ymm2,ymm5,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm5,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm5,14
vmovdqu ymm0,YMMWORD PTR[((160-128))+rax]
vpsrld ymm4,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm6,ymm6,ymm7
vpxor ymm7,ymm4,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm6,ymm6,ymm7
vpsrld ymm7,ymm13,6
vpslld ymm2,ymm13,26
vmovdqu YMMWORD PTR[(224-128)+rax],ymm6
vpaddd ymm6,ymm6,ymm8
vpsrld ymm1,ymm13,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm13,21
vpaddd ymm6,ymm6,YMMWORD PTR[96+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm13,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm13,7
vpandn ymm0,ymm13,ymm15
vpand ymm4,ymm13,ymm14
vpxor ymm7,ymm7,ymm1
vpsrld ymm8,ymm9,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm9,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm10,ymm9
vpxor ymm8,ymm8,ymm1
vpaddd ymm6,ymm6,ymm7
vpsrld ymm1,ymm9,13
vpslld ymm2,ymm9,19
vpaddd ymm6,ymm6,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm8,ymm1
vpsrld ymm1,ymm9,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm9,10
vpxor ymm8,ymm10,ymm3
vpaddd ymm12,ymm12,ymm6
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm8,ymm8,ymm6
vpaddd ymm8,ymm8,ymm7
add rbp,256
vmovdqu ymm6,YMMWORD PTR[((288-256-128))+rbx]
vpaddd ymm5,ymm5,YMMWORD PTR[((32-128))+rax]
vpsrld ymm7,ymm6,3
vpsrld ymm1,ymm6,7
vpslld ymm2,ymm6,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm6,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm6,14
vmovdqu ymm0,YMMWORD PTR[((192-128))+rax]
vpsrld ymm3,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm5,ymm5,ymm7
vpxor ymm7,ymm3,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm5,ymm5,ymm7
vpsrld ymm7,ymm12,6
vpslld ymm2,ymm12,26
vmovdqu YMMWORD PTR[(256-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm15
vpsrld ymm1,ymm12,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm12,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,7
vpandn ymm0,ymm12,ymm14
vpand ymm3,ymm12,ymm13
vpxor ymm7,ymm7,ymm1
vpsrld ymm15,ymm8,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm8,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm9,ymm8
vpxor ymm15,ymm15,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm8,13
vpslld ymm2,ymm8,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm15,ymm1
vpsrld ymm1,ymm8,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,10
vpxor ymm15,ymm9,ymm4
vpaddd ymm11,ymm11,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm15,ymm15,ymm5
vpaddd ymm15,ymm15,ymm7
vmovdqu ymm5,YMMWORD PTR[((320-256-128))+rbx]
vpaddd ymm6,ymm6,YMMWORD PTR[((64-128))+rax]
vpsrld ymm7,ymm5,3
vpsrld ymm1,ymm5,7
vpslld ymm2,ymm5,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm5,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm5,14
vmovdqu ymm0,YMMWORD PTR[((224-128))+rax]
vpsrld ymm4,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm6,ymm6,ymm7
vpxor ymm7,ymm4,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm6,ymm6,ymm7
vpsrld ymm7,ymm11,6
vpslld ymm2,ymm11,26
vmovdqu YMMWORD PTR[(288-256-128)+rbx],ymm6
vpaddd ymm6,ymm6,ymm14
vpsrld ymm1,ymm11,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,21
vpaddd ymm6,ymm6,YMMWORD PTR[((-96))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm11,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,7
vpandn ymm0,ymm11,ymm13
vpand ymm4,ymm11,ymm12
vpxor ymm7,ymm7,ymm1
vpsrld ymm14,ymm15,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm15,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm8,ymm15
vpxor ymm14,ymm14,ymm1
vpaddd ymm6,ymm6,ymm7
vpsrld ymm1,ymm15,13
vpslld ymm2,ymm15,19
vpaddd ymm6,ymm6,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm14,ymm1
vpsrld ymm1,ymm15,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,10
vpxor ymm14,ymm8,ymm3
vpaddd ymm10,ymm10,ymm6
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm14,ymm14,ymm6
vpaddd ymm14,ymm14,ymm7
vmovdqu ymm6,YMMWORD PTR[((352-256-128))+rbx]
vpaddd ymm5,ymm5,YMMWORD PTR[((96-128))+rax]
vpsrld ymm7,ymm6,3
vpsrld ymm1,ymm6,7
vpslld ymm2,ymm6,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm6,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm6,14
vmovdqu ymm0,YMMWORD PTR[((256-256-128))+rbx]
vpsrld ymm3,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm5,ymm5,ymm7
vpxor ymm7,ymm3,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm5,ymm5,ymm7
vpsrld ymm7,ymm10,6
vpslld ymm2,ymm10,26
vmovdqu YMMWORD PTR[(320-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm13
vpsrld ymm1,ymm10,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,21
vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm10,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,7
vpandn ymm0,ymm10,ymm12
vpand ymm3,ymm10,ymm11
vpxor ymm7,ymm7,ymm1
vpsrld ymm13,ymm14,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm14,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm15,ymm14
vpxor ymm13,ymm13,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm14,13
vpslld ymm2,ymm14,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm13,ymm1
vpsrld ymm1,ymm14,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,10
vpxor ymm13,ymm15,ymm4
vpaddd ymm9,ymm9,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm13,ymm13,ymm5
vpaddd ymm13,ymm13,ymm7
vmovdqu ymm5,YMMWORD PTR[((384-256-128))+rbx]
vpaddd ymm6,ymm6,YMMWORD PTR[((128-128))+rax]
vpsrld ymm7,ymm5,3
vpsrld ymm1,ymm5,7
vpslld ymm2,ymm5,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm5,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm5,14
vmovdqu ymm0,YMMWORD PTR[((288-256-128))+rbx]
vpsrld ymm4,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm6,ymm6,ymm7
vpxor ymm7,ymm4,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm6,ymm6,ymm7
vpsrld ymm7,ymm9,6
vpslld ymm2,ymm9,26
vmovdqu YMMWORD PTR[(352-256-128)+rbx],ymm6
vpaddd ymm6,ymm6,ymm12
vpsrld ymm1,ymm9,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm9,21
vpaddd ymm6,ymm6,YMMWORD PTR[((-32))+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm9,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm9,7
vpandn ymm0,ymm9,ymm11
vpand ymm4,ymm9,ymm10
vpxor ymm7,ymm7,ymm1
vpsrld ymm12,ymm13,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm13,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm14,ymm13
vpxor ymm12,ymm12,ymm1
vpaddd ymm6,ymm6,ymm7
vpsrld ymm1,ymm13,13
vpslld ymm2,ymm13,19
vpaddd ymm6,ymm6,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm12,ymm1
vpsrld ymm1,ymm13,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm13,10
vpxor ymm12,ymm14,ymm3
vpaddd ymm8,ymm8,ymm6
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm12,ymm12,ymm6
vpaddd ymm12,ymm12,ymm7
vmovdqu ymm6,YMMWORD PTR[((416-256-128))+rbx]
vpaddd ymm5,ymm5,YMMWORD PTR[((160-128))+rax]
vpsrld ymm7,ymm6,3
vpsrld ymm1,ymm6,7
vpslld ymm2,ymm6,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm6,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm6,14
vmovdqu ymm0,YMMWORD PTR[((320-256-128))+rbx]
vpsrld ymm3,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm5,ymm5,ymm7
vpxor ymm7,ymm3,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm5,ymm5,ymm7
vpsrld ymm7,ymm8,6
vpslld ymm2,ymm8,26
vmovdqu YMMWORD PTR[(384-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm11
vpsrld ymm1,ymm8,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,21
vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm8,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm8,7
vpandn ymm0,ymm8,ymm10
vpand ymm3,ymm8,ymm9
vpxor ymm7,ymm7,ymm1
vpsrld ymm11,ymm12,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm12,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm13,ymm12
vpxor ymm11,ymm11,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm12,13
vpslld ymm2,ymm12,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm11,ymm1
vpsrld ymm1,ymm12,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm12,10
vpxor ymm11,ymm13,ymm4
vpaddd ymm15,ymm15,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm11,ymm11,ymm5
vpaddd ymm11,ymm11,ymm7
vmovdqu ymm5,YMMWORD PTR[((448-256-128))+rbx]
vpaddd ymm6,ymm6,YMMWORD PTR[((192-128))+rax]
vpsrld ymm7,ymm5,3
vpsrld ymm1,ymm5,7
vpslld ymm2,ymm5,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm5,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm5,14
vmovdqu ymm0,YMMWORD PTR[((352-256-128))+rbx]
vpsrld ymm4,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm6,ymm6,ymm7
vpxor ymm7,ymm4,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm6,ymm6,ymm7
vpsrld ymm7,ymm15,6
vpslld ymm2,ymm15,26
vmovdqu YMMWORD PTR[(416-256-128)+rbx],ymm6
vpaddd ymm6,ymm6,ymm10
vpsrld ymm1,ymm15,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,21
vpaddd ymm6,ymm6,YMMWORD PTR[32+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm15,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm15,7
vpandn ymm0,ymm15,ymm9
vpand ymm4,ymm15,ymm8
vpxor ymm7,ymm7,ymm1
vpsrld ymm10,ymm11,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm11,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm12,ymm11
vpxor ymm10,ymm10,ymm1
vpaddd ymm6,ymm6,ymm7
vpsrld ymm1,ymm11,13
vpslld ymm2,ymm11,19
vpaddd ymm6,ymm6,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm10,ymm1
vpsrld ymm1,ymm11,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm11,10
vpxor ymm10,ymm12,ymm3
vpaddd ymm14,ymm14,ymm6
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm10,ymm10,ymm6
vpaddd ymm10,ymm10,ymm7
vmovdqu ymm6,YMMWORD PTR[((480-256-128))+rbx]
vpaddd ymm5,ymm5,YMMWORD PTR[((224-128))+rax]
vpsrld ymm7,ymm6,3
vpsrld ymm1,ymm6,7
vpslld ymm2,ymm6,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm6,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm6,14
vmovdqu ymm0,YMMWORD PTR[((384-256-128))+rbx]
vpsrld ymm3,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm5,ymm5,ymm7
vpxor ymm7,ymm3,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm5,ymm5,ymm7
vpsrld ymm7,ymm14,6
vpslld ymm2,ymm14,26
vmovdqu YMMWORD PTR[(448-256-128)+rbx],ymm5
vpaddd ymm5,ymm5,ymm9
vpsrld ymm1,ymm14,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,21
vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm14,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm14,7
vpandn ymm0,ymm14,ymm8
vpand ymm3,ymm14,ymm15
vpxor ymm7,ymm7,ymm1
vpsrld ymm9,ymm10,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm10,30
vpxor ymm0,ymm0,ymm3
vpxor ymm3,ymm11,ymm10
vpxor ymm9,ymm9,ymm1
vpaddd ymm5,ymm5,ymm7
vpsrld ymm1,ymm10,13
vpslld ymm2,ymm10,19
vpaddd ymm5,ymm5,ymm0
vpand ymm4,ymm4,ymm3
vpxor ymm7,ymm9,ymm1
vpsrld ymm1,ymm10,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm10,10
vpxor ymm9,ymm11,ymm4
vpaddd ymm13,ymm13,ymm5
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm9,ymm9,ymm5
vpaddd ymm9,ymm9,ymm7
vmovdqu ymm5,YMMWORD PTR[((0-128))+rax]
vpaddd ymm6,ymm6,YMMWORD PTR[((256-256-128))+rbx]
vpsrld ymm7,ymm5,3
vpsrld ymm1,ymm5,7
vpslld ymm2,ymm5,25
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm5,18
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm5,14
vmovdqu ymm0,YMMWORD PTR[((416-256-128))+rbx]
vpsrld ymm4,ymm0,10
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm0,17
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,15
vpaddd ymm6,ymm6,ymm7
vpxor ymm7,ymm4,ymm1
vpsrld ymm1,ymm0,19
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm0,13
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm6,ymm6,ymm7
vpsrld ymm7,ymm13,6
vpslld ymm2,ymm13,26
vmovdqu YMMWORD PTR[(480-256-128)+rbx],ymm6
vpaddd ymm6,ymm6,ymm8
vpsrld ymm1,ymm13,11
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm13,21
vpaddd ymm6,ymm6,YMMWORD PTR[96+rbp]
vpxor ymm7,ymm7,ymm1
vpsrld ymm1,ymm13,25
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm13,7
vpandn ymm0,ymm13,ymm15
vpand ymm4,ymm13,ymm14
vpxor ymm7,ymm7,ymm1
vpsrld ymm8,ymm9,2
vpxor ymm7,ymm7,ymm2
vpslld ymm1,ymm9,30
vpxor ymm0,ymm0,ymm4
vpxor ymm4,ymm10,ymm9
vpxor ymm8,ymm8,ymm1
vpaddd ymm6,ymm6,ymm7
vpsrld ymm1,ymm9,13
vpslld ymm2,ymm9,19
vpaddd ymm6,ymm6,ymm0
vpand ymm3,ymm3,ymm4
vpxor ymm7,ymm8,ymm1
vpsrld ymm1,ymm9,22
vpxor ymm7,ymm7,ymm2
vpslld ymm2,ymm9,10
vpxor ymm8,ymm10,ymm3
vpaddd ymm12,ymm12,ymm6
vpxor ymm7,ymm7,ymm1
vpxor ymm7,ymm7,ymm2
vpaddd ymm8,ymm8,ymm6
vpaddd ymm8,ymm8,ymm7
add rbp,256
dec ecx
jnz $L$oop_16_xx_avx2
mov ecx,1
lea rbx,QWORD PTR[512+rsp]
lea rbp,QWORD PTR[((K256+128))]
cmp ecx,DWORD PTR[rbx]
cmovge r12,rbp
cmp ecx,DWORD PTR[4+rbx]
cmovge r13,rbp
cmp ecx,DWORD PTR[8+rbx]
cmovge r14,rbp
cmp ecx,DWORD PTR[12+rbx]
cmovge r15,rbp
cmp ecx,DWORD PTR[16+rbx]
cmovge r8,rbp
cmp ecx,DWORD PTR[20+rbx]
cmovge r9,rbp
cmp ecx,DWORD PTR[24+rbx]
cmovge r10,rbp
cmp ecx,DWORD PTR[28+rbx]
cmovge r11,rbp
vmovdqa ymm7,YMMWORD PTR[rbx]
vpxor ymm0,ymm0,ymm0
vmovdqa ymm6,ymm7
vpcmpgtd ymm6,ymm6,ymm0
vpaddd ymm7,ymm7,ymm6
vmovdqu ymm0,YMMWORD PTR[((0-128))+rdi]
vpand ymm8,ymm8,ymm6
vmovdqu ymm1,YMMWORD PTR[((32-128))+rdi]
vpand ymm9,ymm9,ymm6
vmovdqu ymm2,YMMWORD PTR[((64-128))+rdi]
vpand ymm10,ymm10,ymm6
vmovdqu ymm5,YMMWORD PTR[((96-128))+rdi]
vpand ymm11,ymm11,ymm6
vpaddd ymm8,ymm8,ymm0
vmovdqu ymm0,YMMWORD PTR[((128-128))+rdi]
vpand ymm12,ymm12,ymm6
vpaddd ymm9,ymm9,ymm1
vmovdqu ymm1,YMMWORD PTR[((160-128))+rdi]
vpand ymm13,ymm13,ymm6
vpaddd ymm10,ymm10,ymm2
vmovdqu ymm2,YMMWORD PTR[((192-128))+rdi]
vpand ymm14,ymm14,ymm6
vpaddd ymm11,ymm11,ymm5
vmovdqu ymm5,YMMWORD PTR[((224-128))+rdi]
vpand ymm15,ymm15,ymm6
vpaddd ymm12,ymm12,ymm0
vpaddd ymm13,ymm13,ymm1
vmovdqu YMMWORD PTR[(0-128)+rdi],ymm8
vpaddd ymm14,ymm14,ymm2
vmovdqu YMMWORD PTR[(32-128)+rdi],ymm9
vpaddd ymm15,ymm15,ymm5
vmovdqu YMMWORD PTR[(64-128)+rdi],ymm10
vmovdqu YMMWORD PTR[(96-128)+rdi],ymm11
vmovdqu YMMWORD PTR[(128-128)+rdi],ymm12
vmovdqu YMMWORD PTR[(160-128)+rdi],ymm13
vmovdqu YMMWORD PTR[(192-128)+rdi],ymm14
vmovdqu YMMWORD PTR[(224-128)+rdi],ymm15
vmovdqu YMMWORD PTR[rbx],ymm7
lea rbx,QWORD PTR[((256+128))+rsp]
vmovdqu ymm6,YMMWORD PTR[$L$pbswap]
dec edx
jnz $L$oop_avx2
$L$done_avx2::
mov rax,QWORD PTR[544+rsp]
vzeroupper
movaps xmm6,XMMWORD PTR[((-216))+rax]
movaps xmm7,XMMWORD PTR[((-200))+rax]
movaps xmm8,XMMWORD PTR[((-184))+rax]
movaps xmm9,XMMWORD PTR[((-168))+rax]
movaps xmm10,XMMWORD PTR[((-152))+rax]
movaps xmm11,XMMWORD PTR[((-136))+rax]
movaps xmm12,XMMWORD PTR[((-120))+rax]
movaps xmm13,XMMWORD PTR[((-104))+rax]
movaps xmm14,XMMWORD PTR[((-88))+rax]
movaps xmm15,XMMWORD PTR[((-72))+rax]
mov r15,QWORD PTR[((-48))+rax]
mov r14,QWORD PTR[((-40))+rax]
mov r13,QWORD PTR[((-32))+rax]
mov r12,QWORD PTR[((-24))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov rbx,QWORD PTR[((-8))+rax]
lea rsp,QWORD PTR[rax]
$L$epilogue_avx2::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_sha256_multi_block_avx2::
sha256_multi_block_avx2 ENDP
ALIGN 256
K256::
DD 1116352408,1116352408,1116352408,1116352408
DD 1116352408,1116352408,1116352408,1116352408
DD 1899447441,1899447441,1899447441,1899447441
DD 1899447441,1899447441,1899447441,1899447441
DD 3049323471,3049323471,3049323471,3049323471
DD 3049323471,3049323471,3049323471,3049323471
DD 3921009573,3921009573,3921009573,3921009573
DD 3921009573,3921009573,3921009573,3921009573
DD 961987163,961987163,961987163,961987163
DD 961987163,961987163,961987163,961987163
DD 1508970993,1508970993,1508970993,1508970993
DD 1508970993,1508970993,1508970993,1508970993
DD 2453635748,2453635748,2453635748,2453635748
DD 2453635748,2453635748,2453635748,2453635748
DD 2870763221,2870763221,2870763221,2870763221
DD 2870763221,2870763221,2870763221,2870763221
DD 3624381080,3624381080,3624381080,3624381080
DD 3624381080,3624381080,3624381080,3624381080
DD 310598401,310598401,310598401,310598401
DD 310598401,310598401,310598401,310598401
DD 607225278,607225278,607225278,607225278
DD 607225278,607225278,607225278,607225278
DD 1426881987,1426881987,1426881987,1426881987
DD 1426881987,1426881987,1426881987,1426881987
DD 1925078388,1925078388,1925078388,1925078388
DD 1925078388,1925078388,1925078388,1925078388
DD 2162078206,2162078206,2162078206,2162078206
DD 2162078206,2162078206,2162078206,2162078206
DD 2614888103,2614888103,2614888103,2614888103
DD 2614888103,2614888103,2614888103,2614888103
DD 3248222580,3248222580,3248222580,3248222580
DD 3248222580,3248222580,3248222580,3248222580
DD 3835390401,3835390401,3835390401,3835390401
DD 3835390401,3835390401,3835390401,3835390401
DD 4022224774,4022224774,4022224774,4022224774
DD 4022224774,4022224774,4022224774,4022224774
DD 264347078,264347078,264347078,264347078
DD 264347078,264347078,264347078,264347078
DD 604807628,604807628,604807628,604807628
DD 604807628,604807628,604807628,604807628
DD 770255983,770255983,770255983,770255983
DD 770255983,770255983,770255983,770255983
DD 1249150122,1249150122,1249150122,1249150122
DD 1249150122,1249150122,1249150122,1249150122
DD 1555081692,1555081692,1555081692,1555081692
DD 1555081692,1555081692,1555081692,1555081692
DD 1996064986,1996064986,1996064986,1996064986
DD 1996064986,1996064986,1996064986,1996064986
DD 2554220882,2554220882,2554220882,2554220882
DD 2554220882,2554220882,2554220882,2554220882
DD 2821834349,2821834349,2821834349,2821834349
DD 2821834349,2821834349,2821834349,2821834349
DD 2952996808,2952996808,2952996808,2952996808
DD 2952996808,2952996808,2952996808,2952996808
DD 3210313671,3210313671,3210313671,3210313671
DD 3210313671,3210313671,3210313671,3210313671
DD 3336571891,3336571891,3336571891,3336571891
DD 3336571891,3336571891,3336571891,3336571891
DD 3584528711,3584528711,3584528711,3584528711
DD 3584528711,3584528711,3584528711,3584528711
DD 113926993,113926993,113926993,113926993
DD 113926993,113926993,113926993,113926993
DD 338241895,338241895,338241895,338241895
DD 338241895,338241895,338241895,338241895
DD 666307205,666307205,666307205,666307205
DD 666307205,666307205,666307205,666307205
DD 773529912,773529912,773529912,773529912
DD 773529912,773529912,773529912,773529912
DD 1294757372,1294757372,1294757372,1294757372
DD 1294757372,1294757372,1294757372,1294757372
DD 1396182291,1396182291,1396182291,1396182291
DD 1396182291,1396182291,1396182291,1396182291
DD 1695183700,1695183700,1695183700,1695183700
DD 1695183700,1695183700,1695183700,1695183700
DD 1986661051,1986661051,1986661051,1986661051
DD 1986661051,1986661051,1986661051,1986661051
DD 2177026350,2177026350,2177026350,2177026350
DD 2177026350,2177026350,2177026350,2177026350
DD 2456956037,2456956037,2456956037,2456956037
DD 2456956037,2456956037,2456956037,2456956037
DD 2730485921,2730485921,2730485921,2730485921
DD 2730485921,2730485921,2730485921,2730485921
DD 2820302411,2820302411,2820302411,2820302411
DD 2820302411,2820302411,2820302411,2820302411
DD 3259730800,3259730800,3259730800,3259730800
DD 3259730800,3259730800,3259730800,3259730800
DD 3345764771,3345764771,3345764771,3345764771
DD 3345764771,3345764771,3345764771,3345764771
DD 3516065817,3516065817,3516065817,3516065817
DD 3516065817,3516065817,3516065817,3516065817
DD 3600352804,3600352804,3600352804,3600352804
DD 3600352804,3600352804,3600352804,3600352804
DD 4094571909,4094571909,4094571909,4094571909
DD 4094571909,4094571909,4094571909,4094571909
DD 275423344,275423344,275423344,275423344
DD 275423344,275423344,275423344,275423344
DD 430227734,430227734,430227734,430227734
DD 430227734,430227734,430227734,430227734
DD 506948616,506948616,506948616,506948616
DD 506948616,506948616,506948616,506948616
DD 659060556,659060556,659060556,659060556
DD 659060556,659060556,659060556,659060556
DD 883997877,883997877,883997877,883997877
DD 883997877,883997877,883997877,883997877
DD 958139571,958139571,958139571,958139571
DD 958139571,958139571,958139571,958139571
DD 1322822218,1322822218,1322822218,1322822218
DD 1322822218,1322822218,1322822218,1322822218
DD 1537002063,1537002063,1537002063,1537002063
DD 1537002063,1537002063,1537002063,1537002063
DD 1747873779,1747873779,1747873779,1747873779
DD 1747873779,1747873779,1747873779,1747873779
DD 1955562222,1955562222,1955562222,1955562222
DD 1955562222,1955562222,1955562222,1955562222
DD 2024104815,2024104815,2024104815,2024104815
DD 2024104815,2024104815,2024104815,2024104815
DD 2227730452,2227730452,2227730452,2227730452
DD 2227730452,2227730452,2227730452,2227730452
DD 2361852424,2361852424,2361852424,2361852424
DD 2361852424,2361852424,2361852424,2361852424
DD 2428436474,2428436474,2428436474,2428436474
DD 2428436474,2428436474,2428436474,2428436474
DD 2756734187,2756734187,2756734187,2756734187
DD 2756734187,2756734187,2756734187,2756734187
DD 3204031479,3204031479,3204031479,3204031479
DD 3204031479,3204031479,3204031479,3204031479
DD 3329325298,3329325298,3329325298,3329325298
DD 3329325298,3329325298,3329325298,3329325298
$L$pbswap::
DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
K256_shaext::
DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h
DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h
DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h
DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h
DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch
DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah
DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h
DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h
DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h
DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h
DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h
DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h
DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h
DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
DB 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111
DB 99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114
DB 32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
DB 65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112
DB 101,110,115,115,108,46,111,114,103,62,0
EXTERN __imp_RtlVirtualUnwind:NEAR
ALIGN 16
se_handler PROC PRIVATE
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD PTR[120+r8]
mov rbx,QWORD PTR[248+r8]
mov rsi,QWORD PTR[8+r9]
mov r11,QWORD PTR[56+r9]
mov r10d,DWORD PTR[r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$in_prologue
mov rax,QWORD PTR[152+r8]
mov r10d,DWORD PTR[4+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jae $L$in_prologue
mov rax,QWORD PTR[272+rax]
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov QWORD PTR[144+r8],rbx
mov QWORD PTR[160+r8],rbp
lea rsi,QWORD PTR[((-24-160))+rax]
lea rdi,QWORD PTR[512+r8]
mov ecx,20
DD 0a548f3fch
$L$in_prologue::
mov rdi,QWORD PTR[8+rax]
mov rsi,QWORD PTR[16+rax]
mov QWORD PTR[152+r8],rax
mov QWORD PTR[168+r8],rsi
mov QWORD PTR[176+r8],rdi
mov rdi,QWORD PTR[40+r9]
mov rsi,r8
mov ecx,154
DD 0a548f3fch
mov rsi,r9
xor rcx,rcx
mov rdx,QWORD PTR[8+rsi]
mov r8,QWORD PTR[rsi]
mov r9,QWORD PTR[16+rsi]
mov r10,QWORD PTR[40+rsi]
lea r11,QWORD PTR[56+rsi]
lea r12,QWORD PTR[24+rsi]
mov QWORD PTR[32+rsp],r10
mov QWORD PTR[40+rsp],r11
mov QWORD PTR[48+rsp],r12
mov QWORD PTR[56+rsp],rcx
call QWORD PTR[__imp_RtlVirtualUnwind]
mov eax,1
add rsp,64
popfq
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
DB 0F3h,0C3h ;repret
se_handler ENDP
ALIGN 16
avx2_handler PROC PRIVATE
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD PTR[120+r8]
mov rbx,QWORD PTR[248+r8]
mov rsi,QWORD PTR[8+r9]
mov r11,QWORD PTR[56+r9]
mov r10d,DWORD PTR[r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jb $L$in_prologue
mov rax,QWORD PTR[152+r8]
mov r10d,DWORD PTR[4+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jae $L$in_prologue
mov rax,QWORD PTR[544+r8]
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
mov r13,QWORD PTR[((-32))+rax]
mov r14,QWORD PTR[((-40))+rax]
mov r15,QWORD PTR[((-48))+rax]
mov QWORD PTR[144+r8],rbx
mov QWORD PTR[160+r8],rbp
mov QWORD PTR[216+r8],r12
mov QWORD PTR[224+r8],r13
mov QWORD PTR[232+r8],r14
mov QWORD PTR[240+r8],r15
lea rsi,QWORD PTR[((-56-160))+rax]
lea rdi,QWORD PTR[512+r8]
mov ecx,20
DD 0a548f3fch
jmp $L$in_prologue
avx2_handler ENDP
.text$ ENDS
.pdata SEGMENT READONLY ALIGN(4)
ALIGN 4
DD imagerel $L$SEH_begin_sha256_multi_block
DD imagerel $L$SEH_end_sha256_multi_block
DD imagerel $L$SEH_info_sha256_multi_block
DD imagerel $L$SEH_begin_sha256_multi_block_shaext
DD imagerel $L$SEH_end_sha256_multi_block_shaext
DD imagerel $L$SEH_info_sha256_multi_block_shaext
DD imagerel $L$SEH_begin_sha256_multi_block_avx
DD imagerel $L$SEH_end_sha256_multi_block_avx
DD imagerel $L$SEH_info_sha256_multi_block_avx
DD imagerel $L$SEH_begin_sha256_multi_block_avx2
DD imagerel $L$SEH_end_sha256_multi_block_avx2
DD imagerel $L$SEH_info_sha256_multi_block_avx2
.pdata ENDS
.xdata SEGMENT READONLY ALIGN(8)
ALIGN 8
$L$SEH_info_sha256_multi_block::
DB 9,0,0,0
DD imagerel se_handler
DD imagerel $L$body,imagerel $L$epilogue
$L$SEH_info_sha256_multi_block_shaext::
DB 9,0,0,0
DD imagerel se_handler
DD imagerel $L$body_shaext,imagerel $L$epilogue_shaext
$L$SEH_info_sha256_multi_block_avx::
DB 9,0,0,0
DD imagerel se_handler
DD imagerel $L$body_avx,imagerel $L$epilogue_avx
$L$SEH_info_sha256_multi_block_avx2::
DB 9,0,0,0
DD imagerel avx2_handler
DD imagerel $L$body_avx2,imagerel $L$epilogue_avx2
.xdata ENDS
END