OPTION DOTNAME .text$ SEGMENT ALIGN(256) 'CODE' EXTERN OPENSSL_ia32cap_P:NEAR PUBLIC aesni_cbc_sha256_enc ALIGN 16 aesni_cbc_sha256_enc PROC PUBLIC lea r11,QWORD PTR[OPENSSL_ia32cap_P] mov eax,1 cmp rcx,0 je $L$probe mov eax,DWORD PTR[r11] mov r10,QWORD PTR[4+r11] bt r10,61 jc aesni_cbc_sha256_enc_shaext mov r11,r10 shr r11,32 test r10d,2048 jnz aesni_cbc_sha256_enc_xop and r11d,296 cmp r11d,296 je aesni_cbc_sha256_enc_avx2 and r10d,268435456 jnz aesni_cbc_sha256_enc_avx ud2 xor eax,eax cmp rcx,0 je $L$probe ud2 $L$probe:: DB 0F3h,0C3h ;repret aesni_cbc_sha256_enc ENDP ALIGN 64 K256:: DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h DD 000010203h,004050607h,008090a0bh,00c0d0e0fh DD 000010203h,004050607h,008090a0bh,00c0d0e0fh DD 0,0,0,0,0,0,0,0,-1,-1,-1,-1 DD 0,0,0,0,0,0,0,0 DB 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54 DB 32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95 DB 54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98 DB 121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108 DB 46,111,114,103,62,0 ALIGN 64 ALIGN 64 aesni_cbc_sha256_enc_xop PROC PRIVATE mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesni_cbc_sha256_enc_xop:: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD PTR[40+rsp] mov r9,QWORD PTR[48+rsp] $L$xop_shortcut:: mov r10,QWORD PTR[56+rsp] push rbx push rbp push r12 push r13 push r14 push r15 mov r11,rsp sub rsp,288 and rsp,-64 shl rdx,6 sub rsi,rdi sub r10,rdi add rdx,rdi mov QWORD PTR[((64+8))+rsp],rsi mov QWORD PTR[((64+16))+rsp],rdx mov QWORD PTR[((64+32))+rsp],r8 mov QWORD PTR[((64+40))+rsp],r9 mov QWORD PTR[((64+48))+rsp],r10 mov QWORD PTR[((64+56))+rsp],r11 movaps XMMWORD PTR[128+rsp],xmm6 movaps XMMWORD PTR[144+rsp],xmm7 movaps XMMWORD PTR[160+rsp],xmm8 movaps XMMWORD PTR[176+rsp],xmm9 movaps XMMWORD PTR[192+rsp],xmm10 movaps XMMWORD PTR[208+rsp],xmm11 movaps XMMWORD PTR[224+rsp],xmm12 movaps XMMWORD PTR[240+rsp],xmm13 movaps XMMWORD PTR[256+rsp],xmm14 movaps XMMWORD PTR[272+rsp],xmm15 $L$prologue_xop:: vzeroall mov r12,rdi lea rdi,QWORD PTR[128+rcx] lea r13,QWORD PTR[((K256+544))] mov r14d,DWORD PTR[((240-128))+rdi] mov r15,r9 mov rsi,r10 vmovdqu xmm8,XMMWORD PTR[r8] sub r14,9 mov eax,DWORD PTR[r15] mov ebx,DWORD PTR[4+r15] mov ecx,DWORD PTR[8+r15] mov edx,DWORD PTR[12+r15] mov r8d,DWORD PTR[16+r15] mov r9d,DWORD PTR[20+r15] mov r10d,DWORD PTR[24+r15] mov r11d,DWORD PTR[28+r15] vmovdqa xmm14,XMMWORD PTR[r14*8+r13] vmovdqa xmm13,XMMWORD PTR[16+r14*8+r13] vmovdqa xmm12,XMMWORD PTR[32+r14*8+r13] vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] jmp $L$loop_xop ALIGN 16 $L$loop_xop:: vmovdqa xmm7,XMMWORD PTR[((K256+512))] vmovdqu xmm0,XMMWORD PTR[r12*1+rsi] vmovdqu xmm1,XMMWORD PTR[16+r12*1+rsi] vmovdqu xmm2,XMMWORD PTR[32+r12*1+rsi] vmovdqu xmm3,XMMWORD PTR[48+r12*1+rsi] vpshufb xmm0,xmm0,xmm7 lea rbp,QWORD PTR[K256] vpshufb xmm1,xmm1,xmm7 vpshufb xmm2,xmm2,xmm7 vpaddd xmm4,xmm0,XMMWORD PTR[rbp] vpshufb xmm3,xmm3,xmm7 vpaddd xmm5,xmm1,XMMWORD PTR[32+rbp] vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp] vpaddd xmm7,xmm3,XMMWORD PTR[96+rbp] vmovdqa XMMWORD PTR[rsp],xmm4 mov r14d,eax vmovdqa XMMWORD PTR[16+rsp],xmm5 mov esi,ebx vmovdqa XMMWORD PTR[32+rsp],xmm6 xor esi,ecx vmovdqa XMMWORD PTR[48+rsp],xmm7 mov r13d,r8d jmp $L$xop_00_47 ALIGN 16 $L$xop_00_47:: sub rbp,-16*2*4 vmovdqu xmm9,XMMWORD PTR[r12] mov QWORD PTR[((64+0))+rsp],r12 vpalignr xmm4,xmm1,xmm0,4 ror r13d,14 mov eax,r14d vpalignr xmm7,xmm3,xmm2,4 mov r12d,r9d xor r13d,r8d DB 143,232,120,194,236,14 ror r14d,9 xor r12d,r10d vpsrld xmm4,xmm4,3 ror r13d,5 xor r14d,eax vpaddd xmm0,xmm0,xmm7 and r12d,r8d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] xor r13d,r8d add r11d,DWORD PTR[rsp] mov r15d,eax DB 143,232,120,194,245,11 ror r14d,11 xor r12d,r10d vpxor xmm4,xmm4,xmm5 xor r15d,ebx ror r13d,6 add r11d,r12d and esi,r15d DB 143,232,120,194,251,13 xor r14d,eax add r11d,r13d vpxor xmm4,xmm4,xmm6 xor esi,ebx add edx,r11d vpsrld xmm6,xmm3,10 ror r14d,2 add r11d,esi vpaddd xmm0,xmm0,xmm4 mov r13d,edx add r14d,r11d DB 143,232,120,194,239,2 ror r13d,14 mov r11d,r14d vpxor xmm7,xmm7,xmm6 mov r12d,r8d xor r13d,edx ror r14d,9 xor r12d,r9d vpxor xmm7,xmm7,xmm5 ror r13d,5 xor r14d,r11d and r12d,edx vpxor xmm9,xmm9,xmm8 xor r13d,edx vpsrldq xmm7,xmm7,8 add r10d,DWORD PTR[4+rsp] mov esi,r11d ror r14d,11 xor r12d,r9d vpaddd xmm0,xmm0,xmm7 xor esi,eax ror r13d,6 add r10d,r12d and r15d,esi DB 143,232,120,194,248,13 xor r14d,r11d add r10d,r13d vpsrld xmm6,xmm0,10 xor r15d,eax add ecx,r10d DB 143,232,120,194,239,2 ror r14d,2 add r10d,r15d vpxor xmm7,xmm7,xmm6 mov r13d,ecx add r14d,r10d ror r13d,14 mov r10d,r14d vpxor xmm7,xmm7,xmm5 mov r12d,edx xor r13d,ecx ror r14d,9 xor r12d,r8d vpslldq xmm7,xmm7,8 ror r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] xor r13d,ecx vpaddd xmm0,xmm0,xmm7 add r9d,DWORD PTR[8+rsp] mov r15d,r10d ror r14d,11 xor r12d,r8d vpaddd xmm6,xmm0,XMMWORD PTR[rbp] xor r15d,r11d ror r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d ror r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d ror r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx ror r14d,9 xor r12d,edx ror r13d,5 xor r14d,r9d and r12d,ebx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] xor r13d,ebx add r8d,DWORD PTR[12+rsp] mov esi,r9d ror r14d,11 xor r12d,edx xor esi,r10d ror r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d ror r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d vmovdqa XMMWORD PTR[rsp],xmm6 vpalignr xmm4,xmm2,xmm1,4 ror r13d,14 mov r8d,r14d vpalignr xmm7,xmm0,xmm3,4 mov r12d,ebx xor r13d,eax DB 143,232,120,194,236,14 ror r14d,9 xor r12d,ecx vpsrld xmm4,xmm4,3 ror r13d,5 xor r14d,r8d vpaddd xmm1,xmm1,xmm7 and r12d,eax vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] xor r13d,eax add edx,DWORD PTR[16+rsp] mov r15d,r8d DB 143,232,120,194,245,11 ror r14d,11 xor r12d,ecx vpxor xmm4,xmm4,xmm5 xor r15d,r9d ror r13d,6 add edx,r12d and esi,r15d DB 143,232,120,194,248,13 xor r14d,r8d add edx,r13d vpxor xmm4,xmm4,xmm6 xor esi,r9d add r11d,edx vpsrld xmm6,xmm0,10 ror r14d,2 add edx,esi vpaddd xmm1,xmm1,xmm4 mov r13d,r11d add r14d,edx DB 143,232,120,194,239,2 ror r13d,14 mov edx,r14d vpxor xmm7,xmm7,xmm6 mov r12d,eax xor r13d,r11d ror r14d,9 xor r12d,ebx vpxor xmm7,xmm7,xmm5 ror r13d,5 xor r14d,edx and r12d,r11d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] xor r13d,r11d vpsrldq xmm7,xmm7,8 add ecx,DWORD PTR[20+rsp] mov esi,edx ror r14d,11 xor r12d,ebx vpaddd xmm1,xmm1,xmm7 xor esi,r8d ror r13d,6 add ecx,r12d and r15d,esi DB 143,232,120,194,249,13 xor r14d,edx add ecx,r13d vpsrld xmm6,xmm1,10 xor r15d,r8d add r10d,ecx DB 143,232,120,194,239,2 ror r14d,2 add ecx,r15d vpxor xmm7,xmm7,xmm6 mov r13d,r10d add r14d,ecx ror r13d,14 mov ecx,r14d vpxor xmm7,xmm7,xmm5 mov r12d,r11d xor r13d,r10d ror r14d,9 xor r12d,eax vpslldq xmm7,xmm7,8 ror r13d,5 xor r14d,ecx and r12d,r10d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] xor r13d,r10d vpaddd xmm1,xmm1,xmm7 add ebx,DWORD PTR[24+rsp] mov r15d,ecx ror r14d,11 xor r12d,eax vpaddd xmm6,xmm1,XMMWORD PTR[32+rbp] xor r15d,edx ror r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx ror r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx ror r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d ror r14d,9 xor r12d,r11d ror r13d,5 xor r14d,ebx and r12d,r9d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] xor r13d,r9d add eax,DWORD PTR[28+rsp] mov esi,ebx ror r14d,11 xor r12d,r11d xor esi,ecx ror r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax ror r14d,2 add eax,r15d mov r13d,r8d add r14d,eax vmovdqa XMMWORD PTR[16+rsp],xmm6 vpalignr xmm4,xmm3,xmm2,4 ror r13d,14 mov eax,r14d vpalignr xmm7,xmm1,xmm0,4 mov r12d,r9d xor r13d,r8d DB 143,232,120,194,236,14 ror r14d,9 xor r12d,r10d vpsrld xmm4,xmm4,3 ror r13d,5 xor r14d,eax vpaddd xmm2,xmm2,xmm7 and r12d,r8d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] xor r13d,r8d add r11d,DWORD PTR[32+rsp] mov r15d,eax DB 143,232,120,194,245,11 ror r14d,11 xor r12d,r10d vpxor xmm4,xmm4,xmm5 xor r15d,ebx ror r13d,6 add r11d,r12d and esi,r15d DB 143,232,120,194,249,13 xor r14d,eax add r11d,r13d vpxor xmm4,xmm4,xmm6 xor esi,ebx add edx,r11d vpsrld xmm6,xmm1,10 ror r14d,2 add r11d,esi vpaddd xmm2,xmm2,xmm4 mov r13d,edx add r14d,r11d DB 143,232,120,194,239,2 ror r13d,14 mov r11d,r14d vpxor xmm7,xmm7,xmm6 mov r12d,r8d xor r13d,edx ror r14d,9 xor r12d,r9d vpxor xmm7,xmm7,xmm5 ror r13d,5 xor r14d,r11d and r12d,edx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] xor r13d,edx vpsrldq xmm7,xmm7,8 add r10d,DWORD PTR[36+rsp] mov esi,r11d ror r14d,11 xor r12d,r9d vpaddd xmm2,xmm2,xmm7 xor esi,eax ror r13d,6 add r10d,r12d and r15d,esi DB 143,232,120,194,250,13 xor r14d,r11d add r10d,r13d vpsrld xmm6,xmm2,10 xor r15d,eax add ecx,r10d DB 143,232,120,194,239,2 ror r14d,2 add r10d,r15d vpxor xmm7,xmm7,xmm6 mov r13d,ecx add r14d,r10d ror r13d,14 mov r10d,r14d vpxor xmm7,xmm7,xmm5 mov r12d,edx xor r13d,ecx ror r14d,9 xor r12d,r8d vpslldq xmm7,xmm7,8 ror r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] xor r13d,ecx vpaddd xmm2,xmm2,xmm7 add r9d,DWORD PTR[40+rsp] mov r15d,r10d ror r14d,11 xor r12d,r8d vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp] xor r15d,r11d ror r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d ror r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d ror r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx ror r14d,9 xor r12d,edx ror r13d,5 xor r14d,r9d and r12d,ebx vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] xor r13d,ebx add r8d,DWORD PTR[44+rsp] mov esi,r9d ror r14d,11 xor r12d,edx xor esi,r10d ror r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d ror r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d vmovdqa XMMWORD PTR[32+rsp],xmm6 vpalignr xmm4,xmm0,xmm3,4 ror r13d,14 mov r8d,r14d vpalignr xmm7,xmm2,xmm1,4 mov r12d,ebx xor r13d,eax DB 143,232,120,194,236,14 ror r14d,9 xor r12d,ecx vpsrld xmm4,xmm4,3 ror r13d,5 xor r14d,r8d vpaddd xmm3,xmm3,xmm7 and r12d,eax vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] xor r13d,eax add edx,DWORD PTR[48+rsp] mov r15d,r8d DB 143,232,120,194,245,11 ror r14d,11 xor r12d,ecx vpxor xmm4,xmm4,xmm5 xor r15d,r9d ror r13d,6 add edx,r12d and esi,r15d DB 143,232,120,194,250,13 xor r14d,r8d add edx,r13d vpxor xmm4,xmm4,xmm6 xor esi,r9d add r11d,edx vpsrld xmm6,xmm2,10 ror r14d,2 add edx,esi vpaddd xmm3,xmm3,xmm4 mov r13d,r11d add r14d,edx DB 143,232,120,194,239,2 ror r13d,14 mov edx,r14d vpxor xmm7,xmm7,xmm6 mov r12d,eax xor r13d,r11d ror r14d,9 xor r12d,ebx vpxor xmm7,xmm7,xmm5 ror r13d,5 xor r14d,edx and r12d,r11d vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] xor r13d,r11d vpsrldq xmm7,xmm7,8 add ecx,DWORD PTR[52+rsp] mov esi,edx ror r14d,11 xor r12d,ebx vpaddd xmm3,xmm3,xmm7 xor esi,r8d ror r13d,6 add ecx,r12d and r15d,esi DB 143,232,120,194,251,13 xor r14d,edx add ecx,r13d vpsrld xmm6,xmm3,10 xor r15d,r8d add r10d,ecx DB 143,232,120,194,239,2 ror r14d,2 add ecx,r15d vpxor xmm7,xmm7,xmm6 mov r13d,r10d add r14d,ecx ror r13d,14 mov ecx,r14d vpxor xmm7,xmm7,xmm5 mov r12d,r11d xor r13d,r10d ror r14d,9 xor r12d,eax vpslldq xmm7,xmm7,8 ror r13d,5 xor r14d,ecx and r12d,r10d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] xor r13d,r10d vpaddd xmm3,xmm3,xmm7 add ebx,DWORD PTR[56+rsp] mov r15d,ecx ror r14d,11 xor r12d,eax vpaddd xmm6,xmm3,XMMWORD PTR[96+rbp] xor r15d,edx ror r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx ror r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx ror r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d ror r14d,9 xor r12d,r11d ror r13d,5 xor r14d,ebx and r12d,r9d vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] xor r13d,r9d add eax,DWORD PTR[60+rsp] mov esi,ebx ror r14d,11 xor r12d,r11d xor esi,ecx ror r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax ror r14d,2 add eax,r15d mov r13d,r8d add r14d,eax vmovdqa XMMWORD PTR[48+rsp],xmm6 mov r12,QWORD PTR[((64+0))+rsp] vpand xmm11,xmm11,xmm14 mov r15,QWORD PTR[((64+8))+rsp] vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD PTR[r12*1+r15],xmm8 lea r12,QWORD PTR[16+r12] cmp BYTE PTR[131+rbp],0 jne $L$xop_00_47 vmovdqu xmm9,XMMWORD PTR[r12] mov QWORD PTR[((64+0))+rsp],r12 ror r13d,14 mov eax,r14d mov r12d,r9d xor r13d,r8d ror r14d,9 xor r12d,r10d ror r13d,5 xor r14d,eax and r12d,r8d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] xor r13d,r8d add r11d,DWORD PTR[rsp] mov r15d,eax ror r14d,11 xor r12d,r10d xor r15d,ebx ror r13d,6 add r11d,r12d and esi,r15d xor r14d,eax add r11d,r13d xor esi,ebx add edx,r11d ror r14d,2 add r11d,esi mov r13d,edx add r14d,r11d ror r13d,14 mov r11d,r14d mov r12d,r8d xor r13d,edx ror r14d,9 xor r12d,r9d ror r13d,5 xor r14d,r11d and r12d,edx vpxor xmm9,xmm9,xmm8 xor r13d,edx add r10d,DWORD PTR[4+rsp] mov esi,r11d ror r14d,11 xor r12d,r9d xor esi,eax ror r13d,6 add r10d,r12d and r15d,esi xor r14d,r11d add r10d,r13d xor r15d,eax add ecx,r10d ror r14d,2 add r10d,r15d mov r13d,ecx add r14d,r10d ror r13d,14 mov r10d,r14d mov r12d,edx xor r13d,ecx ror r14d,9 xor r12d,r8d ror r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] xor r13d,ecx add r9d,DWORD PTR[8+rsp] mov r15d,r10d ror r14d,11 xor r12d,r8d xor r15d,r11d ror r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d ror r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d ror r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx ror r14d,9 xor r12d,edx ror r13d,5 xor r14d,r9d and r12d,ebx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] xor r13d,ebx add r8d,DWORD PTR[12+rsp] mov esi,r9d ror r14d,11 xor r12d,edx xor esi,r10d ror r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d ror r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d ror r13d,14 mov r8d,r14d mov r12d,ebx xor r13d,eax ror r14d,9 xor r12d,ecx ror r13d,5 xor r14d,r8d and r12d,eax vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] xor r13d,eax add edx,DWORD PTR[16+rsp] mov r15d,r8d ror r14d,11 xor r12d,ecx xor r15d,r9d ror r13d,6 add edx,r12d and esi,r15d xor r14d,r8d add edx,r13d xor esi,r9d add r11d,edx ror r14d,2 add edx,esi mov r13d,r11d add r14d,edx ror r13d,14 mov edx,r14d mov r12d,eax xor r13d,r11d ror r14d,9 xor r12d,ebx ror r13d,5 xor r14d,edx and r12d,r11d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] xor r13d,r11d add ecx,DWORD PTR[20+rsp] mov esi,edx ror r14d,11 xor r12d,ebx xor esi,r8d ror r13d,6 add ecx,r12d and r15d,esi xor r14d,edx add ecx,r13d xor r15d,r8d add r10d,ecx ror r14d,2 add ecx,r15d mov r13d,r10d add r14d,ecx ror r13d,14 mov ecx,r14d mov r12d,r11d xor r13d,r10d ror r14d,9 xor r12d,eax ror r13d,5 xor r14d,ecx and r12d,r10d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] xor r13d,r10d add ebx,DWORD PTR[24+rsp] mov r15d,ecx ror r14d,11 xor r12d,eax xor r15d,edx ror r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx ror r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx ror r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d ror r14d,9 xor r12d,r11d ror r13d,5 xor r14d,ebx and r12d,r9d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] xor r13d,r9d add eax,DWORD PTR[28+rsp] mov esi,ebx ror r14d,11 xor r12d,r11d xor esi,ecx ror r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax ror r14d,2 add eax,r15d mov r13d,r8d add r14d,eax ror r13d,14 mov eax,r14d mov r12d,r9d xor r13d,r8d ror r14d,9 xor r12d,r10d ror r13d,5 xor r14d,eax and r12d,r8d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] xor r13d,r8d add r11d,DWORD PTR[32+rsp] mov r15d,eax ror r14d,11 xor r12d,r10d xor r15d,ebx ror r13d,6 add r11d,r12d and esi,r15d xor r14d,eax add r11d,r13d xor esi,ebx add edx,r11d ror r14d,2 add r11d,esi mov r13d,edx add r14d,r11d ror r13d,14 mov r11d,r14d mov r12d,r8d xor r13d,edx ror r14d,9 xor r12d,r9d ror r13d,5 xor r14d,r11d and r12d,edx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] xor r13d,edx add r10d,DWORD PTR[36+rsp] mov esi,r11d ror r14d,11 xor r12d,r9d xor esi,eax ror r13d,6 add r10d,r12d and r15d,esi xor r14d,r11d add r10d,r13d xor r15d,eax add ecx,r10d ror r14d,2 add r10d,r15d mov r13d,ecx add r14d,r10d ror r13d,14 mov r10d,r14d mov r12d,edx xor r13d,ecx ror r14d,9 xor r12d,r8d ror r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] xor r13d,ecx add r9d,DWORD PTR[40+rsp] mov r15d,r10d ror r14d,11 xor r12d,r8d xor r15d,r11d ror r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d ror r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d ror r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx ror r14d,9 xor r12d,edx ror r13d,5 xor r14d,r9d and r12d,ebx vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] xor r13d,ebx add r8d,DWORD PTR[44+rsp] mov esi,r9d ror r14d,11 xor r12d,edx xor esi,r10d ror r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d ror r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d ror r13d,14 mov r8d,r14d mov r12d,ebx xor r13d,eax ror r14d,9 xor r12d,ecx ror r13d,5 xor r14d,r8d and r12d,eax vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] xor r13d,eax add edx,DWORD PTR[48+rsp] mov r15d,r8d ror r14d,11 xor r12d,ecx xor r15d,r9d ror r13d,6 add edx,r12d and esi,r15d xor r14d,r8d add edx,r13d xor esi,r9d add r11d,edx ror r14d,2 add edx,esi mov r13d,r11d add r14d,edx ror r13d,14 mov edx,r14d mov r12d,eax xor r13d,r11d ror r14d,9 xor r12d,ebx ror r13d,5 xor r14d,edx and r12d,r11d vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] xor r13d,r11d add ecx,DWORD PTR[52+rsp] mov esi,edx ror r14d,11 xor r12d,ebx xor esi,r8d ror r13d,6 add ecx,r12d and r15d,esi xor r14d,edx add ecx,r13d xor r15d,r8d add r10d,ecx ror r14d,2 add ecx,r15d mov r13d,r10d add r14d,ecx ror r13d,14 mov ecx,r14d mov r12d,r11d xor r13d,r10d ror r14d,9 xor r12d,eax ror r13d,5 xor r14d,ecx and r12d,r10d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] xor r13d,r10d add ebx,DWORD PTR[56+rsp] mov r15d,ecx ror r14d,11 xor r12d,eax xor r15d,edx ror r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx ror r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx ror r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d ror r14d,9 xor r12d,r11d ror r13d,5 xor r14d,ebx and r12d,r9d vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] xor r13d,r9d add eax,DWORD PTR[60+rsp] mov esi,ebx ror r14d,11 xor r12d,r11d xor esi,ecx ror r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax ror r14d,2 add eax,r15d mov r13d,r8d add r14d,eax mov r12,QWORD PTR[((64+0))+rsp] mov r13,QWORD PTR[((64+8))+rsp] mov r15,QWORD PTR[((64+40))+rsp] mov rsi,QWORD PTR[((64+48))+rsp] vpand xmm11,xmm11,xmm14 mov eax,r14d vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD PTR[r13*1+r12],xmm8 lea r12,QWORD PTR[16+r12] add eax,DWORD PTR[r15] add ebx,DWORD PTR[4+r15] add ecx,DWORD PTR[8+r15] add edx,DWORD PTR[12+r15] add r8d,DWORD PTR[16+r15] add r9d,DWORD PTR[20+r15] add r10d,DWORD PTR[24+r15] add r11d,DWORD PTR[28+r15] cmp r12,QWORD PTR[((64+16))+rsp] mov DWORD PTR[r15],eax mov DWORD PTR[4+r15],ebx mov DWORD PTR[8+r15],ecx mov DWORD PTR[12+r15],edx mov DWORD PTR[16+r15],r8d mov DWORD PTR[20+r15],r9d mov DWORD PTR[24+r15],r10d mov DWORD PTR[28+r15],r11d jb $L$loop_xop mov r8,QWORD PTR[((64+32))+rsp] mov rsi,QWORD PTR[((64+56))+rsp] vmovdqu XMMWORD PTR[r8],xmm8 vzeroall movaps xmm6,XMMWORD PTR[128+rsp] movaps xmm7,XMMWORD PTR[144+rsp] movaps xmm8,XMMWORD PTR[160+rsp] movaps xmm9,XMMWORD PTR[176+rsp] movaps xmm10,XMMWORD PTR[192+rsp] movaps xmm11,XMMWORD PTR[208+rsp] movaps xmm12,XMMWORD PTR[224+rsp] movaps xmm13,XMMWORD PTR[240+rsp] movaps xmm14,XMMWORD PTR[256+rsp] movaps xmm15,XMMWORD PTR[272+rsp] mov r15,QWORD PTR[rsi] mov r14,QWORD PTR[8+rsi] mov r13,QWORD PTR[16+rsi] mov r12,QWORD PTR[24+rsi] mov rbp,QWORD PTR[32+rsi] mov rbx,QWORD PTR[40+rsi] lea rsp,QWORD PTR[48+rsi] $L$epilogue_xop:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesni_cbc_sha256_enc_xop:: aesni_cbc_sha256_enc_xop ENDP ALIGN 64 aesni_cbc_sha256_enc_avx PROC PRIVATE mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesni_cbc_sha256_enc_avx:: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD PTR[40+rsp] mov r9,QWORD PTR[48+rsp] $L$avx_shortcut:: mov r10,QWORD PTR[56+rsp] push rbx push rbp push r12 push r13 push r14 push r15 mov r11,rsp sub rsp,288 and rsp,-64 shl rdx,6 sub rsi,rdi sub r10,rdi add rdx,rdi mov QWORD PTR[((64+8))+rsp],rsi mov QWORD PTR[((64+16))+rsp],rdx mov QWORD PTR[((64+32))+rsp],r8 mov QWORD PTR[((64+40))+rsp],r9 mov QWORD PTR[((64+48))+rsp],r10 mov QWORD PTR[((64+56))+rsp],r11 movaps XMMWORD PTR[128+rsp],xmm6 movaps XMMWORD PTR[144+rsp],xmm7 movaps XMMWORD PTR[160+rsp],xmm8 movaps XMMWORD PTR[176+rsp],xmm9 movaps XMMWORD PTR[192+rsp],xmm10 movaps XMMWORD PTR[208+rsp],xmm11 movaps XMMWORD PTR[224+rsp],xmm12 movaps XMMWORD PTR[240+rsp],xmm13 movaps XMMWORD PTR[256+rsp],xmm14 movaps XMMWORD PTR[272+rsp],xmm15 $L$prologue_avx:: vzeroall mov r12,rdi lea rdi,QWORD PTR[128+rcx] lea r13,QWORD PTR[((K256+544))] mov r14d,DWORD PTR[((240-128))+rdi] mov r15,r9 mov rsi,r10 vmovdqu xmm8,XMMWORD PTR[r8] sub r14,9 mov eax,DWORD PTR[r15] mov ebx,DWORD PTR[4+r15] mov ecx,DWORD PTR[8+r15] mov edx,DWORD PTR[12+r15] mov r8d,DWORD PTR[16+r15] mov r9d,DWORD PTR[20+r15] mov r10d,DWORD PTR[24+r15] mov r11d,DWORD PTR[28+r15] vmovdqa xmm14,XMMWORD PTR[r14*8+r13] vmovdqa xmm13,XMMWORD PTR[16+r14*8+r13] vmovdqa xmm12,XMMWORD PTR[32+r14*8+r13] vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] jmp $L$loop_avx ALIGN 16 $L$loop_avx:: vmovdqa xmm7,XMMWORD PTR[((K256+512))] vmovdqu xmm0,XMMWORD PTR[r12*1+rsi] vmovdqu xmm1,XMMWORD PTR[16+r12*1+rsi] vmovdqu xmm2,XMMWORD PTR[32+r12*1+rsi] vmovdqu xmm3,XMMWORD PTR[48+r12*1+rsi] vpshufb xmm0,xmm0,xmm7 lea rbp,QWORD PTR[K256] vpshufb xmm1,xmm1,xmm7 vpshufb xmm2,xmm2,xmm7 vpaddd xmm4,xmm0,XMMWORD PTR[rbp] vpshufb xmm3,xmm3,xmm7 vpaddd xmm5,xmm1,XMMWORD PTR[32+rbp] vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp] vpaddd xmm7,xmm3,XMMWORD PTR[96+rbp] vmovdqa XMMWORD PTR[rsp],xmm4 mov r14d,eax vmovdqa XMMWORD PTR[16+rsp],xmm5 mov esi,ebx vmovdqa XMMWORD PTR[32+rsp],xmm6 xor esi,ecx vmovdqa XMMWORD PTR[48+rsp],xmm7 mov r13d,r8d jmp $L$avx_00_47 ALIGN 16 $L$avx_00_47:: sub rbp,-16*2*4 vmovdqu xmm9,XMMWORD PTR[r12] mov QWORD PTR[((64+0))+rsp],r12 vpalignr xmm4,xmm1,xmm0,4 shrd r13d,r13d,14 mov eax,r14d mov r12d,r9d vpalignr xmm7,xmm3,xmm2,4 xor r13d,r8d shrd r14d,r14d,9 xor r12d,r10d vpsrld xmm6,xmm4,7 shrd r13d,r13d,5 xor r14d,eax and r12d,r8d vpaddd xmm0,xmm0,xmm7 vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] xor r13d,r8d add r11d,DWORD PTR[rsp] mov r15d,eax vpsrld xmm7,xmm4,3 shrd r14d,r14d,11 xor r12d,r10d xor r15d,ebx vpslld xmm5,xmm4,14 shrd r13d,r13d,6 add r11d,r12d and esi,r15d vpxor xmm4,xmm7,xmm6 xor r14d,eax add r11d,r13d xor esi,ebx vpshufd xmm7,xmm3,250 add edx,r11d shrd r14d,r14d,2 add r11d,esi vpsrld xmm6,xmm6,11 mov r13d,edx add r14d,r11d shrd r13d,r13d,14 vpxor xmm4,xmm4,xmm5 mov r11d,r14d mov r12d,r8d xor r13d,edx vpslld xmm5,xmm5,11 shrd r14d,r14d,9 xor r12d,r9d shrd r13d,r13d,5 vpxor xmm4,xmm4,xmm6 xor r14d,r11d and r12d,edx vpxor xmm9,xmm9,xmm8 xor r13d,edx vpsrld xmm6,xmm7,10 add r10d,DWORD PTR[4+rsp] mov esi,r11d shrd r14d,r14d,11 vpxor xmm4,xmm4,xmm5 xor r12d,r9d xor esi,eax shrd r13d,r13d,6 vpsrlq xmm7,xmm7,17 add r10d,r12d and r15d,esi xor r14d,r11d vpaddd xmm0,xmm0,xmm4 add r10d,r13d xor r15d,eax add ecx,r10d vpxor xmm6,xmm6,xmm7 shrd r14d,r14d,2 add r10d,r15d mov r13d,ecx vpsrlq xmm7,xmm7,2 add r14d,r10d shrd r13d,r13d,14 mov r10d,r14d vpxor xmm6,xmm6,xmm7 mov r12d,edx xor r13d,ecx shrd r14d,r14d,9 vpshufd xmm6,xmm6,132 xor r12d,r8d shrd r13d,r13d,5 xor r14d,r10d vpsrldq xmm6,xmm6,8 and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] xor r13d,ecx add r9d,DWORD PTR[8+rsp] vpaddd xmm0,xmm0,xmm6 mov r15d,r10d shrd r14d,r14d,11 xor r12d,r8d vpshufd xmm7,xmm0,80 xor r15d,r11d shrd r13d,r13d,6 add r9d,r12d vpsrld xmm6,xmm7,10 and esi,r15d xor r14d,r10d add r9d,r13d vpsrlq xmm7,xmm7,17 xor esi,r11d add ebx,r9d shrd r14d,r14d,2 vpxor xmm6,xmm6,xmm7 add r9d,esi mov r13d,ebx add r14d,r9d vpsrlq xmm7,xmm7,2 shrd r13d,r13d,14 mov r9d,r14d mov r12d,ecx vpxor xmm6,xmm6,xmm7 xor r13d,ebx shrd r14d,r14d,9 xor r12d,edx vpshufd xmm6,xmm6,232 shrd r13d,r13d,5 xor r14d,r9d and r12d,ebx vpslldq xmm6,xmm6,8 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] xor r13d,ebx add r8d,DWORD PTR[12+rsp] mov esi,r9d vpaddd xmm0,xmm0,xmm6 shrd r14d,r14d,11 xor r12d,edx xor esi,r10d vpaddd xmm6,xmm0,XMMWORD PTR[rbp] shrd r13d,r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d shrd r14d,r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d vmovdqa XMMWORD PTR[rsp],xmm6 vpalignr xmm4,xmm2,xmm1,4 shrd r13d,r13d,14 mov r8d,r14d mov r12d,ebx vpalignr xmm7,xmm0,xmm3,4 xor r13d,eax shrd r14d,r14d,9 xor r12d,ecx vpsrld xmm6,xmm4,7 shrd r13d,r13d,5 xor r14d,r8d and r12d,eax vpaddd xmm1,xmm1,xmm7 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] xor r13d,eax add edx,DWORD PTR[16+rsp] mov r15d,r8d vpsrld xmm7,xmm4,3 shrd r14d,r14d,11 xor r12d,ecx xor r15d,r9d vpslld xmm5,xmm4,14 shrd r13d,r13d,6 add edx,r12d and esi,r15d vpxor xmm4,xmm7,xmm6 xor r14d,r8d add edx,r13d xor esi,r9d vpshufd xmm7,xmm0,250 add r11d,edx shrd r14d,r14d,2 add edx,esi vpsrld xmm6,xmm6,11 mov r13d,r11d add r14d,edx shrd r13d,r13d,14 vpxor xmm4,xmm4,xmm5 mov edx,r14d mov r12d,eax xor r13d,r11d vpslld xmm5,xmm5,11 shrd r14d,r14d,9 xor r12d,ebx shrd r13d,r13d,5 vpxor xmm4,xmm4,xmm6 xor r14d,edx and r12d,r11d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] xor r13d,r11d vpsrld xmm6,xmm7,10 add ecx,DWORD PTR[20+rsp] mov esi,edx shrd r14d,r14d,11 vpxor xmm4,xmm4,xmm5 xor r12d,ebx xor esi,r8d shrd r13d,r13d,6 vpsrlq xmm7,xmm7,17 add ecx,r12d and r15d,esi xor r14d,edx vpaddd xmm1,xmm1,xmm4 add ecx,r13d xor r15d,r8d add r10d,ecx vpxor xmm6,xmm6,xmm7 shrd r14d,r14d,2 add ecx,r15d mov r13d,r10d vpsrlq xmm7,xmm7,2 add r14d,ecx shrd r13d,r13d,14 mov ecx,r14d vpxor xmm6,xmm6,xmm7 mov r12d,r11d xor r13d,r10d shrd r14d,r14d,9 vpshufd xmm6,xmm6,132 xor r12d,eax shrd r13d,r13d,5 xor r14d,ecx vpsrldq xmm6,xmm6,8 and r12d,r10d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] xor r13d,r10d add ebx,DWORD PTR[24+rsp] vpaddd xmm1,xmm1,xmm6 mov r15d,ecx shrd r14d,r14d,11 xor r12d,eax vpshufd xmm7,xmm1,80 xor r15d,edx shrd r13d,r13d,6 add ebx,r12d vpsrld xmm6,xmm7,10 and esi,r15d xor r14d,ecx add ebx,r13d vpsrlq xmm7,xmm7,17 xor esi,edx add r9d,ebx shrd r14d,r14d,2 vpxor xmm6,xmm6,xmm7 add ebx,esi mov r13d,r9d add r14d,ebx vpsrlq xmm7,xmm7,2 shrd r13d,r13d,14 mov ebx,r14d mov r12d,r10d vpxor xmm6,xmm6,xmm7 xor r13d,r9d shrd r14d,r14d,9 xor r12d,r11d vpshufd xmm6,xmm6,232 shrd r13d,r13d,5 xor r14d,ebx and r12d,r9d vpslldq xmm6,xmm6,8 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] xor r13d,r9d add eax,DWORD PTR[28+rsp] mov esi,ebx vpaddd xmm1,xmm1,xmm6 shrd r14d,r14d,11 xor r12d,r11d xor esi,ecx vpaddd xmm6,xmm1,XMMWORD PTR[32+rbp] shrd r13d,r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax shrd r14d,r14d,2 add eax,r15d mov r13d,r8d add r14d,eax vmovdqa XMMWORD PTR[16+rsp],xmm6 vpalignr xmm4,xmm3,xmm2,4 shrd r13d,r13d,14 mov eax,r14d mov r12d,r9d vpalignr xmm7,xmm1,xmm0,4 xor r13d,r8d shrd r14d,r14d,9 xor r12d,r10d vpsrld xmm6,xmm4,7 shrd r13d,r13d,5 xor r14d,eax and r12d,r8d vpaddd xmm2,xmm2,xmm7 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] xor r13d,r8d add r11d,DWORD PTR[32+rsp] mov r15d,eax vpsrld xmm7,xmm4,3 shrd r14d,r14d,11 xor r12d,r10d xor r15d,ebx vpslld xmm5,xmm4,14 shrd r13d,r13d,6 add r11d,r12d and esi,r15d vpxor xmm4,xmm7,xmm6 xor r14d,eax add r11d,r13d xor esi,ebx vpshufd xmm7,xmm1,250 add edx,r11d shrd r14d,r14d,2 add r11d,esi vpsrld xmm6,xmm6,11 mov r13d,edx add r14d,r11d shrd r13d,r13d,14 vpxor xmm4,xmm4,xmm5 mov r11d,r14d mov r12d,r8d xor r13d,edx vpslld xmm5,xmm5,11 shrd r14d,r14d,9 xor r12d,r9d shrd r13d,r13d,5 vpxor xmm4,xmm4,xmm6 xor r14d,r11d and r12d,edx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] xor r13d,edx vpsrld xmm6,xmm7,10 add r10d,DWORD PTR[36+rsp] mov esi,r11d shrd r14d,r14d,11 vpxor xmm4,xmm4,xmm5 xor r12d,r9d xor esi,eax shrd r13d,r13d,6 vpsrlq xmm7,xmm7,17 add r10d,r12d and r15d,esi xor r14d,r11d vpaddd xmm2,xmm2,xmm4 add r10d,r13d xor r15d,eax add ecx,r10d vpxor xmm6,xmm6,xmm7 shrd r14d,r14d,2 add r10d,r15d mov r13d,ecx vpsrlq xmm7,xmm7,2 add r14d,r10d shrd r13d,r13d,14 mov r10d,r14d vpxor xmm6,xmm6,xmm7 mov r12d,edx xor r13d,ecx shrd r14d,r14d,9 vpshufd xmm6,xmm6,132 xor r12d,r8d shrd r13d,r13d,5 xor r14d,r10d vpsrldq xmm6,xmm6,8 and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] xor r13d,ecx add r9d,DWORD PTR[40+rsp] vpaddd xmm2,xmm2,xmm6 mov r15d,r10d shrd r14d,r14d,11 xor r12d,r8d vpshufd xmm7,xmm2,80 xor r15d,r11d shrd r13d,r13d,6 add r9d,r12d vpsrld xmm6,xmm7,10 and esi,r15d xor r14d,r10d add r9d,r13d vpsrlq xmm7,xmm7,17 xor esi,r11d add ebx,r9d shrd r14d,r14d,2 vpxor xmm6,xmm6,xmm7 add r9d,esi mov r13d,ebx add r14d,r9d vpsrlq xmm7,xmm7,2 shrd r13d,r13d,14 mov r9d,r14d mov r12d,ecx vpxor xmm6,xmm6,xmm7 xor r13d,ebx shrd r14d,r14d,9 xor r12d,edx vpshufd xmm6,xmm6,232 shrd r13d,r13d,5 xor r14d,r9d and r12d,ebx vpslldq xmm6,xmm6,8 vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] xor r13d,ebx add r8d,DWORD PTR[44+rsp] mov esi,r9d vpaddd xmm2,xmm2,xmm6 shrd r14d,r14d,11 xor r12d,edx xor esi,r10d vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp] shrd r13d,r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d shrd r14d,r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d vmovdqa XMMWORD PTR[32+rsp],xmm6 vpalignr xmm4,xmm0,xmm3,4 shrd r13d,r13d,14 mov r8d,r14d mov r12d,ebx vpalignr xmm7,xmm2,xmm1,4 xor r13d,eax shrd r14d,r14d,9 xor r12d,ecx vpsrld xmm6,xmm4,7 shrd r13d,r13d,5 xor r14d,r8d and r12d,eax vpaddd xmm3,xmm3,xmm7 vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] xor r13d,eax add edx,DWORD PTR[48+rsp] mov r15d,r8d vpsrld xmm7,xmm4,3 shrd r14d,r14d,11 xor r12d,ecx xor r15d,r9d vpslld xmm5,xmm4,14 shrd r13d,r13d,6 add edx,r12d and esi,r15d vpxor xmm4,xmm7,xmm6 xor r14d,r8d add edx,r13d xor esi,r9d vpshufd xmm7,xmm2,250 add r11d,edx shrd r14d,r14d,2 add edx,esi vpsrld xmm6,xmm6,11 mov r13d,r11d add r14d,edx shrd r13d,r13d,14 vpxor xmm4,xmm4,xmm5 mov edx,r14d mov r12d,eax xor r13d,r11d vpslld xmm5,xmm5,11 shrd r14d,r14d,9 xor r12d,ebx shrd r13d,r13d,5 vpxor xmm4,xmm4,xmm6 xor r14d,edx and r12d,r11d vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] xor r13d,r11d vpsrld xmm6,xmm7,10 add ecx,DWORD PTR[52+rsp] mov esi,edx shrd r14d,r14d,11 vpxor xmm4,xmm4,xmm5 xor r12d,ebx xor esi,r8d shrd r13d,r13d,6 vpsrlq xmm7,xmm7,17 add ecx,r12d and r15d,esi xor r14d,edx vpaddd xmm3,xmm3,xmm4 add ecx,r13d xor r15d,r8d add r10d,ecx vpxor xmm6,xmm6,xmm7 shrd r14d,r14d,2 add ecx,r15d mov r13d,r10d vpsrlq xmm7,xmm7,2 add r14d,ecx shrd r13d,r13d,14 mov ecx,r14d vpxor xmm6,xmm6,xmm7 mov r12d,r11d xor r13d,r10d shrd r14d,r14d,9 vpshufd xmm6,xmm6,132 xor r12d,eax shrd r13d,r13d,5 xor r14d,ecx vpsrldq xmm6,xmm6,8 and r12d,r10d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] xor r13d,r10d add ebx,DWORD PTR[56+rsp] vpaddd xmm3,xmm3,xmm6 mov r15d,ecx shrd r14d,r14d,11 xor r12d,eax vpshufd xmm7,xmm3,80 xor r15d,edx shrd r13d,r13d,6 add ebx,r12d vpsrld xmm6,xmm7,10 and esi,r15d xor r14d,ecx add ebx,r13d vpsrlq xmm7,xmm7,17 xor esi,edx add r9d,ebx shrd r14d,r14d,2 vpxor xmm6,xmm6,xmm7 add ebx,esi mov r13d,r9d add r14d,ebx vpsrlq xmm7,xmm7,2 shrd r13d,r13d,14 mov ebx,r14d mov r12d,r10d vpxor xmm6,xmm6,xmm7 xor r13d,r9d shrd r14d,r14d,9 xor r12d,r11d vpshufd xmm6,xmm6,232 shrd r13d,r13d,5 xor r14d,ebx and r12d,r9d vpslldq xmm6,xmm6,8 vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] xor r13d,r9d add eax,DWORD PTR[60+rsp] mov esi,ebx vpaddd xmm3,xmm3,xmm6 shrd r14d,r14d,11 xor r12d,r11d xor esi,ecx vpaddd xmm6,xmm3,XMMWORD PTR[96+rbp] shrd r13d,r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax shrd r14d,r14d,2 add eax,r15d mov r13d,r8d add r14d,eax vmovdqa XMMWORD PTR[48+rsp],xmm6 mov r12,QWORD PTR[((64+0))+rsp] vpand xmm11,xmm11,xmm14 mov r15,QWORD PTR[((64+8))+rsp] vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD PTR[r12*1+r15],xmm8 lea r12,QWORD PTR[16+r12] cmp BYTE PTR[131+rbp],0 jne $L$avx_00_47 vmovdqu xmm9,XMMWORD PTR[r12] mov QWORD PTR[((64+0))+rsp],r12 shrd r13d,r13d,14 mov eax,r14d mov r12d,r9d xor r13d,r8d shrd r14d,r14d,9 xor r12d,r10d shrd r13d,r13d,5 xor r14d,eax and r12d,r8d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] xor r13d,r8d add r11d,DWORD PTR[rsp] mov r15d,eax shrd r14d,r14d,11 xor r12d,r10d xor r15d,ebx shrd r13d,r13d,6 add r11d,r12d and esi,r15d xor r14d,eax add r11d,r13d xor esi,ebx add edx,r11d shrd r14d,r14d,2 add r11d,esi mov r13d,edx add r14d,r11d shrd r13d,r13d,14 mov r11d,r14d mov r12d,r8d xor r13d,edx shrd r14d,r14d,9 xor r12d,r9d shrd r13d,r13d,5 xor r14d,r11d and r12d,edx vpxor xmm9,xmm9,xmm8 xor r13d,edx add r10d,DWORD PTR[4+rsp] mov esi,r11d shrd r14d,r14d,11 xor r12d,r9d xor esi,eax shrd r13d,r13d,6 add r10d,r12d and r15d,esi xor r14d,r11d add r10d,r13d xor r15d,eax add ecx,r10d shrd r14d,r14d,2 add r10d,r15d mov r13d,ecx add r14d,r10d shrd r13d,r13d,14 mov r10d,r14d mov r12d,edx xor r13d,ecx shrd r14d,r14d,9 xor r12d,r8d shrd r13d,r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] xor r13d,ecx add r9d,DWORD PTR[8+rsp] mov r15d,r10d shrd r14d,r14d,11 xor r12d,r8d xor r15d,r11d shrd r13d,r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d shrd r14d,r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d shrd r13d,r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx shrd r14d,r14d,9 xor r12d,edx shrd r13d,r13d,5 xor r14d,r9d and r12d,ebx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] xor r13d,ebx add r8d,DWORD PTR[12+rsp] mov esi,r9d shrd r14d,r14d,11 xor r12d,edx xor esi,r10d shrd r13d,r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d shrd r14d,r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d shrd r13d,r13d,14 mov r8d,r14d mov r12d,ebx xor r13d,eax shrd r14d,r14d,9 xor r12d,ecx shrd r13d,r13d,5 xor r14d,r8d and r12d,eax vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] xor r13d,eax add edx,DWORD PTR[16+rsp] mov r15d,r8d shrd r14d,r14d,11 xor r12d,ecx xor r15d,r9d shrd r13d,r13d,6 add edx,r12d and esi,r15d xor r14d,r8d add edx,r13d xor esi,r9d add r11d,edx shrd r14d,r14d,2 add edx,esi mov r13d,r11d add r14d,edx shrd r13d,r13d,14 mov edx,r14d mov r12d,eax xor r13d,r11d shrd r14d,r14d,9 xor r12d,ebx shrd r13d,r13d,5 xor r14d,edx and r12d,r11d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] xor r13d,r11d add ecx,DWORD PTR[20+rsp] mov esi,edx shrd r14d,r14d,11 xor r12d,ebx xor esi,r8d shrd r13d,r13d,6 add ecx,r12d and r15d,esi xor r14d,edx add ecx,r13d xor r15d,r8d add r10d,ecx shrd r14d,r14d,2 add ecx,r15d mov r13d,r10d add r14d,ecx shrd r13d,r13d,14 mov ecx,r14d mov r12d,r11d xor r13d,r10d shrd r14d,r14d,9 xor r12d,eax shrd r13d,r13d,5 xor r14d,ecx and r12d,r10d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] xor r13d,r10d add ebx,DWORD PTR[24+rsp] mov r15d,ecx shrd r14d,r14d,11 xor r12d,eax xor r15d,edx shrd r13d,r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx shrd r14d,r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx shrd r13d,r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d shrd r14d,r14d,9 xor r12d,r11d shrd r13d,r13d,5 xor r14d,ebx and r12d,r9d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] xor r13d,r9d add eax,DWORD PTR[28+rsp] mov esi,ebx shrd r14d,r14d,11 xor r12d,r11d xor esi,ecx shrd r13d,r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax shrd r14d,r14d,2 add eax,r15d mov r13d,r8d add r14d,eax shrd r13d,r13d,14 mov eax,r14d mov r12d,r9d xor r13d,r8d shrd r14d,r14d,9 xor r12d,r10d shrd r13d,r13d,5 xor r14d,eax and r12d,r8d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] xor r13d,r8d add r11d,DWORD PTR[32+rsp] mov r15d,eax shrd r14d,r14d,11 xor r12d,r10d xor r15d,ebx shrd r13d,r13d,6 add r11d,r12d and esi,r15d xor r14d,eax add r11d,r13d xor esi,ebx add edx,r11d shrd r14d,r14d,2 add r11d,esi mov r13d,edx add r14d,r11d shrd r13d,r13d,14 mov r11d,r14d mov r12d,r8d xor r13d,edx shrd r14d,r14d,9 xor r12d,r9d shrd r13d,r13d,5 xor r14d,r11d and r12d,edx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] xor r13d,edx add r10d,DWORD PTR[36+rsp] mov esi,r11d shrd r14d,r14d,11 xor r12d,r9d xor esi,eax shrd r13d,r13d,6 add r10d,r12d and r15d,esi xor r14d,r11d add r10d,r13d xor r15d,eax add ecx,r10d shrd r14d,r14d,2 add r10d,r15d mov r13d,ecx add r14d,r10d shrd r13d,r13d,14 mov r10d,r14d mov r12d,edx xor r13d,ecx shrd r14d,r14d,9 xor r12d,r8d shrd r13d,r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] xor r13d,ecx add r9d,DWORD PTR[40+rsp] mov r15d,r10d shrd r14d,r14d,11 xor r12d,r8d xor r15d,r11d shrd r13d,r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d shrd r14d,r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d shrd r13d,r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx shrd r14d,r14d,9 xor r12d,edx shrd r13d,r13d,5 xor r14d,r9d and r12d,ebx vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] xor r13d,ebx add r8d,DWORD PTR[44+rsp] mov esi,r9d shrd r14d,r14d,11 xor r12d,edx xor esi,r10d shrd r13d,r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d shrd r14d,r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d shrd r13d,r13d,14 mov r8d,r14d mov r12d,ebx xor r13d,eax shrd r14d,r14d,9 xor r12d,ecx shrd r13d,r13d,5 xor r14d,r8d and r12d,eax vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] xor r13d,eax add edx,DWORD PTR[48+rsp] mov r15d,r8d shrd r14d,r14d,11 xor r12d,ecx xor r15d,r9d shrd r13d,r13d,6 add edx,r12d and esi,r15d xor r14d,r8d add edx,r13d xor esi,r9d add r11d,edx shrd r14d,r14d,2 add edx,esi mov r13d,r11d add r14d,edx shrd r13d,r13d,14 mov edx,r14d mov r12d,eax xor r13d,r11d shrd r14d,r14d,9 xor r12d,ebx shrd r13d,r13d,5 xor r14d,edx and r12d,r11d vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] xor r13d,r11d add ecx,DWORD PTR[52+rsp] mov esi,edx shrd r14d,r14d,11 xor r12d,ebx xor esi,r8d shrd r13d,r13d,6 add ecx,r12d and r15d,esi xor r14d,edx add ecx,r13d xor r15d,r8d add r10d,ecx shrd r14d,r14d,2 add ecx,r15d mov r13d,r10d add r14d,ecx shrd r13d,r13d,14 mov ecx,r14d mov r12d,r11d xor r13d,r10d shrd r14d,r14d,9 xor r12d,eax shrd r13d,r13d,5 xor r14d,ecx and r12d,r10d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] xor r13d,r10d add ebx,DWORD PTR[56+rsp] mov r15d,ecx shrd r14d,r14d,11 xor r12d,eax xor r15d,edx shrd r13d,r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx shrd r14d,r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx shrd r13d,r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d shrd r14d,r14d,9 xor r12d,r11d shrd r13d,r13d,5 xor r14d,ebx and r12d,r9d vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] xor r13d,r9d add eax,DWORD PTR[60+rsp] mov esi,ebx shrd r14d,r14d,11 xor r12d,r11d xor esi,ecx shrd r13d,r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax shrd r14d,r14d,2 add eax,r15d mov r13d,r8d add r14d,eax mov r12,QWORD PTR[((64+0))+rsp] mov r13,QWORD PTR[((64+8))+rsp] mov r15,QWORD PTR[((64+40))+rsp] mov rsi,QWORD PTR[((64+48))+rsp] vpand xmm11,xmm11,xmm14 mov eax,r14d vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD PTR[r13*1+r12],xmm8 lea r12,QWORD PTR[16+r12] add eax,DWORD PTR[r15] add ebx,DWORD PTR[4+r15] add ecx,DWORD PTR[8+r15] add edx,DWORD PTR[12+r15] add r8d,DWORD PTR[16+r15] add r9d,DWORD PTR[20+r15] add r10d,DWORD PTR[24+r15] add r11d,DWORD PTR[28+r15] cmp r12,QWORD PTR[((64+16))+rsp] mov DWORD PTR[r15],eax mov DWORD PTR[4+r15],ebx mov DWORD PTR[8+r15],ecx mov DWORD PTR[12+r15],edx mov DWORD PTR[16+r15],r8d mov DWORD PTR[20+r15],r9d mov DWORD PTR[24+r15],r10d mov DWORD PTR[28+r15],r11d jb $L$loop_avx mov r8,QWORD PTR[((64+32))+rsp] mov rsi,QWORD PTR[((64+56))+rsp] vmovdqu XMMWORD PTR[r8],xmm8 vzeroall movaps xmm6,XMMWORD PTR[128+rsp] movaps xmm7,XMMWORD PTR[144+rsp] movaps xmm8,XMMWORD PTR[160+rsp] movaps xmm9,XMMWORD PTR[176+rsp] movaps xmm10,XMMWORD PTR[192+rsp] movaps xmm11,XMMWORD PTR[208+rsp] movaps xmm12,XMMWORD PTR[224+rsp] movaps xmm13,XMMWORD PTR[240+rsp] movaps xmm14,XMMWORD PTR[256+rsp] movaps xmm15,XMMWORD PTR[272+rsp] mov r15,QWORD PTR[rsi] mov r14,QWORD PTR[8+rsi] mov r13,QWORD PTR[16+rsi] mov r12,QWORD PTR[24+rsi] mov rbp,QWORD PTR[32+rsi] mov rbx,QWORD PTR[40+rsi] lea rsp,QWORD PTR[48+rsi] $L$epilogue_avx:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesni_cbc_sha256_enc_avx:: aesni_cbc_sha256_enc_avx ENDP ALIGN 64 aesni_cbc_sha256_enc_avx2 PROC PRIVATE mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesni_cbc_sha256_enc_avx2:: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD PTR[40+rsp] mov r9,QWORD PTR[48+rsp] $L$avx2_shortcut:: mov r10,QWORD PTR[56+rsp] push rbx push rbp push r12 push r13 push r14 push r15 mov r11,rsp sub rsp,736 and rsp,-256*4 add rsp,448 shl rdx,6 sub rsi,rdi sub r10,rdi add rdx,rdi mov QWORD PTR[((64+16))+rsp],rdx mov QWORD PTR[((64+32))+rsp],r8 mov QWORD PTR[((64+40))+rsp],r9 mov QWORD PTR[((64+48))+rsp],r10 mov QWORD PTR[((64+56))+rsp],r11 movaps XMMWORD PTR[128+rsp],xmm6 movaps XMMWORD PTR[144+rsp],xmm7 movaps XMMWORD PTR[160+rsp],xmm8 movaps XMMWORD PTR[176+rsp],xmm9 movaps XMMWORD PTR[192+rsp],xmm10 movaps XMMWORD PTR[208+rsp],xmm11 movaps XMMWORD PTR[224+rsp],xmm12 movaps XMMWORD PTR[240+rsp],xmm13 movaps XMMWORD PTR[256+rsp],xmm14 movaps XMMWORD PTR[272+rsp],xmm15 $L$prologue_avx2:: vzeroall mov r13,rdi vpinsrq xmm15,xmm15,rsi,1 lea rdi,QWORD PTR[128+rcx] lea r12,QWORD PTR[((K256+544))] mov r14d,DWORD PTR[((240-128))+rdi] mov r15,r9 mov rsi,r10 vmovdqu xmm8,XMMWORD PTR[r8] lea r14,QWORD PTR[((-9))+r14] vmovdqa xmm14,XMMWORD PTR[r14*8+r12] vmovdqa xmm13,XMMWORD PTR[16+r14*8+r12] vmovdqa xmm12,XMMWORD PTR[32+r14*8+r12] sub r13,-16*4 mov eax,DWORD PTR[r15] lea r12,QWORD PTR[r13*1+rsi] mov ebx,DWORD PTR[4+r15] cmp r13,rdx mov ecx,DWORD PTR[8+r15] cmove r12,rsp mov edx,DWORD PTR[12+r15] mov r8d,DWORD PTR[16+r15] mov r9d,DWORD PTR[20+r15] mov r10d,DWORD PTR[24+r15] mov r11d,DWORD PTR[28+r15] vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] jmp $L$oop_avx2 ALIGN 16 $L$oop_avx2:: vmovdqa ymm7,YMMWORD PTR[((K256+512))] vmovdqu xmm0,XMMWORD PTR[((-64+0))+r13*1+rsi] vmovdqu xmm1,XMMWORD PTR[((-64+16))+r13*1+rsi] vmovdqu xmm2,XMMWORD PTR[((-64+32))+r13*1+rsi] vmovdqu xmm3,XMMWORD PTR[((-64+48))+r13*1+rsi] vinserti128 ymm0,ymm0,XMMWORD PTR[r12],1 vinserti128 ymm1,ymm1,XMMWORD PTR[16+r12],1 vpshufb ymm0,ymm0,ymm7 vinserti128 ymm2,ymm2,XMMWORD PTR[32+r12],1 vpshufb ymm1,ymm1,ymm7 vinserti128 ymm3,ymm3,XMMWORD PTR[48+r12],1 lea rbp,QWORD PTR[K256] vpshufb ymm2,ymm2,ymm7 lea r13,QWORD PTR[((-64))+r13] vpaddd ymm4,ymm0,YMMWORD PTR[rbp] vpshufb ymm3,ymm3,ymm7 vpaddd ymm5,ymm1,YMMWORD PTR[32+rbp] vpaddd ymm6,ymm2,YMMWORD PTR[64+rbp] vpaddd ymm7,ymm3,YMMWORD PTR[96+rbp] vmovdqa YMMWORD PTR[rsp],ymm4 xor r14d,r14d vmovdqa YMMWORD PTR[32+rsp],ymm5 lea rsp,QWORD PTR[((-64))+rsp] mov esi,ebx vmovdqa YMMWORD PTR[rsp],ymm6 xor esi,ecx vmovdqa YMMWORD PTR[32+rsp],ymm7 mov r12d,r9d sub rbp,-16*2*4 jmp $L$avx2_00_47 ALIGN 16 $L$avx2_00_47:: vmovdqu xmm9,XMMWORD PTR[r13] vpinsrq xmm15,xmm15,r13,0 lea rsp,QWORD PTR[((-64))+rsp] vpalignr ymm4,ymm1,ymm0,4 add r11d,DWORD PTR[((0+128))+rsp] and r12d,r8d rorx r13d,r8d,25 vpalignr ymm7,ymm3,ymm2,4 rorx r15d,r8d,11 lea eax,DWORD PTR[r14*1+rax] lea r11d,DWORD PTR[r12*1+r11] vpsrld ymm6,ymm4,7 andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 vpaddd ymm0,ymm0,ymm7 lea r11d,DWORD PTR[r12*1+r11] xor r13d,r14d mov r15d,eax vpsrld ymm7,ymm4,3 rorx r12d,eax,22 lea r11d,DWORD PTR[r13*1+r11] xor r15d,ebx vpslld ymm5,ymm4,14 rorx r14d,eax,13 rorx r13d,eax,2 lea edx,DWORD PTR[r11*1+rdx] vpxor ymm4,ymm7,ymm6 and esi,r15d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] xor r14d,r12d xor esi,ebx vpshufd ymm7,ymm3,250 xor r14d,r13d lea r11d,DWORD PTR[rsi*1+r11] mov r12d,r8d vpsrld ymm6,ymm6,11 add r10d,DWORD PTR[((4+128))+rsp] and r12d,edx rorx r13d,edx,25 vpxor ymm4,ymm4,ymm5 rorx esi,edx,11 lea r11d,DWORD PTR[r14*1+r11] lea r10d,DWORD PTR[r12*1+r10] vpslld ymm5,ymm5,11 andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 vpxor ymm4,ymm4,ymm6 lea r10d,DWORD PTR[r12*1+r10] xor r13d,r14d mov esi,r11d vpsrld ymm6,ymm7,10 rorx r12d,r11d,22 lea r10d,DWORD PTR[r13*1+r10] xor esi,eax vpxor ymm4,ymm4,ymm5 rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,DWORD PTR[r10*1+rcx] vpsrlq ymm7,ymm7,17 and r15d,esi vpxor xmm9,xmm9,xmm8 xor r14d,r12d xor r15d,eax vpaddd ymm0,ymm0,ymm4 xor r14d,r13d lea r10d,DWORD PTR[r15*1+r10] mov r12d,edx vpxor ymm6,ymm6,ymm7 add r9d,DWORD PTR[((8+128))+rsp] and r12d,ecx rorx r13d,ecx,25 vpsrlq ymm7,ymm7,2 rorx r15d,ecx,11 lea r10d,DWORD PTR[r14*1+r10] lea r9d,DWORD PTR[r12*1+r9] vpxor ymm6,ymm6,ymm7 andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 vpshufd ymm6,ymm6,132 lea r9d,DWORD PTR[r12*1+r9] xor r13d,r14d mov r15d,r10d vpsrldq ymm6,ymm6,8 rorx r12d,r10d,22 lea r9d,DWORD PTR[r13*1+r9] xor r15d,r11d vpaddd ymm0,ymm0,ymm6 rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,DWORD PTR[r9*1+rbx] vpshufd ymm7,ymm0,80 and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] xor r14d,r12d xor esi,r11d vpsrld ymm6,ymm7,10 xor r14d,r13d lea r9d,DWORD PTR[rsi*1+r9] mov r12d,ecx vpsrlq ymm7,ymm7,17 add r8d,DWORD PTR[((12+128))+rsp] and r12d,ebx rorx r13d,ebx,25 vpxor ymm6,ymm6,ymm7 rorx esi,ebx,11 lea r9d,DWORD PTR[r14*1+r9] lea r8d,DWORD PTR[r12*1+r8] vpsrlq ymm7,ymm7,2 andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 vpxor ymm6,ymm6,ymm7 lea r8d,DWORD PTR[r12*1+r8] xor r13d,r14d mov esi,r9d vpshufd ymm6,ymm6,232 rorx r12d,r9d,22 lea r8d,DWORD PTR[r13*1+r8] xor esi,r10d vpslldq ymm6,ymm6,8 rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,DWORD PTR[r8*1+rax] vpaddd ymm0,ymm0,ymm6 and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] xor r14d,r12d xor r15d,r10d vpaddd ymm6,ymm0,YMMWORD PTR[rbp] xor r14d,r13d lea r8d,DWORD PTR[r15*1+r8] mov r12d,ebx vmovdqa YMMWORD PTR[rsp],ymm6 vpalignr ymm4,ymm2,ymm1,4 add edx,DWORD PTR[((32+128))+rsp] and r12d,eax rorx r13d,eax,25 vpalignr ymm7,ymm0,ymm3,4 rorx r15d,eax,11 lea r8d,DWORD PTR[r14*1+r8] lea edx,DWORD PTR[r12*1+rdx] vpsrld ymm6,ymm4,7 andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 vpaddd ymm1,ymm1,ymm7 lea edx,DWORD PTR[r12*1+rdx] xor r13d,r14d mov r15d,r8d vpsrld ymm7,ymm4,3 rorx r12d,r8d,22 lea edx,DWORD PTR[r13*1+rdx] xor r15d,r9d vpslld ymm5,ymm4,14 rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,DWORD PTR[rdx*1+r11] vpxor ymm4,ymm7,ymm6 and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] xor r14d,r12d xor esi,r9d vpshufd ymm7,ymm0,250 xor r14d,r13d lea edx,DWORD PTR[rsi*1+rdx] mov r12d,eax vpsrld ymm6,ymm6,11 add ecx,DWORD PTR[((36+128))+rsp] and r12d,r11d rorx r13d,r11d,25 vpxor ymm4,ymm4,ymm5 rorx esi,r11d,11 lea edx,DWORD PTR[r14*1+rdx] lea ecx,DWORD PTR[r12*1+rcx] vpslld ymm5,ymm5,11 andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 vpxor ymm4,ymm4,ymm6 lea ecx,DWORD PTR[r12*1+rcx] xor r13d,r14d mov esi,edx vpsrld ymm6,ymm7,10 rorx r12d,edx,22 lea ecx,DWORD PTR[r13*1+rcx] xor esi,r8d vpxor ymm4,ymm4,ymm5 rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,DWORD PTR[rcx*1+r10] vpsrlq ymm7,ymm7,17 and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] xor r14d,r12d xor r15d,r8d vpaddd ymm1,ymm1,ymm4 xor r14d,r13d lea ecx,DWORD PTR[r15*1+rcx] mov r12d,r11d vpxor ymm6,ymm6,ymm7 add ebx,DWORD PTR[((40+128))+rsp] and r12d,r10d rorx r13d,r10d,25 vpsrlq ymm7,ymm7,2 rorx r15d,r10d,11 lea ecx,DWORD PTR[r14*1+rcx] lea ebx,DWORD PTR[r12*1+rbx] vpxor ymm6,ymm6,ymm7 andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 vpshufd ymm6,ymm6,132 lea ebx,DWORD PTR[r12*1+rbx] xor r13d,r14d mov r15d,ecx vpsrldq ymm6,ymm6,8 rorx r12d,ecx,22 lea ebx,DWORD PTR[r13*1+rbx] xor r15d,edx vpaddd ymm1,ymm1,ymm6 rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,DWORD PTR[rbx*1+r9] vpshufd ymm7,ymm1,80 and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] xor r14d,r12d xor esi,edx vpsrld ymm6,ymm7,10 xor r14d,r13d lea ebx,DWORD PTR[rsi*1+rbx] mov r12d,r10d vpsrlq ymm7,ymm7,17 add eax,DWORD PTR[((44+128))+rsp] and r12d,r9d rorx r13d,r9d,25 vpxor ymm6,ymm6,ymm7 rorx esi,r9d,11 lea ebx,DWORD PTR[r14*1+rbx] lea eax,DWORD PTR[r12*1+rax] vpsrlq ymm7,ymm7,2 andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 vpxor ymm6,ymm6,ymm7 lea eax,DWORD PTR[r12*1+rax] xor r13d,r14d mov esi,ebx vpshufd ymm6,ymm6,232 rorx r12d,ebx,22 lea eax,DWORD PTR[r13*1+rax] xor esi,ecx vpslldq ymm6,ymm6,8 rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,DWORD PTR[rax*1+r8] vpaddd ymm1,ymm1,ymm6 and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] xor r14d,r12d xor r15d,ecx vpaddd ymm6,ymm1,YMMWORD PTR[32+rbp] xor r14d,r13d lea eax,DWORD PTR[r15*1+rax] mov r12d,r9d vmovdqa YMMWORD PTR[32+rsp],ymm6 lea rsp,QWORD PTR[((-64))+rsp] vpalignr ymm4,ymm3,ymm2,4 add r11d,DWORD PTR[((0+128))+rsp] and r12d,r8d rorx r13d,r8d,25 vpalignr ymm7,ymm1,ymm0,4 rorx r15d,r8d,11 lea eax,DWORD PTR[r14*1+rax] lea r11d,DWORD PTR[r12*1+r11] vpsrld ymm6,ymm4,7 andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 vpaddd ymm2,ymm2,ymm7 lea r11d,DWORD PTR[r12*1+r11] xor r13d,r14d mov r15d,eax vpsrld ymm7,ymm4,3 rorx r12d,eax,22 lea r11d,DWORD PTR[r13*1+r11] xor r15d,ebx vpslld ymm5,ymm4,14 rorx r14d,eax,13 rorx r13d,eax,2 lea edx,DWORD PTR[r11*1+rdx] vpxor ymm4,ymm7,ymm6 and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] xor r14d,r12d xor esi,ebx vpshufd ymm7,ymm1,250 xor r14d,r13d lea r11d,DWORD PTR[rsi*1+r11] mov r12d,r8d vpsrld ymm6,ymm6,11 add r10d,DWORD PTR[((4+128))+rsp] and r12d,edx rorx r13d,edx,25 vpxor ymm4,ymm4,ymm5 rorx esi,edx,11 lea r11d,DWORD PTR[r14*1+r11] lea r10d,DWORD PTR[r12*1+r10] vpslld ymm5,ymm5,11 andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 vpxor ymm4,ymm4,ymm6 lea r10d,DWORD PTR[r12*1+r10] xor r13d,r14d mov esi,r11d vpsrld ymm6,ymm7,10 rorx r12d,r11d,22 lea r10d,DWORD PTR[r13*1+r10] xor esi,eax vpxor ymm4,ymm4,ymm5 rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,DWORD PTR[r10*1+rcx] vpsrlq ymm7,ymm7,17 and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] xor r14d,r12d xor r15d,eax vpaddd ymm2,ymm2,ymm4 xor r14d,r13d lea r10d,DWORD PTR[r15*1+r10] mov r12d,edx vpxor ymm6,ymm6,ymm7 add r9d,DWORD PTR[((8+128))+rsp] and r12d,ecx rorx r13d,ecx,25 vpsrlq ymm7,ymm7,2 rorx r15d,ecx,11 lea r10d,DWORD PTR[r14*1+r10] lea r9d,DWORD PTR[r12*1+r9] vpxor ymm6,ymm6,ymm7 andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 vpshufd ymm6,ymm6,132 lea r9d,DWORD PTR[r12*1+r9] xor r13d,r14d mov r15d,r10d vpsrldq ymm6,ymm6,8 rorx r12d,r10d,22 lea r9d,DWORD PTR[r13*1+r9] xor r15d,r11d vpaddd ymm2,ymm2,ymm6 rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,DWORD PTR[r9*1+rbx] vpshufd ymm7,ymm2,80 and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] xor r14d,r12d xor esi,r11d vpsrld ymm6,ymm7,10 xor r14d,r13d lea r9d,DWORD PTR[rsi*1+r9] mov r12d,ecx vpsrlq ymm7,ymm7,17 add r8d,DWORD PTR[((12+128))+rsp] and r12d,ebx rorx r13d,ebx,25 vpxor ymm6,ymm6,ymm7 rorx esi,ebx,11 lea r9d,DWORD PTR[r14*1+r9] lea r8d,DWORD PTR[r12*1+r8] vpsrlq ymm7,ymm7,2 andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 vpxor ymm6,ymm6,ymm7 lea r8d,DWORD PTR[r12*1+r8] xor r13d,r14d mov esi,r9d vpshufd ymm6,ymm6,232 rorx r12d,r9d,22 lea r8d,DWORD PTR[r13*1+r8] xor esi,r10d vpslldq ymm6,ymm6,8 rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,DWORD PTR[r8*1+rax] vpaddd ymm2,ymm2,ymm6 and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] xor r14d,r12d xor r15d,r10d vpaddd ymm6,ymm2,YMMWORD PTR[64+rbp] xor r14d,r13d lea r8d,DWORD PTR[r15*1+r8] mov r12d,ebx vmovdqa YMMWORD PTR[rsp],ymm6 vpalignr ymm4,ymm0,ymm3,4 add edx,DWORD PTR[((32+128))+rsp] and r12d,eax rorx r13d,eax,25 vpalignr ymm7,ymm2,ymm1,4 rorx r15d,eax,11 lea r8d,DWORD PTR[r14*1+r8] lea edx,DWORD PTR[r12*1+rdx] vpsrld ymm6,ymm4,7 andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 vpaddd ymm3,ymm3,ymm7 lea edx,DWORD PTR[r12*1+rdx] xor r13d,r14d mov r15d,r8d vpsrld ymm7,ymm4,3 rorx r12d,r8d,22 lea edx,DWORD PTR[r13*1+rdx] xor r15d,r9d vpslld ymm5,ymm4,14 rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,DWORD PTR[rdx*1+r11] vpxor ymm4,ymm7,ymm6 and esi,r15d vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] xor r14d,r12d xor esi,r9d vpshufd ymm7,ymm2,250 xor r14d,r13d lea edx,DWORD PTR[rsi*1+rdx] mov r12d,eax vpsrld ymm6,ymm6,11 add ecx,DWORD PTR[((36+128))+rsp] and r12d,r11d rorx r13d,r11d,25 vpxor ymm4,ymm4,ymm5 rorx esi,r11d,11 lea edx,DWORD PTR[r14*1+rdx] lea ecx,DWORD PTR[r12*1+rcx] vpslld ymm5,ymm5,11 andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 vpxor ymm4,ymm4,ymm6 lea ecx,DWORD PTR[r12*1+rcx] xor r13d,r14d mov esi,edx vpsrld ymm6,ymm7,10 rorx r12d,edx,22 lea ecx,DWORD PTR[r13*1+rcx] xor esi,r8d vpxor ymm4,ymm4,ymm5 rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,DWORD PTR[rcx*1+r10] vpsrlq ymm7,ymm7,17 and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] xor r14d,r12d xor r15d,r8d vpaddd ymm3,ymm3,ymm4 xor r14d,r13d lea ecx,DWORD PTR[r15*1+rcx] mov r12d,r11d vpxor ymm6,ymm6,ymm7 add ebx,DWORD PTR[((40+128))+rsp] and r12d,r10d rorx r13d,r10d,25 vpsrlq ymm7,ymm7,2 rorx r15d,r10d,11 lea ecx,DWORD PTR[r14*1+rcx] lea ebx,DWORD PTR[r12*1+rbx] vpxor ymm6,ymm6,ymm7 andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 vpshufd ymm6,ymm6,132 lea ebx,DWORD PTR[r12*1+rbx] xor r13d,r14d mov r15d,ecx vpsrldq ymm6,ymm6,8 rorx r12d,ecx,22 lea ebx,DWORD PTR[r13*1+rbx] xor r15d,edx vpaddd ymm3,ymm3,ymm6 rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,DWORD PTR[rbx*1+r9] vpshufd ymm7,ymm3,80 and esi,r15d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] xor r14d,r12d xor esi,edx vpsrld ymm6,ymm7,10 xor r14d,r13d lea ebx,DWORD PTR[rsi*1+rbx] mov r12d,r10d vpsrlq ymm7,ymm7,17 add eax,DWORD PTR[((44+128))+rsp] and r12d,r9d rorx r13d,r9d,25 vpxor ymm6,ymm6,ymm7 rorx esi,r9d,11 lea ebx,DWORD PTR[r14*1+rbx] lea eax,DWORD PTR[r12*1+rax] vpsrlq ymm7,ymm7,2 andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 vpxor ymm6,ymm6,ymm7 lea eax,DWORD PTR[r12*1+rax] xor r13d,r14d mov esi,ebx vpshufd ymm6,ymm6,232 rorx r12d,ebx,22 lea eax,DWORD PTR[r13*1+rax] xor esi,ecx vpslldq ymm6,ymm6,8 rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,DWORD PTR[rax*1+r8] vpaddd ymm3,ymm3,ymm6 and r15d,esi vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] xor r14d,r12d xor r15d,ecx vpaddd ymm6,ymm3,YMMWORD PTR[96+rbp] xor r14d,r13d lea eax,DWORD PTR[r15*1+rax] mov r12d,r9d vmovdqa YMMWORD PTR[32+rsp],ymm6 vmovq r13,xmm15 vpextrq r15,xmm15,1 vpand xmm11,xmm11,xmm14 vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD PTR[r13*1+r15],xmm8 lea r13,QWORD PTR[16+r13] lea rbp,QWORD PTR[128+rbp] cmp BYTE PTR[3+rbp],0 jne $L$avx2_00_47 vmovdqu xmm9,XMMWORD PTR[r13] vpinsrq xmm15,xmm15,r13,0 add r11d,DWORD PTR[((0+64))+rsp] and r12d,r8d rorx r13d,r8d,25 rorx r15d,r8d,11 lea eax,DWORD PTR[r14*1+rax] lea r11d,DWORD PTR[r12*1+r11] andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 lea r11d,DWORD PTR[r12*1+r11] xor r13d,r14d mov r15d,eax rorx r12d,eax,22 lea r11d,DWORD PTR[r13*1+r11] xor r15d,ebx rorx r14d,eax,13 rorx r13d,eax,2 lea edx,DWORD PTR[r11*1+rdx] and esi,r15d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] xor r14d,r12d xor esi,ebx xor r14d,r13d lea r11d,DWORD PTR[rsi*1+r11] mov r12d,r8d add r10d,DWORD PTR[((4+64))+rsp] and r12d,edx rorx r13d,edx,25 rorx esi,edx,11 lea r11d,DWORD PTR[r14*1+r11] lea r10d,DWORD PTR[r12*1+r10] andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 lea r10d,DWORD PTR[r12*1+r10] xor r13d,r14d mov esi,r11d rorx r12d,r11d,22 lea r10d,DWORD PTR[r13*1+r10] xor esi,eax rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,DWORD PTR[r10*1+rcx] and r15d,esi vpxor xmm9,xmm9,xmm8 xor r14d,r12d xor r15d,eax xor r14d,r13d lea r10d,DWORD PTR[r15*1+r10] mov r12d,edx add r9d,DWORD PTR[((8+64))+rsp] and r12d,ecx rorx r13d,ecx,25 rorx r15d,ecx,11 lea r10d,DWORD PTR[r14*1+r10] lea r9d,DWORD PTR[r12*1+r9] andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 lea r9d,DWORD PTR[r12*1+r9] xor r13d,r14d mov r15d,r10d rorx r12d,r10d,22 lea r9d,DWORD PTR[r13*1+r9] xor r15d,r11d rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,DWORD PTR[r9*1+rbx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] xor r14d,r12d xor esi,r11d xor r14d,r13d lea r9d,DWORD PTR[rsi*1+r9] mov r12d,ecx add r8d,DWORD PTR[((12+64))+rsp] and r12d,ebx rorx r13d,ebx,25 rorx esi,ebx,11 lea r9d,DWORD PTR[r14*1+r9] lea r8d,DWORD PTR[r12*1+r8] andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 lea r8d,DWORD PTR[r12*1+r8] xor r13d,r14d mov esi,r9d rorx r12d,r9d,22 lea r8d,DWORD PTR[r13*1+r8] xor esi,r10d rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,DWORD PTR[r8*1+rax] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] xor r14d,r12d xor r15d,r10d xor r14d,r13d lea r8d,DWORD PTR[r15*1+r8] mov r12d,ebx add edx,DWORD PTR[((32+64))+rsp] and r12d,eax rorx r13d,eax,25 rorx r15d,eax,11 lea r8d,DWORD PTR[r14*1+r8] lea edx,DWORD PTR[r12*1+rdx] andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 lea edx,DWORD PTR[r12*1+rdx] xor r13d,r14d mov r15d,r8d rorx r12d,r8d,22 lea edx,DWORD PTR[r13*1+rdx] xor r15d,r9d rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,DWORD PTR[rdx*1+r11] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] xor r14d,r12d xor esi,r9d xor r14d,r13d lea edx,DWORD PTR[rsi*1+rdx] mov r12d,eax add ecx,DWORD PTR[((36+64))+rsp] and r12d,r11d rorx r13d,r11d,25 rorx esi,r11d,11 lea edx,DWORD PTR[r14*1+rdx] lea ecx,DWORD PTR[r12*1+rcx] andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 lea ecx,DWORD PTR[r12*1+rcx] xor r13d,r14d mov esi,edx rorx r12d,edx,22 lea ecx,DWORD PTR[r13*1+rcx] xor esi,r8d rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,DWORD PTR[rcx*1+r10] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] xor r14d,r12d xor r15d,r8d xor r14d,r13d lea ecx,DWORD PTR[r15*1+rcx] mov r12d,r11d add ebx,DWORD PTR[((40+64))+rsp] and r12d,r10d rorx r13d,r10d,25 rorx r15d,r10d,11 lea ecx,DWORD PTR[r14*1+rcx] lea ebx,DWORD PTR[r12*1+rbx] andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 lea ebx,DWORD PTR[r12*1+rbx] xor r13d,r14d mov r15d,ecx rorx r12d,ecx,22 lea ebx,DWORD PTR[r13*1+rbx] xor r15d,edx rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,DWORD PTR[rbx*1+r9] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] xor r14d,r12d xor esi,edx xor r14d,r13d lea ebx,DWORD PTR[rsi*1+rbx] mov r12d,r10d add eax,DWORD PTR[((44+64))+rsp] and r12d,r9d rorx r13d,r9d,25 rorx esi,r9d,11 lea ebx,DWORD PTR[r14*1+rbx] lea eax,DWORD PTR[r12*1+rax] andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 lea eax,DWORD PTR[r12*1+rax] xor r13d,r14d mov esi,ebx rorx r12d,ebx,22 lea eax,DWORD PTR[r13*1+rax] xor esi,ecx rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,DWORD PTR[rax*1+r8] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] xor r14d,r12d xor r15d,ecx xor r14d,r13d lea eax,DWORD PTR[r15*1+rax] mov r12d,r9d add r11d,DWORD PTR[rsp] and r12d,r8d rorx r13d,r8d,25 rorx r15d,r8d,11 lea eax,DWORD PTR[r14*1+rax] lea r11d,DWORD PTR[r12*1+r11] andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 lea r11d,DWORD PTR[r12*1+r11] xor r13d,r14d mov r15d,eax rorx r12d,eax,22 lea r11d,DWORD PTR[r13*1+r11] xor r15d,ebx rorx r14d,eax,13 rorx r13d,eax,2 lea edx,DWORD PTR[r11*1+rdx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] xor r14d,r12d xor esi,ebx xor r14d,r13d lea r11d,DWORD PTR[rsi*1+r11] mov r12d,r8d add r10d,DWORD PTR[4+rsp] and r12d,edx rorx r13d,edx,25 rorx esi,edx,11 lea r11d,DWORD PTR[r14*1+r11] lea r10d,DWORD PTR[r12*1+r10] andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 lea r10d,DWORD PTR[r12*1+r10] xor r13d,r14d mov esi,r11d rorx r12d,r11d,22 lea r10d,DWORD PTR[r13*1+r10] xor esi,eax rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,DWORD PTR[r10*1+rcx] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] xor r14d,r12d xor r15d,eax xor r14d,r13d lea r10d,DWORD PTR[r15*1+r10] mov r12d,edx add r9d,DWORD PTR[8+rsp] and r12d,ecx rorx r13d,ecx,25 rorx r15d,ecx,11 lea r10d,DWORD PTR[r14*1+r10] lea r9d,DWORD PTR[r12*1+r9] andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 lea r9d,DWORD PTR[r12*1+r9] xor r13d,r14d mov r15d,r10d rorx r12d,r10d,22 lea r9d,DWORD PTR[r13*1+r9] xor r15d,r11d rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,DWORD PTR[r9*1+rbx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] xor r14d,r12d xor esi,r11d xor r14d,r13d lea r9d,DWORD PTR[rsi*1+r9] mov r12d,ecx add r8d,DWORD PTR[12+rsp] and r12d,ebx rorx r13d,ebx,25 rorx esi,ebx,11 lea r9d,DWORD PTR[r14*1+r9] lea r8d,DWORD PTR[r12*1+r8] andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 lea r8d,DWORD PTR[r12*1+r8] xor r13d,r14d mov esi,r9d rorx r12d,r9d,22 lea r8d,DWORD PTR[r13*1+r8] xor esi,r10d rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,DWORD PTR[r8*1+rax] and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] xor r14d,r12d xor r15d,r10d xor r14d,r13d lea r8d,DWORD PTR[r15*1+r8] mov r12d,ebx add edx,DWORD PTR[32+rsp] and r12d,eax rorx r13d,eax,25 rorx r15d,eax,11 lea r8d,DWORD PTR[r14*1+r8] lea edx,DWORD PTR[r12*1+rdx] andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 lea edx,DWORD PTR[r12*1+rdx] xor r13d,r14d mov r15d,r8d rorx r12d,r8d,22 lea edx,DWORD PTR[r13*1+rdx] xor r15d,r9d rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,DWORD PTR[rdx*1+r11] and esi,r15d vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] xor r14d,r12d xor esi,r9d xor r14d,r13d lea edx,DWORD PTR[rsi*1+rdx] mov r12d,eax add ecx,DWORD PTR[36+rsp] and r12d,r11d rorx r13d,r11d,25 rorx esi,r11d,11 lea edx,DWORD PTR[r14*1+rdx] lea ecx,DWORD PTR[r12*1+rcx] andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 lea ecx,DWORD PTR[r12*1+rcx] xor r13d,r14d mov esi,edx rorx r12d,edx,22 lea ecx,DWORD PTR[r13*1+rcx] xor esi,r8d rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,DWORD PTR[rcx*1+r10] and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] xor r14d,r12d xor r15d,r8d xor r14d,r13d lea ecx,DWORD PTR[r15*1+rcx] mov r12d,r11d add ebx,DWORD PTR[40+rsp] and r12d,r10d rorx r13d,r10d,25 rorx r15d,r10d,11 lea ecx,DWORD PTR[r14*1+rcx] lea ebx,DWORD PTR[r12*1+rbx] andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 lea ebx,DWORD PTR[r12*1+rbx] xor r13d,r14d mov r15d,ecx rorx r12d,ecx,22 lea ebx,DWORD PTR[r13*1+rbx] xor r15d,edx rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,DWORD PTR[rbx*1+r9] and esi,r15d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] xor r14d,r12d xor esi,edx xor r14d,r13d lea ebx,DWORD PTR[rsi*1+rbx] mov r12d,r10d add eax,DWORD PTR[44+rsp] and r12d,r9d rorx r13d,r9d,25 rorx esi,r9d,11 lea ebx,DWORD PTR[r14*1+rbx] lea eax,DWORD PTR[r12*1+rax] andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 lea eax,DWORD PTR[r12*1+rax] xor r13d,r14d mov esi,ebx rorx r12d,ebx,22 lea eax,DWORD PTR[r13*1+rax] xor esi,ecx rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,DWORD PTR[rax*1+r8] and r15d,esi vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] xor r14d,r12d xor r15d,ecx xor r14d,r13d lea eax,DWORD PTR[r15*1+rax] mov r12d,r9d vpextrq r12,xmm15,1 vmovq r13,xmm15 mov r15,QWORD PTR[552+rsp] add eax,r14d lea rbp,QWORD PTR[448+rsp] vpand xmm11,xmm11,xmm14 vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD PTR[r13*1+r12],xmm8 lea r13,QWORD PTR[16+r13] add eax,DWORD PTR[r15] add ebx,DWORD PTR[4+r15] add ecx,DWORD PTR[8+r15] add edx,DWORD PTR[12+r15] add r8d,DWORD PTR[16+r15] add r9d,DWORD PTR[20+r15] add r10d,DWORD PTR[24+r15] add r11d,DWORD PTR[28+r15] mov DWORD PTR[r15],eax mov DWORD PTR[4+r15],ebx mov DWORD PTR[8+r15],ecx mov DWORD PTR[12+r15],edx mov DWORD PTR[16+r15],r8d mov DWORD PTR[20+r15],r9d mov DWORD PTR[24+r15],r10d mov DWORD PTR[28+r15],r11d cmp r13,QWORD PTR[80+rbp] je $L$done_avx2 xor r14d,r14d mov esi,ebx mov r12d,r9d xor esi,ecx jmp $L$ower_avx2 ALIGN 16 $L$ower_avx2:: vmovdqu xmm9,XMMWORD PTR[r13] vpinsrq xmm15,xmm15,r13,0 add r11d,DWORD PTR[((0+16))+rbp] and r12d,r8d rorx r13d,r8d,25 rorx r15d,r8d,11 lea eax,DWORD PTR[r14*1+rax] lea r11d,DWORD PTR[r12*1+r11] andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 lea r11d,DWORD PTR[r12*1+r11] xor r13d,r14d mov r15d,eax rorx r12d,eax,22 lea r11d,DWORD PTR[r13*1+r11] xor r15d,ebx rorx r14d,eax,13 rorx r13d,eax,2 lea edx,DWORD PTR[r11*1+rdx] and esi,r15d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] xor r14d,r12d xor esi,ebx xor r14d,r13d lea r11d,DWORD PTR[rsi*1+r11] mov r12d,r8d add r10d,DWORD PTR[((4+16))+rbp] and r12d,edx rorx r13d,edx,25 rorx esi,edx,11 lea r11d,DWORD PTR[r14*1+r11] lea r10d,DWORD PTR[r12*1+r10] andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 lea r10d,DWORD PTR[r12*1+r10] xor r13d,r14d mov esi,r11d rorx r12d,r11d,22 lea r10d,DWORD PTR[r13*1+r10] xor esi,eax rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,DWORD PTR[r10*1+rcx] and r15d,esi vpxor xmm9,xmm9,xmm8 xor r14d,r12d xor r15d,eax xor r14d,r13d lea r10d,DWORD PTR[r15*1+r10] mov r12d,edx add r9d,DWORD PTR[((8+16))+rbp] and r12d,ecx rorx r13d,ecx,25 rorx r15d,ecx,11 lea r10d,DWORD PTR[r14*1+r10] lea r9d,DWORD PTR[r12*1+r9] andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 lea r9d,DWORD PTR[r12*1+r9] xor r13d,r14d mov r15d,r10d rorx r12d,r10d,22 lea r9d,DWORD PTR[r13*1+r9] xor r15d,r11d rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,DWORD PTR[r9*1+rbx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] xor r14d,r12d xor esi,r11d xor r14d,r13d lea r9d,DWORD PTR[rsi*1+r9] mov r12d,ecx add r8d,DWORD PTR[((12+16))+rbp] and r12d,ebx rorx r13d,ebx,25 rorx esi,ebx,11 lea r9d,DWORD PTR[r14*1+r9] lea r8d,DWORD PTR[r12*1+r8] andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 lea r8d,DWORD PTR[r12*1+r8] xor r13d,r14d mov esi,r9d rorx r12d,r9d,22 lea r8d,DWORD PTR[r13*1+r8] xor esi,r10d rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,DWORD PTR[r8*1+rax] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] xor r14d,r12d xor r15d,r10d xor r14d,r13d lea r8d,DWORD PTR[r15*1+r8] mov r12d,ebx add edx,DWORD PTR[((32+16))+rbp] and r12d,eax rorx r13d,eax,25 rorx r15d,eax,11 lea r8d,DWORD PTR[r14*1+r8] lea edx,DWORD PTR[r12*1+rdx] andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 lea edx,DWORD PTR[r12*1+rdx] xor r13d,r14d mov r15d,r8d rorx r12d,r8d,22 lea edx,DWORD PTR[r13*1+rdx] xor r15d,r9d rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,DWORD PTR[rdx*1+r11] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] xor r14d,r12d xor esi,r9d xor r14d,r13d lea edx,DWORD PTR[rsi*1+rdx] mov r12d,eax add ecx,DWORD PTR[((36+16))+rbp] and r12d,r11d rorx r13d,r11d,25 rorx esi,r11d,11 lea edx,DWORD PTR[r14*1+rdx] lea ecx,DWORD PTR[r12*1+rcx] andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 lea ecx,DWORD PTR[r12*1+rcx] xor r13d,r14d mov esi,edx rorx r12d,edx,22 lea ecx,DWORD PTR[r13*1+rcx] xor esi,r8d rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,DWORD PTR[rcx*1+r10] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] xor r14d,r12d xor r15d,r8d xor r14d,r13d lea ecx,DWORD PTR[r15*1+rcx] mov r12d,r11d add ebx,DWORD PTR[((40+16))+rbp] and r12d,r10d rorx r13d,r10d,25 rorx r15d,r10d,11 lea ecx,DWORD PTR[r14*1+rcx] lea ebx,DWORD PTR[r12*1+rbx] andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 lea ebx,DWORD PTR[r12*1+rbx] xor r13d,r14d mov r15d,ecx rorx r12d,ecx,22 lea ebx,DWORD PTR[r13*1+rbx] xor r15d,edx rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,DWORD PTR[rbx*1+r9] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] xor r14d,r12d xor esi,edx xor r14d,r13d lea ebx,DWORD PTR[rsi*1+rbx] mov r12d,r10d add eax,DWORD PTR[((44+16))+rbp] and r12d,r9d rorx r13d,r9d,25 rorx esi,r9d,11 lea ebx,DWORD PTR[r14*1+rbx] lea eax,DWORD PTR[r12*1+rax] andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 lea eax,DWORD PTR[r12*1+rax] xor r13d,r14d mov esi,ebx rorx r12d,ebx,22 lea eax,DWORD PTR[r13*1+rax] xor esi,ecx rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,DWORD PTR[rax*1+r8] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] xor r14d,r12d xor r15d,ecx xor r14d,r13d lea eax,DWORD PTR[r15*1+rax] mov r12d,r9d lea rbp,QWORD PTR[((-64))+rbp] add r11d,DWORD PTR[((0+16))+rbp] and r12d,r8d rorx r13d,r8d,25 rorx r15d,r8d,11 lea eax,DWORD PTR[r14*1+rax] lea r11d,DWORD PTR[r12*1+r11] andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 lea r11d,DWORD PTR[r12*1+r11] xor r13d,r14d mov r15d,eax rorx r12d,eax,22 lea r11d,DWORD PTR[r13*1+r11] xor r15d,ebx rorx r14d,eax,13 rorx r13d,eax,2 lea edx,DWORD PTR[r11*1+rdx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] xor r14d,r12d xor esi,ebx xor r14d,r13d lea r11d,DWORD PTR[rsi*1+r11] mov r12d,r8d add r10d,DWORD PTR[((4+16))+rbp] and r12d,edx rorx r13d,edx,25 rorx esi,edx,11 lea r11d,DWORD PTR[r14*1+r11] lea r10d,DWORD PTR[r12*1+r10] andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 lea r10d,DWORD PTR[r12*1+r10] xor r13d,r14d mov esi,r11d rorx r12d,r11d,22 lea r10d,DWORD PTR[r13*1+r10] xor esi,eax rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,DWORD PTR[r10*1+rcx] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] xor r14d,r12d xor r15d,eax xor r14d,r13d lea r10d,DWORD PTR[r15*1+r10] mov r12d,edx add r9d,DWORD PTR[((8+16))+rbp] and r12d,ecx rorx r13d,ecx,25 rorx r15d,ecx,11 lea r10d,DWORD PTR[r14*1+r10] lea r9d,DWORD PTR[r12*1+r9] andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 lea r9d,DWORD PTR[r12*1+r9] xor r13d,r14d mov r15d,r10d rorx r12d,r10d,22 lea r9d,DWORD PTR[r13*1+r9] xor r15d,r11d rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,DWORD PTR[r9*1+rbx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] xor r14d,r12d xor esi,r11d xor r14d,r13d lea r9d,DWORD PTR[rsi*1+r9] mov r12d,ecx add r8d,DWORD PTR[((12+16))+rbp] and r12d,ebx rorx r13d,ebx,25 rorx esi,ebx,11 lea r9d,DWORD PTR[r14*1+r9] lea r8d,DWORD PTR[r12*1+r8] andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 lea r8d,DWORD PTR[r12*1+r8] xor r13d,r14d mov esi,r9d rorx r12d,r9d,22 lea r8d,DWORD PTR[r13*1+r8] xor esi,r10d rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,DWORD PTR[r8*1+rax] and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] xor r14d,r12d xor r15d,r10d xor r14d,r13d lea r8d,DWORD PTR[r15*1+r8] mov r12d,ebx add edx,DWORD PTR[((32+16))+rbp] and r12d,eax rorx r13d,eax,25 rorx r15d,eax,11 lea r8d,DWORD PTR[r14*1+r8] lea edx,DWORD PTR[r12*1+rdx] andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 lea edx,DWORD PTR[r12*1+rdx] xor r13d,r14d mov r15d,r8d rorx r12d,r8d,22 lea edx,DWORD PTR[r13*1+rdx] xor r15d,r9d rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,DWORD PTR[rdx*1+r11] and esi,r15d vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] xor r14d,r12d xor esi,r9d xor r14d,r13d lea edx,DWORD PTR[rsi*1+rdx] mov r12d,eax add ecx,DWORD PTR[((36+16))+rbp] and r12d,r11d rorx r13d,r11d,25 rorx esi,r11d,11 lea edx,DWORD PTR[r14*1+rdx] lea ecx,DWORD PTR[r12*1+rcx] andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 lea ecx,DWORD PTR[r12*1+rcx] xor r13d,r14d mov esi,edx rorx r12d,edx,22 lea ecx,DWORD PTR[r13*1+rcx] xor esi,r8d rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,DWORD PTR[rcx*1+r10] and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] xor r14d,r12d xor r15d,r8d xor r14d,r13d lea ecx,DWORD PTR[r15*1+rcx] mov r12d,r11d add ebx,DWORD PTR[((40+16))+rbp] and r12d,r10d rorx r13d,r10d,25 rorx r15d,r10d,11 lea ecx,DWORD PTR[r14*1+rcx] lea ebx,DWORD PTR[r12*1+rbx] andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 lea ebx,DWORD PTR[r12*1+rbx] xor r13d,r14d mov r15d,ecx rorx r12d,ecx,22 lea ebx,DWORD PTR[r13*1+rbx] xor r15d,edx rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,DWORD PTR[rbx*1+r9] and esi,r15d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] xor r14d,r12d xor esi,edx xor r14d,r13d lea ebx,DWORD PTR[rsi*1+rbx] mov r12d,r10d add eax,DWORD PTR[((44+16))+rbp] and r12d,r9d rorx r13d,r9d,25 rorx esi,r9d,11 lea ebx,DWORD PTR[r14*1+rbx] lea eax,DWORD PTR[r12*1+rax] andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 lea eax,DWORD PTR[r12*1+rax] xor r13d,r14d mov esi,ebx rorx r12d,ebx,22 lea eax,DWORD PTR[r13*1+rax] xor esi,ecx rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,DWORD PTR[rax*1+r8] and r15d,esi vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] xor r14d,r12d xor r15d,ecx xor r14d,r13d lea eax,DWORD PTR[r15*1+rax] mov r12d,r9d vmovq r13,xmm15 vpextrq r15,xmm15,1 vpand xmm11,xmm11,xmm14 vpor xmm8,xmm8,xmm11 lea rbp,QWORD PTR[((-64))+rbp] vmovdqu XMMWORD PTR[r13*1+r15],xmm8 lea r13,QWORD PTR[16+r13] cmp rbp,rsp jae $L$ower_avx2 mov r15,QWORD PTR[552+rsp] lea r13,QWORD PTR[64+r13] mov rsi,QWORD PTR[560+rsp] add eax,r14d lea rsp,QWORD PTR[448+rsp] add eax,DWORD PTR[r15] add ebx,DWORD PTR[4+r15] add ecx,DWORD PTR[8+r15] add edx,DWORD PTR[12+r15] add r8d,DWORD PTR[16+r15] add r9d,DWORD PTR[20+r15] add r10d,DWORD PTR[24+r15] lea r12,QWORD PTR[r13*1+rsi] add r11d,DWORD PTR[28+r15] cmp r13,QWORD PTR[((64+16))+rsp] mov DWORD PTR[r15],eax cmove r12,rsp mov DWORD PTR[4+r15],ebx mov DWORD PTR[8+r15],ecx mov DWORD PTR[12+r15],edx mov DWORD PTR[16+r15],r8d mov DWORD PTR[20+r15],r9d mov DWORD PTR[24+r15],r10d mov DWORD PTR[28+r15],r11d jbe $L$oop_avx2 lea rbp,QWORD PTR[rsp] $L$done_avx2:: lea rsp,QWORD PTR[rbp] mov r8,QWORD PTR[((64+32))+rsp] mov rsi,QWORD PTR[((64+56))+rsp] vmovdqu XMMWORD PTR[r8],xmm8 vzeroall movaps xmm6,XMMWORD PTR[128+rsp] movaps xmm7,XMMWORD PTR[144+rsp] movaps xmm8,XMMWORD PTR[160+rsp] movaps xmm9,XMMWORD PTR[176+rsp] movaps xmm10,XMMWORD PTR[192+rsp] movaps xmm11,XMMWORD PTR[208+rsp] movaps xmm12,XMMWORD PTR[224+rsp] movaps xmm13,XMMWORD PTR[240+rsp] movaps xmm14,XMMWORD PTR[256+rsp] movaps xmm15,XMMWORD PTR[272+rsp] mov r15,QWORD PTR[rsi] mov r14,QWORD PTR[8+rsi] mov r13,QWORD PTR[16+rsi] mov r12,QWORD PTR[24+rsi] mov rbp,QWORD PTR[32+rsi] mov rbx,QWORD PTR[40+rsi] lea rsp,QWORD PTR[48+rsi] $L$epilogue_avx2:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesni_cbc_sha256_enc_avx2:: aesni_cbc_sha256_enc_avx2 ENDP ALIGN 32 aesni_cbc_sha256_enc_shaext PROC PRIVATE mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesni_cbc_sha256_enc_shaext:: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD PTR[40+rsp] mov r9,QWORD PTR[48+rsp] mov r10,QWORD PTR[56+rsp] lea rsp,QWORD PTR[((-168))+rsp] movaps XMMWORD PTR[(-8-160)+rax],xmm6 movaps XMMWORD PTR[(-8-144)+rax],xmm7 movaps XMMWORD PTR[(-8-128)+rax],xmm8 movaps XMMWORD PTR[(-8-112)+rax],xmm9 movaps XMMWORD PTR[(-8-96)+rax],xmm10 movaps XMMWORD PTR[(-8-80)+rax],xmm11 movaps XMMWORD PTR[(-8-64)+rax],xmm12 movaps XMMWORD PTR[(-8-48)+rax],xmm13 movaps XMMWORD PTR[(-8-32)+rax],xmm14 movaps XMMWORD PTR[(-8-16)+rax],xmm15 $L$prologue_shaext:: lea rax,QWORD PTR[((K256+128))] movdqu xmm1,XMMWORD PTR[r9] movdqu xmm2,XMMWORD PTR[16+r9] movdqa xmm3,XMMWORD PTR[((512-128))+rax] mov r11d,DWORD PTR[240+rcx] sub rsi,rdi movups xmm15,XMMWORD PTR[rcx] movups xmm4,XMMWORD PTR[16+rcx] lea rcx,QWORD PTR[112+rcx] pshufd xmm0,xmm1,01bh pshufd xmm1,xmm1,1h pshufd xmm2,xmm2,01bh movdqa xmm7,xmm3 DB 102,15,58,15,202,8 punpcklqdq xmm2,xmm0 jmp $L$oop_shaext ALIGN 16 $L$oop_shaext:: movdqu xmm10,XMMWORD PTR[r10] movdqu xmm11,XMMWORD PTR[16+r10] movdqu xmm12,XMMWORD PTR[32+r10] DB 102,68,15,56,0,211 movdqu xmm13,XMMWORD PTR[48+r10] movdqa xmm0,XMMWORD PTR[((0-128))+rax] paddd xmm0,xmm10 DB 102,68,15,56,0,219 movdqa xmm9,xmm2 movdqa xmm8,xmm1 movups xmm14,XMMWORD PTR[rdi] xorps xmm14,xmm15 xorps xmm6,xmm14 movups xmm5,XMMWORD PTR[((-80))+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movups xmm4,XMMWORD PTR[((-64))+rcx] aesenc xmm6,xmm5 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((32-128))+rax] paddd xmm0,xmm11 DB 102,68,15,56,0,227 lea r10,QWORD PTR[64+r10] movups xmm5,XMMWORD PTR[((-48))+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movups xmm4,XMMWORD PTR[((-32))+rcx] aesenc xmm6,xmm5 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((64-128))+rax] paddd xmm0,xmm12 DB 102,68,15,56,0,235 DB 69,15,56,204,211 movups xmm5,XMMWORD PTR[((-16))+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movdqa xmm3,xmm13 DB 102,65,15,58,15,220,4 paddd xmm10,xmm3 movups xmm4,XMMWORD PTR[rcx] aesenc xmm6,xmm5 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((96-128))+rax] paddd xmm0,xmm13 DB 69,15,56,205,213 DB 69,15,56,204,220 movups xmm5,XMMWORD PTR[16+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movups xmm4,XMMWORD PTR[32+rcx] aesenc xmm6,xmm5 movdqa xmm3,xmm10 DB 102,65,15,58,15,221,4 paddd xmm11,xmm3 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((128-128))+rax] paddd xmm0,xmm10 DB 69,15,56,205,218 DB 69,15,56,204,229 movups xmm5,XMMWORD PTR[48+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movdqa xmm3,xmm11 DB 102,65,15,58,15,218,4 paddd xmm12,xmm3 cmp r11d,11 jb $L$aesenclast1 movups xmm4,XMMWORD PTR[64+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD PTR[80+rcx] aesenc xmm6,xmm4 je $L$aesenclast1 movups xmm4,XMMWORD PTR[96+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD PTR[112+rcx] aesenc xmm6,xmm4 $L$aesenclast1:: aesenclast xmm6,xmm5 movups xmm4,XMMWORD PTR[((16-112))+rcx] nop DB 15,56,203,202 movups xmm14,XMMWORD PTR[16+rdi] xorps xmm14,xmm15 movups XMMWORD PTR[rdi*1+rsi],xmm6 xorps xmm6,xmm14 movups xmm5,XMMWORD PTR[((-80))+rcx] aesenc xmm6,xmm4 movdqa xmm0,XMMWORD PTR[((160-128))+rax] paddd xmm0,xmm11 DB 69,15,56,205,227 DB 69,15,56,204,234 movups xmm4,XMMWORD PTR[((-64))+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movdqa xmm3,xmm12 DB 102,65,15,58,15,219,4 paddd xmm13,xmm3 movups xmm5,XMMWORD PTR[((-48))+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((192-128))+rax] paddd xmm0,xmm12 DB 69,15,56,205,236 DB 69,15,56,204,211 movups xmm4,XMMWORD PTR[((-32))+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movdqa xmm3,xmm13 DB 102,65,15,58,15,220,4 paddd xmm10,xmm3 movups xmm5,XMMWORD PTR[((-16))+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((224-128))+rax] paddd xmm0,xmm13 DB 69,15,56,205,213 DB 69,15,56,204,220 movups xmm4,XMMWORD PTR[rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movdqa xmm3,xmm10 DB 102,65,15,58,15,221,4 paddd xmm11,xmm3 movups xmm5,XMMWORD PTR[16+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((256-128))+rax] paddd xmm0,xmm10 DB 69,15,56,205,218 DB 69,15,56,204,229 movups xmm4,XMMWORD PTR[32+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movdqa xmm3,xmm11 DB 102,65,15,58,15,218,4 paddd xmm12,xmm3 movups xmm5,XMMWORD PTR[48+rcx] aesenc xmm6,xmm4 cmp r11d,11 jb $L$aesenclast2 movups xmm4,XMMWORD PTR[64+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD PTR[80+rcx] aesenc xmm6,xmm4 je $L$aesenclast2 movups xmm4,XMMWORD PTR[96+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD PTR[112+rcx] aesenc xmm6,xmm4 $L$aesenclast2:: aesenclast xmm6,xmm5 movups xmm4,XMMWORD PTR[((16-112))+rcx] nop DB 15,56,203,202 movups xmm14,XMMWORD PTR[32+rdi] xorps xmm14,xmm15 movups XMMWORD PTR[16+rdi*1+rsi],xmm6 xorps xmm6,xmm14 movups xmm5,XMMWORD PTR[((-80))+rcx] aesenc xmm6,xmm4 movdqa xmm0,XMMWORD PTR[((288-128))+rax] paddd xmm0,xmm11 DB 69,15,56,205,227 DB 69,15,56,204,234 movups xmm4,XMMWORD PTR[((-64))+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movdqa xmm3,xmm12 DB 102,65,15,58,15,219,4 paddd xmm13,xmm3 movups xmm5,XMMWORD PTR[((-48))+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((320-128))+rax] paddd xmm0,xmm12 DB 69,15,56,205,236 DB 69,15,56,204,211 movups xmm4,XMMWORD PTR[((-32))+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movdqa xmm3,xmm13 DB 102,65,15,58,15,220,4 paddd xmm10,xmm3 movups xmm5,XMMWORD PTR[((-16))+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((352-128))+rax] paddd xmm0,xmm13 DB 69,15,56,205,213 DB 69,15,56,204,220 movups xmm4,XMMWORD PTR[rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movdqa xmm3,xmm10 DB 102,65,15,58,15,221,4 paddd xmm11,xmm3 movups xmm5,XMMWORD PTR[16+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((384-128))+rax] paddd xmm0,xmm10 DB 69,15,56,205,218 DB 69,15,56,204,229 movups xmm4,XMMWORD PTR[32+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movdqa xmm3,xmm11 DB 102,65,15,58,15,218,4 paddd xmm12,xmm3 movups xmm5,XMMWORD PTR[48+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((416-128))+rax] paddd xmm0,xmm11 DB 69,15,56,205,227 DB 69,15,56,204,234 cmp r11d,11 jb $L$aesenclast3 movups xmm4,XMMWORD PTR[64+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD PTR[80+rcx] aesenc xmm6,xmm4 je $L$aesenclast3 movups xmm4,XMMWORD PTR[96+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD PTR[112+rcx] aesenc xmm6,xmm4 $L$aesenclast3:: aesenclast xmm6,xmm5 movups xmm4,XMMWORD PTR[((16-112))+rcx] nop DB 15,56,203,209 pshufd xmm0,xmm0,00eh movdqa xmm3,xmm12 DB 102,65,15,58,15,219,4 paddd xmm13,xmm3 movups xmm14,XMMWORD PTR[48+rdi] xorps xmm14,xmm15 movups XMMWORD PTR[32+rdi*1+rsi],xmm6 xorps xmm6,xmm14 movups xmm5,XMMWORD PTR[((-80))+rcx] aesenc xmm6,xmm4 movups xmm4,XMMWORD PTR[((-64))+rcx] aesenc xmm6,xmm5 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((448-128))+rax] paddd xmm0,xmm12 DB 69,15,56,205,236 movdqa xmm3,xmm7 movups xmm5,XMMWORD PTR[((-48))+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movups xmm4,XMMWORD PTR[((-32))+rcx] aesenc xmm6,xmm5 DB 15,56,203,202 movdqa xmm0,XMMWORD PTR[((480-128))+rax] paddd xmm0,xmm13 movups xmm5,XMMWORD PTR[((-16))+rcx] aesenc xmm6,xmm4 movups xmm4,XMMWORD PTR[rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,00eh movups xmm5,XMMWORD PTR[16+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movups xmm4,XMMWORD PTR[32+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD PTR[48+rcx] aesenc xmm6,xmm4 cmp r11d,11 jb $L$aesenclast4 movups xmm4,XMMWORD PTR[64+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD PTR[80+rcx] aesenc xmm6,xmm4 je $L$aesenclast4 movups xmm4,XMMWORD PTR[96+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD PTR[112+rcx] aesenc xmm6,xmm4 $L$aesenclast4:: aesenclast xmm6,xmm5 movups xmm4,XMMWORD PTR[((16-112))+rcx] nop paddd xmm2,xmm9 paddd xmm1,xmm8 dec rdx movups XMMWORD PTR[48+rdi*1+rsi],xmm6 lea rdi,QWORD PTR[64+rdi] jnz $L$oop_shaext pshufd xmm2,xmm2,1h pshufd xmm3,xmm1,01bh pshufd xmm1,xmm1,1h punpckhqdq xmm1,xmm2 DB 102,15,58,15,211,8 movups XMMWORD PTR[r8],xmm6 movdqu XMMWORD PTR[r9],xmm1 movdqu XMMWORD PTR[16+r9],xmm2 movaps xmm6,XMMWORD PTR[rsp] movaps xmm7,XMMWORD PTR[16+rsp] movaps xmm8,XMMWORD PTR[32+rsp] movaps xmm9,XMMWORD PTR[48+rsp] movaps xmm10,XMMWORD PTR[64+rsp] movaps xmm11,XMMWORD PTR[80+rsp] movaps xmm12,XMMWORD PTR[96+rsp] movaps xmm13,XMMWORD PTR[112+rsp] movaps xmm14,XMMWORD PTR[128+rsp] movaps xmm15,XMMWORD PTR[144+rsp] lea rsp,QWORD PTR[((8+160))+rsp] $L$epilogue_shaext:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesni_cbc_sha256_enc_shaext:: aesni_cbc_sha256_enc_shaext ENDP EXTERN __imp_RtlVirtualUnwind:NEAR ALIGN 16 se_handler PROC PRIVATE push rsi push rdi push rbx push rbp push r12 push r13 push r14 push r15 pushfq sub rsp,64 mov rax,QWORD PTR[120+r8] mov rbx,QWORD PTR[248+r8] mov rsi,QWORD PTR[8+r9] mov r11,QWORD PTR[56+r9] mov r10d,DWORD PTR[r11] lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 jb $L$in_prologue mov rax,QWORD PTR[152+r8] mov r10d,DWORD PTR[4+r11] lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 jae $L$in_prologue lea r10,QWORD PTR[aesni_cbc_sha256_enc_shaext] cmp rbx,r10 jb $L$not_in_shaext lea rsi,QWORD PTR[rax] lea rdi,QWORD PTR[512+r8] mov ecx,20 DD 0a548f3fch lea rax,QWORD PTR[168+rax] jmp $L$in_prologue $L$not_in_shaext:: lea r10,QWORD PTR[$L$avx2_shortcut] cmp rbx,r10 jb $L$not_in_avx2 and rax,-256*4 add rax,448 $L$not_in_avx2:: mov rsi,rax mov rax,QWORD PTR[((64+56))+rax] lea rax,QWORD PTR[48+rax] mov rbx,QWORD PTR[((-8))+rax] mov rbp,QWORD PTR[((-16))+rax] mov r12,QWORD PTR[((-24))+rax] mov r13,QWORD PTR[((-32))+rax] mov r14,QWORD PTR[((-40))+rax] mov r15,QWORD PTR[((-48))+rax] mov QWORD PTR[144+r8],rbx mov QWORD PTR[160+r8],rbp mov QWORD PTR[216+r8],r12 mov QWORD PTR[224+r8],r13 mov QWORD PTR[232+r8],r14 mov QWORD PTR[240+r8],r15 lea rsi,QWORD PTR[((64+64))+rsi] lea rdi,QWORD PTR[512+r8] mov ecx,20 DD 0a548f3fch $L$in_prologue:: mov rdi,QWORD PTR[8+rax] mov rsi,QWORD PTR[16+rax] mov QWORD PTR[152+r8],rax mov QWORD PTR[168+r8],rsi mov QWORD PTR[176+r8],rdi mov rdi,QWORD PTR[40+r9] mov rsi,r8 mov ecx,154 DD 0a548f3fch mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] mov r8,QWORD PTR[rsi] mov r9,QWORD PTR[16+rsi] mov r10,QWORD PTR[40+rsi] lea r11,QWORD PTR[56+rsi] lea r12,QWORD PTR[24+rsi] mov QWORD PTR[32+rsp],r10 mov QWORD PTR[40+rsp],r11 mov QWORD PTR[48+rsp],r12 mov QWORD PTR[56+rsp],rcx call QWORD PTR[__imp_RtlVirtualUnwind] mov eax,1 add rsp,64 popfq pop r15 pop r14 pop r13 pop r12 pop rbp pop rbx pop rdi pop rsi DB 0F3h,0C3h ;repret se_handler ENDP .text$ ENDS .pdata SEGMENT READONLY ALIGN(4) DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_xop DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_xop DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_xop DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_avx DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_avx DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_avx DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_avx2 DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_avx2 DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_avx2 DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_shaext DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_shaext DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_shaext .pdata ENDS .xdata SEGMENT READONLY ALIGN(8) ALIGN 8 $L$SEH_info_aesni_cbc_sha256_enc_xop:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$prologue_xop,imagerel $L$epilogue_xop $L$SEH_info_aesni_cbc_sha256_enc_avx:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$prologue_avx,imagerel $L$epilogue_avx $L$SEH_info_aesni_cbc_sha256_enc_avx2:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$prologue_avx2,imagerel $L$epilogue_avx2 $L$SEH_info_aesni_cbc_sha256_enc_shaext:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$prologue_shaext,imagerel $L$epilogue_shaext .xdata ENDS END