From 5edbb53c45bc087412d5192a8e3a5a89d88f9be5 Mon Sep 17 00:00:00 2001 From: Bert Belder Date: Wed, 19 Dec 2012 17:08:16 +0100 Subject: [PATCH] openssl: regenerate asm files for openssl 1.0.1 --- deps/openssl/asm/Makefile | 6 + deps/openssl/asm/x64-elf-gas/aes/aes-x86_64.s | 25 +- .../asm/x64-elf-gas/aes/aesni-sha1-x86_64.s | 1402 +++++++ .../asm/x64-elf-gas/aes/aesni-x86_64.s | 2558 ++++++++++++ .../asm/x64-elf-gas/bn/modexp512-x86_64.s | 1776 ++++++++ deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s | 1311 +++++- .../asm/x64-elf-gas/rc4/rc4-md5-x86_64.s | 1260 ++++++ deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s | 734 ++-- .../openssl/asm/x64-elf-gas/sha/sha1-x86_64.s | 3531 ++++++++++------ .../asm/x64-elf-gas/sha/sha512-x86_64.s | 2150 +++++----- deps/openssl/asm/x64-elf-gas/x86_64cpuid.s | 70 +- .../asm/x64-macosx-gas/aes/aes-x86_64.s | 17 +- .../x64-macosx-gas/aes/aesni-sha1-x86_64.s | 1402 +++++++ .../asm/x64-macosx-gas/aes/aesni-x86_64.s | 2558 ++++++++++++ .../asm/x64-macosx-gas/bn/modexp512-x86_64.s | 1775 ++++++++ .../asm/x64-macosx-gas/bn/x86_64-mont.s | 1309 +++++- .../asm/x64-macosx-gas/md5/md5-x86_64.s | 1 - .../asm/x64-macosx-gas/rc4/rc4-md5-x86_64.s | 1259 ++++++ .../asm/x64-macosx-gas/rc4/rc4-x86_64.s | 731 ++-- .../asm/x64-macosx-gas/sha/sha1-x86_64.s | 3529 ++++++++++------ .../asm/x64-macosx-gas/sha/sha512-x86_64.s | 2150 +++++----- deps/openssl/asm/x64-macosx-gas/x86_64cpuid.s | 69 +- .../asm/x64-win32-masm/aes/aes-x86_64.asm | 45 +- .../x64-win32-masm/aes/aesni-sha1-x86_64.asm | 1554 +++++++ .../asm/x64-win32-masm/aes/aesni-x86_64.asm | 3062 ++++++++++++++ .../x64-win32-masm/bn/modexp512-x86_64.asm | 1890 +++++++++ .../asm/x64-win32-masm/bn/x86_64-mont.asm | 1540 ++++++- .../x64-win32-masm/camellia/cmll-x86_64.asm | 4 +- .../asm/x64-win32-masm/md5/md5-x86_64.asm | 280 +- .../asm/x64-win32-masm/rc4/rc4-md5-x86_64.asm | 1375 +++++++ .../asm/x64-win32-masm/rc4/rc4-x86_64.asm | 744 ++-- .../asm/x64-win32-masm/sha/sha1-x86_64.asm | 3624 +++++++++++------ .../asm/x64-win32-masm/sha/sha512-x86_64.asm | 2254 +++++----- .../asm/x64-win32-masm/whrlpool/wp-x86_64.asm | 204 +- .../asm/x64-win32-masm/x86_64cpuid.asm | 72 +- .../asm/x64-win32-masm/x86_64cpuid.asm.orig | 235 ++ deps/openssl/asm/x86-elf-gas/aes/aes-586.s | 22 +- deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s | 2143 ++++++++++ .../asm/x86-elf-gas/camellia/cmll-x86.s | 10 +- deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s | 218 +- deps/openssl/asm/x86-elf-gas/sha/sha1-586.s | 1262 +++--- deps/openssl/asm/x86-elf-gas/sha/sha256-586.s | 65 +- deps/openssl/asm/x86-elf-gas/x86cpuid.s | 135 +- deps/openssl/asm/x86-macosx-gas/aes/aes-586.s | 24 +- .../asm/x86-macosx-gas/aes/aesni-x86.s | 2107 ++++++++++ .../asm/x86-macosx-gas/camellia/cmll-x86.s | 6 +- .../openssl/asm/x86-macosx-gas/des/crypt586.s | 13 +- deps/openssl/asm/x86-macosx-gas/rc4/rc4-586.s | 231 +- .../openssl/asm/x86-macosx-gas/sha/sha1-586.s | 1262 +++--- .../asm/x86-macosx-gas/sha/sha256-586.s | 65 +- deps/openssl/asm/x86-macosx-gas/x86cpuid.s | 149 +- .../asm/x86-win32-masm/aes/aes-586.asm | 14 +- .../asm/x86-win32-masm/aes/aesni-x86.asm | 2133 ++++++++++ deps/openssl/asm/x86-win32-masm/bf/bf-686.asm | 2 +- .../asm/x86-win32-masm/bn/x86-mont.asm | 2 +- deps/openssl/asm/x86-win32-masm/bn/x86.asm | 2 +- .../asm/x86-win32-masm/camellia/cmll-x86.asm | 8 +- .../asm/x86-win32-masm/cast/cast-586.asm | 2 +- .../asm/x86-win32-masm/des/crypt586.asm | 2 +- .../asm/x86-win32-masm/des/des-586.asm | 2 +- .../asm/x86-win32-masm/md5/md5-586.asm | 2 +- .../asm/x86-win32-masm/rc4/rc4-586.asm | 225 +- .../asm/x86-win32-masm/rc5/rc5-586.asm | 2 +- .../asm/x86-win32-masm/ripemd/rmd-586.asm | 2 +- .../asm/x86-win32-masm/sha/sha1-586.asm | 1264 +++--- .../asm/x86-win32-masm/sha/sha256-586.asm | 67 +- .../asm/x86-win32-masm/sha/sha512-586.asm | 2 +- deps/openssl/asm/x86-win32-masm/x86cpuid.asm | 132 +- 68 files changed, 47278 insertions(+), 10803 deletions(-) create mode 100644 deps/openssl/asm/x64-elf-gas/aes/aesni-sha1-x86_64.s create mode 100644 deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s create mode 100644 deps/openssl/asm/x64-elf-gas/bn/modexp512-x86_64.s create mode 100644 deps/openssl/asm/x64-elf-gas/rc4/rc4-md5-x86_64.s create mode 100644 deps/openssl/asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s create mode 100644 deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s create mode 100644 deps/openssl/asm/x64-macosx-gas/bn/modexp512-x86_64.s create mode 100644 deps/openssl/asm/x64-macosx-gas/rc4/rc4-md5-x86_64.s create mode 100644 deps/openssl/asm/x64-win32-masm/aes/aesni-sha1-x86_64.asm create mode 100644 deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm create mode 100644 deps/openssl/asm/x64-win32-masm/bn/modexp512-x86_64.asm create mode 100644 deps/openssl/asm/x64-win32-masm/rc4/rc4-md5-x86_64.asm create mode 100644 deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm.orig create mode 100644 deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s create mode 100644 deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s create mode 100644 deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm diff --git a/deps/openssl/asm/Makefile b/deps/openssl/asm/Makefile index d41e7ae442..9f54785b44 100644 --- a/deps/openssl/asm/Makefile +++ b/deps/openssl/asm/Makefile @@ -28,6 +28,7 @@ OUTPUTS = \ x64-elf-gas/camellia/cmll-x86_64.s \ x64-elf-gas/md5/md5-x86_64.s \ x64-elf-gas/rc4/rc4-x86_64.s \ + x64-elf-gas/rc4/rc4-md5-x86_64.s \ x64-elf-gas/sha/sha1-x86_64.s \ x64-elf-gas/sha/sha512-x86_64.s \ x64-elf-gas/whrlpool/wp-x86_64.s \ @@ -58,6 +59,7 @@ OUTPUTS = \ x64-macosx-gas/camellia/cmll-x86_64.s \ x64-macosx-gas/md5/md5-x86_64.s \ x64-macosx-gas/rc4/rc4-x86_64.s \ + x64-macosx-gas/rc4/rc4-md5-x86_64.s \ x64-macosx-gas/sha/sha1-x86_64.s \ x64-macosx-gas/sha/sha512-x86_64.s \ x64-macosx-gas/whrlpool/wp-x86_64.s \ @@ -88,6 +90,7 @@ OUTPUTS = \ x64-win32-masm/camellia/cmll-x86_64.asm \ x64-win32-masm/md5/md5-x86_64.asm \ x64-win32-masm/rc4/rc4-x86_64.asm \ + x64-win32-masm/rc4/rc4-md5-x86_64.asm \ x64-win32-masm/sha/sha1-x86_64.asm \ x64-win32-masm/sha/sha512-x86_64.asm \ x64-win32-masm/whrlpool/wp-x86_64.asm \ @@ -122,6 +125,7 @@ x64-elf-gas/bn/x86_64-mont.s: ../openssl/crypto/bn/asm/x86_64-mont.pl x64-elf-gas/camellia/cmll-x86_64.s: ../openssl/crypto/camellia/asm/cmll-x86_64.pl x64-elf-gas/md5/md5-x86_64.s: ../openssl/crypto/md5/asm/md5-x86_64.pl x64-elf-gas/rc4/rc4-x86_64.s: ../openssl/crypto/rc4/asm/rc4-x86_64.pl +x64-elf-gas/rc4/rc4-md5-x86_64.s: ../openssl/crypto/rc4/asm/rc4-md5-x86_64.pl x64-elf-gas/sha/sha1-x86_64.s: ../openssl/crypto/sha/asm/sha1-x86_64.pl x64-elf-gas/sha/sha512-x86_64.s: ../openssl/crypto/sha/asm/sha512-x86_64.pl x64-elf-gas/whrlpool/wp-x86_64.s: ../openssl/crypto/whrlpool/asm/wp-x86_64.pl @@ -134,6 +138,7 @@ x64-macosx-gas/bn/x86_64-mont.s: ../openssl/crypto/bn/asm/x86_64-mont.pl x64-macosx-gas/camellia/cmll-x86_64.s: ../openssl/crypto/camellia/asm/cmll-x86_64.pl x64-macosx-gas/md5/md5-x86_64.s: ../openssl/crypto/md5/asm/md5-x86_64.pl x64-macosx-gas/rc4/rc4-x86_64.s: ../openssl/crypto/rc4/asm/rc4-x86_64.pl +x64-macosx-gas/rc4/rc4-md5-x86_64.s: ../openssl/crypto/rc4/asm/rc4-md5-x86_64.pl x64-macosx-gas/sha/sha1-x86_64.s: ../openssl/crypto/sha/asm/sha1-x86_64.pl x64-macosx-gas/sha/sha512-x86_64.s: ../openssl/crypto/sha/asm/sha512-x86_64.pl x64-macosx-gas/whrlpool/wp-x86_64.s: ../openssl/crypto/whrlpool/asm/wp-x86_64.pl @@ -146,6 +151,7 @@ x64-win32-masm/bn/x86_64-mont.asm: ../openssl/crypto/bn/asm/x86_64-mont.pl x64-win32-masm/camellia/cmll-x86_64.asm: ../openssl/crypto/camellia/asm/cmll-x86_64.pl x64-win32-masm/md5/md5-x86_64.asm: ../openssl/crypto/md5/asm/md5-x86_64.pl x64-win32-masm/rc4/rc4-x86_64.asm: ../openssl/crypto/rc4/asm/rc4-x86_64.pl +x64-win32-masm/rc4/rc4-md5-x86_64.asm: ../openssl/crypto/rc4/asm/rc4-md5-x86_64.pl x64-win32-masm/sha/sha1-x86_64.asm: ../openssl/crypto/sha/asm/sha1-x86_64.pl x64-win32-masm/sha/sha512-x86_64.asm: ../openssl/crypto/sha/asm/sha512-x86_64.pl x64-win32-masm/whrlpool/wp-x86_64.asm: ../openssl/crypto/whrlpool/asm/wp-x86_64.pl diff --git a/deps/openssl/asm/x64-elf-gas/aes/aes-x86_64.s b/deps/openssl/asm/x64-elf-gas/aes/aes-x86_64.s index d7feffbfa5..e7c261fe43 100644 --- a/deps/openssl/asm/x64-elf-gas/aes/aes-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/aes/aes-x86_64.s @@ -333,6 +333,9 @@ _x86_64_AES_encrypt_compact: .globl AES_encrypt .type AES_encrypt,@function .align 16 +.globl asm_AES_encrypt +.hidden asm_AES_encrypt +asm_AES_encrypt: AES_encrypt: pushq %rbx pushq %rbp @@ -780,6 +783,9 @@ _x86_64_AES_decrypt_compact: .globl AES_decrypt .type AES_decrypt,@function .align 16 +.globl asm_AES_decrypt +.hidden asm_AES_decrypt +asm_AES_decrypt: AES_decrypt: pushq %rbx pushq %rbp @@ -843,10 +849,10 @@ AES_decrypt: .Ldec_epilogue: .byte 0xf3,0xc3 .size AES_decrypt,.-AES_decrypt -.globl AES_set_encrypt_key -.type AES_set_encrypt_key,@function +.globl private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,@function .align 16 -AES_set_encrypt_key: +private_AES_set_encrypt_key: pushq %rbx pushq %rbp pushq %r12 @@ -867,7 +873,7 @@ AES_set_encrypt_key: addq $56,%rsp .Lenc_key_epilogue: .byte 0xf3,0xc3 -.size AES_set_encrypt_key,.-AES_set_encrypt_key +.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key .type _x86_64_AES_set_encrypt_key,@function .align 16 @@ -1109,10 +1115,10 @@ _x86_64_AES_set_encrypt_key: .byte 0xf3,0xc3 .size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key -.globl AES_set_decrypt_key -.type AES_set_decrypt_key,@function +.globl private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,@function .align 16 -AES_set_decrypt_key: +private_AES_set_decrypt_key: pushq %rbx pushq %rbp pushq %r12 @@ -1295,11 +1301,14 @@ AES_set_decrypt_key: addq $56,%rsp .Ldec_key_epilogue: .byte 0xf3,0xc3 -.size AES_set_decrypt_key,.-AES_set_decrypt_key +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key .globl AES_cbc_encrypt .type AES_cbc_encrypt,@function .align 16 +.globl asm_AES_cbc_encrypt +.hidden asm_AES_cbc_encrypt +asm_AES_cbc_encrypt: AES_cbc_encrypt: cmpq $0,%rdx je .Lcbc_epilogue diff --git a/deps/openssl/asm/x64-elf-gas/aes/aesni-sha1-x86_64.s b/deps/openssl/asm/x64-elf-gas/aes/aesni-sha1-x86_64.s new file mode 100644 index 0000000000..8f0475e0d2 --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/aes/aesni-sha1-x86_64.s @@ -0,0 +1,1402 @@ +.text + + + +.globl aesni_cbc_sha1_enc +.type aesni_cbc_sha1_enc,@function +.align 16 +aesni_cbc_sha1_enc: + + movl OPENSSL_ia32cap_P+0(%rip),%r10d + movl OPENSSL_ia32cap_P+4(%rip),%r11d + jmp aesni_cbc_sha1_enc_ssse3 + .byte 0xf3,0xc3 +.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc +.type aesni_cbc_sha1_enc_ssse3,@function +.align 16 +aesni_cbc_sha1_enc_ssse3: + movq 8(%rsp),%r10 + + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -104(%rsp),%rsp + + + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqu (%r8),%xmm11 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + movups (%r15),%xmm13 + movups 16(%r15),%xmm14 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + movups 0(%r12),%xmm12 + xorps %xmm13,%xmm12 + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + cmpl $11,%r8d + jb .Laesenclast1 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je .Laesenclast1 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +.Laesenclast1: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + movups 16(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,0(%r13,%r12,1) + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast2 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je .Laesenclast2 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +.Laesenclast2: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + movups 32(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,16(%r13,%r12,1) + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + cmpl $11,%r8d + jb .Laesenclast3 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je .Laesenclast3 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +.Laesenclast3: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + movups 48(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,32(%r13,%r12,1) + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r14,%r10 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast4 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je .Laesenclast4 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +.Laesenclast4: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast5 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je .Laesenclast5 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +.Laesenclast5: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + movups %xmm11,(%r8) + leaq 104(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 + +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + + +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s b/deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s new file mode 100644 index 0000000000..2d24b7b28d --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s @@ -0,0 +1,2558 @@ +.text + +.globl aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_enc1_1: +.byte 102,15,56,220,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_enc1_1 + +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + .byte 0xf3,0xc3 +.size aesni_encrypt,.-aesni_encrypt + +.globl aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_dec1_2: +.byte 102,15,56,222,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_dec1_2 + +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + .byte 0xf3,0xc3 +.size aesni_decrypt, .-aesni_decrypt +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +.Lenc_loop3: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 + movups (%rcx),%xmm0 + jnz .Lenc_loop3 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + .byte 0xf3,0xc3 +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +.Ldec_loop3: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 + movups (%rcx),%xmm0 + jnz .Ldec_loop3 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + .byte 0xf3,0xc3 +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +.Lenc_loop4: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups (%rcx),%xmm0 + jnz .Lenc_loop4 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + .byte 0xf3,0xc3 +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +.Ldec_loop4: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups (%rcx),%xmm0 + jnz .Ldec_loop4 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + .byte 0xf3,0xc3 +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,220,241 + movups (%rcx),%xmm0 +.byte 102,15,56,220,249 + jmp .Lenc_loop6_enter +.align 16 +.Lenc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.Lenc_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%rcx),%xmm0 + jnz .Lenc_loop6 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + .byte 0xf3,0xc3 +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,222,241 + movups (%rcx),%xmm0 +.byte 102,15,56,222,249 + jmp .Ldec_loop6_enter +.align 16 +.Ldec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.Ldec_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups (%rcx),%xmm0 + jnz .Ldec_loop6 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + .byte 0xf3,0xc3 +.size _aesni_decrypt6,.-_aesni_decrypt6 +.type _aesni_encrypt8,@function +.align 16 +_aesni_encrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,220,241 + pxor %xmm0,%xmm8 +.byte 102,15,56,220,249 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 16(%rcx),%xmm1 + jmp .Lenc_loop8_enter +.align 16 +.Lenc_loop8: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 16(%rcx),%xmm1 +.Lenc_loop8_enter: +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups (%rcx),%xmm0 + jnz .Lenc_loop8 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 +.byte 102,68,15,56,221,192 +.byte 102,68,15,56,221,200 + .byte 0xf3,0xc3 +.size _aesni_encrypt8,.-_aesni_encrypt8 +.type _aesni_decrypt8,@function +.align 16 +_aesni_decrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,222,241 + pxor %xmm0,%xmm8 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 16(%rcx),%xmm1 + jmp .Ldec_loop8_enter +.align 16 +.Ldec_loop8: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 16(%rcx),%xmm1 +.Ldec_loop8_enter: +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups (%rcx),%xmm0 + jnz .Ldec_loop8 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 +.byte 102,68,15,56,223,192 +.byte 102,68,15,56,223,200 + .byte 0xf3,0xc3 +.size _aesni_decrypt8,.-_aesni_decrypt8 +.globl aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: + andq $-16,%rdx + jz .Lecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz .Lecb_decrypt + + cmpq $128,%rdx + jb .Lecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_enc_loop8_enter +.align 16 +.Lecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $128,%rdx + jnc .Lecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_enc_one + movups 16(%rdi),%xmm3 + je .Lecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_enc_three + movups 48(%rdi),%xmm5 + je .Lecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_enc_five + movups 80(%rdi),%xmm7 + je .Lecb_enc_six + movdqu 96(%rdi),%xmm8 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_3: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_3 + +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp .Lecb_ret + +.align 16 +.Lecb_decrypt: + cmpq $128,%rdx + jb .Lecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_dec_loop8_enter +.align 16 +.Lecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $128,%rdx + jnc .Lecb_dec_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_dec_one + movups 16(%rdi),%xmm3 + je .Lecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_dec_three + movups 48(%rdi),%xmm5 + je .Lecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_dec_five + movups 80(%rdi),%xmm7 + je .Lecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_4: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_4 + +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + +.Lecb_ret: + .byte 0xf3,0xc3 +.size aesni_ecb_encrypt,.-aesni_ecb_encrypt +.globl aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: + movl 240(%rcx),%eax + movdqu (%r8),%xmm9 + movdqa .Lincrement64(%rip),%xmm6 + movdqa .Lbswap_mask(%rip),%xmm7 + + shrl $1,%eax + leaq 0(%rcx),%r11 + movdqu (%r9),%xmm3 + movdqa %xmm9,%xmm2 + movl %eax,%r10d +.byte 102,68,15,56,0,207 + jmp .Lccm64_enc_outer +.align 16 +.Lccm64_enc_outer: + movups (%r11),%xmm0 + movl %r10d,%eax + movups (%rdi),%xmm8 + + xorps %xmm0,%xmm2 + movups 16(%r11),%xmm1 + xorps %xmm8,%xmm0 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm3 + movups (%rcx),%xmm0 + +.Lccm64_enc2_loop: +.byte 102,15,56,220,209 + decl %eax +.byte 102,15,56,220,217 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,216 + movups 0(%rcx),%xmm0 + jnz .Lccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq %xmm6,%xmm9 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + + decq %rdx + leaq 16(%rdi),%rdi + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + jnz .Lccm64_enc_outer + + movups %xmm3,(%r9) + .byte 0xf3,0xc3 +.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks +.globl aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: + movl 240(%rcx),%eax + movups (%r8),%xmm9 + movdqu (%r9),%xmm3 + movdqa .Lincrement64(%rip),%xmm6 + movdqa .Lbswap_mask(%rip),%xmm7 + + movaps %xmm9,%xmm2 + movl %eax,%r10d + movq %rcx,%r11 +.byte 102,68,15,56,0,207 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_5: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_5 + +.byte 102,15,56,221,209 + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 + leaq 16(%rdi),%rdi + jmp .Lccm64_dec_outer +.align 16 +.Lccm64_dec_outer: + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movl %r10d,%eax + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + + subq $1,%rdx + jz .Lccm64_dec_break + + movups (%r11),%xmm0 + shrl $1,%eax + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm2 + xorps %xmm8,%xmm3 + movups (%rcx),%xmm0 + +.Lccm64_dec2_loop: +.byte 102,15,56,220,209 + decl %eax +.byte 102,15,56,220,217 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,216 + movups 0(%rcx),%xmm0 + jnz .Lccm64_dec2_loop + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 16(%rdi),%rdi +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + jmp .Lccm64_dec_outer + +.align 16 +.Lccm64_dec_break: + + movups (%r11),%xmm0 + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%r11 + xorps %xmm8,%xmm3 +.Loop_enc1_6: +.byte 102,15,56,220,217 + decl %eax + movups (%r11),%xmm1 + leaq 16(%r11),%r11 + jnz .Loop_enc1_6 + +.byte 102,15,56,221,217 + movups %xmm3,(%r9) + .byte 0xf3,0xc3 +.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks +.globl aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: + cmpq $1,%rdx + je .Lctr32_one_shortcut + + movdqu (%r8),%xmm14 + movdqa .Lbswap_mask(%rip),%xmm15 + xorl %eax,%eax +.byte 102,69,15,58,22,242,3 +.byte 102,68,15,58,34,240,3 + + movl 240(%rcx),%eax + bswapl %r10d + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 +.byte 102,69,15,58,34,226,0 + leaq 3(%r10),%r11 +.byte 102,69,15,58,34,235,0 + incl %r10d +.byte 102,69,15,58,34,226,1 + incq %r11 +.byte 102,69,15,58,34,235,1 + incl %r10d +.byte 102,69,15,58,34,226,2 + incq %r11 +.byte 102,69,15,58,34,235,2 + movdqa %xmm12,-40(%rsp) +.byte 102,69,15,56,0,231 + movdqa %xmm13,-24(%rsp) +.byte 102,69,15,56,0,239 + + pshufd $192,%xmm12,%xmm2 + pshufd $128,%xmm12,%xmm3 + pshufd $64,%xmm12,%xmm4 + cmpq $6,%rdx + jb .Lctr32_tail + shrl $1,%eax + movq %rcx,%r11 + movl %eax,%r10d + subq $6,%rdx + jmp .Lctr32_loop6 + +.align 16 +.Lctr32_loop6: + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm2 + movups (%r11),%xmm0 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm3 + movups 16(%r11),%xmm1 + pshufd $64,%xmm13,%xmm7 + por %xmm14,%xmm4 + por %xmm14,%xmm5 + xorps %xmm0,%xmm2 + por %xmm14,%xmm6 + por %xmm14,%xmm7 + + + + + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + movdqa .Lincrement32(%rip),%xmm13 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + movdqa -40(%rsp),%xmm12 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + jmp .Lctr32_enc_loop6_enter +.align 16 +.Lctr32_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.Lctr32_enc_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%rcx),%xmm0 + jnz .Lctr32_enc_loop6 + +.byte 102,15,56,220,209 + paddd %xmm13,%xmm12 +.byte 102,15,56,220,217 + paddd -24(%rsp),%xmm13 +.byte 102,15,56,220,225 + movdqa %xmm12,-40(%rsp) +.byte 102,15,56,220,233 + movdqa %xmm13,-24(%rsp) +.byte 102,15,56,220,241 +.byte 102,69,15,56,0,231 +.byte 102,15,56,220,249 +.byte 102,69,15,56,0,239 + +.byte 102,15,56,221,208 + movups (%rdi),%xmm8 +.byte 102,15,56,221,216 + movups 16(%rdi),%xmm9 +.byte 102,15,56,221,224 + movups 32(%rdi),%xmm10 +.byte 102,15,56,221,232 + movups 48(%rdi),%xmm11 +.byte 102,15,56,221,240 + movups 64(%rdi),%xmm1 +.byte 102,15,56,221,248 + movups 80(%rdi),%xmm0 + leaq 96(%rdi),%rdi + + xorps %xmm2,%xmm8 + pshufd $192,%xmm12,%xmm2 + xorps %xmm3,%xmm9 + pshufd $128,%xmm12,%xmm3 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + pshufd $64,%xmm12,%xmm4 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + xorps %xmm7,%xmm0 + movups %xmm1,64(%rsi) + movups %xmm0,80(%rsi) + leaq 96(%rsi),%rsi + movl %r10d,%eax + subq $6,%rdx + jnc .Lctr32_loop6 + + addq $6,%rdx + jz .Lctr32_done + movq %r11,%rcx + leal 1(%rax,%rax,1),%eax + +.Lctr32_tail: + por %xmm14,%xmm2 + movups (%rdi),%xmm8 + cmpq $2,%rdx + jb .Lctr32_one + + por %xmm14,%xmm3 + movups 16(%rdi),%xmm9 + je .Lctr32_two + + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm4 + movups 32(%rdi),%xmm10 + cmpq $4,%rdx + jb .Lctr32_three + + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm5 + movups 48(%rdi),%xmm11 + je .Lctr32_four + + por %xmm14,%xmm6 + xorps %xmm7,%xmm7 + + call _aesni_encrypt6 + + movups 64(%rdi),%xmm1 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + movups %xmm1,64(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_one_shortcut: + movups (%r8),%xmm2 + movups (%rdi),%xmm8 + movl 240(%rcx),%eax +.Lctr32_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_7: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_7 + +.byte 102,15,56,221,209 + xorps %xmm2,%xmm8 + movups %xmm8,(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + movups %xmm9,16(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_three: + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + movups %xmm10,32(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_four: + call _aesni_encrypt4 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + movups %xmm11,48(%rsi) + +.Lctr32_done: + .byte 0xf3,0xc3 +.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks +.globl aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: + leaq -104(%rsp),%rsp + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +.Loop_enc1_8: +.byte 102,68,15,56,220,249 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_8 + +.byte 102,68,15,56,221,249 + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa .Lxts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc .Lxts_enc_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp .Lxts_enc_grandloop + +.align 16 +.Lxts_enc_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) +.byte 102,15,56,220,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,220,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_enc_loop6_enter + +.align 16 +.Lxts_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.Lxts_enc_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%rcx),%xmm0 + jnz .Lxts_enc_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,209 + pand %xmm8,%xmm9 +.byte 102,15,56,220,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,220,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,208 + pand %xmm8,%xmm9 +.byte 102,15,56,220,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,220,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,209 + pand %xmm8,%xmm9 +.byte 102,15,56,220,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,220,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 +.byte 102,15,56,221,208 + pand %xmm8,%xmm9 +.byte 102,15,56,221,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,221,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc .Lxts_enc_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +.Lxts_enc_short: + addq $96,%rdx + jz .Lxts_enc_done + + cmpq $32,%rdx + jb .Lxts_enc_one + je .Lxts_enc_two + + cmpq $64,%rdx + jb .Lxts_enc_three + je .Lxts_enc_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_encrypt6 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_9: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_9 + +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_encrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_done: + andq $15,%r9 + jz .Lxts_enc_ret + movq %r9,%rdx + +.Lxts_enc_steal: + movzbl (%rdi),%eax + movzbl -16(%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,-16(%rsi) + movb %cl,0(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_enc_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups -16(%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_10: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_10 + +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movups %xmm2,-16(%rsi) + +.Lxts_enc_ret: + leaq 104(%rsp),%rsp +.Lxts_enc_epilogue: + .byte 0xf3,0xc3 +.size aesni_xts_encrypt,.-aesni_xts_encrypt +.globl aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: + leaq -104(%rsp),%rsp + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +.Loop_enc1_11: +.byte 102,68,15,56,220,249 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_11 + +.byte 102,68,15,56,221,249 + xorl %eax,%eax + testq $15,%rdx + setnz %al + shlq $4,%rax + subq %rax,%rdx + + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa .Lxts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc .Lxts_dec_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp .Lxts_dec_grandloop + +.align 16 +.Lxts_dec_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,222,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) +.byte 102,15,56,222,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,222,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_dec_loop6_enter + +.align 16 +.Lxts_dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.Lxts_dec_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups (%rcx),%xmm0 + jnz .Lxts_dec_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,209 + pand %xmm8,%xmm9 +.byte 102,15,56,222,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,222,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,208 + pand %xmm8,%xmm9 +.byte 102,15,56,222,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,222,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,209 + pand %xmm8,%xmm9 +.byte 102,15,56,222,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,222,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 +.byte 102,15,56,223,208 + pand %xmm8,%xmm9 +.byte 102,15,56,223,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,223,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc .Lxts_dec_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +.Lxts_dec_short: + addq $96,%rdx + jz .Lxts_dec_done + + cmpq $32,%rdx + jb .Lxts_dec_one + je .Lxts_dec_two + + cmpq $64,%rdx + jb .Lxts_dec_three + je .Lxts_dec_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_decrypt6 + + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm14 + movdqu %xmm5,48(%rsi) + pcmpgtd %xmm15,%xmm14 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + pshufd $19,%xmm14,%xmm11 + andq $15,%r9 + jz .Lxts_dec_ret + + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm11 + pxor %xmm15,%xmm11 + jmp .Lxts_dec_done2 + +.align 16 +.Lxts_dec_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_12: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_12 + +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + movdqa %xmm12,%xmm11 + leaq 16(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm13,%xmm11 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_four: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movups (%rdi),%xmm2 + pand %xmm8,%xmm9 + movups 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_decrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_done: + andq $15,%r9 + jz .Lxts_dec_ret +.Lxts_dec_done2: + movq %r9,%rdx + movq %r11,%rcx + movl %r10d,%eax + + movups (%rdi),%xmm2 + xorps %xmm11,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_13: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_13 + +.byte 102,15,56,223,209 + xorps %xmm11,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_steal: + movzbl 16(%rdi),%eax + movzbl (%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,(%rsi) + movb %cl,16(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_dec_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups (%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_14: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_14 + +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_ret: + leaq 104(%rsp),%rsp +.Lxts_dec_epilogue: + .byte 0xf3,0xc3 +.size aesni_xts_decrypt,.-aesni_xts_decrypt +.globl aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: + testq %rdx,%rdx + jz .Lcbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz .Lcbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb .Lcbc_enc_tail + subq $16,%rdx + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +.Loop_enc1_15: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_15 + +.byte 102,15,56,221,209 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc .Lcbc_enc_loop + addq $16,%rdx + jnz .Lcbc_enc_tail + movups %xmm2,(%r8) + jmp .Lcbc_ret + +.Lcbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp .Lcbc_enc_loop + + +.align 16 +.Lcbc_decrypt: + movups (%r8),%xmm9 + movl %r10d,%eax + cmpq $112,%rdx + jbe .Lcbc_dec_tail + shrl $1,%r10d + subq $112,%rdx + movl %r10d,%eax + movaps %xmm9,-24(%rsp) + jmp .Lcbc_dec_loop8_enter +.align 16 +.Lcbc_dec_loop8: + movaps %xmm0,-24(%rsp) + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +.Lcbc_dec_loop8_enter: + movups (%rcx),%xmm0 + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 16(%rcx),%xmm1 + + leaq 32(%rcx),%rcx + movdqu 32(%rdi),%xmm4 + xorps %xmm0,%xmm2 + movdqu 48(%rdi),%xmm5 + xorps %xmm0,%xmm3 + movdqu 64(%rdi),%xmm6 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 + movdqu 80(%rdi),%xmm7 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + movdqu 96(%rdi),%xmm8 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + movdqu 112(%rdi),%xmm9 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,222,241 + pxor %xmm0,%xmm8 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 16(%rcx),%xmm1 + + call .Ldec_loop8_enter + + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm1 + xorps %xmm0,%xmm8 + movups 112(%rdi),%xmm0 + xorps %xmm1,%xmm9 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movq %r11,%rcx + movups %xmm7,80(%rsi) + leaq 128(%rdi),%rdi + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + subq $128,%rdx + ja .Lcbc_dec_loop8 + + movaps %xmm9,%xmm2 + movaps %xmm0,%xmm9 + addq $112,%rdx + jle .Lcbc_dec_tail_collected + movups %xmm2,(%rsi) + leal 1(%r10,%r10,1),%eax + leaq 16(%rsi),%rsi +.Lcbc_dec_tail: + movups (%rdi),%xmm2 + movaps %xmm2,%xmm8 + cmpq $16,%rdx + jbe .Lcbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm3,%xmm7 + cmpq $32,%rdx + jbe .Lcbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm4,%xmm6 + cmpq $48,%rdx + jbe .Lcbc_dec_three + + movups 48(%rdi),%xmm5 + cmpq $64,%rdx + jbe .Lcbc_dec_four + + movups 64(%rdi),%xmm6 + cmpq $80,%rdx + jbe .Lcbc_dec_five + + movups 80(%rdi),%xmm7 + cmpq $96,%rdx + jbe .Lcbc_dec_six + + movups 96(%rdi),%xmm8 + movaps %xmm9,-24(%rsp) + call _aesni_decrypt8 + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm9 + xorps %xmm0,%xmm8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movaps %xmm8,%xmm2 + subq $112,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_16: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_16 + +.byte 102,15,56,223,209 + xorps %xmm9,%xmm2 + movaps %xmm8,%xmm9 + subq $16,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + movaps %xmm7,%xmm9 + movaps %xmm3,%xmm2 + leaq 16(%rsi),%rsi + subq $32,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_three: + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + movaps %xmm6,%xmm9 + movaps %xmm4,%xmm2 + leaq 32(%rsi),%rsi + subq $48,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_four: + call _aesni_decrypt4 + xorps %xmm9,%xmm2 + movups 48(%rdi),%xmm9 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + xorps %xmm6,%xmm5 + movups %xmm4,32(%rsi) + movaps %xmm5,%xmm2 + leaq 48(%rsi),%rsi + subq $64,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm9 + xorps %xmm1,%xmm6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movaps %xmm6,%xmm2 + subq $80,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_six: + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm0 + xorps %xmm1,%xmm6 + movups 80(%rdi),%xmm9 + xorps %xmm0,%xmm7 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movaps %xmm7,%xmm2 + subq $96,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_tail_collected: + andq $15,%rdx + movups %xmm9,(%r8) + jnz .Lcbc_dec_tail_partial + movups %xmm2,(%rsi) + jmp .Lcbc_dec_ret +.align 16 +.Lcbc_dec_tail_partial: + movaps %xmm2,-24(%rsp) + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq -24(%rsp),%rsi +.long 0x9066A4F3 + + +.Lcbc_dec_ret: +.Lcbc_ret: + .byte 0xf3,0xc3 +.size aesni_cbc_encrypt,.-aesni_cbc_encrypt +.globl aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: +.byte 0x48,0x83,0xEC,0x08 + + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz .Ldec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +.Ldec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja .Ldec_key_inverse + + movups (%rdx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%rdi) +.Ldec_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +.LSEH_end_set_decrypt_key: +.size aesni_set_decrypt_key,.-aesni_set_decrypt_key +.globl aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +__aesni_set_encrypt_key: +.byte 0x48,0x83,0xEC,0x08 + + movq $-1,%rax + testq %rdi,%rdi + jz .Lenc_key_ret + testq %rdx,%rdx + jz .Lenc_key_ret + + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + leaq 16(%rdx),%rax + cmpl $256,%esi + je .L14rounds + cmpl $192,%esi + je .L12rounds + cmpl $128,%esi + jne .Lbad_keybits + +.L10rounds: + movl $9,%esi + movups %xmm0,(%rdx) +.byte 102,15,58,223,200,1 + call .Lkey_expansion_128_cold +.byte 102,15,58,223,200,2 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,4 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,8 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,16 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,32 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,64 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,128 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,27 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,54 + call .Lkey_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + movups %xmm0,(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_192a_cold +.byte 102,15,58,223,202,2 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,8 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,32 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,128 + call .Lkey_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.L14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_256a_cold +.byte 102,15,58,223,200,1 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,2 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,2 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,4 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,8 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,8 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,16 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,32 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,32 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.Lbad_keybits: + movq $-2,%rax +.Lenc_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +.LSEH_end_set_encrypt_key: + +.align 16 +.Lkey_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_192a_cold: + movaps %xmm2,%xmm5 +.Lkey_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp .Lkey_expansion_192b_warm + +.align 16 +.Lkey_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + .byte 0xf3,0xc3 +.size aesni_set_encrypt_key,.-aesni_set_encrypt_key +.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lincrement32: +.long 6,6,6,0 +.Lincrement64: +.long 1,0,0,0 +.Lxts_magic: +.long 0x87,0,1,0 + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/deps/openssl/asm/x64-elf-gas/bn/modexp512-x86_64.s b/deps/openssl/asm/x64-elf-gas/bn/modexp512-x86_64.s new file mode 100644 index 0000000000..c980dd0733 --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/bn/modexp512-x86_64.s @@ -0,0 +1,1776 @@ +.text + + +.type MULADD_128x512,@function +.align 16 +MULADD_128x512: + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + movq %r8,0(%rcx) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq 8(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + movq %r9,8(%rcx) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%r9 + .byte 0xf3,0xc3 +.size MULADD_128x512,.-MULADD_128x512 +.type mont_reduce,@function +.align 16 +mont_reduce: + leaq 192(%rsp),%rdi + movq 32(%rsp),%rsi + addq $576,%rsi + leaq 520(%rsp),%rcx + + movq 96(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + movq (%rcx),%r8 + addq %rax,%r8 + adcq $0,%rdx + movq %r8,0(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + movq 8(%rcx),%r9 + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + movq 16(%rcx),%r10 + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + movq 24(%rcx),%r11 + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + movq 32(%rcx),%r12 + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + movq 40(%rcx),%r13 + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + movq 48(%rcx),%r14 + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + movq 56(%rcx),%r15 + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq 104(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + movq %r9,8(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%r9 + movq 112(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + movq %r10,16(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%r10 + movq 120(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + movq %r11,24(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%r11 + xorq %rax,%rax + + addq 64(%rcx),%r8 + adcq 72(%rcx),%r9 + adcq 80(%rcx),%r10 + adcq 88(%rcx),%r11 + adcq $0,%rax + + + + + movq %r8,64(%rdi) + movq %r9,72(%rdi) + movq %r10,%rbp + movq %r11,88(%rdi) + + movq %rax,384(%rsp) + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + + + + + + + + + addq $80,%rdi + + addq $64,%rsi + leaq 296(%rsp),%rcx + + call MULADD_128x512 + + + movq 384(%rsp),%rax + + + addq -16(%rdi),%r8 + adcq -8(%rdi),%r9 + movq %r8,64(%rcx) + movq %r9,72(%rcx) + + adcq %rax,%rax + movq %rax,384(%rsp) + + leaq 192(%rsp),%rdi + addq $64,%rsi + + + + + + movq (%rsi),%r8 + movq 8(%rsi),%rbx + + movq (%rcx),%rax + mulq %r8 + movq %rax,%rbp + movq %rdx,%r9 + + movq 8(%rcx),%rax + mulq %r8 + addq %rax,%r9 + + movq (%rcx),%rax + mulq %rbx + addq %rax,%r9 + + movq %r9,8(%rdi) + + + subq $192,%rsi + + movq (%rcx),%r8 + movq 8(%rcx),%r9 + + call MULADD_128x512 + + + + + + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdi + movq 24(%rsi),%rdx + + + movq 384(%rsp),%rbp + + addq 64(%rcx),%r8 + adcq 72(%rcx),%r9 + + + adcq %rbp,%rbp + + + + shlq $3,%rbp + movq 32(%rsp),%rcx + addq %rcx,%rbp + + + xorq %rsi,%rsi + + addq 0(%rbp),%r10 + adcq 64(%rbp),%r11 + adcq 128(%rbp),%r12 + adcq 192(%rbp),%r13 + adcq 256(%rbp),%r14 + adcq 320(%rbp),%r15 + adcq 384(%rbp),%r8 + adcq 448(%rbp),%r9 + + + + sbbq $0,%rsi + + + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rdi + andq %rsi,%rdx + + movq $1,%rbp + subq %rax,%r10 + sbbq %rbx,%r11 + sbbq %rdi,%r12 + sbbq %rdx,%r13 + + + + + sbbq $0,%rbp + + + + addq $512,%rcx + movq 32(%rcx),%rax + movq 40(%rcx),%rbx + movq 48(%rcx),%rdi + movq 56(%rcx),%rdx + + + + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rdi + andq %rsi,%rdx + + + + subq $1,%rbp + + sbbq %rax,%r14 + sbbq %rbx,%r15 + sbbq %rdi,%r8 + sbbq %rdx,%r9 + + + + movq 144(%rsp),%rsi + movq %r10,0(%rsi) + movq %r11,8(%rsi) + movq %r12,16(%rsi) + movq %r13,24(%rsi) + movq %r14,32(%rsi) + movq %r15,40(%rsi) + movq %r8,48(%rsi) + movq %r9,56(%rsi) + + .byte 0xf3,0xc3 +.size mont_reduce,.-mont_reduce +.type mont_mul_a3b,@function +.align 16 +mont_mul_a3b: + + + + + movq 0(%rdi),%rbp + + movq %r10,%rax + mulq %rbp + movq %rax,520(%rsp) + movq %rdx,%r10 + movq %r11,%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + movq %rdx,%r11 + movq %r12,%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %r13,%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + movq %rdx,%r13 + movq %r14,%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + movq %rdx,%r14 + movq %r15,%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r15 + movq %r8,%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq %r9,%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + movq %rdx,%r9 + movq 8(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + movq %r10,528(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%r10 + movq 16(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + movq %r11,536(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%r11 + movq 24(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + movq %r12,544(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq 32(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + movq %r13,552(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%r13 + movq 40(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + movq %r14,560(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%r14 + movq 48(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + movq %r15,568(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%r15 + movq 56(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + movq %r8,576(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq %r9,584(%rsp) + movq %r10,592(%rsp) + movq %r11,600(%rsp) + movq %r12,608(%rsp) + movq %r13,616(%rsp) + movq %r14,624(%rsp) + movq %r15,632(%rsp) + movq %r8,640(%rsp) + + + + + + jmp mont_reduce + + +.size mont_mul_a3b,.-mont_mul_a3b +.type sqr_reduce,@function +.align 16 +sqr_reduce: + movq 16(%rsp),%rcx + + + + movq %r10,%rbx + + movq %r11,%rax + mulq %rbx + movq %rax,528(%rsp) + movq %rdx,%r10 + movq %r12,%rax + mulq %rbx + addq %rax,%r10 + adcq $0,%rdx + movq %rdx,%r11 + movq %r13,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %r14,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + movq %rdx,%r13 + movq %r15,%rax + mulq %rbx + addq %rax,%r13 + adcq $0,%rdx + movq %rdx,%r14 + movq %r8,%rax + mulq %rbx + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r15 + movq %r9,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + movq %rdx,%rsi + + movq %r10,536(%rsp) + + + + + + movq 8(%rcx),%rbx + + movq 16(%rcx),%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + movq %r11,544(%rsp) + + movq %rdx,%r10 + movq 24(%rcx),%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %r12,552(%rsp) + + movq %rdx,%r10 + movq 32(%rcx),%rax + mulq %rbx + addq %rax,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + + movq %rdx,%r10 + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%r14 + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%rsi + adcq $0,%rdx + addq %r10,%rsi + adcq $0,%rdx + + movq %rdx,%r11 + + + + + movq 16(%rcx),%rbx + + movq 24(%rcx),%rax + mulq %rbx + addq %rax,%r13 + adcq $0,%rdx + movq %r13,560(%rsp) + + movq %rdx,%r10 + movq 32(%rcx),%rax + mulq %rbx + addq %rax,%r14 + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %r14,568(%rsp) + + movq %rdx,%r10 + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%rsi + adcq $0,%rdx + addq %r10,%rsi + adcq $0,%rdx + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + addq %r10,%r11 + adcq $0,%rdx + + movq %rdx,%r12 + + + + + + movq 24(%rcx),%rbx + + movq 32(%rcx),%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + movq %r15,576(%rsp) + + movq %rdx,%r10 + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%rsi + adcq $0,%rdx + addq %r10,%rsi + adcq $0,%rdx + movq %rsi,584(%rsp) + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + addq %r10,%r11 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + + movq %rdx,%r15 + + + + + movq 32(%rcx),%rbx + + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + movq %r11,592(%rsp) + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %r12,600(%rsp) + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + + movq %rdx,%r11 + + + + + movq 40(%rcx),%rbx + + movq %r8,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + movq %r15,608(%rsp) + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + addq %r10,%r11 + adcq $0,%rdx + movq %r11,616(%rsp) + + movq %rdx,%r12 + + + + + movq %r8,%rbx + + movq %r9,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + movq %r12,624(%rsp) + + movq %rdx,632(%rsp) + + + movq 528(%rsp),%r10 + movq 536(%rsp),%r11 + movq 544(%rsp),%r12 + movq 552(%rsp),%r13 + movq 560(%rsp),%r14 + movq 568(%rsp),%r15 + + movq 24(%rcx),%rax + mulq %rax + movq %rax,%rdi + movq %rdx,%r8 + + addq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq %r15,%r15 + adcq $0,%r8 + + movq 0(%rcx),%rax + mulq %rax + movq %rax,520(%rsp) + movq %rdx,%rbx + + movq 8(%rcx),%rax + mulq %rax + + addq %rbx,%r10 + adcq %rax,%r11 + adcq $0,%rdx + + movq %rdx,%rbx + movq %r10,528(%rsp) + movq %r11,536(%rsp) + + movq 16(%rcx),%rax + mulq %rax + + addq %rbx,%r12 + adcq %rax,%r13 + adcq $0,%rdx + + movq %rdx,%rbx + + movq %r12,544(%rsp) + movq %r13,552(%rsp) + + xorq %rbp,%rbp + addq %rbx,%r14 + adcq %rdi,%r15 + adcq $0,%rbp + + movq %r14,560(%rsp) + movq %r15,568(%rsp) + + + + + movq 576(%rsp),%r10 + movq 584(%rsp),%r11 + movq 592(%rsp),%r12 + movq 600(%rsp),%r13 + movq 608(%rsp),%r14 + movq 616(%rsp),%r15 + movq 624(%rsp),%rdi + movq 632(%rsp),%rsi + + movq %r9,%rax + mulq %rax + movq %rax,%r9 + movq %rdx,%rbx + + addq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq %r15,%r15 + adcq %rdi,%rdi + adcq %rsi,%rsi + adcq $0,%rbx + + addq %rbp,%r10 + + movq 32(%rcx),%rax + mulq %rax + + addq %r8,%r10 + adcq %rax,%r11 + adcq $0,%rdx + + movq %rdx,%rbp + + movq %r10,576(%rsp) + movq %r11,584(%rsp) + + movq 40(%rcx),%rax + mulq %rax + + addq %rbp,%r12 + adcq %rax,%r13 + adcq $0,%rdx + + movq %rdx,%rbp + + movq %r12,592(%rsp) + movq %r13,600(%rsp) + + movq 48(%rcx),%rax + mulq %rax + + addq %rbp,%r14 + adcq %rax,%r15 + adcq $0,%rdx + + movq %r14,608(%rsp) + movq %r15,616(%rsp) + + addq %rdx,%rdi + adcq %r9,%rsi + adcq $0,%rbx + + movq %rdi,624(%rsp) + movq %rsi,632(%rsp) + movq %rbx,640(%rsp) + + jmp mont_reduce + + +.size sqr_reduce,.-sqr_reduce +.globl mod_exp_512 +.type mod_exp_512,@function +mod_exp_512: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r8 + subq $2688,%rsp + andq $-64,%rsp + + + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rcx,24(%rsp) +.Lbody: + + + + pxor %xmm4,%xmm4 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqa %xmm4,512(%rsp) + movdqa %xmm4,528(%rsp) + movdqa %xmm4,608(%rsp) + movdqa %xmm4,624(%rsp) + movdqa %xmm0,544(%rsp) + movdqa %xmm1,560(%rsp) + movdqa %xmm2,576(%rsp) + movdqa %xmm3,592(%rsp) + + + movdqu 0(%rdx),%xmm0 + movdqu 16(%rdx),%xmm1 + movdqu 32(%rdx),%xmm2 + movdqu 48(%rdx),%xmm3 + + leaq 384(%rsp),%rbx + movq %rbx,136(%rsp) + call mont_reduce + + + leaq 448(%rsp),%rcx + xorq %rax,%rax + movq %rax,0(%rcx) + movq %rax,8(%rcx) + movq %rax,24(%rcx) + movq %rax,32(%rcx) + movq %rax,40(%rcx) + movq %rax,48(%rcx) + movq %rax,56(%rcx) + movq %rax,128(%rsp) + movq $1,16(%rcx) + + leaq 640(%rsp),%rbp + movq %rcx,%rsi + movq %rbp,%rdi + movq $8,%rax +loop_0: + movq (%rcx),%rbx + movw %bx,(%rdi) + shrq $16,%rbx + movw %bx,64(%rdi) + shrq $16,%rbx + movw %bx,128(%rdi) + shrq $16,%rbx + movw %bx,192(%rdi) + leaq 8(%rcx),%rcx + leaq 256(%rdi),%rdi + decq %rax + jnz loop_0 + movq $31,%rax + movq %rax,32(%rsp) + movq %rbp,40(%rsp) + + movq %rsi,136(%rsp) + movq 0(%rsi),%r10 + movq 8(%rsi),%r11 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + movq 32(%rsi),%r14 + movq 40(%rsi),%r15 + movq 48(%rsi),%r8 + movq 56(%rsi),%r9 +init_loop: + leaq 384(%rsp),%rdi + call mont_mul_a3b + leaq 448(%rsp),%rsi + movq 40(%rsp),%rbp + addq $2,%rbp + movq %rbp,40(%rsp) + movq %rsi,%rcx + movq $8,%rax +loop_1: + movq (%rcx),%rbx + movw %bx,(%rbp) + shrq $16,%rbx + movw %bx,64(%rbp) + shrq $16,%rbx + movw %bx,128(%rbp) + shrq $16,%rbx + movw %bx,192(%rbp) + leaq 8(%rcx),%rcx + leaq 256(%rbp),%rbp + decq %rax + jnz loop_1 + movq 32(%rsp),%rax + subq $1,%rax + movq %rax,32(%rsp) + jne init_loop + + + + movdqa %xmm0,64(%rsp) + movdqa %xmm1,80(%rsp) + movdqa %xmm2,96(%rsp) + movdqa %xmm3,112(%rsp) + + + + + + movl 126(%rsp),%eax + movq %rax,%rdx + shrq $11,%rax + andl $2047,%edx + movl %edx,126(%rsp) + leaq 640(%rsp,%rax,2),%rsi + movq 8(%rsp),%rdx + movq $4,%rbp +loop_2: + movzwq 192(%rsi),%rbx + movzwq 448(%rsi),%rax + shlq $16,%rbx + shlq $16,%rax + movw 128(%rsi),%bx + movw 384(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 64(%rsi),%bx + movw 320(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 0(%rsi),%bx + movw 256(%rsi),%ax + movq %rbx,0(%rdx) + movq %rax,8(%rdx) + leaq 512(%rsi),%rsi + leaq 16(%rdx),%rdx + subq $1,%rbp + jnz loop_2 + movq $505,48(%rsp) + + movq 8(%rsp),%rcx + movq %rcx,136(%rsp) + movq 0(%rcx),%r10 + movq 8(%rcx),%r11 + movq 16(%rcx),%r12 + movq 24(%rcx),%r13 + movq 32(%rcx),%r14 + movq 40(%rcx),%r15 + movq 48(%rcx),%r8 + movq 56(%rcx),%r9 + jmp sqr_2 + +main_loop_a3b: + call sqr_reduce + call sqr_reduce + call sqr_reduce +sqr_2: + call sqr_reduce + call sqr_reduce + + + + movq 48(%rsp),%rcx + movq %rcx,%rax + shrq $4,%rax + movl 64(%rsp,%rax,2),%edx + andq $15,%rcx + shrq %cl,%rdx + andq $31,%rdx + + leaq 640(%rsp,%rdx,2),%rsi + leaq 448(%rsp),%rdx + movq %rdx,%rdi + movq $4,%rbp +loop_3: + movzwq 192(%rsi),%rbx + movzwq 448(%rsi),%rax + shlq $16,%rbx + shlq $16,%rax + movw 128(%rsi),%bx + movw 384(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 64(%rsi),%bx + movw 320(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 0(%rsi),%bx + movw 256(%rsi),%ax + movq %rbx,0(%rdx) + movq %rax,8(%rdx) + leaq 512(%rsi),%rsi + leaq 16(%rdx),%rdx + subq $1,%rbp + jnz loop_3 + movq 8(%rsp),%rsi + call mont_mul_a3b + + + + movq 48(%rsp),%rcx + subq $5,%rcx + movq %rcx,48(%rsp) + jge main_loop_a3b + + + +end_main_loop_a3b: + + + movq 8(%rsp),%rdx + pxor %xmm4,%xmm4 + movdqu 0(%rdx),%xmm0 + movdqu 16(%rdx),%xmm1 + movdqu 32(%rdx),%xmm2 + movdqu 48(%rdx),%xmm3 + movdqa %xmm4,576(%rsp) + movdqa %xmm4,592(%rsp) + movdqa %xmm4,608(%rsp) + movdqa %xmm4,624(%rsp) + movdqa %xmm0,512(%rsp) + movdqa %xmm1,528(%rsp) + movdqa %xmm2,544(%rsp) + movdqa %xmm3,560(%rsp) + call mont_reduce + + + + movq 8(%rsp),%rax + movq 0(%rax),%r8 + movq 8(%rax),%r9 + movq 16(%rax),%r10 + movq 24(%rax),%r11 + movq 32(%rax),%r12 + movq 40(%rax),%r13 + movq 48(%rax),%r14 + movq 56(%rax),%r15 + + + movq 24(%rsp),%rbx + addq $512,%rbx + + subq 0(%rbx),%r8 + sbbq 8(%rbx),%r9 + sbbq 16(%rbx),%r10 + sbbq 24(%rbx),%r11 + sbbq 32(%rbx),%r12 + sbbq 40(%rbx),%r13 + sbbq 48(%rbx),%r14 + sbbq 56(%rbx),%r15 + + + movq 0(%rax),%rsi + movq 8(%rax),%rdi + movq 16(%rax),%rcx + movq 24(%rax),%rdx + cmovncq %r8,%rsi + cmovncq %r9,%rdi + cmovncq %r10,%rcx + cmovncq %r11,%rdx + movq %rsi,0(%rax) + movq %rdi,8(%rax) + movq %rcx,16(%rax) + movq %rdx,24(%rax) + + movq 32(%rax),%rsi + movq 40(%rax),%rdi + movq 48(%rax),%rcx + movq 56(%rax),%rdx + cmovncq %r12,%rsi + cmovncq %r13,%rdi + cmovncq %r14,%rcx + cmovncq %r15,%rdx + movq %rsi,32(%rax) + movq %rdi,40(%rax) + movq %rcx,48(%rax) + movq %rdx,56(%rax) + + movq 0(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbx + movq 40(%rsi),%rbp + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size mod_exp_512, . - mod_exp_512 diff --git a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s index 2dbcffc59d..ea12bd408c 100644 --- a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s +++ b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s @@ -5,6 +5,16 @@ .type bn_mul_mont,@function .align 16 bn_mul_mont: + testl $3,%r9d + jnz .Lmul_enter + cmpl $8,%r9d + jb .Lmul_enter + cmpq %rsi,%rdx + jne .Lmul4x_enter + jmp .Lsqr4x_enter + +.align 16 +.Lmul_enter: pushq %rbx pushq %rbp pushq %r12 @@ -20,48 +30,63 @@ bn_mul_mont: andq $-1024,%rsp movq %r11,8(%rsp,%r9,8) -.Lprologue: +.Lmul_body: movq %rdx,%r12 - movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax xorq %r14,%r14 xorq %r15,%r15 - movq (%r12),%rbx - movq (%rsi),%rax + movq %r8,%rbp mulq %rbx movq %rax,%r10 - movq %rdx,%r11 + movq (%rcx),%rax - imulq %r8,%rax - movq %rax,%rbp + imulq %r10,%rbp + movq %rdx,%r11 - mulq (%rcx) - addq %r10,%rax + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax adcq $0,%rdx movq %rdx,%r13 leaq 1(%r15),%r15 + jmp .L1st_enter + +.align 16 .L1st: + addq %rax,%r13 movq (%rsi,%r15,8),%rax - mulq %rbx - addq %r11,%rax adcq $0,%rdx - movq %rax,%r10 + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.L1st_enter: + mulq %rbx + addq %rax,%r11 movq (%rcx,%r15,8),%rax - movq %rdx,%r11 + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 mulq %rbp - addq %r13,%rax - leaq 1(%r15),%r15 + cmpq %r9,%r15 + jne .L1st + + addq %rax,%r13 + movq (%rsi),%rax adcq $0,%rdx - addq %r10,%rax + addq %r11,%r13 adcq $0,%rdx - movq %rax,-16(%rsp,%r15,8) - cmpq %r9,%r15 + movq %r13,-16(%rsp,%r15,8) movq %rdx,%r13 - jl .L1st + movq %r10,%r11 xorq %rdx,%rdx addq %r11,%r13 @@ -70,50 +95,64 @@ bn_mul_mont: movq %rdx,(%rsp,%r9,8) leaq 1(%r14),%r14 -.align 4 + jmp .Louter +.align 16 .Louter: - xorq %r15,%r15 - movq (%r12,%r14,8),%rbx - movq (%rsi),%rax + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 mulq %rbx - addq (%rsp),%rax + addq %rax,%r10 + movq (%rcx),%rax adcq $0,%rdx - movq %rax,%r10 - movq %rdx,%r11 - imulq %r8,%rax - movq %rax,%rbp + imulq %r10,%rbp + movq %rdx,%r11 - mulq (%rcx,%r15,8) - addq %r10,%rax - movq 8(%rsp),%r10 + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax adcq $0,%rdx + movq 8(%rsp),%r10 movq %rdx,%r13 leaq 1(%r15),%r15 -.align 4 + jmp .Linner_enter + +.align 16 .Linner: + addq %rax,%r13 movq (%rsi,%r15,8),%rax - mulq %rbx - addq %r11,%rax adcq $0,%rdx - addq %rax,%r10 + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.Linner_enter: + mulq %rbx + addq %rax,%r11 movq (%rcx,%r15,8),%rax adcq $0,%rdx + addq %r11,%r10 movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 mulq %rbp - addq %r13,%rax - leaq 1(%r15),%r15 - adcq $0,%rdx - addq %r10,%rax + cmpq %r9,%r15 + jne .Linner + + addq %rax,%r13 + movq (%rsi),%rax adcq $0,%rdx + addq %r10,%r13 movq (%rsp,%r15,8),%r10 - cmpq %r9,%r15 - movq %rax,-16(%rsp,%r15,8) + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) movq %rdx,%r13 - jl .Linner xorq %rdx,%rdx addq %r11,%r13 @@ -127,35 +166,434 @@ bn_mul_mont: cmpq %r9,%r14 jl .Louter - leaq (%rsp),%rsi - leaq -1(%r9),%r15 - - movq (%rsi),%rax xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 jmp .Lsub .align 16 .Lsub: sbbq (%rcx,%r14,8),%rax movq %rax,(%rdi,%r14,8) - decq %r15 movq 8(%rsi,%r14,8),%rax leaq 1(%r14),%r14 - jge .Lsub + decq %r15 + jnz .Lsub sbbq $0,%rax + xorq %r14,%r14 andq %rax,%rsi notq %rax movq %rdi,%rcx andq %rax,%rcx - leaq -1(%r9),%r15 + movq %r9,%r15 orq %rcx,%rsi .align 16 .Lcopy: + movq (%rsi,%r14,8),%rax + movq %r14,(%rsp,%r14,8) + movq %rax,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz .Lcopy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.size bn_mul_mont,.-bn_mul_mont +.type bn_mul4x_mont,@function +.align 16 +bn_mul4x_mont: +.Lmul4x_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 4(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +.Lmul4x_body: + movq %rdi,16(%rsp,%r9,8) + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .L1st4x +.align 16 +.L1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jl .L1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + leaq 1(%r14),%r14 +.align 4 +.Louter4x: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq (%rsp),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%rsp),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .Linner4x +.align 16 +.Linner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi movq (%rsi,%r15,8),%rax - movq %rax,(%rdi,%r15,8) - movq %r14,(%rsp,%r15,8) + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq 8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jl .Linner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 1(%r14),%r14 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%rsp,%r9,8),%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + cmpq %r9,%r14 + jl .Louter4x + movq 16(%rsp,%r9,8),%rdi + movq 0(%rsp),%rax + pxor %xmm0,%xmm0 + movq 8(%rsp),%rdx + shrq $2,%r9 + leaq (%rsp),%rsi + xorq %r14,%r14 + + subq 0(%rcx),%rax + movq 16(%rsi),%rbx + movq 24(%rsi),%rbp + sbbq 8(%rcx),%rdx + leaq -1(%r9),%r15 + jmp .Lsub4x +.align 16 +.Lsub4x: + movq %rax,0(%rdi,%r14,8) + movq %rdx,8(%rdi,%r14,8) + sbbq 16(%rcx,%r14,8),%rbx + movq 32(%rsi,%r14,8),%rax + movq 40(%rsi,%r14,8),%rdx + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + movq %rbp,24(%rdi,%r14,8) + sbbq 32(%rcx,%r14,8),%rax + movq 48(%rsi,%r14,8),%rbx + movq 56(%rsi,%r14,8),%rbp + sbbq 40(%rcx,%r14,8),%rdx + leaq 4(%r14),%r14 + decq %r15 + jnz .Lsub4x + + movq %rax,0(%rdi,%r14,8) + movq 32(%rsi,%r14,8),%rax + sbbq 16(%rcx,%r14,8),%rbx + movq %rdx,8(%rdi,%r14,8) + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + + sbbq $0,%rax + movq %rbp,24(%rdi,%r14,8) + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + leaq -1(%r9),%r15 + orq %rcx,%rsi + + movdqu (%rsi),%xmm1 + movdqa %xmm0,(%rsp) + movdqu %xmm1,(%rdi) + jmp .Lcopy4x +.align 16 +.Lcopy4x: + movdqu 16(%rsi,%r14,1),%xmm2 + movdqu 32(%rsi,%r14,1),%xmm1 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) + movdqa %xmm0,32(%rsp,%r14,1) + movdqu %xmm1,32(%rdi,%r14,1) + leaq 32(%r14),%r14 decq %r15 - jge .Lcopy + jnz .Lcopy4x + shlq $2,%r9 + movdqu 16(%rsi,%r14,1),%xmm2 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) movq 8(%rsp,%r9,8),%rsi movq $1,%rax movq (%rsi),%r15 @@ -165,8 +603,773 @@ bn_mul_mont: movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp -.Lepilogue: +.Lmul4x_epilogue: .byte 0xf3,0xc3 -.size bn_mul_mont,.-bn_mul_mont +.size bn_mul4x_mont,.-bn_mul4x_mont +.type bn_sqr4x_mont,@function +.align 16 +bn_sqr4x_mont: +.Lsqr4x_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + shll $3,%r9d + xorq %r10,%r10 + movq %rsp,%r11 + subq %r9,%r10 + movq (%r8),%r8 + leaq -72(%rsp,%r10,2),%rsp + andq $-1024,%rsp + + + + + + + + + + + + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + movq %r8,48(%rsp) + movq %r11,56(%rsp) +.Lsqr4x_body: + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 64(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + xorq %r10,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,-16(%rdi,%rbp,1) + + leaq -16(%rbp),%rcx + + + movq 8(%rsi,%rcx,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + leaq 16(%rcx),%rcx + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp .Lsqr4x_1st + +.align 16 +.Lsqr4x_1st: + movq (%rsi,%rcx,1),%rbx + xorq %r12,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,(%rdi,%rcx,1) + + + movq 8(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,8(%rdi,%rcx,1) + + movq 16(%rsi,%rcx,1),%rbx + xorq %r12,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,16(%rdi,%rcx,1) + + + movq 24(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + leaq 32(%rcx),%rcx + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_1st + + xorq %r12,%r12 + addq %r11,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + adcq %rdx,%r12 + + movq %r13,(%rdi) + leaq 16(%rbp),%rbp + movq %r12,8(%rdi) + jmp .Lsqr4x_outer + +.align 16 +.Lsqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 64(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + movq -24(%rdi,%rbp,1),%r10 + xorq %r11,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + xorq %r10,%r10 + addq -16(%rdi,%rbp,1),%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,-16(%rdi,%rbp,1) + + leaq -16(%rbp),%rcx + xorq %r12,%r12 + + + movq 8(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq 8(%rdi,%rcx,1),%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,8(%rdi,%rcx,1) + + leaq 16(%rcx),%rcx + jmp .Lsqr4x_inner + +.align 16 +.Lsqr4x_inner: + movq (%rsi,%rcx,1),%rbx + xorq %r12,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,(%rdi,%rcx,1) + + movq 8(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq 8(%rdi,%rcx,1),%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + leaq 16(%rcx),%rcx + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_inner + + xorq %r12,%r12 + addq %r11,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + adcq %rdx,%r12 + + movq %r13,(%rdi) + movq %r12,8(%rdi) + + addq $16,%rbp + jnz .Lsqr4x_outer + + + movq -32(%rsi),%r14 + leaq 64(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + xorq %r11,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-24(%rdi) + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,-16(%rdi) + + movq -8(%rsi),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + + xorq %r11,%r11 + addq %r12,%r10 + movq %rdx,%r13 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi) + + xorq %r12,%r12 + addq %r11,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq %rdx,%r12 + + movq %r13,(%rdi) + movq %r12,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 64(%rsp,%r9,2),%rdi + xorq %r10,%r10 + movq -24(%rdi,%rbp,2),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi,%rbp,2),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi,%rbp,2) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi,%rbp,2),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi,%rbp,2) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,-40(%rdi,%rbp,2) + sbbq %r15,%r15 + jmp .Lsqr4x_shift_n_add + +.align 16 +.Lsqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi,%rbp,2),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi,%rbp,2) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi,%rbp,2),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi,%rbp,2) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi,%rbp,2),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi,%rbp,2) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi,%rbp,2),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi,%rbp,2) + adcq %rdx,%r8 + movq %r8,24(%rdi,%rbp,2) + sbbq %r15,%r15 + addq $32,%rbp + jnz .Lsqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) + movq 40(%rsp),%rsi + movq 48(%rsp),%r8 + xorq %rcx,%rcx + movq %r9,0(%rsp) + subq %r9,%rcx + movq 64(%rsp),%r10 + movq %r8,%r14 + leaq 64(%rsp,%r9,2),%rax + leaq 64(%rsp,%r9,1),%rdi + movq %rax,8(%rsp) + leaq (%rsi,%r9,1),%rsi + xorq %rbp,%rbp + + movq 0(%rsi,%rcx,1),%rax + movq 8(%rsi,%rcx,1),%r9 + imulq %r10,%r14 + movq %rax,%rbx + jmp .Lsqr4x_mont_outer + +.align 16 +.Lsqr4x_mont_outer: + xorq %r11,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + movq %r8,%r15 + + xorq %r10,%r10 + addq 8(%rdi,%rcx,1),%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + + imulq %r11,%r15 + + movq 16(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + movq %r12,8(%rdi,%rcx,1) + + xorq %r11,%r11 + addq 16(%rdi,%rcx,1),%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + + movq 24(%rsi,%rcx,1),%r9 + xorq %r12,%r12 + addq %r10,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r12 + movq %r13,16(%rdi,%rcx,1) + + xorq %r10,%r10 + addq 24(%rdi,%rcx,1),%r11 + leaq 32(%rcx),%rcx + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + jmp .Lsqr4x_mont_inner + +.align 16 +.Lsqr4x_mont_inner: + movq (%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + movq %r12,-8(%rdi,%rcx,1) + + xorq %r11,%r11 + addq (%rdi,%rcx,1),%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + + movq 8(%rsi,%rcx,1),%r9 + xorq %r12,%r12 + addq %r10,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r12 + movq %r13,(%rdi,%rcx,1) + + xorq %r10,%r10 + addq 8(%rdi,%rcx,1),%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + + + movq 16(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + movq %r12,8(%rdi,%rcx,1) + + xorq %r11,%r11 + addq 16(%rdi,%rcx,1),%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + + movq 24(%rsi,%rcx,1),%r9 + xorq %r12,%r12 + addq %r10,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r12 + movq %r13,16(%rdi,%rcx,1) + + xorq %r10,%r10 + addq 24(%rdi,%rcx,1),%r11 + leaq 32(%rcx),%rcx + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + cmpq $0,%rcx + jne .Lsqr4x_mont_inner + + subq 0(%rsp),%rcx + movq %r8,%r14 + + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + movq %r12,-8(%rdi) + + xorq %r11,%r11 + addq (%rdi),%r10 + adcq $0,%r11 + movq 0(%rsi,%rcx,1),%rbx + addq %rbp,%r10 + adcq $0,%r11 + + imulq 16(%rdi,%rcx,1),%r14 + xorq %r12,%r12 + movq 8(%rsi,%rcx,1),%r9 + addq %r10,%r13 + movq 16(%rdi,%rcx,1),%r10 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + movq %r13,(%rdi) + + xorq %rbp,%rbp + addq 8(%rdi),%r12 + adcq %rbp,%rbp + addq %r11,%r12 + leaq 16(%rdi),%rdi + adcq $0,%rbp + movq %r12,-8(%rdi) + cmpq 8(%rsp),%rdi + jb .Lsqr4x_mont_outer + + movq 0(%rsp),%r9 + movq %rbp,(%rdi) + movq 64(%rsp,%r9,1),%rax + leaq 64(%rsp,%r9,1),%rbx + movq 40(%rsp),%rsi + shrq $5,%r9 + movq 8(%rbx),%rdx + xorq %rbp,%rbp + + movq 32(%rsp),%rdi + subq 0(%rsi),%rax + movq 16(%rbx),%r10 + movq 24(%rbx),%r11 + sbbq 8(%rsi),%rdx + leaq -1(%r9),%rcx + jmp .Lsqr4x_sub +.align 16 +.Lsqr4x_sub: + movq %rax,0(%rdi,%rbp,8) + movq %rdx,8(%rdi,%rbp,8) + sbbq 16(%rsi,%rbp,8),%r10 + movq 32(%rbx,%rbp,8),%rax + movq 40(%rbx,%rbp,8),%rdx + sbbq 24(%rsi,%rbp,8),%r11 + movq %r10,16(%rdi,%rbp,8) + movq %r11,24(%rdi,%rbp,8) + sbbq 32(%rsi,%rbp,8),%rax + movq 48(%rbx,%rbp,8),%r10 + movq 56(%rbx,%rbp,8),%r11 + sbbq 40(%rsi,%rbp,8),%rdx + leaq 4(%rbp),%rbp + decq %rcx + jnz .Lsqr4x_sub + + movq %rax,0(%rdi,%rbp,8) + movq 32(%rbx,%rbp,8),%rax + sbbq 16(%rsi,%rbp,8),%r10 + movq %rdx,8(%rdi,%rbp,8) + sbbq 24(%rsi,%rbp,8),%r11 + movq %r10,16(%rdi,%rbp,8) + + sbbq $0,%rax + movq %r11,24(%rdi,%rbp,8) + xorq %rbp,%rbp + andq %rax,%rbx + notq %rax + movq %rdi,%rsi + andq %rax,%rsi + leaq -1(%r9),%rcx + orq %rsi,%rbx + + pxor %xmm0,%xmm0 + leaq 64(%rsp,%r9,8),%rsi + movdqu (%rbx),%xmm1 + leaq (%rsi,%r9,8),%rsi + movdqa %xmm0,64(%rsp) + movdqa %xmm0,(%rsi) + movdqu %xmm1,(%rdi) + jmp .Lsqr4x_copy +.align 16 +.Lsqr4x_copy: + movdqu 16(%rbx,%rbp,1),%xmm2 + movdqu 32(%rbx,%rbp,1),%xmm1 + movdqa %xmm0,80(%rsp,%rbp,1) + movdqa %xmm0,96(%rsp,%rbp,1) + movdqa %xmm0,16(%rsi,%rbp,1) + movdqa %xmm0,32(%rsi,%rbp,1) + movdqu %xmm2,16(%rdi,%rbp,1) + movdqu %xmm1,32(%rdi,%rbp,1) + leaq 32(%rbp),%rbp + decq %rcx + jnz .Lsqr4x_copy + + movdqu 16(%rbx,%rbp,1),%xmm2 + movdqa %xmm0,80(%rsp,%rbp,1) + movdqa %xmm0,16(%rsi,%rbp,1) + movdqu %xmm2,16(%rdi,%rbp,1) + movq 56(%rsp),%rsi + movq $1,%rax + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lsqr4x_epilogue: + .byte 0xf3,0xc3 +.size bn_sqr4x_mont,.-bn_sqr4x_mont .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 16 diff --git a/deps/openssl/asm/x64-elf-gas/rc4/rc4-md5-x86_64.s b/deps/openssl/asm/x64-elf-gas/rc4/rc4-md5-x86_64.s new file mode 100644 index 0000000000..501027a801 --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/rc4/rc4-md5-x86_64.s @@ -0,0 +1,1260 @@ +.text + +.align 16 + +.globl rc4_md5_enc +.type rc4_md5_enc,@function +rc4_md5_enc: + cmpq $0,%r9 + je .Labort + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $40,%rsp +.Lbody: + movq %rcx,%r11 + movq %r9,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %r8,%r15 + xorq %rbp,%rbp + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%bpl + movb -4(%rdi),%cl + + incb %bpl + subq %r13,%r14 + movl (%rdi,%rbp,4),%eax + addb %al,%cl + leaq (%rdi,%rbp,4),%rsi + shlq $6,%r12 + addq %r15,%r12 + movq %r12,16(%rsp) + + movq %r11,24(%rsp) + movl 0(%r11),%r8d + movl 4(%r11),%r9d + movl 8(%r11),%r10d + movl 12(%r11),%r11d + jmp .Loop + +.align 16 +.Loop: + movl %r8d,0(%rsp) + movl %r9d,4(%rsp) + movl %r10d,8(%rsp) + movl %r11d,%r12d + movl %r11d,12(%rsp) + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $3614090360,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,0(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 4(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $3905402710,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,4(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $606105819,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,8(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 12(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $3250441966,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,12(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $4118548399,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,16(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 20(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1200080426,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,20(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $2821735955,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,24(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 28(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $4249261313,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,28(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $1770035416,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,32(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 36(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $2336552879,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,36(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $4294925233,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,40(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 44(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $2304563134,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,44(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $1804603682,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,48(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 52(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $4254626195,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,52(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $2792965006,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,56(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu (%r13),%xmm2 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 60(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $1236535329,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,60(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r10d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4129170786,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 24(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $3225465664,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $643717713,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 0(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $3921069994,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $3593408605,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 40(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $38016083,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $3634488961,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 16(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $3889429448,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $568446438,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 56(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $3275163606,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $4107603335,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 32(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1163531501,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $2850285829,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 8(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $4243563512,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $1735328473,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 16(%r13),%xmm3 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 48(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $2368359562,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $4294588738,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,0(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 32(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $2272392833,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,4(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $1839030562,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,8(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 56(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $4259657740,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,12(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $2763975236,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,16(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 16(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1272893353,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,20(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $4139469664,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,24(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 40(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $3200236656,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,28(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $681279174,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,32(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 0(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $3936430074,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,36(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $3572445317,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,40(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 24(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $76029189,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,44(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $3654602809,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,48(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 48(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $3873151461,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,52(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $530742520,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,56(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 32(%r13),%xmm4 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 8(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $3299628645,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,60(%rsi) + addb %al,%cl + roll $23,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm1,%xmm4 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4096336452,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 28(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $1126891415,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $2878612391,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 20(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $4237533241,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $1700485571,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 12(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $2399980690,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $4293915773,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 4(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $2240044497,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $1873313359,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 60(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $4264355552,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $2734768916,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 52(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1309151649,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $4149444226,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 44(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $3174756917,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $718787259,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 48(%r13),%xmm5 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 36(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $3951481745,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rbp,%rsi + xorq %rbp,%rbp + movb %sil,%bpl + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm1,%xmm5 + addl 0(%rsp),%r8d + addl 4(%rsp),%r9d + addl 8(%rsp),%r10d + addl 12(%rsp),%r11d + + movdqu %xmm2,(%r14,%r13,1) + movdqu %xmm3,16(%r14,%r13,1) + movdqu %xmm4,32(%r14,%r13,1) + movdqu %xmm5,48(%r14,%r13,1) + leaq 64(%r15),%r15 + leaq 64(%r13),%r13 + cmpq 16(%rsp),%r15 + jb .Loop + + movq 24(%rsp),%r12 + subb %al,%cl + movl %r8d,0(%r12) + movl %r9d,4(%r12) + movl %r10d,8(%r12) + movl %r11d,12(%r12) + subb $1,%bpl + movl %ebp,-8(%rdi) + movl %ecx,-4(%rdi) + + movq 40(%rsp),%r15 + movq 48(%rsp),%r14 + movq 56(%rsp),%r13 + movq 64(%rsp),%r12 + movq 72(%rsp),%rbp + movq 80(%rsp),%rbx + leaq 88(%rsp),%rsp +.Lepilogue: +.Labort: + .byte 0xf3,0xc3 +.size rc4_md5_enc,.-rc4_md5_enc diff --git a/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s b/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s index 1bafefeb02..f2b8a8bc04 100644 --- a/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s @@ -1,6 +1,7 @@ .text + .globl RC4 .type RC4,@function .align 16 @@ -12,316 +13,511 @@ RC4: orq %rsi,%rsi pushq %r12 pushq %r13 .Lprologue: + movq %rsi,%r11 + movq %rdx,%r12 + movq %rcx,%r13 + xorq %r10,%r10 + xorq %rcx,%rcx - addq $8,%rdi - movl -8(%rdi),%r8d - movl -4(%rdi),%r12d + leaq 8(%rdi),%rdi + movb -8(%rdi),%r10b + movb -4(%rdi),%cl cmpl $-1,256(%rdi) je .LRC4_CHAR - incb %r8b - movl (%rdi,%r8,4),%r9d - testq $-8,%rsi - jz .Lloop1 - jmp .Lloop8 -.align 16 -.Lloop8: - addb %r9b,%r12b - movq %r8,%r10 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax - incb %r10b - movl (%rdi,%r10,4),%r11d - cmpq %r10,%r12 - movl %r9d,(%rdi,%r12,4) - cmoveq %r9,%r11 - movl %r13d,(%rdi,%r8,4) - addb %r9b,%r13b - movb (%rdi,%r13,4),%al - addb %r11b,%r12b - movq %r10,%r8 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax - incb %r8b - movl (%rdi,%r8,4),%r9d - cmpq %r8,%r12 - movl %r11d,(%rdi,%r12,4) - cmoveq %r11,%r9 - movl %r13d,(%rdi,%r10,4) - addb %r11b,%r13b - movb (%rdi,%r13,4),%al - addb %r9b,%r12b - movq %r8,%r10 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax + movl OPENSSL_ia32cap_P(%rip),%r8d + xorq %rbx,%rbx incb %r10b - movl (%rdi,%r10,4),%r11d - cmpq %r10,%r12 - movl %r9d,(%rdi,%r12,4) - cmoveq %r9,%r11 - movl %r13d,(%rdi,%r8,4) - addb %r9b,%r13b - movb (%rdi,%r13,4),%al - addb %r11b,%r12b - movq %r10,%r8 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax - incb %r8b - movl (%rdi,%r8,4),%r9d - cmpq %r8,%r12 - movl %r11d,(%rdi,%r12,4) - cmoveq %r11,%r9 - movl %r13d,(%rdi,%r10,4) - addb %r11b,%r13b - movb (%rdi,%r13,4),%al - addb %r9b,%r12b - movq %r8,%r10 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax + subq %r10,%rbx + subq %r12,%r13 + movl (%rdi,%r10,4),%eax + testq $-16,%r11 + jz .Lloop1 + btl $30,%r8d + jc .Lintel + andq $7,%rbx + leaq 1(%r10),%rsi + jz .Loop8 + subq %rbx,%r11 +.Loop8_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al incb %r10b - movl (%rdi,%r10,4),%r11d - cmpq %r10,%r12 - movl %r9d,(%rdi,%r12,4) - cmoveq %r9,%r11 - movl %r13d,(%rdi,%r8,4) - addb %r9b,%r13b - movb (%rdi,%r13,4),%al - addb %r11b,%r12b - movq %r10,%r8 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax - incb %r8b - movl (%rdi,%r8,4),%r9d - cmpq %r8,%r12 - movl %r11d,(%rdi,%r12,4) - cmoveq %r11,%r9 - movl %r13d,(%rdi,%r10,4) - addb %r11b,%r13b - movb (%rdi,%r13,4),%al - addb %r9b,%r12b - movq %r8,%r10 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r13,%r12,1) + leaq 1(%r12),%r12 + decq %rbx + jnz .Loop8_warmup + + leaq 1(%r10),%rsi + jmp .Loop8 +.align 16 +.Loop8: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 0(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,0(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,4(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 8(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,8(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 12(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,12(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 16(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,16(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 20(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,20(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 24(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,24(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%sil + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl -4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,28(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%r10b + rorq $8,%r8 + subq $8,%r11 + + xorq (%r12),%r8 + movq %r8,(%r13,%r12,1) + leaq 8(%r12),%r12 + + testq $-8,%r11 + jnz .Loop8 + cmpq $0,%r11 + jne .Lloop1 + jmp .Lexit + +.align 16 +.Lintel: + testq $-32,%r11 + jz .Lloop1 + andq $15,%rbx + jz .Loop16_is_hot + subq %rbx,%r11 +.Loop16_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al incb %r10b - movl (%rdi,%r10,4),%r11d - cmpq %r10,%r12 - movl %r9d,(%rdi,%r12,4) - cmoveq %r9,%r11 - movl %r13d,(%rdi,%r8,4) - addb %r9b,%r13b - movb (%rdi,%r13,4),%al - addb %r11b,%r12b - movq %r10,%r8 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax - incb %r8b - movl (%rdi,%r8,4),%r9d - cmpq %r8,%r12 - movl %r11d,(%rdi,%r12,4) - cmoveq %r11,%r9 - movl %r13d,(%rdi,%r10,4) - addb %r11b,%r13b - movb (%rdi,%r13,4),%al - rorq $8,%rax - subq $8,%rsi - - xorq (%rdx),%rax - addq $8,%rdx - movq %rax,(%rcx) - addq $8,%rcx - - testq $-8,%rsi - jnz .Lloop8 - cmpq $0,%rsi + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r13,%r12,1) + leaq 1(%r12),%r12 + decq %rbx + jnz .Loop16_warmup + + movq %rcx,%rbx + xorq %rcx,%rcx + movb %bl,%cl + +.Loop16_is_hot: + leaq (%rdi,%r10,4),%rsi + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + jmp .Loop16_enter +.align 16 +.Loop16: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm2 + psllq $8,%xmm1 + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + pxor %xmm1,%xmm2 + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + movdqu %xmm2,(%r13,%r12,1) + leaq 16(%r12),%r12 +.Loop16_enter: + movl (%rdi,%rcx,4),%edx + pxor %xmm1,%xmm1 + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 8(%rsi),%eax + movzbl %bl,%ebx + movl %edx,4(%rsi) + addb %al,%cl + pinsrw $0,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 12(%rsi),%ebx + movzbl %al,%eax + movl %edx,8(%rsi) + addb %bl,%cl + pinsrw $1,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 16(%rsi),%eax + movzbl %bl,%ebx + movl %edx,12(%rsi) + addb %al,%cl + pinsrw $1,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 20(%rsi),%ebx + movzbl %al,%eax + movl %edx,16(%rsi) + addb %bl,%cl + pinsrw $2,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 24(%rsi),%eax + movzbl %bl,%ebx + movl %edx,20(%rsi) + addb %al,%cl + pinsrw $2,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 28(%rsi),%ebx + movzbl %al,%eax + movl %edx,24(%rsi) + addb %bl,%cl + pinsrw $3,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 32(%rsi),%eax + movzbl %bl,%ebx + movl %edx,28(%rsi) + addb %al,%cl + pinsrw $3,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 36(%rsi),%ebx + movzbl %al,%eax + movl %edx,32(%rsi) + addb %bl,%cl + pinsrw $4,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 40(%rsi),%eax + movzbl %bl,%ebx + movl %edx,36(%rsi) + addb %al,%cl + pinsrw $4,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 44(%rsi),%ebx + movzbl %al,%eax + movl %edx,40(%rsi) + addb %bl,%cl + pinsrw $5,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 48(%rsi),%eax + movzbl %bl,%ebx + movl %edx,44(%rsi) + addb %al,%cl + pinsrw $5,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 52(%rsi),%ebx + movzbl %al,%eax + movl %edx,48(%rsi) + addb %bl,%cl + pinsrw $6,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 56(%rsi),%eax + movzbl %bl,%ebx + movl %edx,52(%rsi) + addb %al,%cl + pinsrw $6,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 60(%rsi),%ebx + movzbl %al,%eax + movl %edx,56(%rsi) + addb %bl,%cl + pinsrw $7,(%rdi,%rax,4),%xmm0 + addb $16,%r10b + movdqu (%r12),%xmm2 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movzbl %bl,%ebx + movl %edx,60(%rsi) + leaq (%rdi,%r10,4),%rsi + pinsrw $7,(%rdi,%rbx,4),%xmm1 + movl (%rsi),%eax + movq %rcx,%rbx + xorq %rcx,%rcx + subq $16,%r11 + movb %bl,%cl + testq $-16,%r11 + jnz .Loop16 + + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + movdqu %xmm2,(%r13,%r12,1) + leaq 16(%r12),%r12 + + cmpq $0,%r11 jne .Lloop1 jmp .Lexit .align 16 .Lloop1: - addb %r9b,%r12b - movl (%rdi,%r12,4),%r13d - movl %r9d,(%rdi,%r12,4) - movl %r13d,(%rdi,%r8,4) - addb %r13b,%r9b - incb %r8b - movl (%rdi,%r9,4),%r13d - movl (%rdi,%r8,4),%r9d - xorb (%rdx),%r13b - incq %rdx - movb %r13b,(%rcx) - incq %rcx - decq %rsi + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r13,%r12,1) + leaq 1(%r12),%r12 + decq %r11 jnz .Lloop1 jmp .Lexit .align 16 .LRC4_CHAR: - addb $1,%r8b - movzbl (%rdi,%r8,1),%r9d - testq $-8,%rsi + addb $1,%r10b + movzbl (%rdi,%r10,1),%eax + testq $-8,%r11 jz .Lcloop1 - cmpl $0,260(%rdi) - jnz .Lcloop1 jmp .Lcloop8 .align 16 .Lcloop8: - movl (%rdx),%eax - movl 4(%rdx),%ebx - addb %r9b,%r12b - leaq 1(%r8),%r10 - movzbl (%rdi,%r12,1),%r13d - movzbl %r10b,%r10d - movzbl (%rdi,%r10,1),%r11d - movb %r9b,(%rdi,%r12,1) - cmpq %r10,%r12 - movb %r13b,(%rdi,%r8,1) + movl (%r12),%r8d + movl 4(%r12),%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) jne .Lcmov0 - movq %r9,%r11 + movq %rax,%rbx .Lcmov0: - addb %r9b,%r13b - xorb (%rdi,%r13,1),%al - rorl $8,%eax - addb %r11b,%r12b - leaq 1(%r10),%r8 - movzbl (%rdi,%r12,1),%r13d - movzbl %r8b,%r8d - movzbl (%rdi,%r8,1),%r9d - movb %r11b,(%rdi,%r12,1) - cmpq %r8,%r12 - movb %r13b,(%rdi,%r10,1) + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) jne .Lcmov1 - movq %r11,%r9 + movq %rbx,%rax .Lcmov1: - addb %r11b,%r13b - xorb (%rdi,%r13,1),%al - rorl $8,%eax - addb %r9b,%r12b - leaq 1(%r8),%r10 - movzbl (%rdi,%r12,1),%r13d - movzbl %r10b,%r10d - movzbl (%rdi,%r10,1),%r11d - movb %r9b,(%rdi,%r12,1) - cmpq %r10,%r12 - movb %r13b,(%rdi,%r8,1) + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) jne .Lcmov2 - movq %r9,%r11 + movq %rax,%rbx .Lcmov2: - addb %r9b,%r13b - xorb (%rdi,%r13,1),%al - rorl $8,%eax - addb %r11b,%r12b - leaq 1(%r10),%r8 - movzbl (%rdi,%r12,1),%r13d - movzbl %r8b,%r8d - movzbl (%rdi,%r8,1),%r9d - movb %r11b,(%rdi,%r12,1) - cmpq %r8,%r12 - movb %r13b,(%rdi,%r10,1) + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) jne .Lcmov3 - movq %r11,%r9 + movq %rbx,%rax .Lcmov3: - addb %r11b,%r13b - xorb (%rdi,%r13,1),%al - rorl $8,%eax - addb %r9b,%r12b - leaq 1(%r8),%r10 - movzbl (%rdi,%r12,1),%r13d - movzbl %r10b,%r10d - movzbl (%rdi,%r10,1),%r11d - movb %r9b,(%rdi,%r12,1) - cmpq %r10,%r12 - movb %r13b,(%rdi,%r8,1) + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) jne .Lcmov4 - movq %r9,%r11 + movq %rax,%rbx .Lcmov4: - addb %r9b,%r13b - xorb (%rdi,%r13,1),%bl - rorl $8,%ebx - addb %r11b,%r12b - leaq 1(%r10),%r8 - movzbl (%rdi,%r12,1),%r13d - movzbl %r8b,%r8d - movzbl (%rdi,%r8,1),%r9d - movb %r11b,(%rdi,%r12,1) - cmpq %r8,%r12 - movb %r13b,(%rdi,%r10,1) + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) jne .Lcmov5 - movq %r11,%r9 + movq %rbx,%rax .Lcmov5: - addb %r11b,%r13b - xorb (%rdi,%r13,1),%bl - rorl $8,%ebx - addb %r9b,%r12b - leaq 1(%r8),%r10 - movzbl (%rdi,%r12,1),%r13d - movzbl %r10b,%r10d - movzbl (%rdi,%r10,1),%r11d - movb %r9b,(%rdi,%r12,1) - cmpq %r10,%r12 - movb %r13b,(%rdi,%r8,1) + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) jne .Lcmov6 - movq %r9,%r11 + movq %rax,%rbx .Lcmov6: - addb %r9b,%r13b - xorb (%rdi,%r13,1),%bl - rorl $8,%ebx - addb %r11b,%r12b - leaq 1(%r10),%r8 - movzbl (%rdi,%r12,1),%r13d - movzbl %r8b,%r8d - movzbl (%rdi,%r8,1),%r9d - movb %r11b,(%rdi,%r12,1) - cmpq %r8,%r12 - movb %r13b,(%rdi,%r10,1) + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) jne .Lcmov7 - movq %r11,%r9 + movq %rbx,%rax .Lcmov7: - addb %r11b,%r13b - xorb (%rdi,%r13,1),%bl - rorl $8,%ebx - leaq -8(%rsi),%rsi - movl %eax,(%rcx) - leaq 8(%rdx),%rdx - movl %ebx,4(%rcx) - leaq 8(%rcx),%rcx - - testq $-8,%rsi + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + leaq -8(%r11),%r11 + movl %r8d,(%r13) + leaq 8(%r12),%r12 + movl %r9d,4(%r13) + leaq 8(%r13),%r13 + + testq $-8,%r11 jnz .Lcloop8 - cmpq $0,%rsi + cmpq $0,%r11 jne .Lcloop1 jmp .Lexit .align 16 .Lcloop1: - addb %r9b,%r12b - movzbl (%rdi,%r12,1),%r13d - movb %r9b,(%rdi,%r12,1) - movb %r13b,(%rdi,%r8,1) - addb %r9b,%r13b - addb $1,%r8b - movzbl %r13b,%r13d - movzbl %r8b,%r8d - movzbl (%rdi,%r13,1),%r13d - movzbl (%rdi,%r8,1),%r9d - xorb (%rdx),%r13b - leaq 1(%rdx),%rdx - movb %r13b,(%rcx) - leaq 1(%rcx),%rcx - subq $1,%rsi + addb %al,%cl + movzbl %cl,%ecx + movzbl (%rdi,%rcx,1),%edx + movb %al,(%rdi,%rcx,1) + movb %dl,(%rdi,%r10,1) + addb %al,%dl + addb $1,%r10b + movzbl %dl,%edx + movzbl %r10b,%r10d + movzbl (%rdi,%rdx,1),%edx + movzbl (%rdi,%r10,1),%eax + xorb (%r12),%dl + leaq 1(%r12),%r12 + movb %dl,(%r13) + leaq 1(%r13),%r13 + subq $1,%r11 jnz .Lcloop1 jmp .Lexit .align 16 .Lexit: - subb $1,%r8b - movl %r8d,-8(%rdi) - movl %r12d,-4(%rdi) + subb $1,%r10b + movl %r10d,-8(%rdi) + movl %ecx,-4(%rdi) movq (%rsp),%r13 movq 8(%rsp),%r12 @@ -330,11 +526,10 @@ RC4: orq %rsi,%rsi .Lepilogue: .byte 0xf3,0xc3 .size RC4,.-RC4 - -.globl RC4_set_key -.type RC4_set_key,@function +.globl private_RC4_set_key +.type private_RC4_set_key,@function .align 16 -RC4_set_key: +private_RC4_set_key: leaq 8(%rdi),%rdi leaq (%rdx,%rsi,1),%rdx negq %rsi @@ -346,11 +541,8 @@ RC4_set_key: movl OPENSSL_ia32cap_P(%rip),%r8d btl $20,%r8d - jnc .Lw1stloop - btl $30,%r8d - setc %r9b - movl %r9d,260(%rdi) - jmp .Lc1stloop + jc .Lc1stloop + jmp .Lw1stloop .align 16 .Lw1stloop: @@ -404,7 +596,7 @@ RC4_set_key: movl %eax,-8(%rdi) movl %eax,-4(%rdi) .byte 0xf3,0xc3 -.size RC4_set_key,.-RC4_set_key +.size private_RC4_set_key,.-private_RC4_set_key .globl RC4_options .type RC4_options,@function @@ -413,18 +605,20 @@ RC4_options: leaq .Lopts(%rip),%rax movl OPENSSL_ia32cap_P(%rip),%edx btl $20,%edx - jnc .Ldone - addq $12,%rax + jc .L8xchar btl $30,%edx jnc .Ldone - addq $13,%rax + addq $25,%rax + .byte 0xf3,0xc3 +.L8xchar: + addq $12,%rax .Ldone: .byte 0xf3,0xc3 .align 64 .Lopts: .byte 114,99,52,40,56,120,44,105,110,116,41,0 .byte 114,99,52,40,56,120,44,99,104,97,114,41,0 -.byte 114,99,52,40,49,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,49,54,120,44,105,110,116,41,0 .byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 .size RC4_options,.-RC4_options diff --git a/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s b/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s index 208c2cdd26..c11c6f650b 100644 --- a/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s @@ -1,12 +1,23 @@ .text + + .globl sha1_block_data_order .type sha1_block_data_order,@function .align 16 sha1_block_data_order: + movl OPENSSL_ia32cap_P+0(%rip),%r9d + movl OPENSSL_ia32cap_P+4(%rip),%r8d + testl $512,%r8d + jz .Lialu + jmp _ssse3_shortcut + +.align 16 +.Lialu: pushq %rbx pushq %rbp pushq %r12 + pushq %r13 movq %rsp,%r11 movq %rdi,%r8 subq $72,%rsp @@ -16,1268 +27,2466 @@ sha1_block_data_order: movq %r11,64(%rsp) .Lprologue: - movl 0(%r8),%edx - movl 4(%r8),%esi - movl 8(%r8),%edi - movl 12(%r8),%ebp - movl 16(%r8),%r11d -.align 4 + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.align 16 .Lloop: - movl 0(%r9),%eax - bswapl %eax - movl %eax,0(%rsp) - leal 1518500249(%rax,%r11,1),%r12d - movl %edi,%ebx - movl 4(%r9),%eax - movl %edx,%r11d - xorl %ebp,%ebx - bswapl %eax - roll $5,%r11d - andl %esi,%ebx - movl %eax,4(%rsp) - addl %r11d,%r12d - xorl %ebp,%ebx + movl 0(%r9),%edx + bswapl %edx + movl %edx,0(%rsp) + movl %r11d,%eax + movl 4(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,4(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 8(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,8(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax roll $30,%esi - addl %ebx,%r12d - leal 1518500249(%rax,%rbp,1),%r11d - movl %esi,%ebx - movl 8(%r9),%eax - movl %r12d,%ebp - xorl %edi,%ebx - bswapl %eax - roll $5,%ebp - andl %edx,%ebx - movl %eax,8(%rsp) - addl %ebp,%r11d - xorl %edi,%ebx - roll $30,%edx - addl %ebx,%r11d - leal 1518500249(%rax,%rdi,1),%ebp - movl %edx,%ebx - movl 12(%r9),%eax - movl %r11d,%edi - xorl %esi,%ebx - bswapl %eax - roll $5,%edi - andl %r12d,%ebx - movl %eax,12(%rsp) - addl %edi,%ebp - xorl %esi,%ebx + addl %eax,%r12d + movl %esi,%eax + movl 12(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,12(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 16(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,16(%rsp) + addl %ecx,%edi + xorl %esi,%eax roll $30,%r12d - addl %ebx,%ebp - leal 1518500249(%rax,%rsi,1),%edi - movl %r12d,%ebx - movl 16(%r9),%eax - movl %ebp,%esi - xorl %edx,%ebx - bswapl %eax - roll $5,%esi - andl %r11d,%ebx - movl %eax,16(%rsp) - addl %esi,%edi - xorl %edx,%ebx + addl %eax,%edi + movl %r12d,%eax + movl 20(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,20(%rsp) + addl %ecx,%esi + xorl %r13d,%eax roll $30,%r11d - addl %ebx,%edi - leal 1518500249(%rax,%rdx,1),%esi + addl %eax,%esi + movl %r11d,%eax + movl 24(%r9),%edx + movl %esi,%ecx + xorl %r12d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r13,1),%r13d + andl %edi,%eax + movl %edx,24(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 28(%r9),%ebp + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r12,1),%r12d + andl %esi,%eax + movl %ebp,28(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 32(%r9),%edx + movl %r12d,%ecx + xorl %edi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r11,1),%r11d + andl %r13d,%eax + movl %edx,32(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 36(%r9),%ebp + movl %r11d,%ecx + xorl %esi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rdi,1),%edi + andl %r12d,%eax + movl %ebp,36(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 40(%r9),%edx + movl %edi,%ecx + xorl %r13d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rsi,1),%esi + andl %r11d,%eax + movl %edx,40(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 44(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,44(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 48(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,48(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 52(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,52(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 56(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,56(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 60(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,60(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 0(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 32(%rsp),%edx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + xorl 52(%rsp),%edx + xorl %r12d,%eax + roll $1,%edx + addl %ecx,%r13d + roll $30,%edi + movl %edx,0(%rsp) + addl %eax,%r13d + movl 4(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 36(%rsp),%ebp + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + xorl 56(%rsp),%ebp + xorl %r11d,%eax + roll $1,%ebp + addl %ecx,%r12d + roll $30,%esi + movl %ebp,4(%rsp) + addl %eax,%r12d + movl 8(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + xorl 60(%rsp),%edx + xorl %edi,%eax + roll $1,%edx + addl %ecx,%r11d + roll $30,%r13d + movl %edx,8(%rsp) + addl %eax,%r11d + movl 12(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + xorl 0(%rsp),%ebp + xorl %esi,%eax + roll $1,%ebp + addl %ecx,%edi + roll $30,%r12d + movl %ebp,12(%rsp) + addl %eax,%edi + movl 16(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + xorl 4(%rsp),%edx + xorl %r13d,%eax + roll $1,%edx + addl %ecx,%esi + roll $30,%r11d + movl %edx,16(%rsp) + addl %eax,%esi + movl 20(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 28(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 52(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 8(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 32(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 56(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 12(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 36(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 60(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 16(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 0(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 20(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 4(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 24(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 48(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 8(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 28(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 52(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 12(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 32(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 56(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 16(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 36(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 20(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 40(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 24(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 44(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 28(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 48(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 32(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 52(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 36(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 56(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 40(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 60(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 44(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 0(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 48(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 4(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 52(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 8(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 56(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 12(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 60(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 16(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 0(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 20(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r11d,%eax movl %r11d,%ebx - movl 20(%r9),%eax - movl %edi,%edx - xorl %r12d,%ebx - bswapl %eax - roll $5,%edx - andl %ebp,%ebx - movl %eax,20(%rsp) - addl %edx,%esi + xorl 44(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp xorl %r12d,%ebx - roll $30,%ebp - addl %ebx,%esi - leal 1518500249(%rax,%r12,1),%edx - movl %ebp,%ebx - movl 24(%r9),%eax - movl %esi,%r12d - xorl %r11d,%ebx - bswapl %eax - roll $5,%r12d + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r13d andl %edi,%ebx - movl %eax,24(%rsp) - addl %r12d,%edx - xorl %r11d,%ebx + roll $1,%ebp + addl %ebx,%r13d roll $30,%edi - addl %ebx,%edx - leal 1518500249(%rax,%r11,1),%r12d + movl %ebp,36(%rsp) + addl %ecx,%r13d + movl 40(%rsp),%edx + movl %edi,%eax movl %edi,%ebx - movl 28(%r9),%eax - movl %edx,%r11d - xorl %ebp,%ebx - bswapl %eax - roll $5,%r11d + xorl 48(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r12d andl %esi,%ebx - movl %eax,28(%rsp) - addl %r11d,%r12d - xorl %ebp,%ebx - roll $30,%esi + roll $1,%edx addl %ebx,%r12d - leal 1518500249(%rax,%rbp,1),%r11d + roll $30,%esi + movl %edx,40(%rsp) + addl %ecx,%r12d + movl 44(%rsp),%ebp + movl %esi,%eax movl %esi,%ebx - movl 32(%r9),%eax - movl %r12d,%ebp - xorl %edi,%ebx - bswapl %eax - roll $5,%ebp - andl %edx,%ebx - movl %eax,32(%rsp) - addl %ebp,%r11d + xorl 52(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp xorl %edi,%ebx - roll $30,%edx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp addl %ebx,%r11d - leal 1518500249(%rax,%rdi,1),%ebp - movl %edx,%ebx - movl 36(%r9),%eax - movl %r11d,%edi + roll $30,%r13d + movl %ebp,44(%rsp) + addl %ecx,%r11d + movl 48(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx xorl %esi,%ebx - bswapl %eax - roll $5,%edi + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%edi andl %r12d,%ebx - movl %eax,36(%rsp) - addl %edi,%ebp - xorl %esi,%ebx + roll $1,%edx + addl %ebx,%edi roll $30,%r12d - addl %ebx,%ebp - leal 1518500249(%rax,%rsi,1),%edi + movl %edx,48(%rsp) + addl %ecx,%edi + movl 52(%rsp),%ebp + movl %r12d,%eax movl %r12d,%ebx - movl 40(%r9),%eax - movl %ebp,%esi - xorl %edx,%ebx - bswapl %eax - roll $5,%esi + xorl 60(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 40(%rsp),%ebp + addl %eax,%esi andl %r11d,%ebx - movl %eax,40(%rsp) - addl %esi,%edi - xorl %edx,%ebx + roll $1,%ebp + addl %ebx,%esi roll $30,%r11d - addl %ebx,%edi - leal 1518500249(%rax,%rdx,1),%esi + movl %ebp,52(%rsp) + addl %ecx,%esi + movl 56(%rsp),%edx + movl %r11d,%eax movl %r11d,%ebx - movl 44(%r9),%eax - movl %edi,%edx - xorl %r12d,%ebx - bswapl %eax - roll $5,%edx - andl %ebp,%ebx - movl %eax,44(%rsp) - addl %edx,%esi + xorl 0(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx xorl %r12d,%ebx - roll $30,%ebp - addl %ebx,%esi - leal 1518500249(%rax,%r12,1),%edx - movl %ebp,%ebx - movl 48(%r9),%eax - movl %esi,%r12d - xorl %r11d,%ebx - bswapl %eax - roll $5,%r12d + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 44(%rsp),%edx + addl %eax,%r13d andl %edi,%ebx - movl %eax,48(%rsp) - addl %r12d,%edx - xorl %r11d,%ebx + roll $1,%edx + addl %ebx,%r13d roll $30,%edi - addl %ebx,%edx - leal 1518500249(%rax,%r11,1),%r12d + movl %edx,56(%rsp) + addl %ecx,%r13d + movl 60(%rsp),%ebp + movl %edi,%eax movl %edi,%ebx - movl 52(%r9),%eax - movl %edx,%r11d - xorl %ebp,%ebx - bswapl %eax - roll $5,%r11d + xorl 4(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 48(%rsp),%ebp + addl %eax,%r12d andl %esi,%ebx - movl %eax,52(%rsp) - addl %r11d,%r12d - xorl %ebp,%ebx - roll $30,%esi + roll $1,%ebp addl %ebx,%r12d - leal 1518500249(%rax,%rbp,1),%r11d + roll $30,%esi + movl %ebp,60(%rsp) + addl %ecx,%r12d + movl 0(%rsp),%edx + movl %esi,%eax movl %esi,%ebx - movl 56(%r9),%eax - movl %r12d,%ebp - xorl %edi,%ebx - bswapl %eax - roll $5,%ebp - andl %edx,%ebx - movl %eax,56(%rsp) - addl %ebp,%r11d + xorl 8(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx xorl %edi,%ebx - roll $30,%edx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 52(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx addl %ebx,%r11d - leal 1518500249(%rax,%rdi,1),%ebp - movl %edx,%ebx - movl 60(%r9),%eax - movl %r11d,%edi + roll $30,%r13d + movl %edx,0(%rsp) + addl %ecx,%r11d + movl 4(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 12(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp xorl %esi,%ebx - bswapl %eax - roll $5,%edi + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 56(%rsp),%ebp + addl %eax,%edi andl %r12d,%ebx - movl %eax,60(%rsp) - addl %edi,%ebp - xorl %esi,%ebx + roll $1,%ebp + addl %ebx,%edi roll $30,%r12d - addl %ebx,%ebp - leal 1518500249(%rax,%rsi,1),%edi - movl 0(%rsp),%eax + movl %ebp,4(%rsp) + addl %ecx,%edi + movl 8(%rsp),%edx + movl %r12d,%eax movl %r12d,%ebx - movl %ebp,%esi - xorl 8(%rsp),%eax - xorl %edx,%ebx - roll $5,%esi - xorl 32(%rsp),%eax + xorl 16(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 60(%rsp),%edx + addl %eax,%esi andl %r11d,%ebx - addl %esi,%edi - xorl 52(%rsp),%eax - xorl %edx,%ebx + roll $1,%edx + addl %ebx,%esi roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,0(%rsp) - leal 1518500249(%rax,%rdx,1),%esi - movl 4(%rsp),%eax + movl %edx,8(%rsp) + addl %ecx,%esi + movl 12(%rsp),%ebp + movl %r11d,%eax movl %r11d,%ebx - movl %edi,%edx - xorl 12(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edx - xorl 36(%rsp),%eax - andl %ebp,%ebx - addl %edx,%esi - xorl 56(%rsp),%eax + xorl 20(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 44(%rsp),%ebp xorl %r12d,%ebx - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - movl %eax,4(%rsp) - leal 1518500249(%rax,%r12,1),%edx - movl 8(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 16(%rsp),%eax - xorl %r11d,%ebx - roll $5,%r12d - xorl 40(%rsp),%eax + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 0(%rsp),%ebp + addl %eax,%r13d andl %edi,%ebx - addl %r12d,%edx - xorl 60(%rsp),%eax - xorl %r11d,%ebx + roll $1,%ebp + addl %ebx,%r13d roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,8(%rsp) - leal 1518500249(%rax,%r11,1),%r12d - movl 12(%rsp),%eax + movl %ebp,12(%rsp) + addl %ecx,%r13d + movl 16(%rsp),%edx + movl %edi,%eax movl %edi,%ebx - movl %edx,%r11d - xorl 20(%rsp),%eax - xorl %ebp,%ebx - roll $5,%r11d - xorl 44(%rsp),%eax + xorl 24(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 48(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 4(%rsp),%edx + addl %eax,%r12d andl %esi,%ebx - addl %r11d,%r12d - xorl 0(%rsp),%eax - xorl %ebp,%ebx - roll $30,%esi + roll $1,%edx addl %ebx,%r12d - roll $1,%eax - movl %eax,12(%rsp) - leal 1518500249(%rax,%rbp,1),%r11d - movl 16(%rsp),%eax + roll $30,%esi + movl %edx,16(%rsp) + addl %ecx,%r12d + movl 20(%rsp),%ebp + movl %esi,%eax movl %esi,%ebx - movl %r12d,%ebp - xorl 24(%rsp),%eax - xorl %edi,%ebx - roll $5,%ebp - xorl 48(%rsp),%eax - andl %edx,%ebx - addl %ebp,%r11d - xorl 4(%rsp),%eax + xorl 28(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%ebp xorl %edi,%ebx - roll $30,%edx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 8(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp addl %ebx,%r11d - roll $1,%eax - movl %eax,16(%rsp) - leal 1859775393(%rax,%rdi,1),%ebp - movl 20(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 28(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 52(%rsp),%eax + roll $30,%r13d + movl %ebp,20(%rsp) + addl %ecx,%r11d + movl 24(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 32(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx xorl %esi,%ebx - addl %edi,%ebp - xorl 8(%rsp),%eax + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 12(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,20(%rsp) - leal 1859775393(%rax,%rsi,1),%edi - movl 24(%rsp),%eax + movl %edx,24(%rsp) + addl %ecx,%edi + movl 28(%rsp),%ebp + movl %r12d,%eax movl %r12d,%ebx - movl %ebp,%esi - xorl 32(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 56(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 12(%rsp),%eax + xorl 36(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 16(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,24(%rsp) - leal 1859775393(%rax,%rdx,1),%esi - movl 28(%rsp),%eax + movl %ebp,28(%rsp) + addl %ecx,%esi + movl 32(%rsp),%edx + movl %r11d,%eax movl %r11d,%ebx - movl %edi,%edx - xorl 36(%rsp),%eax - xorl %ebp,%ebx - roll $5,%edx - xorl 60(%rsp),%eax + xorl 40(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 0(%rsp),%edx xorl %r12d,%ebx - addl %edx,%esi - xorl 16(%rsp),%eax - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - movl %eax,28(%rsp) - leal 1859775393(%rax,%r12,1),%edx - movl 32(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 40(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 0(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 20(%rsp),%eax + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 20(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,32(%rsp) - leal 1859775393(%rax,%r11,1),%r12d - movl 36(%rsp),%eax + movl %edx,32(%rsp) + addl %ecx,%r13d + movl 36(%rsp),%ebp + movl %edi,%eax movl %edi,%ebx - movl %edx,%r11d - xorl 44(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 4(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 24(%rsp),%eax - roll $30,%esi + xorl 44(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 4(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp addl %ebx,%r12d - roll $1,%eax - movl %eax,36(%rsp) - leal 1859775393(%rax,%rbp,1),%r11d - movl 40(%rsp),%eax + roll $30,%esi + movl %ebp,36(%rsp) + addl %ecx,%r12d + movl 40(%rsp),%edx + movl %esi,%eax movl %esi,%ebx - movl %r12d,%ebp - xorl 48(%rsp),%eax - xorl %edx,%ebx - roll $5,%ebp - xorl 8(%rsp),%eax + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%edx xorl %edi,%ebx - addl %ebp,%r11d - xorl 28(%rsp),%eax - roll $30,%edx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx addl %ebx,%r11d - roll $1,%eax - movl %eax,40(%rsp) - leal 1859775393(%rax,%rdi,1),%ebp - movl 44(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 52(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 12(%rsp),%eax + roll $30,%r13d + movl %edx,40(%rsp) + addl %ecx,%r11d + movl 44(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 12(%rsp),%ebp xorl %esi,%ebx - addl %edi,%ebp - xorl 32(%rsp),%eax + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,44(%rsp) - leal 1859775393(%rax,%rsi,1),%edi - movl 48(%rsp),%eax + movl %ebp,44(%rsp) + addl %ecx,%edi + movl 48(%rsp),%edx + movl %r12d,%eax movl %r12d,%ebx - movl %ebp,%esi - xorl 56(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 16(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 36(%rsp),%eax - roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,48(%rsp) - leal 1859775393(%rax,%rdx,1),%esi - movl 52(%rsp),%eax - movl %r11d,%ebx - movl %edi,%edx - xorl 60(%rsp),%eax - xorl %ebp,%ebx - roll $5,%edx - xorl 20(%rsp),%eax - xorl %r12d,%ebx - addl %edx,%esi - xorl 40(%rsp),%eax - roll $30,%ebp + xorl 56(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx addl %ebx,%esi - roll $1,%eax - movl %eax,52(%rsp) - leal 1859775393(%rax,%r12,1),%edx - movl 56(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 0(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 24(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 44(%rsp),%eax + roll $30,%r11d + movl %edx,48(%rsp) + addl %ecx,%esi + movl 52(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 20(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 40(%rsp),%ebp roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,56(%rsp) - leal 1859775393(%rax,%r11,1),%r12d - movl 60(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 4(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 28(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 48(%rsp),%eax + addl %eax,%r13d + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 0(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 24(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 44(%rsp),%edx roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - movl %eax,60(%rsp) - leal 1859775393(%rax,%rbp,1),%r11d - movl 0(%rsp),%eax - movl %esi,%ebx - movl %r12d,%ebp - xorl 8(%rsp),%eax - xorl %edx,%ebx - roll $5,%ebp - xorl 32(%rsp),%eax - xorl %edi,%ebx - addl %ebp,%r11d - xorl 52(%rsp),%eax - roll $30,%edx - addl %ebx,%r11d - roll $1,%eax - movl %eax,0(%rsp) - leal 1859775393(%rax,%rdi,1),%ebp - movl 4(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 12(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 36(%rsp),%eax - xorl %esi,%ebx - addl %edi,%ebp - xorl 56(%rsp),%eax + addl %eax,%r12d + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 4(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 28(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 48(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 32(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 52(%rsp),%edx roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,4(%rsp) - leal 1859775393(%rax,%rsi,1),%edi - movl 8(%rsp),%eax - movl %r12d,%ebx - movl %ebp,%esi - xorl 16(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 40(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 60(%rsp),%eax + addl %eax,%edi + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 36(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 56(%rsp),%ebp roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,8(%rsp) - leal 1859775393(%rax,%rdx,1),%esi - movl 12(%rsp),%eax - movl %r11d,%ebx - movl %edi,%edx - xorl 20(%rsp),%eax - xorl %ebp,%ebx - roll $5,%edx - xorl 44(%rsp),%eax - xorl %r12d,%ebx - addl %edx,%esi - xorl 0(%rsp),%eax - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - movl %eax,12(%rsp) - leal 1859775393(%rax,%r12,1),%edx - movl 16(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 24(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 48(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 4(%rsp),%eax + addl %eax,%esi + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 40(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 60(%rsp),%edx roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,16(%rsp) - leal 1859775393(%rax,%r11,1),%r12d - movl 20(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 28(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 52(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 8(%rsp),%eax + addl %eax,%r13d + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 44(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 0(%rsp),%ebp roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - movl %eax,20(%rsp) - leal 1859775393(%rax,%rbp,1),%r11d - movl 24(%rsp),%eax - movl %esi,%ebx - movl %r12d,%ebp - xorl 32(%rsp),%eax - xorl %edx,%ebx - roll $5,%ebp - xorl 56(%rsp),%eax - xorl %edi,%ebx - addl %ebp,%r11d - xorl 12(%rsp),%eax - roll $30,%edx - addl %ebx,%r11d - roll $1,%eax - movl %eax,24(%rsp) - leal 1859775393(%rax,%rdi,1),%ebp - movl 28(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 36(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 60(%rsp),%eax - xorl %esi,%ebx - addl %edi,%ebp - xorl 16(%rsp),%eax + addl %eax,%r12d + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 48(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 4(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 28(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 52(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 8(%rsp),%ebp roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,28(%rsp) - leal 1859775393(%rax,%rsi,1),%edi - movl 32(%rsp),%eax - movl %r12d,%ebx - movl %ebp,%esi - xorl 40(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 0(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 20(%rsp),%eax - roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,32(%rsp) - leal -1894007588(%rax,%rdx,1),%esi - movl 36(%rsp),%eax - movl %ebp,%ebx - movl %ebp,%ecx - xorl 44(%rsp),%eax - movl %edi,%edx - andl %r11d,%ebx - xorl 4(%rsp),%eax - orl %r11d,%ecx - roll $5,%edx - xorl 24(%rsp),%eax - andl %r12d,%ecx - addl %edx,%esi - roll $1,%eax - orl %ecx,%ebx - roll $30,%ebp - movl %eax,36(%rsp) - addl %ebx,%esi - leal -1894007588(%rax,%r12,1),%edx - movl 40(%rsp),%eax - movl %edi,%ebx + addl %eax,%edi + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %r12d,%eax movl %edi,%ecx - xorl 48(%rsp),%eax - movl %esi,%r12d - andl %ebp,%ebx - xorl 8(%rsp),%eax - orl %ebp,%ecx - roll $5,%r12d - xorl 28(%rsp),%eax - andl %r11d,%ecx - addl %r12d,%edx - roll $1,%eax - orl %ecx,%ebx - roll $30,%edi - movl %eax,40(%rsp) - addl %ebx,%edx - leal -1894007588(%rax,%r11,1),%r12d - movl 44(%rsp),%eax - movl %esi,%ebx + xorl 32(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rsi,1),%esi + xorl 56(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 12(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r11d,%eax movl %esi,%ecx - xorl 52(%rsp),%eax - movl %edx,%r11d - andl %edi,%ebx - xorl 12(%rsp),%eax - orl %edi,%ecx - roll $5,%r11d - xorl 32(%rsp),%eax - andl %ebp,%ecx - addl %r11d,%r12d - roll $1,%eax - orl %ecx,%ebx + xorl 36(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 60(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 16(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 0(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 20(%rsp),%edx roll $30,%esi - movl %eax,44(%rsp) - addl %ebx,%r12d - leal -1894007588(%rax,%rbp,1),%r11d - movl 48(%rsp),%eax - movl %edx,%ebx - movl %edx,%ecx - xorl 56(%rsp),%eax - movl %r12d,%ebp - andl %esi,%ebx - xorl 16(%rsp),%eax - orl %esi,%ecx - roll $5,%ebp - xorl 36(%rsp),%eax - andl %edi,%ecx - addl %ebp,%r11d - roll $1,%eax - orl %ecx,%ebx - roll $30,%edx - movl %eax,48(%rsp) - addl %ebx,%r11d - leal -1894007588(%rax,%rdi,1),%ebp - movl 52(%rsp),%eax - movl %r12d,%ebx + addl %eax,%r12d + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %esi,%eax movl %r12d,%ecx - xorl 60(%rsp),%eax - movl %r11d,%edi - andl %edx,%ebx - xorl 20(%rsp),%eax - orl %edx,%ecx - roll $5,%edi - xorl 40(%rsp),%eax - andl %esi,%ecx - addl %edi,%ebp - roll $1,%eax - orl %ecx,%ebx - roll $30,%r12d - movl %eax,52(%rsp) - addl %ebx,%ebp - leal -1894007588(%rax,%rsi,1),%edi - movl 56(%rsp),%eax - movl %r11d,%ebx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 4(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 24(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r13d,%eax movl %r11d,%ecx - xorl 0(%rsp),%eax - movl %ebp,%esi - andl %r12d,%ebx - xorl 24(%rsp),%eax - orl %r12d,%ecx - roll $5,%esi - xorl 44(%rsp),%eax - andl %edx,%ecx - addl %esi,%edi - roll $1,%eax - orl %ecx,%ebx - roll $30,%r11d - movl %eax,56(%rsp) - addl %ebx,%edi - leal -1894007588(%rax,%rdx,1),%esi - movl 60(%rsp),%eax - movl %ebp,%ebx - movl %ebp,%ecx - xorl 4(%rsp),%eax - movl %edi,%edx - andl %r11d,%ebx - xorl 28(%rsp),%eax - orl %r11d,%ecx - roll $5,%edx - xorl 48(%rsp),%eax - andl %r12d,%ecx - addl %edx,%esi - roll $1,%eax - orl %ecx,%ebx - roll $30,%ebp - movl %eax,60(%rsp) - addl %ebx,%esi - leal -1894007588(%rax,%r12,1),%edx - movl 0(%rsp),%eax - movl %edi,%ebx + xorl 48(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 8(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 28(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %r12d,%eax movl %edi,%ecx - xorl 8(%rsp),%eax - movl %esi,%r12d - andl %ebp,%ebx - xorl 32(%rsp),%eax - orl %ebp,%ecx - roll $5,%r12d - xorl 52(%rsp),%eax - andl %r11d,%ecx - addl %r12d,%edx - roll $1,%eax - orl %ecx,%ebx - roll $30,%edi - movl %eax,0(%rsp) - addl %ebx,%edx - leal -1894007588(%rax,%r11,1),%r12d - movl 4(%rsp),%eax - movl %esi,%ebx + xorl 52(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 12(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 32(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %r11d,%eax movl %esi,%ecx - xorl 12(%rsp),%eax - movl %edx,%r11d - andl %edi,%ebx - xorl 36(%rsp),%eax - orl %edi,%ecx - roll $5,%r11d - xorl 56(%rsp),%eax - andl %ebp,%ecx - addl %r11d,%r12d - roll $1,%eax - orl %ecx,%ebx + xorl 56(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 16(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 36(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 60(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 20(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 40(%rsp),%ebp roll $30,%esi - movl %eax,4(%rsp) - addl %ebx,%r12d - leal -1894007588(%rax,%rbp,1),%r11d - movl 8(%rsp),%eax - movl %edx,%ebx - movl %edx,%ecx - xorl 16(%rsp),%eax - movl %r12d,%ebp - andl %esi,%ebx - xorl 40(%rsp),%eax - orl %esi,%ecx - roll $5,%ebp - xorl 60(%rsp),%eax - andl %edi,%ecx - addl %ebp,%r11d - roll $1,%eax - orl %ecx,%ebx - roll $30,%edx - movl %eax,8(%rsp) - addl %ebx,%r11d - leal -1894007588(%rax,%rdi,1),%ebp - movl 12(%rsp),%eax - movl %r12d,%ebx + addl %eax,%r12d + roll $1,%ebp + movl 56(%rsp),%edx + movl %esi,%eax movl %r12d,%ecx - xorl 20(%rsp),%eax - movl %r11d,%edi - andl %edx,%ebx - xorl 44(%rsp),%eax - orl %edx,%ecx - roll $5,%edi - xorl 0(%rsp),%eax - andl %esi,%ecx - addl %edi,%ebp - roll $1,%eax - orl %ecx,%ebx - roll $30,%r12d - movl %eax,12(%rsp) - addl %ebx,%ebp - leal -1894007588(%rax,%rsi,1),%edi - movl 16(%rsp),%eax - movl %r11d,%ebx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 24(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 44(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl 60(%rsp),%ebp + movl %r13d,%eax movl %r11d,%ecx - xorl 24(%rsp),%eax - movl %ebp,%esi - andl %r12d,%ebx - xorl 48(%rsp),%eax - orl %r12d,%ecx - roll $5,%esi - xorl 4(%rsp),%eax - andl %edx,%ecx - addl %esi,%edi - roll $1,%eax - orl %ecx,%ebx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 28(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 48(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl %r11d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r13d,%eax + addl %ecx,%esi roll $30,%r11d - movl %eax,16(%rsp) - addl %ebx,%edi - leal -1894007588(%rax,%rdx,1),%esi - movl 20(%rsp),%eax - movl %ebp,%ebx - movl %ebp,%ecx - xorl 28(%rsp),%eax - movl %edi,%edx - andl %r11d,%ebx - xorl 52(%rsp),%eax - orl %r11d,%ecx + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi + movq (%rsi),%r13 + movq 8(%rsi),%r12 + movq 16(%rsi),%rbp + movq 24(%rsi),%rbx + leaq 32(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_data_order_ssse3,@function +.align 16 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -64(%rsp),%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi roll $5,%edx - xorl 8(%rsp),%eax - andl %r12d,%ecx - addl %edx,%esi - roll $1,%eax - orl %ecx,%ebx - roll $30,%ebp - movl %eax,20(%rsp) - addl %ebx,%esi - leal -1894007588(%rax,%r12,1),%edx - movl 24(%rsp),%eax - movl %edi,%ebx - movl %edi,%ecx - xorl 32(%rsp),%eax - movl %esi,%r12d - andl %ebp,%ebx - xorl 56(%rsp),%eax - orl %ebp,%ecx - roll $5,%r12d - xorl 12(%rsp),%eax - andl %r11d,%ecx - addl %r12d,%edx - roll $1,%eax - orl %ecx,%ebx - roll $30,%edi - movl %eax,24(%rsp) - addl %ebx,%edx - leal -1894007588(%rax,%r11,1),%r12d - movl 28(%rsp),%eax - movl %esi,%ebx - movl %esi,%ecx - xorl 36(%rsp),%eax - movl %edx,%r11d - andl %edi,%ebx - xorl 60(%rsp),%eax - orl %edi,%ecx - roll $5,%r11d - xorl 16(%rsp),%eax - andl %ebp,%ecx - addl %r11d,%r12d - roll $1,%eax - orl %ecx,%ebx - roll $30,%esi - movl %eax,28(%rsp) - addl %ebx,%r12d - leal -1894007588(%rax,%rbp,1),%r11d - movl 32(%rsp),%eax - movl %edx,%ebx - movl %edx,%ecx - xorl 40(%rsp),%eax - movl %r12d,%ebp - andl %esi,%ebx - xorl 0(%rsp),%eax - orl %esi,%ecx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi roll $5,%ebp - xorl 20(%rsp),%eax - andl %edi,%ecx - addl %ebp,%r11d - roll $1,%eax - orl %ecx,%ebx - roll $30,%edx - movl %eax,32(%rsp) - addl %ebx,%r11d - leal -1894007588(%rax,%rdi,1),%ebp - movl 36(%rsp),%eax - movl %r12d,%ebx - movl %r12d,%ecx - xorl 44(%rsp),%eax - movl %r11d,%edi - andl %edx,%ebx - xorl 4(%rsp),%eax - orl %edx,%ecx - roll $5,%edi - xorl 24(%rsp),%eax - andl %esi,%ecx + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx addl %edi,%ebp - roll $1,%eax - orl %ecx,%ebx - roll $30,%r12d - movl %eax,36(%rsp) - addl %ebx,%ebp - leal -1894007588(%rax,%rsi,1),%edi - movl 40(%rsp),%eax - movl %r11d,%ebx - movl %r11d,%ecx - xorl 48(%rsp),%eax + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi movl %ebp,%esi - andl %r12d,%ebx - xorl 8(%rsp),%eax - orl %r12d,%ecx - roll $5,%esi - xorl 28(%rsp),%eax - andl %edx,%ecx - addl %esi,%edi - roll $1,%eax - orl %ecx,%ebx - roll $30,%r11d - movl %eax,40(%rsp) - addl %ebx,%edi - leal -1894007588(%rax,%rdx,1),%esi - movl 44(%rsp),%eax - movl %ebp,%ebx - movl %ebp,%ecx - xorl 52(%rsp),%eax - movl %edi,%edx - andl %r11d,%ebx - xorl 12(%rsp),%eax - orl %r11d,%ecx + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi roll $5,%edx - xorl 32(%rsp),%eax - andl %r12d,%ecx - addl %edx,%esi - roll $1,%eax - orl %ecx,%ebx - roll $30,%ebp - movl %eax,44(%rsp) - addl %ebx,%esi - leal -1894007588(%rax,%r12,1),%edx - movl 48(%rsp),%eax - movl %edi,%ebx - movl %edi,%ecx - xorl 56(%rsp),%eax - movl %esi,%r12d - andl %ebp,%ebx - xorl 16(%rsp),%eax - orl %ebp,%ecx - roll $5,%r12d - xorl 36(%rsp),%eax - andl %r11d,%ecx - addl %r12d,%edx - roll $1,%eax - orl %ecx,%ebx - roll $30,%edi - movl %eax,48(%rsp) - addl %ebx,%edx - leal -899497514(%rax,%r11,1),%r12d - movl 52(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 60(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 20(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 40(%rsp),%eax - roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - movl %eax,52(%rsp) - leal -899497514(%rax,%rbp,1),%r11d - movl 56(%rsp),%eax - movl %esi,%ebx - movl %r12d,%ebp - xorl 0(%rsp),%eax - xorl %edx,%ebx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi roll $5,%ebp - xorl 24(%rsp),%eax - xorl %edi,%ebx - addl %ebp,%r11d - xorl 44(%rsp),%eax - roll $30,%edx - addl %ebx,%r11d - roll $1,%eax - movl %eax,56(%rsp) - leal -899497514(%rax,%rdi,1),%ebp - movl 60(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 4(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 28(%rsp),%eax - xorl %esi,%ebx + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx addl %edi,%ebp - xorl 48(%rsp),%eax - roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,60(%rsp) - leal -899497514(%rax,%rsi,1),%edi - movl 0(%rsp),%eax - movl %r12d,%ebx + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx movl %ebp,%esi - xorl 8(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 32(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 52(%rsp),%eax - roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,0(%rsp) - leal -899497514(%rax,%rdx,1),%esi - movl 4(%rsp),%eax - movl %r11d,%ebx - movl %edi,%edx - xorl 12(%rsp),%eax - xorl %ebp,%ebx + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi roll $5,%edx - xorl 36(%rsp),%eax - xorl %r12d,%ebx - addl %edx,%esi - xorl 56(%rsp),%eax - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - movl %eax,4(%rsp) - leal -899497514(%rax,%r12,1),%edx - movl 8(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 16(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 40(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 60(%rsp),%eax - roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,8(%rsp) - leal -899497514(%rax,%r11,1),%r12d - movl 12(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 20(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 44(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 0(%rsp),%eax - roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - movl %eax,12(%rsp) - leal -899497514(%rax,%rbp,1),%r11d - movl 16(%rsp),%eax - movl %esi,%ebx - movl %r12d,%ebp - xorl 24(%rsp),%eax - xorl %edx,%ebx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi roll $5,%ebp - xorl 48(%rsp),%eax - xorl %edi,%ebx - addl %ebp,%r11d - xorl 4(%rsp),%eax - roll $30,%edx - addl %ebx,%r11d - roll $1,%eax - movl %eax,16(%rsp) - leal -899497514(%rax,%rdi,1),%ebp - movl 20(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 28(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 52(%rsp),%eax - xorl %esi,%ebx + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx addl %edi,%ebp - xorl 8(%rsp),%eax - roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,20(%rsp) - leal -899497514(%rax,%rsi,1),%edi - movl 24(%rsp),%eax - movl %r12d,%ebx + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx movl %ebp,%esi - xorl 32(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 56(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 12(%rsp),%eax - roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,24(%rsp) - leal -899497514(%rax,%rdx,1),%esi - movl 28(%rsp),%eax - movl %r11d,%ebx - movl %edi,%edx - xorl 36(%rsp),%eax - xorl %ebp,%ebx + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 roll $5,%edx - xorl 60(%rsp),%eax - xorl %r12d,%ebx - addl %edx,%esi - xorl 16(%rsp),%eax - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - movl %eax,28(%rsp) - leal -899497514(%rax,%r12,1),%edx - movl 32(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 40(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 0(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 20(%rsp),%eax - roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,32(%rsp) - leal -899497514(%rax,%r11,1),%r12d - movl 36(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 44(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 4(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 24(%rsp),%eax - roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - movl %eax,36(%rsp) - leal -899497514(%rax,%rbp,1),%r11d - movl 40(%rsp),%eax - movl %esi,%ebx - movl %r12d,%ebp - xorl 48(%rsp),%eax - xorl %edx,%ebx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 roll $5,%ebp - xorl 8(%rsp),%eax - xorl %edi,%ebx - addl %ebp,%r11d - xorl 28(%rsp),%eax - roll $30,%edx - addl %ebx,%r11d - roll $1,%eax - movl %eax,40(%rsp) - leal -899497514(%rax,%rdi,1),%ebp - movl 44(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 52(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 12(%rsp),%eax - xorl %esi,%ebx + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx addl %edi,%ebp - xorl 32(%rsp),%eax - roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,44(%rsp) - leal -899497514(%rax,%rsi,1),%edi - movl 48(%rsp),%eax - movl %r12d,%ebx + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi movl %ebp,%esi - xorl 56(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 16(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 36(%rsp),%eax - roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,48(%rsp) - leal -899497514(%rax,%rdx,1),%esi - movl 52(%rsp),%eax - movl %r11d,%ebx - movl %edi,%edx - xorl 60(%rsp),%eax - xorl %ebp,%ebx + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi roll $5,%edx - xorl 20(%rsp),%eax - xorl %r12d,%ebx - addl %edx,%esi - xorl 40(%rsp),%eax - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - leal -899497514(%rax,%r12,1),%edx - movl 56(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 0(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 24(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 44(%rsp),%eax - roll $30,%edi - addl %ebx,%edx - roll $1,%eax - leal -899497514(%rax,%r11,1),%r12d - movl 60(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 4(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 28(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 48(%rsp),%eax - roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - leal -899497514(%rax,%rbp,1),%r11d - movl %esi,%ebx - movl %r12d,%ebp - xorl %edx,%ebx + paddd %xmm9,%xmm1 + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi roll $5,%ebp - xorl %edi,%ebx - addl %ebp,%r11d - roll $30,%edx - addl %ebx,%r11d - addl 0(%r8),%r11d - addl 4(%r8),%r12d - addl 8(%r8),%edx - addl 12(%r8),%esi - addl 16(%r8),%edi - movl %r11d,0(%r8) - movl %r12d,4(%r8) - movl %edx,8(%r8) - movl %esi,12(%r8) - movl %edi,16(%r8) - - xchgl %r11d,%edx - xchgl %r12d,%esi - xchgl %r11d,%edi - xchgl %r12d,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + jmp .Loop_ssse3 - leaq 64(%r9),%r9 - subq $1,%r10 - jnz .Lloop - movq 64(%rsp),%rsi - movq (%rsi),%r12 +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq 64(%rsp),%rsi + movq 0(%rsi),%r12 movq 8(%rsi),%rbp movq 16(%rsi),%rbx leaq 24(%rsi),%rsp -.Lepilogue: +.Lepilogue_ssse3: .byte 0xf3,0xc3 -.size sha1_block_data_order,.-sha1_block_data_order +.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 + +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 16 +.align 64 diff --git a/deps/openssl/asm/x64-elf-gas/sha/sha512-x86_64.s b/deps/openssl/asm/x64-elf-gas/sha/sha512-x86_64.s index ddf7b907a9..576d7d8bfd 100644 --- a/deps/openssl/asm/x64-elf-gas/sha/sha512-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/sha/sha512-x86_64.s @@ -38,1880 +38,1688 @@ sha256_block_data_order: .Lloop: xorq %rdi,%rdi movl 0(%rsi),%r12d - bswapl %r12d movl %r8d,%r13d - movl %r8d,%r14d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d movl %r9d,%r15d + movl %r12d,0(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %r12d,0(%rsp) + movl %ebx,%r11d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - addl %r11d,%r12d - - movl %eax,%r11d - addl %r13d,%r12d + xorl %ecx,%r11d + xorl %eax,%r14d addl %r15d,%r12d - movl %eax,%r13d - movl %eax,%r14d + movl %ebx,%r15d - rorl $2,%r11d - rorl $13,%r13d - movl %eax,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d - xorl %r13d,%r11d - rorl $9,%r13d - orl %ecx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d - xorl %r13d,%r11d - andl %ecx,%r15d addl %r12d,%edx - - andl %ebx,%r14d addl %r12d,%r11d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r11d + movl 4(%rsi),%r12d - bswapl %r12d movl %edx,%r13d - movl %edx,%r14d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d movl %r8d,%r15d + movl %r12d,4(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %edx,%r15d - movl %r12d,4(%rsp) + movl %eax,%r10d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - addl %r10d,%r12d - - movl %r11d,%r10d - addl %r13d,%r12d + xorl %ebx,%r10d + xorl %r11d,%r14d addl %r15d,%r12d - movl %r11d,%r13d - movl %r11d,%r14d + movl %eax,%r15d - rorl $2,%r10d - rorl $13,%r13d - movl %r11d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d - xorl %r13d,%r10d - rorl $9,%r13d - orl %ebx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d - xorl %r13d,%r10d - andl %ebx,%r15d addl %r12d,%ecx - - andl %eax,%r14d addl %r12d,%r10d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r10d + movl 8(%rsi),%r12d - bswapl %r12d movl %ecx,%r13d - movl %ecx,%r14d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d movl %edx,%r15d + movl %r12d,8(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r12d,8(%rsp) + movl %r11d,%r9d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - addl %r9d,%r12d - - movl %r10d,%r9d - addl %r13d,%r12d + xorl %eax,%r9d + xorl %r10d,%r14d addl %r15d,%r12d - movl %r10d,%r13d - movl %r10d,%r14d + movl %r11d,%r15d - rorl $2,%r9d - rorl $13,%r13d - movl %r10d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d - xorl %r13d,%r9d - rorl $9,%r13d - orl %eax,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d - xorl %r13d,%r9d - andl %eax,%r15d addl %r12d,%ebx - - andl %r11d,%r14d addl %r12d,%r9d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r9d + movl 12(%rsi),%r12d - bswapl %r12d movl %ebx,%r13d - movl %ebx,%r14d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d movl %ecx,%r15d + movl %r12d,12(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ebx,%r15d - movl %r12d,12(%rsp) + movl %r10d,%r8d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - addl %r8d,%r12d - - movl %r9d,%r8d - addl %r13d,%r12d + xorl %r11d,%r8d + xorl %r9d,%r14d addl %r15d,%r12d - movl %r9d,%r13d - movl %r9d,%r14d + movl %r10d,%r15d - rorl $2,%r8d - rorl $13,%r13d - movl %r9d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d - xorl %r13d,%r8d - rorl $9,%r13d - orl %r11d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d - xorl %r13d,%r8d - andl %r11d,%r15d addl %r12d,%eax - - andl %r10d,%r14d addl %r12d,%r8d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r8d + movl 16(%rsi),%r12d - bswapl %r12d movl %eax,%r13d - movl %eax,%r14d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d movl %ebx,%r15d + movl %r12d,16(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r12d,16(%rsp) + movl %r9d,%edx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - addl %edx,%r12d - - movl %r8d,%edx - addl %r13d,%r12d + xorl %r10d,%edx + xorl %r8d,%r14d addl %r15d,%r12d - movl %r8d,%r13d - movl %r8d,%r14d + movl %r9d,%r15d - rorl $2,%edx - rorl $13,%r13d - movl %r8d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d - xorl %r13d,%edx - rorl $9,%r13d - orl %r10d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx - xorl %r13d,%edx - andl %r10d,%r15d addl %r12d,%r11d - - andl %r9d,%r14d addl %r12d,%edx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%edx + movl 20(%rsi),%r12d - bswapl %r12d movl %r11d,%r13d - movl %r11d,%r14d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d movl %eax,%r15d + movl %r12d,20(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r11d,%r15d - movl %r12d,20(%rsp) + movl %r8d,%ecx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - addl %ecx,%r12d - - movl %edx,%ecx - addl %r13d,%r12d + xorl %r9d,%ecx + xorl %edx,%r14d addl %r15d,%r12d - movl %edx,%r13d - movl %edx,%r14d + movl %r8d,%r15d - rorl $2,%ecx - rorl $13,%r13d - movl %edx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d - xorl %r13d,%ecx - rorl $9,%r13d - orl %r9d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx - xorl %r13d,%ecx - andl %r9d,%r15d addl %r12d,%r10d - - andl %r8d,%r14d addl %r12d,%ecx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ecx + movl 24(%rsi),%r12d - bswapl %r12d movl %r10d,%r13d - movl %r10d,%r14d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d movl %r11d,%r15d + movl %r12d,24(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %r12d,24(%rsp) + movl %edx,%ebx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - addl %ebx,%r12d - - movl %ecx,%ebx - addl %r13d,%r12d + xorl %r8d,%ebx + xorl %ecx,%r14d addl %r15d,%r12d - movl %ecx,%r13d - movl %ecx,%r14d + movl %edx,%r15d - rorl $2,%ebx - rorl $13,%r13d - movl %ecx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d - xorl %r13d,%ebx - rorl $9,%r13d - orl %r8d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx - xorl %r13d,%ebx - andl %r8d,%r15d addl %r12d,%r9d - - andl %edx,%r14d addl %r12d,%ebx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ebx + movl 28(%rsi),%r12d - bswapl %r12d movl %r9d,%r13d - movl %r9d,%r14d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d movl %r10d,%r15d + movl %r12d,28(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r9d,%r15d - movl %r12d,28(%rsp) + movl %ecx,%eax - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - addl %eax,%r12d - - movl %ebx,%eax - addl %r13d,%r12d + xorl %edx,%eax + xorl %ebx,%r14d addl %r15d,%r12d - movl %ebx,%r13d - movl %ebx,%r14d + movl %ecx,%r15d - rorl $2,%eax - rorl $13,%r13d - movl %ebx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d - xorl %r13d,%eax - rorl $9,%r13d - orl %edx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax - xorl %r13d,%eax - andl %edx,%r15d addl %r12d,%r8d - - andl %ecx,%r14d addl %r12d,%eax - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%eax + movl 32(%rsi),%r12d - bswapl %r12d movl %r8d,%r13d - movl %r8d,%r14d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d movl %r9d,%r15d + movl %r12d,32(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %r12d,32(%rsp) + movl %ebx,%r11d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - addl %r11d,%r12d - - movl %eax,%r11d - addl %r13d,%r12d + xorl %ecx,%r11d + xorl %eax,%r14d addl %r15d,%r12d - movl %eax,%r13d - movl %eax,%r14d + movl %ebx,%r15d - rorl $2,%r11d - rorl $13,%r13d - movl %eax,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d - xorl %r13d,%r11d - rorl $9,%r13d - orl %ecx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d - xorl %r13d,%r11d - andl %ecx,%r15d addl %r12d,%edx - - andl %ebx,%r14d addl %r12d,%r11d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r11d + movl 36(%rsi),%r12d - bswapl %r12d movl %edx,%r13d - movl %edx,%r14d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d movl %r8d,%r15d - - rorl $6,%r13d - rorl $11,%r14d - xorl %r9d,%r15d - - xorl %r14d,%r13d - rorl $14,%r14d - andl %edx,%r15d movl %r12d,36(%rsp) - xorl %r14d,%r13d + rorl $9,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - addl %r10d,%r12d - - movl %r11d,%r10d - addl %r13d,%r12d - addl %r15d,%r12d - movl %r11d,%r13d - movl %r11d,%r14d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d - rorl $2,%r10d - rorl $13,%r13d - movl %r11d,%r15d addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d - xorl %r13d,%r10d - rorl $9,%r13d - orl %ebx,%r14d + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %r13d,%r10d + rorl $6,%r13d + andl %r11d,%r10d andl %ebx,%r15d - addl %r12d,%ecx - andl %eax,%r14d - addl %r12d,%r10d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d - orl %r15d,%r14d + addl %r12d,%ecx + addl %r12d,%r10d leaq 1(%rdi),%rdi - addl %r14d,%r10d + movl 40(%rsi),%r12d - bswapl %r12d movl %ecx,%r13d - movl %ecx,%r14d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d movl %edx,%r15d + movl %r12d,40(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r12d,40(%rsp) + movl %r11d,%r9d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - addl %r9d,%r12d - - movl %r10d,%r9d - addl %r13d,%r12d + xorl %eax,%r9d + xorl %r10d,%r14d addl %r15d,%r12d - movl %r10d,%r13d - movl %r10d,%r14d + movl %r11d,%r15d - rorl $2,%r9d - rorl $13,%r13d - movl %r10d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d - xorl %r13d,%r9d - rorl $9,%r13d - orl %eax,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d - xorl %r13d,%r9d - andl %eax,%r15d addl %r12d,%ebx - - andl %r11d,%r14d addl %r12d,%r9d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r9d + movl 44(%rsi),%r12d - bswapl %r12d movl %ebx,%r13d - movl %ebx,%r14d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d movl %ecx,%r15d + movl %r12d,44(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ebx,%r15d - movl %r12d,44(%rsp) + movl %r10d,%r8d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - addl %r8d,%r12d - - movl %r9d,%r8d - addl %r13d,%r12d + xorl %r11d,%r8d + xorl %r9d,%r14d addl %r15d,%r12d - movl %r9d,%r13d - movl %r9d,%r14d + movl %r10d,%r15d - rorl $2,%r8d - rorl $13,%r13d - movl %r9d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d - xorl %r13d,%r8d - rorl $9,%r13d - orl %r11d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d - xorl %r13d,%r8d - andl %r11d,%r15d addl %r12d,%eax - - andl %r10d,%r14d addl %r12d,%r8d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r8d + movl 48(%rsi),%r12d - bswapl %r12d movl %eax,%r13d - movl %eax,%r14d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d movl %ebx,%r15d + movl %r12d,48(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r12d,48(%rsp) + movl %r9d,%edx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - addl %edx,%r12d - - movl %r8d,%edx - addl %r13d,%r12d + xorl %r10d,%edx + xorl %r8d,%r14d addl %r15d,%r12d - movl %r8d,%r13d - movl %r8d,%r14d + movl %r9d,%r15d - rorl $2,%edx - rorl $13,%r13d - movl %r8d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d - xorl %r13d,%edx - rorl $9,%r13d - orl %r10d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx - xorl %r13d,%edx - andl %r10d,%r15d addl %r12d,%r11d - - andl %r9d,%r14d addl %r12d,%edx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%edx + movl 52(%rsi),%r12d - bswapl %r12d movl %r11d,%r13d - movl %r11d,%r14d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d movl %eax,%r15d + movl %r12d,52(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r11d,%r15d - movl %r12d,52(%rsp) + movl %r8d,%ecx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - addl %ecx,%r12d - - movl %edx,%ecx - addl %r13d,%r12d + xorl %r9d,%ecx + xorl %edx,%r14d addl %r15d,%r12d - movl %edx,%r13d - movl %edx,%r14d + movl %r8d,%r15d - rorl $2,%ecx - rorl $13,%r13d - movl %edx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d - xorl %r13d,%ecx - rorl $9,%r13d - orl %r9d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx - xorl %r13d,%ecx - andl %r9d,%r15d addl %r12d,%r10d - - andl %r8d,%r14d addl %r12d,%ecx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ecx + movl 56(%rsi),%r12d - bswapl %r12d movl %r10d,%r13d - movl %r10d,%r14d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d movl %r11d,%r15d + movl %r12d,56(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %r12d,56(%rsp) + movl %edx,%ebx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - addl %ebx,%r12d - - movl %ecx,%ebx - addl %r13d,%r12d + xorl %r8d,%ebx + xorl %ecx,%r14d addl %r15d,%r12d - movl %ecx,%r13d - movl %ecx,%r14d + movl %edx,%r15d - rorl $2,%ebx - rorl $13,%r13d - movl %ecx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d - xorl %r13d,%ebx - rorl $9,%r13d - orl %r8d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx - xorl %r13d,%ebx - andl %r8d,%r15d addl %r12d,%r9d - - andl %edx,%r14d addl %r12d,%ebx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ebx + movl 60(%rsi),%r12d - bswapl %r12d movl %r9d,%r13d - movl %r9d,%r14d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d movl %r10d,%r15d + movl %r12d,60(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r9d,%r15d - movl %r12d,60(%rsp) + movl %ecx,%eax - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - addl %eax,%r12d - - movl %ebx,%eax - addl %r13d,%r12d + xorl %edx,%eax + xorl %ebx,%r14d addl %r15d,%r12d - movl %ebx,%r13d - movl %ebx,%r14d + movl %ecx,%r15d - rorl $2,%eax - rorl $13,%r13d - movl %ebx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d - xorl %r13d,%eax - rorl $9,%r13d - orl %edx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax - xorl %r13d,%eax - andl %edx,%r15d addl %r12d,%r8d - - andl %ecx,%r14d addl %r12d,%eax - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%eax + jmp .Lrounds_16_xx .align 16 .Lrounds_16_xx: movl 4(%rsp),%r13d - movl 56(%rsp),%r12d - - movl %r13d,%r15d + movl 56(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 36(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 36(%rsp),%r12d + xorl %r15d,%r14d addl 0(%rsp),%r12d movl %r8d,%r13d - movl %r8d,%r14d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d movl %r9d,%r15d + movl %r12d,0(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %r12d,0(%rsp) + movl %ebx,%r11d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - addl %r11d,%r12d - - movl %eax,%r11d - addl %r13d,%r12d + xorl %ecx,%r11d + xorl %eax,%r14d addl %r15d,%r12d - movl %eax,%r13d - movl %eax,%r14d + movl %ebx,%r15d - rorl $2,%r11d - rorl $13,%r13d - movl %eax,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d - xorl %r13d,%r11d - rorl $9,%r13d - orl %ecx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d - xorl %r13d,%r11d - andl %ecx,%r15d addl %r12d,%edx - - andl %ebx,%r14d addl %r12d,%r11d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r11d - movl 8(%rsp),%r13d - movl 60(%rsp),%r12d - movl %r13d,%r15d + movl 8(%rsp),%r13d + movl 60(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 40(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 40(%rsp),%r12d + xorl %r15d,%r14d addl 4(%rsp),%r12d movl %edx,%r13d - movl %edx,%r14d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d movl %r8d,%r15d + movl %r12d,4(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %edx,%r15d - movl %r12d,4(%rsp) + movl %eax,%r10d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - addl %r10d,%r12d - - movl %r11d,%r10d - addl %r13d,%r12d + xorl %ebx,%r10d + xorl %r11d,%r14d addl %r15d,%r12d - movl %r11d,%r13d - movl %r11d,%r14d + movl %eax,%r15d - rorl $2,%r10d - rorl $13,%r13d - movl %r11d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d - xorl %r13d,%r10d - rorl $9,%r13d - orl %ebx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d - xorl %r13d,%r10d - andl %ebx,%r15d addl %r12d,%ecx - - andl %eax,%r14d addl %r12d,%r10d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r10d - movl 12(%rsp),%r13d - movl 0(%rsp),%r12d - movl %r13d,%r15d + movl 12(%rsp),%r13d + movl 0(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 44(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 44(%rsp),%r12d + xorl %r15d,%r14d addl 8(%rsp),%r12d movl %ecx,%r13d - movl %ecx,%r14d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d movl %edx,%r15d + movl %r12d,8(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r12d,8(%rsp) + movl %r11d,%r9d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - addl %r9d,%r12d - - movl %r10d,%r9d - addl %r13d,%r12d + xorl %eax,%r9d + xorl %r10d,%r14d addl %r15d,%r12d - movl %r10d,%r13d - movl %r10d,%r14d + movl %r11d,%r15d - rorl $2,%r9d - rorl $13,%r13d - movl %r10d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d - xorl %r13d,%r9d - rorl $9,%r13d - orl %eax,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d - xorl %r13d,%r9d - andl %eax,%r15d addl %r12d,%ebx - - andl %r11d,%r14d addl %r12d,%r9d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r9d - movl 16(%rsp),%r13d - movl 4(%rsp),%r12d - movl %r13d,%r15d + movl 16(%rsp),%r13d + movl 4(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 48(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 48(%rsp),%r12d + xorl %r15d,%r14d addl 12(%rsp),%r12d movl %ebx,%r13d - movl %ebx,%r14d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d movl %ecx,%r15d + movl %r12d,12(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ebx,%r15d - movl %r12d,12(%rsp) + movl %r10d,%r8d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - addl %r8d,%r12d - - movl %r9d,%r8d - addl %r13d,%r12d + xorl %r11d,%r8d + xorl %r9d,%r14d addl %r15d,%r12d - movl %r9d,%r13d - movl %r9d,%r14d + movl %r10d,%r15d - rorl $2,%r8d - rorl $13,%r13d - movl %r9d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d - xorl %r13d,%r8d - rorl $9,%r13d - orl %r11d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d - xorl %r13d,%r8d - andl %r11d,%r15d addl %r12d,%eax - - andl %r10d,%r14d addl %r12d,%r8d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r8d - movl 20(%rsp),%r13d - movl 8(%rsp),%r12d - movl %r13d,%r15d + movl 20(%rsp),%r13d + movl 8(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 52(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 52(%rsp),%r12d + xorl %r15d,%r14d addl 16(%rsp),%r12d movl %eax,%r13d - movl %eax,%r14d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d movl %ebx,%r15d + movl %r12d,16(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r12d,16(%rsp) + movl %r9d,%edx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - addl %edx,%r12d - - movl %r8d,%edx - addl %r13d,%r12d + xorl %r10d,%edx + xorl %r8d,%r14d addl %r15d,%r12d - movl %r8d,%r13d - movl %r8d,%r14d + movl %r9d,%r15d - rorl $2,%edx - rorl $13,%r13d - movl %r8d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d - xorl %r13d,%edx - rorl $9,%r13d - orl %r10d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx - xorl %r13d,%edx - andl %r10d,%r15d addl %r12d,%r11d - - andl %r9d,%r14d addl %r12d,%edx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%edx - movl 24(%rsp),%r13d - movl 12(%rsp),%r12d - movl %r13d,%r15d + movl 24(%rsp),%r13d + movl 12(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 56(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 56(%rsp),%r12d + xorl %r15d,%r14d addl 20(%rsp),%r12d movl %r11d,%r13d - movl %r11d,%r14d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d movl %eax,%r15d + movl %r12d,20(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r11d,%r15d - movl %r12d,20(%rsp) + movl %r8d,%ecx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - addl %ecx,%r12d - - movl %edx,%ecx - addl %r13d,%r12d + xorl %r9d,%ecx + xorl %edx,%r14d addl %r15d,%r12d - movl %edx,%r13d - movl %edx,%r14d + movl %r8d,%r15d - rorl $2,%ecx - rorl $13,%r13d - movl %edx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d - xorl %r13d,%ecx - rorl $9,%r13d - orl %r9d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx - xorl %r13d,%ecx - andl %r9d,%r15d addl %r12d,%r10d - - andl %r8d,%r14d addl %r12d,%ecx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ecx - movl 28(%rsp),%r13d - movl 16(%rsp),%r12d - movl %r13d,%r15d + movl 28(%rsp),%r13d + movl 16(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 60(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 60(%rsp),%r12d + xorl %r15d,%r14d addl 24(%rsp),%r12d movl %r10d,%r13d - movl %r10d,%r14d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d movl %r11d,%r15d + movl %r12d,24(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %r12d,24(%rsp) + movl %edx,%ebx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - addl %ebx,%r12d - - movl %ecx,%ebx - addl %r13d,%r12d + xorl %r8d,%ebx + xorl %ecx,%r14d addl %r15d,%r12d - movl %ecx,%r13d - movl %ecx,%r14d + movl %edx,%r15d - rorl $2,%ebx - rorl $13,%r13d - movl %ecx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d - xorl %r13d,%ebx - rorl $9,%r13d - orl %r8d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx - xorl %r13d,%ebx - andl %r8d,%r15d addl %r12d,%r9d - - andl %edx,%r14d addl %r12d,%ebx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ebx - movl 32(%rsp),%r13d - movl 20(%rsp),%r12d - movl %r13d,%r15d + movl 32(%rsp),%r13d + movl 20(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 0(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 0(%rsp),%r12d + xorl %r15d,%r14d addl 28(%rsp),%r12d movl %r9d,%r13d - movl %r9d,%r14d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d movl %r10d,%r15d + movl %r12d,28(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r9d,%r15d - movl %r12d,28(%rsp) + movl %ecx,%eax - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - addl %eax,%r12d - - movl %ebx,%eax - addl %r13d,%r12d + xorl %edx,%eax + xorl %ebx,%r14d addl %r15d,%r12d - movl %ebx,%r13d - movl %ebx,%r14d + movl %ecx,%r15d - rorl $2,%eax - rorl $13,%r13d - movl %ebx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d - xorl %r13d,%eax - rorl $9,%r13d - orl %edx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax - xorl %r13d,%eax - andl %edx,%r15d addl %r12d,%r8d - - andl %ecx,%r14d addl %r12d,%eax - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%eax - movl 36(%rsp),%r13d - movl 24(%rsp),%r12d - movl %r13d,%r15d + movl 36(%rsp),%r13d + movl 24(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 4(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 4(%rsp),%r12d + xorl %r15d,%r14d addl 32(%rsp),%r12d movl %r8d,%r13d - movl %r8d,%r14d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d movl %r9d,%r15d + movl %r12d,32(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %r12d,32(%rsp) + movl %ebx,%r11d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - addl %r11d,%r12d - - movl %eax,%r11d - addl %r13d,%r12d + xorl %ecx,%r11d + xorl %eax,%r14d addl %r15d,%r12d - movl %eax,%r13d - movl %eax,%r14d - - rorl $2,%r11d - rorl $13,%r13d - movl %eax,%r15d - addl (%rbp,%rdi,4),%r12d - - xorl %r13d,%r11d - rorl $9,%r13d - orl %ecx,%r14d + movl %ebx,%r15d - xorl %r13d,%r11d + rorl $6,%r13d + andl %eax,%r11d andl %ecx,%r15d - addl %r12d,%edx - andl %ebx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx addl %r12d,%r11d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r11d - movl 40(%rsp),%r13d - movl 28(%rsp),%r12d - movl %r13d,%r15d + movl 40(%rsp),%r13d + movl 28(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 8(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 8(%rsp),%r12d + xorl %r15d,%r14d addl 36(%rsp),%r12d movl %edx,%r13d - movl %edx,%r14d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d movl %r8d,%r15d + movl %r12d,36(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %edx,%r15d - movl %r12d,36(%rsp) + movl %eax,%r10d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - addl %r10d,%r12d - - movl %r11d,%r10d - addl %r13d,%r12d + xorl %ebx,%r10d + xorl %r11d,%r14d addl %r15d,%r12d - movl %r11d,%r13d - movl %r11d,%r14d + movl %eax,%r15d - rorl $2,%r10d - rorl $13,%r13d - movl %r11d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d - xorl %r13d,%r10d - rorl $9,%r13d - orl %ebx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d - xorl %r13d,%r10d - andl %ebx,%r15d addl %r12d,%ecx - - andl %eax,%r14d addl %r12d,%r10d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r10d - movl 44(%rsp),%r13d - movl 32(%rsp),%r12d - movl %r13d,%r15d + movl 44(%rsp),%r13d + movl 32(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 12(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 12(%rsp),%r12d + xorl %r15d,%r14d addl 40(%rsp),%r12d movl %ecx,%r13d - movl %ecx,%r14d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d movl %edx,%r15d + movl %r12d,40(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r12d,40(%rsp) + movl %r11d,%r9d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - addl %r9d,%r12d - - movl %r10d,%r9d - addl %r13d,%r12d + xorl %eax,%r9d + xorl %r10d,%r14d addl %r15d,%r12d - movl %r10d,%r13d - movl %r10d,%r14d + movl %r11d,%r15d - rorl $2,%r9d - rorl $13,%r13d - movl %r10d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d - xorl %r13d,%r9d - rorl $9,%r13d - orl %eax,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d - xorl %r13d,%r9d - andl %eax,%r15d addl %r12d,%ebx - - andl %r11d,%r14d addl %r12d,%r9d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r9d - movl 48(%rsp),%r13d - movl 36(%rsp),%r12d - movl %r13d,%r15d + movl 48(%rsp),%r13d + movl 36(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 16(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 16(%rsp),%r12d + xorl %r15d,%r14d addl 44(%rsp),%r12d movl %ebx,%r13d - movl %ebx,%r14d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d movl %ecx,%r15d + movl %r12d,44(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ebx,%r15d - movl %r12d,44(%rsp) + movl %r10d,%r8d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - addl %r8d,%r12d - - movl %r9d,%r8d - addl %r13d,%r12d + xorl %r11d,%r8d + xorl %r9d,%r14d addl %r15d,%r12d - movl %r9d,%r13d - movl %r9d,%r14d + movl %r10d,%r15d - rorl $2,%r8d - rorl $13,%r13d - movl %r9d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d - xorl %r13d,%r8d - rorl $9,%r13d - orl %r11d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d - xorl %r13d,%r8d - andl %r11d,%r15d addl %r12d,%eax - - andl %r10d,%r14d addl %r12d,%r8d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r8d - movl 52(%rsp),%r13d - movl 40(%rsp),%r12d - movl %r13d,%r15d + movl 52(%rsp),%r13d + movl 40(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 20(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 20(%rsp),%r12d + xorl %r15d,%r14d addl 48(%rsp),%r12d movl %eax,%r13d - movl %eax,%r14d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d movl %ebx,%r15d + movl %r12d,48(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r12d,48(%rsp) + movl %r9d,%edx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - addl %edx,%r12d - - movl %r8d,%edx - addl %r13d,%r12d + xorl %r10d,%edx + xorl %r8d,%r14d addl %r15d,%r12d - movl %r8d,%r13d - movl %r8d,%r14d + movl %r9d,%r15d - rorl $2,%edx - rorl $13,%r13d - movl %r8d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d - xorl %r13d,%edx - rorl $9,%r13d - orl %r10d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx - xorl %r13d,%edx - andl %r10d,%r15d addl %r12d,%r11d - - andl %r9d,%r14d addl %r12d,%edx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%edx - movl 56(%rsp),%r13d - movl 44(%rsp),%r12d - movl %r13d,%r15d + movl 56(%rsp),%r13d + movl 44(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 24(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 24(%rsp),%r12d + xorl %r15d,%r14d addl 52(%rsp),%r12d movl %r11d,%r13d - movl %r11d,%r14d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d movl %eax,%r15d + movl %r12d,52(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r11d,%r15d - movl %r12d,52(%rsp) + movl %r8d,%ecx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - addl %ecx,%r12d - - movl %edx,%ecx - addl %r13d,%r12d + xorl %r9d,%ecx + xorl %edx,%r14d addl %r15d,%r12d - movl %edx,%r13d - movl %edx,%r14d + movl %r8d,%r15d - rorl $2,%ecx - rorl $13,%r13d - movl %edx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d - xorl %r13d,%ecx - rorl $9,%r13d - orl %r9d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx - xorl %r13d,%ecx - andl %r9d,%r15d addl %r12d,%r10d - - andl %r8d,%r14d addl %r12d,%ecx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ecx - movl 60(%rsp),%r13d - movl 48(%rsp),%r12d - movl %r13d,%r15d + movl 60(%rsp),%r13d + movl 48(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 28(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 28(%rsp),%r12d + xorl %r15d,%r14d addl 56(%rsp),%r12d movl %r10d,%r13d - movl %r10d,%r14d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d movl %r11d,%r15d + movl %r12d,56(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %r12d,56(%rsp) + movl %edx,%ebx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - addl %ebx,%r12d - - movl %ecx,%ebx - addl %r13d,%r12d + xorl %r8d,%ebx + xorl %ecx,%r14d addl %r15d,%r12d - movl %ecx,%r13d - movl %ecx,%r14d + movl %edx,%r15d - rorl $2,%ebx - rorl $13,%r13d - movl %ecx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d - xorl %r13d,%ebx - rorl $9,%r13d - orl %r8d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx - xorl %r13d,%ebx - andl %r8d,%r15d addl %r12d,%r9d - - andl %edx,%r14d addl %r12d,%ebx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ebx - movl 0(%rsp),%r13d - movl 52(%rsp),%r12d - movl %r13d,%r15d + movl 0(%rsp),%r13d + movl 52(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 32(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 32(%rsp),%r12d + xorl %r15d,%r14d addl 60(%rsp),%r12d movl %r9d,%r13d - movl %r9d,%r14d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d movl %r10d,%r15d + movl %r12d,60(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r9d,%r15d - movl %r12d,60(%rsp) + movl %ecx,%eax - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - addl %eax,%r12d - - movl %ebx,%eax - addl %r13d,%r12d + xorl %edx,%eax + xorl %ebx,%r14d addl %r15d,%r12d - movl %ebx,%r13d - movl %ebx,%r14d + movl %ecx,%r15d - rorl $2,%eax - rorl $13,%r13d - movl %ebx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d - xorl %r13d,%eax - rorl $9,%r13d - orl %edx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax - xorl %r13d,%eax - andl %edx,%r15d addl %r12d,%r8d - - andl %ecx,%r14d addl %r12d,%eax - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%eax + cmpq $64,%rdi jb .Lrounds_16_xx diff --git a/deps/openssl/asm/x64-elf-gas/x86_64cpuid.s b/deps/openssl/asm/x64-elf-gas/x86_64cpuid.s index 0a565a989b..e0a8287085 100644 --- a/deps/openssl/asm/x64-elf-gas/x86_64cpuid.s +++ b/deps/openssl/asm/x64-elf-gas/x86_64cpuid.s @@ -1,7 +1,11 @@ +.hidden OPENSSL_cpuid_setup .section .init call OPENSSL_cpuid_setup +.hidden OPENSSL_ia32cap_P +.comm OPENSSL_ia32cap_P,8,4 + .text @@ -67,7 +71,15 @@ OPENSSL_ia32_cpuid: movl $2147483648,%eax cpuid - cmpl $2147483656,%eax + cmpl $2147483649,%eax + jb .Lintel + movl %eax,%r10d + movl $2147483649,%eax + cpuid + orl %ecx,%r9d + andl $2049,%r9d + + cmpl $2147483656,%r10d jb .Lintel movl $2147483656,%eax @@ -78,12 +90,12 @@ OPENSSL_ia32_cpuid: movl $1,%eax cpuid btl $28,%edx - jnc .Ldone + jnc .Lgeneric shrl $16,%ebx cmpb %r10b,%bl - ja .Ldone + ja .Lgeneric andl $4026531839,%edx - jmp .Ldone + jmp .Lgeneric .Lintel: cmpl $4,%r11d @@ -100,30 +112,48 @@ OPENSSL_ia32_cpuid: .Lnocacheinfo: movl $1,%eax cpuid + andl $3220176895,%edx cmpl $0,%r9d jne .Lnotintel - orl $1048576,%edx + orl $1073741824,%edx andb $15,%ah cmpb $15,%ah - je .Lnotintel - orl $1073741824,%edx + jne .Lnotintel + orl $1048576,%edx .Lnotintel: btl $28,%edx - jnc .Ldone + jnc .Lgeneric andl $4026531839,%edx cmpl $0,%r10d - je .Ldone + je .Lgeneric orl $268435456,%edx shrl $16,%ebx cmpb $1,%bl - ja .Ldone + ja .Lgeneric andl $4026531839,%edx +.Lgeneric: + andl $2048,%r9d + andl $4294965247,%ecx + orl %ecx,%r9d + + movl %edx,%r10d + btl $27,%r9d + jnc .Lclear_avx + xorl %ecx,%ecx +.byte 0x0f,0x01,0xd0 + + andl $6,%eax + cmpl $6,%eax + je .Ldone +.Lclear_avx: + movl $4026525695,%eax + andl %eax,%r9d .Ldone: - shlq $32,%rcx - movl %edx,%eax + shlq $32,%r9 + movl %r10d,%eax movq %r8,%rbx - orq %rcx,%rax + orq %r9,%rax .byte 0xf3,0xc3 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid @@ -192,3 +222,17 @@ OPENSSL_wipe_cpu: leaq 8(%rsp),%rax .byte 0xf3,0xc3 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu +.globl OPENSSL_ia32_rdrand +.type OPENSSL_ia32_rdrand,@function +.align 16 +OPENSSL_ia32_rdrand: + movl $8,%ecx +.Loop_rdrand: +.byte 72,15,199,240 + jc .Lbreak_rdrand + loop .Loop_rdrand +.Lbreak_rdrand: + cmpq $0,%rax + cmoveq %rcx,%rax + .byte 0xf3,0xc3 +.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand diff --git a/deps/openssl/asm/x64-macosx-gas/aes/aes-x86_64.s b/deps/openssl/asm/x64-macosx-gas/aes/aes-x86_64.s index d42e1ea79a..88120a1898 100644 --- a/deps/openssl/asm/x64-macosx-gas/aes/aes-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/aes/aes-x86_64.s @@ -333,6 +333,9 @@ L$enc_compact_done: .globl _AES_encrypt .p2align 4 +.globl _asm_AES_encrypt +.private_extern _asm_AES_encrypt +_asm_AES_encrypt: _AES_encrypt: pushq %rbx pushq %rbp @@ -780,6 +783,9 @@ L$dec_compact_done: .globl _AES_decrypt .p2align 4 +.globl _asm_AES_decrypt +.private_extern _asm_AES_decrypt +_asm_AES_decrypt: _AES_decrypt: pushq %rbx pushq %rbp @@ -843,10 +849,10 @@ L$dec_prologue: L$dec_epilogue: .byte 0xf3,0xc3 -.globl _AES_set_encrypt_key +.globl _private_AES_set_encrypt_key .p2align 4 -_AES_set_encrypt_key: +_private_AES_set_encrypt_key: pushq %rbx pushq %rbp pushq %r12 @@ -1109,10 +1115,10 @@ L$exit: .byte 0xf3,0xc3 -.globl _AES_set_decrypt_key +.globl _private_AES_set_decrypt_key .p2align 4 -_AES_set_decrypt_key: +_private_AES_set_decrypt_key: pushq %rbx pushq %rbp pushq %r12 @@ -1300,6 +1306,9 @@ L$dec_key_epilogue: .p2align 4 +.globl _asm_AES_cbc_encrypt +.private_extern _asm_AES_cbc_encrypt +_asm_AES_cbc_encrypt: _AES_cbc_encrypt: cmpq $0,%rdx je L$cbc_epilogue diff --git a/deps/openssl/asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s b/deps/openssl/asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s new file mode 100644 index 0000000000..5ae41e019e --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s @@ -0,0 +1,1402 @@ +.text + + + +.globl _aesni_cbc_sha1_enc + +.p2align 4 +_aesni_cbc_sha1_enc: + + movl _OPENSSL_ia32cap_P+0(%rip),%r10d + movl _OPENSSL_ia32cap_P+4(%rip),%r11d + jmp aesni_cbc_sha1_enc_ssse3 + .byte 0xf3,0xc3 + + +.p2align 4 +aesni_cbc_sha1_enc_ssse3: + movq 8(%rsp),%r10 + + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -104(%rsp),%rsp + + + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqu (%r8),%xmm11 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + movups (%r15),%xmm13 + movups 16(%r15),%xmm14 + jmp L$oop_ssse3 +.p2align 4 +L$oop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + movups 0(%r12),%xmm12 + xorps %xmm13,%xmm12 + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + cmpl $11,%r8d + jb L$aesenclast1 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je L$aesenclast1 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +L$aesenclast1: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + movups 16(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,0(%r13,%r12,1) + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + cmpl $11,%r8d + jb L$aesenclast2 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je L$aesenclast2 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +L$aesenclast2: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + movups 32(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,16(%r13,%r12,1) + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + cmpl $11,%r8d + jb L$aesenclast3 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je L$aesenclast3 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +L$aesenclast3: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + movups 48(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,32(%r13,%r12,1) + xorps %xmm12,%xmm11 +.byte 102,69,15,56,220,222 + movups 32(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi +.byte 102,69,15,56,220,223 + movups 48(%r15),%xmm14 + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm15 + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi +.byte 102,69,15,56,220,223 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r14,%r10 + je L$done_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb L$aesenclast4 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je L$aesenclast4 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +L$aesenclast4: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + jmp L$oop_ssse3 + +.p2align 4 +L$done_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi +.byte 102,69,15,56,220,223 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp +.byte 102,69,15,56,220,222 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb L$aesenclast5 + movups 176(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 192(%r15),%xmm15 +.byte 102,69,15,56,220,222 + je L$aesenclast5 + movups 208(%r15),%xmm14 +.byte 102,69,15,56,220,223 + movups 224(%r15),%xmm15 +.byte 102,69,15,56,220,222 +L$aesenclast5: +.byte 102,69,15,56,221,223 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + movups %xmm11,(%r8) + leaq 104(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_ssse3: + .byte 0xf3,0xc3 + +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 + +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + + +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s b/deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s new file mode 100644 index 0000000000..2ea2d3460a --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s @@ -0,0 +1,2558 @@ +.text + +.globl _aesni_encrypt + +.p2align 4 +_aesni_encrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +L$oop_enc1_1: +.byte 102,15,56,220,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz L$oop_enc1_1 + +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + .byte 0xf3,0xc3 + + +.globl _aesni_decrypt + +.p2align 4 +_aesni_decrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +L$oop_dec1_2: +.byte 102,15,56,222,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz L$oop_dec1_2 + +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +L$enc_loop3: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 + movups (%rcx),%xmm0 + jnz L$enc_loop3 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +L$dec_loop3: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 + movups (%rcx),%xmm0 + jnz L$dec_loop3 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +L$enc_loop4: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups (%rcx),%xmm0 + jnz L$enc_loop4 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +L$dec_loop4: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups (%rcx),%xmm0 + jnz L$dec_loop4 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,220,241 + movups (%rcx),%xmm0 +.byte 102,15,56,220,249 + jmp L$enc_loop6_enter +.p2align 4 +L$enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +L$enc_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%rcx),%xmm0 + jnz L$enc_loop6 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,222,241 + movups (%rcx),%xmm0 +.byte 102,15,56,222,249 + jmp L$dec_loop6_enter +.p2align 4 +L$dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +L$dec_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups (%rcx),%xmm0 + jnz L$dec_loop6 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,220,241 + pxor %xmm0,%xmm8 +.byte 102,15,56,220,249 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 16(%rcx),%xmm1 + jmp L$enc_loop8_enter +.p2align 4 +L$enc_loop8: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 16(%rcx),%xmm1 +L$enc_loop8_enter: +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups (%rcx),%xmm0 + jnz L$enc_loop8 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 +.byte 102,68,15,56,221,192 +.byte 102,68,15,56,221,200 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,222,241 + pxor %xmm0,%xmm8 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 16(%rcx),%xmm1 + jmp L$dec_loop8_enter +.p2align 4 +L$dec_loop8: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 16(%rcx),%xmm1 +L$dec_loop8_enter: +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups (%rcx),%xmm0 + jnz L$dec_loop8 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 +.byte 102,68,15,56,223,192 +.byte 102,68,15,56,223,200 + .byte 0xf3,0xc3 + +.globl _aesni_ecb_encrypt + +.p2align 4 +_aesni_ecb_encrypt: + andq $-16,%rdx + jz L$ecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz L$ecb_decrypt + + cmpq $128,%rdx + jb L$ecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp L$ecb_enc_loop8_enter +.p2align 4 +L$ecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +L$ecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $128,%rdx + jnc L$ecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz L$ecb_ret + +L$ecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb L$ecb_enc_one + movups 16(%rdi),%xmm3 + je L$ecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb L$ecb_enc_three + movups 48(%rdi),%xmm5 + je L$ecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb L$ecb_enc_five + movups 80(%rdi),%xmm7 + je L$ecb_enc_six + movdqu 96(%rdi),%xmm8 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_3: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_3 + +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp L$ecb_ret + +.p2align 4 +L$ecb_decrypt: + cmpq $128,%rdx + jb L$ecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp L$ecb_dec_loop8_enter +.p2align 4 +L$ecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +L$ecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $128,%rdx + jnc L$ecb_dec_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz L$ecb_ret + +L$ecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb L$ecb_dec_one + movups 16(%rdi),%xmm3 + je L$ecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb L$ecb_dec_three + movups 48(%rdi),%xmm5 + je L$ecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb L$ecb_dec_five + movups 80(%rdi),%xmm7 + je L$ecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_4: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_4 + +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + +L$ecb_ret: + .byte 0xf3,0xc3 + +.globl _aesni_ccm64_encrypt_blocks + +.p2align 4 +_aesni_ccm64_encrypt_blocks: + movl 240(%rcx),%eax + movdqu (%r8),%xmm9 + movdqa L$increment64(%rip),%xmm6 + movdqa L$bswap_mask(%rip),%xmm7 + + shrl $1,%eax + leaq 0(%rcx),%r11 + movdqu (%r9),%xmm3 + movdqa %xmm9,%xmm2 + movl %eax,%r10d +.byte 102,68,15,56,0,207 + jmp L$ccm64_enc_outer +.p2align 4 +L$ccm64_enc_outer: + movups (%r11),%xmm0 + movl %r10d,%eax + movups (%rdi),%xmm8 + + xorps %xmm0,%xmm2 + movups 16(%r11),%xmm1 + xorps %xmm8,%xmm0 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm3 + movups (%rcx),%xmm0 + +L$ccm64_enc2_loop: +.byte 102,15,56,220,209 + decl %eax +.byte 102,15,56,220,217 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,216 + movups 0(%rcx),%xmm0 + jnz L$ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq %xmm6,%xmm9 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + + decq %rdx + leaq 16(%rdi),%rdi + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + jnz L$ccm64_enc_outer + + movups %xmm3,(%r9) + .byte 0xf3,0xc3 + +.globl _aesni_ccm64_decrypt_blocks + +.p2align 4 +_aesni_ccm64_decrypt_blocks: + movl 240(%rcx),%eax + movups (%r8),%xmm9 + movdqu (%r9),%xmm3 + movdqa L$increment64(%rip),%xmm6 + movdqa L$bswap_mask(%rip),%xmm7 + + movaps %xmm9,%xmm2 + movl %eax,%r10d + movq %rcx,%r11 +.byte 102,68,15,56,0,207 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_5: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_5 + +.byte 102,15,56,221,209 + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 + leaq 16(%rdi),%rdi + jmp L$ccm64_dec_outer +.p2align 4 +L$ccm64_dec_outer: + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movl %r10d,%eax + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + + subq $1,%rdx + jz L$ccm64_dec_break + + movups (%r11),%xmm0 + shrl $1,%eax + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm2 + xorps %xmm8,%xmm3 + movups (%rcx),%xmm0 + +L$ccm64_dec2_loop: +.byte 102,15,56,220,209 + decl %eax +.byte 102,15,56,220,217 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,216 + movups 0(%rcx),%xmm0 + jnz L$ccm64_dec2_loop + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 16(%rdi),%rdi +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + jmp L$ccm64_dec_outer + +.p2align 4 +L$ccm64_dec_break: + + movups (%r11),%xmm0 + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%r11 + xorps %xmm8,%xmm3 +L$oop_enc1_6: +.byte 102,15,56,220,217 + decl %eax + movups (%r11),%xmm1 + leaq 16(%r11),%r11 + jnz L$oop_enc1_6 + +.byte 102,15,56,221,217 + movups %xmm3,(%r9) + .byte 0xf3,0xc3 + +.globl _aesni_ctr32_encrypt_blocks + +.p2align 4 +_aesni_ctr32_encrypt_blocks: + cmpq $1,%rdx + je L$ctr32_one_shortcut + + movdqu (%r8),%xmm14 + movdqa L$bswap_mask(%rip),%xmm15 + xorl %eax,%eax +.byte 102,69,15,58,22,242,3 +.byte 102,68,15,58,34,240,3 + + movl 240(%rcx),%eax + bswapl %r10d + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 +.byte 102,69,15,58,34,226,0 + leaq 3(%r10),%r11 +.byte 102,69,15,58,34,235,0 + incl %r10d +.byte 102,69,15,58,34,226,1 + incq %r11 +.byte 102,69,15,58,34,235,1 + incl %r10d +.byte 102,69,15,58,34,226,2 + incq %r11 +.byte 102,69,15,58,34,235,2 + movdqa %xmm12,-40(%rsp) +.byte 102,69,15,56,0,231 + movdqa %xmm13,-24(%rsp) +.byte 102,69,15,56,0,239 + + pshufd $192,%xmm12,%xmm2 + pshufd $128,%xmm12,%xmm3 + pshufd $64,%xmm12,%xmm4 + cmpq $6,%rdx + jb L$ctr32_tail + shrl $1,%eax + movq %rcx,%r11 + movl %eax,%r10d + subq $6,%rdx + jmp L$ctr32_loop6 + +.p2align 4 +L$ctr32_loop6: + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm2 + movups (%r11),%xmm0 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm3 + movups 16(%r11),%xmm1 + pshufd $64,%xmm13,%xmm7 + por %xmm14,%xmm4 + por %xmm14,%xmm5 + xorps %xmm0,%xmm2 + por %xmm14,%xmm6 + por %xmm14,%xmm7 + + + + + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + movdqa L$increment32(%rip),%xmm13 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + movdqa -40(%rsp),%xmm12 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + jmp L$ctr32_enc_loop6_enter +.p2align 4 +L$ctr32_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +L$ctr32_enc_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%rcx),%xmm0 + jnz L$ctr32_enc_loop6 + +.byte 102,15,56,220,209 + paddd %xmm13,%xmm12 +.byte 102,15,56,220,217 + paddd -24(%rsp),%xmm13 +.byte 102,15,56,220,225 + movdqa %xmm12,-40(%rsp) +.byte 102,15,56,220,233 + movdqa %xmm13,-24(%rsp) +.byte 102,15,56,220,241 +.byte 102,69,15,56,0,231 +.byte 102,15,56,220,249 +.byte 102,69,15,56,0,239 + +.byte 102,15,56,221,208 + movups (%rdi),%xmm8 +.byte 102,15,56,221,216 + movups 16(%rdi),%xmm9 +.byte 102,15,56,221,224 + movups 32(%rdi),%xmm10 +.byte 102,15,56,221,232 + movups 48(%rdi),%xmm11 +.byte 102,15,56,221,240 + movups 64(%rdi),%xmm1 +.byte 102,15,56,221,248 + movups 80(%rdi),%xmm0 + leaq 96(%rdi),%rdi + + xorps %xmm2,%xmm8 + pshufd $192,%xmm12,%xmm2 + xorps %xmm3,%xmm9 + pshufd $128,%xmm12,%xmm3 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + pshufd $64,%xmm12,%xmm4 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + xorps %xmm7,%xmm0 + movups %xmm1,64(%rsi) + movups %xmm0,80(%rsi) + leaq 96(%rsi),%rsi + movl %r10d,%eax + subq $6,%rdx + jnc L$ctr32_loop6 + + addq $6,%rdx + jz L$ctr32_done + movq %r11,%rcx + leal 1(%rax,%rax,1),%eax + +L$ctr32_tail: + por %xmm14,%xmm2 + movups (%rdi),%xmm8 + cmpq $2,%rdx + jb L$ctr32_one + + por %xmm14,%xmm3 + movups 16(%rdi),%xmm9 + je L$ctr32_two + + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm4 + movups 32(%rdi),%xmm10 + cmpq $4,%rdx + jb L$ctr32_three + + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm5 + movups 48(%rdi),%xmm11 + je L$ctr32_four + + por %xmm14,%xmm6 + xorps %xmm7,%xmm7 + + call _aesni_encrypt6 + + movups 64(%rdi),%xmm1 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + movups %xmm1,64(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_one_shortcut: + movups (%r8),%xmm2 + movups (%rdi),%xmm8 + movl 240(%rcx),%eax +L$ctr32_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_7: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_7 + +.byte 102,15,56,221,209 + xorps %xmm2,%xmm8 + movups %xmm8,(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + movups %xmm9,16(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_three: + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + movups %xmm10,32(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_four: + call _aesni_encrypt4 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + movups %xmm11,48(%rsi) + +L$ctr32_done: + .byte 0xf3,0xc3 + +.globl _aesni_xts_encrypt + +.p2align 4 +_aesni_xts_encrypt: + leaq -104(%rsp),%rsp + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +L$oop_enc1_8: +.byte 102,68,15,56,220,249 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz L$oop_enc1_8 + +.byte 102,68,15,56,221,249 + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa L$xts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc L$xts_enc_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp L$xts_enc_grandloop + +.p2align 4 +L$xts_enc_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) +.byte 102,15,56,220,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,220,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp L$xts_enc_loop6_enter + +.p2align 4 +L$xts_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %eax +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +L$xts_enc_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%rcx),%xmm0 + jnz L$xts_enc_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,209 + pand %xmm8,%xmm9 +.byte 102,15,56,220,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,220,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,208 + pand %xmm8,%xmm9 +.byte 102,15,56,220,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,220,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,209 + pand %xmm8,%xmm9 +.byte 102,15,56,220,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,220,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 +.byte 102,15,56,221,208 + pand %xmm8,%xmm9 +.byte 102,15,56,221,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,221,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc L$xts_enc_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +L$xts_enc_short: + addq $96,%rdx + jz L$xts_enc_done + + cmpq $32,%rdx + jb L$xts_enc_one + je L$xts_enc_two + + cmpq $64,%rdx + jb L$xts_enc_three + je L$xts_enc_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_encrypt6 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_9: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_9 + +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_encrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_done: + andq $15,%r9 + jz L$xts_enc_ret + movq %r9,%rdx + +L$xts_enc_steal: + movzbl (%rdi),%eax + movzbl -16(%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,-16(%rsi) + movb %cl,0(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz L$xts_enc_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups -16(%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_10: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_10 + +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movups %xmm2,-16(%rsi) + +L$xts_enc_ret: + leaq 104(%rsp),%rsp +L$xts_enc_epilogue: + .byte 0xf3,0xc3 + +.globl _aesni_xts_decrypt + +.p2align 4 +_aesni_xts_decrypt: + leaq -104(%rsp),%rsp + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +L$oop_enc1_11: +.byte 102,68,15,56,220,249 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz L$oop_enc1_11 + +.byte 102,68,15,56,221,249 + xorl %eax,%eax + testq $15,%rdx + setnz %al + shlq $4,%rax + subq %rax,%rdx + + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa L$xts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc L$xts_dec_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp L$xts_dec_grandloop + +.p2align 4 +L$xts_dec_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,222,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) +.byte 102,15,56,222,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,222,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp L$xts_dec_loop6_enter + +.p2align 4 +L$xts_dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %eax +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +L$xts_dec_loop6_enter: + movups 16(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leaq 32(%rcx),%rcx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups (%rcx),%xmm0 + jnz L$xts_dec_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,209 + pand %xmm8,%xmm9 +.byte 102,15,56,222,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,222,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,208 + pand %xmm8,%xmm9 +.byte 102,15,56,222,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,222,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,209 + pand %xmm8,%xmm9 +.byte 102,15,56,222,217 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,222,225 + pxor %xmm9,%xmm15 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 +.byte 102,15,56,223,208 + pand %xmm8,%xmm9 +.byte 102,15,56,223,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,223,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc L$xts_dec_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +L$xts_dec_short: + addq $96,%rdx + jz L$xts_dec_done + + cmpq $32,%rdx + jb L$xts_dec_one + je L$xts_dec_two + + cmpq $64,%rdx + jb L$xts_dec_three + je L$xts_dec_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_decrypt6 + + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm14 + movdqu %xmm5,48(%rsi) + pcmpgtd %xmm15,%xmm14 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + pshufd $19,%xmm14,%xmm11 + andq $15,%r9 + jz L$xts_dec_ret + + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm11 + pxor %xmm15,%xmm11 + jmp L$xts_dec_done2 + +.p2align 4 +L$xts_dec_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_12: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_12 + +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + movdqa %xmm12,%xmm11 + leaq 16(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm13,%xmm11 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_four: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movups (%rdi),%xmm2 + pand %xmm8,%xmm9 + movups 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_decrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_done: + andq $15,%r9 + jz L$xts_dec_ret +L$xts_dec_done2: + movq %r9,%rdx + movq %r11,%rcx + movl %r10d,%eax + + movups (%rdi),%xmm2 + xorps %xmm11,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_13: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_13 + +.byte 102,15,56,223,209 + xorps %xmm11,%xmm2 + movups %xmm2,(%rsi) + +L$xts_dec_steal: + movzbl 16(%rdi),%eax + movzbl (%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,(%rsi) + movb %cl,16(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz L$xts_dec_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups (%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_14: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_14 + +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + +L$xts_dec_ret: + leaq 104(%rsp),%rsp +L$xts_dec_epilogue: + .byte 0xf3,0xc3 + +.globl _aesni_cbc_encrypt + +.p2align 4 +_aesni_cbc_encrypt: + testq %rdx,%rdx + jz L$cbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz L$cbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb L$cbc_enc_tail + subq $16,%rdx + jmp L$cbc_enc_loop +.p2align 4 +L$cbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +L$oop_enc1_15: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_15 + +.byte 102,15,56,221,209 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc L$cbc_enc_loop + addq $16,%rdx + jnz L$cbc_enc_tail + movups %xmm2,(%r8) + jmp L$cbc_ret + +L$cbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp L$cbc_enc_loop + + +.p2align 4 +L$cbc_decrypt: + movups (%r8),%xmm9 + movl %r10d,%eax + cmpq $112,%rdx + jbe L$cbc_dec_tail + shrl $1,%r10d + subq $112,%rdx + movl %r10d,%eax + movaps %xmm9,-24(%rsp) + jmp L$cbc_dec_loop8_enter +.p2align 4 +L$cbc_dec_loop8: + movaps %xmm0,-24(%rsp) + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +L$cbc_dec_loop8_enter: + movups (%rcx),%xmm0 + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 16(%rcx),%xmm1 + + leaq 32(%rcx),%rcx + movdqu 32(%rdi),%xmm4 + xorps %xmm0,%xmm2 + movdqu 48(%rdi),%xmm5 + xorps %xmm0,%xmm3 + movdqu 64(%rdi),%xmm6 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 + movdqu 80(%rdi),%xmm7 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + movdqu 96(%rdi),%xmm8 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + movdqu 112(%rdi),%xmm9 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + decl %eax +.byte 102,15,56,222,241 + pxor %xmm0,%xmm8 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 16(%rcx),%xmm1 + + call L$dec_loop8_enter + + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm1 + xorps %xmm0,%xmm8 + movups 112(%rdi),%xmm0 + xorps %xmm1,%xmm9 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movq %r11,%rcx + movups %xmm7,80(%rsi) + leaq 128(%rdi),%rdi + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + subq $128,%rdx + ja L$cbc_dec_loop8 + + movaps %xmm9,%xmm2 + movaps %xmm0,%xmm9 + addq $112,%rdx + jle L$cbc_dec_tail_collected + movups %xmm2,(%rsi) + leal 1(%r10,%r10,1),%eax + leaq 16(%rsi),%rsi +L$cbc_dec_tail: + movups (%rdi),%xmm2 + movaps %xmm2,%xmm8 + cmpq $16,%rdx + jbe L$cbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm3,%xmm7 + cmpq $32,%rdx + jbe L$cbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm4,%xmm6 + cmpq $48,%rdx + jbe L$cbc_dec_three + + movups 48(%rdi),%xmm5 + cmpq $64,%rdx + jbe L$cbc_dec_four + + movups 64(%rdi),%xmm6 + cmpq $80,%rdx + jbe L$cbc_dec_five + + movups 80(%rdi),%xmm7 + cmpq $96,%rdx + jbe L$cbc_dec_six + + movups 96(%rdi),%xmm8 + movaps %xmm9,-24(%rsp) + call _aesni_decrypt8 + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm9 + xorps %xmm0,%xmm8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movaps %xmm8,%xmm2 + subq $112,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_16: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_16 + +.byte 102,15,56,223,209 + xorps %xmm9,%xmm2 + movaps %xmm8,%xmm9 + subq $16,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + movaps %xmm7,%xmm9 + movaps %xmm3,%xmm2 + leaq 16(%rsi),%rsi + subq $32,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_three: + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + movaps %xmm6,%xmm9 + movaps %xmm4,%xmm2 + leaq 32(%rsi),%rsi + subq $48,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_four: + call _aesni_decrypt4 + xorps %xmm9,%xmm2 + movups 48(%rdi),%xmm9 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + xorps %xmm6,%xmm5 + movups %xmm4,32(%rsi) + movaps %xmm5,%xmm2 + leaq 48(%rsi),%rsi + subq $64,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm9 + xorps %xmm1,%xmm6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movaps %xmm6,%xmm2 + subq $80,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_six: + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm0 + xorps %xmm1,%xmm6 + movups 80(%rdi),%xmm9 + xorps %xmm0,%xmm7 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movaps %xmm7,%xmm2 + subq $96,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_tail_collected: + andq $15,%rdx + movups %xmm9,(%r8) + jnz L$cbc_dec_tail_partial + movups %xmm2,(%rsi) + jmp L$cbc_dec_ret +.p2align 4 +L$cbc_dec_tail_partial: + movaps %xmm2,-24(%rsp) + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq -24(%rsp),%rsi +.long 0x9066A4F3 + + +L$cbc_dec_ret: +L$cbc_ret: + .byte 0xf3,0xc3 + +.globl _aesni_set_decrypt_key + +.p2align 4 +_aesni_set_decrypt_key: +.byte 0x48,0x83,0xEC,0x08 + + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz L$dec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +L$dec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja L$dec_key_inverse + + movups (%rdx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%rdi) +L$dec_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +L$SEH_end_set_decrypt_key: + +.globl _aesni_set_encrypt_key + +.p2align 4 +_aesni_set_encrypt_key: +__aesni_set_encrypt_key: +.byte 0x48,0x83,0xEC,0x08 + + movq $-1,%rax + testq %rdi,%rdi + jz L$enc_key_ret + testq %rdx,%rdx + jz L$enc_key_ret + + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + leaq 16(%rdx),%rax + cmpl $256,%esi + je L$14rounds + cmpl $192,%esi + je L$12rounds + cmpl $128,%esi + jne L$bad_keybits + +L$10rounds: + movl $9,%esi + movups %xmm0,(%rdx) +.byte 102,15,58,223,200,1 + call L$key_expansion_128_cold +.byte 102,15,58,223,200,2 + call L$key_expansion_128 +.byte 102,15,58,223,200,4 + call L$key_expansion_128 +.byte 102,15,58,223,200,8 + call L$key_expansion_128 +.byte 102,15,58,223,200,16 + call L$key_expansion_128 +.byte 102,15,58,223,200,32 + call L$key_expansion_128 +.byte 102,15,58,223,200,64 + call L$key_expansion_128 +.byte 102,15,58,223,200,128 + call L$key_expansion_128 +.byte 102,15,58,223,200,27 + call L$key_expansion_128 +.byte 102,15,58,223,200,54 + call L$key_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + +.p2align 4 +L$12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + movups %xmm0,(%rdx) +.byte 102,15,58,223,202,1 + call L$key_expansion_192a_cold +.byte 102,15,58,223,202,2 + call L$key_expansion_192b +.byte 102,15,58,223,202,4 + call L$key_expansion_192a +.byte 102,15,58,223,202,8 + call L$key_expansion_192b +.byte 102,15,58,223,202,16 + call L$key_expansion_192a +.byte 102,15,58,223,202,32 + call L$key_expansion_192b +.byte 102,15,58,223,202,64 + call L$key_expansion_192a +.byte 102,15,58,223,202,128 + call L$key_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp L$enc_key_ret + +.p2align 4 +L$14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) +.byte 102,15,58,223,202,1 + call L$key_expansion_256a_cold +.byte 102,15,58,223,200,1 + call L$key_expansion_256b +.byte 102,15,58,223,202,2 + call L$key_expansion_256a +.byte 102,15,58,223,200,2 + call L$key_expansion_256b +.byte 102,15,58,223,202,4 + call L$key_expansion_256a +.byte 102,15,58,223,200,4 + call L$key_expansion_256b +.byte 102,15,58,223,202,8 + call L$key_expansion_256a +.byte 102,15,58,223,200,8 + call L$key_expansion_256b +.byte 102,15,58,223,202,16 + call L$key_expansion_256a +.byte 102,15,58,223,200,16 + call L$key_expansion_256b +.byte 102,15,58,223,202,32 + call L$key_expansion_256a +.byte 102,15,58,223,200,32 + call L$key_expansion_256b +.byte 102,15,58,223,202,64 + call L$key_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp L$enc_key_ret + +.p2align 4 +L$bad_keybits: + movq $-2,%rax +L$enc_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +L$SEH_end_set_encrypt_key: + +.p2align 4 +L$key_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +L$key_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.p2align 4 +L$key_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +L$key_expansion_192a_cold: + movaps %xmm2,%xmm5 +L$key_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + .byte 0xf3,0xc3 + +.p2align 4 +L$key_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp L$key_expansion_192b_warm + +.p2align 4 +L$key_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +L$key_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.p2align 4 +L$key_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + .byte 0xf3,0xc3 + + +.p2align 6 +L$bswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +L$increment32: +.long 6,6,6,0 +L$increment64: +.long 1,0,0,0 +L$xts_magic: +.long 0x87,0,1,0 + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/deps/openssl/asm/x64-macosx-gas/bn/modexp512-x86_64.s b/deps/openssl/asm/x64-macosx-gas/bn/modexp512-x86_64.s new file mode 100644 index 0000000000..00c529a079 --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/bn/modexp512-x86_64.s @@ -0,0 +1,1775 @@ +.text + + + +.p2align 4 +MULADD_128x512: + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + movq %r8,0(%rcx) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq 8(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + movq %r9,8(%rcx) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%r9 + .byte 0xf3,0xc3 + + +.p2align 4 +mont_reduce: + leaq 192(%rsp),%rdi + movq 32(%rsp),%rsi + addq $576,%rsi + leaq 520(%rsp),%rcx + + movq 96(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + movq (%rcx),%r8 + addq %rax,%r8 + adcq $0,%rdx + movq %r8,0(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + movq 8(%rcx),%r9 + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + movq 16(%rcx),%r10 + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + movq 24(%rcx),%r11 + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + movq 32(%rcx),%r12 + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + movq 40(%rcx),%r13 + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + movq 48(%rcx),%r14 + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + movq 56(%rcx),%r15 + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq 104(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + movq %r9,8(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%r9 + movq 112(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + movq %r10,16(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%r10 + movq 120(%rcx),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + movq %r11,24(%rdi) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%r11 + xorq %rax,%rax + + addq 64(%rcx),%r8 + adcq 72(%rcx),%r9 + adcq 80(%rcx),%r10 + adcq 88(%rcx),%r11 + adcq $0,%rax + + + + + movq %r8,64(%rdi) + movq %r9,72(%rdi) + movq %r10,%rbp + movq %r11,88(%rdi) + + movq %rax,384(%rsp) + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + + + + + + + + + addq $80,%rdi + + addq $64,%rsi + leaq 296(%rsp),%rcx + + call MULADD_128x512 + + + movq 384(%rsp),%rax + + + addq -16(%rdi),%r8 + adcq -8(%rdi),%r9 + movq %r8,64(%rcx) + movq %r9,72(%rcx) + + adcq %rax,%rax + movq %rax,384(%rsp) + + leaq 192(%rsp),%rdi + addq $64,%rsi + + + + + + movq (%rsi),%r8 + movq 8(%rsi),%rbx + + movq (%rcx),%rax + mulq %r8 + movq %rax,%rbp + movq %rdx,%r9 + + movq 8(%rcx),%rax + mulq %r8 + addq %rax,%r9 + + movq (%rcx),%rax + mulq %rbx + addq %rax,%r9 + + movq %r9,8(%rdi) + + + subq $192,%rsi + + movq (%rcx),%r8 + movq 8(%rcx),%r9 + + call MULADD_128x512 + + + + + + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdi + movq 24(%rsi),%rdx + + + movq 384(%rsp),%rbp + + addq 64(%rcx),%r8 + adcq 72(%rcx),%r9 + + + adcq %rbp,%rbp + + + + shlq $3,%rbp + movq 32(%rsp),%rcx + addq %rcx,%rbp + + + xorq %rsi,%rsi + + addq 0(%rbp),%r10 + adcq 64(%rbp),%r11 + adcq 128(%rbp),%r12 + adcq 192(%rbp),%r13 + adcq 256(%rbp),%r14 + adcq 320(%rbp),%r15 + adcq 384(%rbp),%r8 + adcq 448(%rbp),%r9 + + + + sbbq $0,%rsi + + + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rdi + andq %rsi,%rdx + + movq $1,%rbp + subq %rax,%r10 + sbbq %rbx,%r11 + sbbq %rdi,%r12 + sbbq %rdx,%r13 + + + + + sbbq $0,%rbp + + + + addq $512,%rcx + movq 32(%rcx),%rax + movq 40(%rcx),%rbx + movq 48(%rcx),%rdi + movq 56(%rcx),%rdx + + + + andq %rsi,%rax + andq %rsi,%rbx + andq %rsi,%rdi + andq %rsi,%rdx + + + + subq $1,%rbp + + sbbq %rax,%r14 + sbbq %rbx,%r15 + sbbq %rdi,%r8 + sbbq %rdx,%r9 + + + + movq 144(%rsp),%rsi + movq %r10,0(%rsi) + movq %r11,8(%rsi) + movq %r12,16(%rsi) + movq %r13,24(%rsi) + movq %r14,32(%rsi) + movq %r15,40(%rsi) + movq %r8,48(%rsi) + movq %r9,56(%rsi) + + .byte 0xf3,0xc3 + + +.p2align 4 +mont_mul_a3b: + + + + + movq 0(%rdi),%rbp + + movq %r10,%rax + mulq %rbp + movq %rax,520(%rsp) + movq %rdx,%r10 + movq %r11,%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + movq %rdx,%r11 + movq %r12,%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %r13,%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + movq %rdx,%r13 + movq %r14,%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + movq %rdx,%r14 + movq %r15,%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r15 + movq %r8,%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq %r9,%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + movq %rdx,%r9 + movq 8(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + movq %r10,528(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%r10 + movq 16(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + movq %r11,536(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%r11 + movq 24(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + movq %r12,544(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq 32(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + movq %r13,552(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%r13 + movq 40(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + movq %r14,560(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%r14 + movq 48(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + movq %r15,568(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + addq %rbx,%r8 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%r15 + movq 56(%rdi),%rbp + movq 0(%rsi),%rax + mulq %rbp + addq %rax,%r8 + adcq $0,%rdx + movq %r8,576(%rsp) + movq %rdx,%rbx + + movq 8(%rsi),%rax + mulq %rbp + addq %rax,%r9 + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + movq %rdx,%rbx + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r10 + adcq $0,%rdx + addq %rbx,%r10 + adcq $0,%rdx + movq %rdx,%rbx + + movq 24(%rsi),%rax + mulq %rbp + addq %rax,%r11 + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + movq %rdx,%rbx + + movq 32(%rsi),%rax + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + addq %rbx,%r12 + adcq $0,%rdx + movq %rdx,%rbx + + movq 40(%rsi),%rax + mulq %rbp + addq %rax,%r13 + adcq $0,%rdx + addq %rbx,%r13 + adcq $0,%rdx + movq %rdx,%rbx + + movq 48(%rsi),%rax + mulq %rbp + addq %rax,%r14 + adcq $0,%rdx + addq %rbx,%r14 + adcq $0,%rdx + movq %rdx,%rbx + + movq 56(%rsi),%rax + mulq %rbp + addq %rax,%r15 + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + movq %rdx,%r8 + movq %r9,584(%rsp) + movq %r10,592(%rsp) + movq %r11,600(%rsp) + movq %r12,608(%rsp) + movq %r13,616(%rsp) + movq %r14,624(%rsp) + movq %r15,632(%rsp) + movq %r8,640(%rsp) + + + + + + jmp mont_reduce + + + + +.p2align 4 +sqr_reduce: + movq 16(%rsp),%rcx + + + + movq %r10,%rbx + + movq %r11,%rax + mulq %rbx + movq %rax,528(%rsp) + movq %rdx,%r10 + movq %r12,%rax + mulq %rbx + addq %rax,%r10 + adcq $0,%rdx + movq %rdx,%r11 + movq %r13,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + movq %rdx,%r12 + movq %r14,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + movq %rdx,%r13 + movq %r15,%rax + mulq %rbx + addq %rax,%r13 + adcq $0,%rdx + movq %rdx,%r14 + movq %r8,%rax + mulq %rbx + addq %rax,%r14 + adcq $0,%rdx + movq %rdx,%r15 + movq %r9,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + movq %rdx,%rsi + + movq %r10,536(%rsp) + + + + + + movq 8(%rcx),%rbx + + movq 16(%rcx),%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + movq %r11,544(%rsp) + + movq %rdx,%r10 + movq 24(%rcx),%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %r12,552(%rsp) + + movq %rdx,%r10 + movq 32(%rcx),%rax + mulq %rbx + addq %rax,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + + movq %rdx,%r10 + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%r14 + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%rsi + adcq $0,%rdx + addq %r10,%rsi + adcq $0,%rdx + + movq %rdx,%r11 + + + + + movq 16(%rcx),%rbx + + movq 24(%rcx),%rax + mulq %rbx + addq %rax,%r13 + adcq $0,%rdx + movq %r13,560(%rsp) + + movq %rdx,%r10 + movq 32(%rcx),%rax + mulq %rbx + addq %rax,%r14 + adcq $0,%rdx + addq %r10,%r14 + adcq $0,%rdx + movq %r14,568(%rsp) + + movq %rdx,%r10 + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%rsi + adcq $0,%rdx + addq %r10,%rsi + adcq $0,%rdx + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + addq %r10,%r11 + adcq $0,%rdx + + movq %rdx,%r12 + + + + + + movq 24(%rcx),%rbx + + movq 32(%rcx),%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + movq %r15,576(%rsp) + + movq %rdx,%r10 + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%rsi + adcq $0,%rdx + addq %r10,%rsi + adcq $0,%rdx + movq %rsi,584(%rsp) + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + addq %r10,%r11 + adcq $0,%rdx + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + + movq %rdx,%r15 + + + + + movq 32(%rcx),%rbx + + movq 40(%rcx),%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + movq %r11,592(%rsp) + + movq %rdx,%r10 + movq %r8,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + addq %r10,%r12 + adcq $0,%rdx + movq %r12,600(%rsp) + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r10,%r15 + adcq $0,%rdx + + movq %rdx,%r11 + + + + + movq 40(%rcx),%rbx + + movq %r8,%rax + mulq %rbx + addq %rax,%r15 + adcq $0,%rdx + movq %r15,608(%rsp) + + movq %rdx,%r10 + movq %r9,%rax + mulq %rbx + addq %rax,%r11 + adcq $0,%rdx + addq %r10,%r11 + adcq $0,%rdx + movq %r11,616(%rsp) + + movq %rdx,%r12 + + + + + movq %r8,%rbx + + movq %r9,%rax + mulq %rbx + addq %rax,%r12 + adcq $0,%rdx + movq %r12,624(%rsp) + + movq %rdx,632(%rsp) + + + movq 528(%rsp),%r10 + movq 536(%rsp),%r11 + movq 544(%rsp),%r12 + movq 552(%rsp),%r13 + movq 560(%rsp),%r14 + movq 568(%rsp),%r15 + + movq 24(%rcx),%rax + mulq %rax + movq %rax,%rdi + movq %rdx,%r8 + + addq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq %r15,%r15 + adcq $0,%r8 + + movq 0(%rcx),%rax + mulq %rax + movq %rax,520(%rsp) + movq %rdx,%rbx + + movq 8(%rcx),%rax + mulq %rax + + addq %rbx,%r10 + adcq %rax,%r11 + adcq $0,%rdx + + movq %rdx,%rbx + movq %r10,528(%rsp) + movq %r11,536(%rsp) + + movq 16(%rcx),%rax + mulq %rax + + addq %rbx,%r12 + adcq %rax,%r13 + adcq $0,%rdx + + movq %rdx,%rbx + + movq %r12,544(%rsp) + movq %r13,552(%rsp) + + xorq %rbp,%rbp + addq %rbx,%r14 + adcq %rdi,%r15 + adcq $0,%rbp + + movq %r14,560(%rsp) + movq %r15,568(%rsp) + + + + + movq 576(%rsp),%r10 + movq 584(%rsp),%r11 + movq 592(%rsp),%r12 + movq 600(%rsp),%r13 + movq 608(%rsp),%r14 + movq 616(%rsp),%r15 + movq 624(%rsp),%rdi + movq 632(%rsp),%rsi + + movq %r9,%rax + mulq %rax + movq %rax,%r9 + movq %rdx,%rbx + + addq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq %r15,%r15 + adcq %rdi,%rdi + adcq %rsi,%rsi + adcq $0,%rbx + + addq %rbp,%r10 + + movq 32(%rcx),%rax + mulq %rax + + addq %r8,%r10 + adcq %rax,%r11 + adcq $0,%rdx + + movq %rdx,%rbp + + movq %r10,576(%rsp) + movq %r11,584(%rsp) + + movq 40(%rcx),%rax + mulq %rax + + addq %rbp,%r12 + adcq %rax,%r13 + adcq $0,%rdx + + movq %rdx,%rbp + + movq %r12,592(%rsp) + movq %r13,600(%rsp) + + movq 48(%rcx),%rax + mulq %rax + + addq %rbp,%r14 + adcq %rax,%r15 + adcq $0,%rdx + + movq %r14,608(%rsp) + movq %r15,616(%rsp) + + addq %rdx,%rdi + adcq %r9,%rsi + adcq $0,%rbx + + movq %rdi,624(%rsp) + movq %rsi,632(%rsp) + movq %rbx,640(%rsp) + + jmp mont_reduce + + + +.globl _mod_exp_512 + +_mod_exp_512: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r8 + subq $2688,%rsp + andq $-64,%rsp + + + movq %r8,0(%rsp) + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rcx,24(%rsp) +L$body: + + + + pxor %xmm4,%xmm4 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqa %xmm4,512(%rsp) + movdqa %xmm4,528(%rsp) + movdqa %xmm4,608(%rsp) + movdqa %xmm4,624(%rsp) + movdqa %xmm0,544(%rsp) + movdqa %xmm1,560(%rsp) + movdqa %xmm2,576(%rsp) + movdqa %xmm3,592(%rsp) + + + movdqu 0(%rdx),%xmm0 + movdqu 16(%rdx),%xmm1 + movdqu 32(%rdx),%xmm2 + movdqu 48(%rdx),%xmm3 + + leaq 384(%rsp),%rbx + movq %rbx,136(%rsp) + call mont_reduce + + + leaq 448(%rsp),%rcx + xorq %rax,%rax + movq %rax,0(%rcx) + movq %rax,8(%rcx) + movq %rax,24(%rcx) + movq %rax,32(%rcx) + movq %rax,40(%rcx) + movq %rax,48(%rcx) + movq %rax,56(%rcx) + movq %rax,128(%rsp) + movq $1,16(%rcx) + + leaq 640(%rsp),%rbp + movq %rcx,%rsi + movq %rbp,%rdi + movq $8,%rax +loop_0: + movq (%rcx),%rbx + movw %bx,(%rdi) + shrq $16,%rbx + movw %bx,64(%rdi) + shrq $16,%rbx + movw %bx,128(%rdi) + shrq $16,%rbx + movw %bx,192(%rdi) + leaq 8(%rcx),%rcx + leaq 256(%rdi),%rdi + decq %rax + jnz loop_0 + movq $31,%rax + movq %rax,32(%rsp) + movq %rbp,40(%rsp) + + movq %rsi,136(%rsp) + movq 0(%rsi),%r10 + movq 8(%rsi),%r11 + movq 16(%rsi),%r12 + movq 24(%rsi),%r13 + movq 32(%rsi),%r14 + movq 40(%rsi),%r15 + movq 48(%rsi),%r8 + movq 56(%rsi),%r9 +init_loop: + leaq 384(%rsp),%rdi + call mont_mul_a3b + leaq 448(%rsp),%rsi + movq 40(%rsp),%rbp + addq $2,%rbp + movq %rbp,40(%rsp) + movq %rsi,%rcx + movq $8,%rax +loop_1: + movq (%rcx),%rbx + movw %bx,(%rbp) + shrq $16,%rbx + movw %bx,64(%rbp) + shrq $16,%rbx + movw %bx,128(%rbp) + shrq $16,%rbx + movw %bx,192(%rbp) + leaq 8(%rcx),%rcx + leaq 256(%rbp),%rbp + decq %rax + jnz loop_1 + movq 32(%rsp),%rax + subq $1,%rax + movq %rax,32(%rsp) + jne init_loop + + + + movdqa %xmm0,64(%rsp) + movdqa %xmm1,80(%rsp) + movdqa %xmm2,96(%rsp) + movdqa %xmm3,112(%rsp) + + + + + + movl 126(%rsp),%eax + movq %rax,%rdx + shrq $11,%rax + andl $2047,%edx + movl %edx,126(%rsp) + leaq 640(%rsp,%rax,2),%rsi + movq 8(%rsp),%rdx + movq $4,%rbp +loop_2: + movzwq 192(%rsi),%rbx + movzwq 448(%rsi),%rax + shlq $16,%rbx + shlq $16,%rax + movw 128(%rsi),%bx + movw 384(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 64(%rsi),%bx + movw 320(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 0(%rsi),%bx + movw 256(%rsi),%ax + movq %rbx,0(%rdx) + movq %rax,8(%rdx) + leaq 512(%rsi),%rsi + leaq 16(%rdx),%rdx + subq $1,%rbp + jnz loop_2 + movq $505,48(%rsp) + + movq 8(%rsp),%rcx + movq %rcx,136(%rsp) + movq 0(%rcx),%r10 + movq 8(%rcx),%r11 + movq 16(%rcx),%r12 + movq 24(%rcx),%r13 + movq 32(%rcx),%r14 + movq 40(%rcx),%r15 + movq 48(%rcx),%r8 + movq 56(%rcx),%r9 + jmp sqr_2 + +main_loop_a3b: + call sqr_reduce + call sqr_reduce + call sqr_reduce +sqr_2: + call sqr_reduce + call sqr_reduce + + + + movq 48(%rsp),%rcx + movq %rcx,%rax + shrq $4,%rax + movl 64(%rsp,%rax,2),%edx + andq $15,%rcx + shrq %cl,%rdx + andq $31,%rdx + + leaq 640(%rsp,%rdx,2),%rsi + leaq 448(%rsp),%rdx + movq %rdx,%rdi + movq $4,%rbp +loop_3: + movzwq 192(%rsi),%rbx + movzwq 448(%rsi),%rax + shlq $16,%rbx + shlq $16,%rax + movw 128(%rsi),%bx + movw 384(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 64(%rsi),%bx + movw 320(%rsi),%ax + shlq $16,%rbx + shlq $16,%rax + movw 0(%rsi),%bx + movw 256(%rsi),%ax + movq %rbx,0(%rdx) + movq %rax,8(%rdx) + leaq 512(%rsi),%rsi + leaq 16(%rdx),%rdx + subq $1,%rbp + jnz loop_3 + movq 8(%rsp),%rsi + call mont_mul_a3b + + + + movq 48(%rsp),%rcx + subq $5,%rcx + movq %rcx,48(%rsp) + jge main_loop_a3b + + + +end_main_loop_a3b: + + + movq 8(%rsp),%rdx + pxor %xmm4,%xmm4 + movdqu 0(%rdx),%xmm0 + movdqu 16(%rdx),%xmm1 + movdqu 32(%rdx),%xmm2 + movdqu 48(%rdx),%xmm3 + movdqa %xmm4,576(%rsp) + movdqa %xmm4,592(%rsp) + movdqa %xmm4,608(%rsp) + movdqa %xmm4,624(%rsp) + movdqa %xmm0,512(%rsp) + movdqa %xmm1,528(%rsp) + movdqa %xmm2,544(%rsp) + movdqa %xmm3,560(%rsp) + call mont_reduce + + + + movq 8(%rsp),%rax + movq 0(%rax),%r8 + movq 8(%rax),%r9 + movq 16(%rax),%r10 + movq 24(%rax),%r11 + movq 32(%rax),%r12 + movq 40(%rax),%r13 + movq 48(%rax),%r14 + movq 56(%rax),%r15 + + + movq 24(%rsp),%rbx + addq $512,%rbx + + subq 0(%rbx),%r8 + sbbq 8(%rbx),%r9 + sbbq 16(%rbx),%r10 + sbbq 24(%rbx),%r11 + sbbq 32(%rbx),%r12 + sbbq 40(%rbx),%r13 + sbbq 48(%rbx),%r14 + sbbq 56(%rbx),%r15 + + + movq 0(%rax),%rsi + movq 8(%rax),%rdi + movq 16(%rax),%rcx + movq 24(%rax),%rdx + cmovncq %r8,%rsi + cmovncq %r9,%rdi + cmovncq %r10,%rcx + cmovncq %r11,%rdx + movq %rsi,0(%rax) + movq %rdi,8(%rax) + movq %rcx,16(%rax) + movq %rdx,24(%rax) + + movq 32(%rax),%rsi + movq 40(%rax),%rdi + movq 48(%rax),%rcx + movq 56(%rax),%rdx + cmovncq %r12,%rsi + cmovncq %r13,%rdi + cmovncq %r14,%rcx + cmovncq %r15,%rdx + movq %rsi,32(%rax) + movq %rdi,40(%rax) + movq %rcx,48(%rax) + movq %rdx,56(%rax) + + movq 0(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbx + movq 40(%rsi),%rbp + leaq 48(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s index 23292a0716..ece106c498 100644 --- a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s +++ b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s @@ -5,6 +5,16 @@ .p2align 4 _bn_mul_mont: + testl $3,%r9d + jnz L$mul_enter + cmpl $8,%r9d + jb L$mul_enter + cmpq %rsi,%rdx + jne L$mul4x_enter + jmp L$sqr4x_enter + +.p2align 4 +L$mul_enter: pushq %rbx pushq %rbp pushq %r12 @@ -20,48 +30,63 @@ _bn_mul_mont: andq $-1024,%rsp movq %r11,8(%rsp,%r9,8) -L$prologue: +L$mul_body: movq %rdx,%r12 - movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax xorq %r14,%r14 xorq %r15,%r15 - movq (%r12),%rbx - movq (%rsi),%rax + movq %r8,%rbp mulq %rbx movq %rax,%r10 - movq %rdx,%r11 + movq (%rcx),%rax - imulq %r8,%rax - movq %rax,%rbp + imulq %r10,%rbp + movq %rdx,%r11 - mulq (%rcx) - addq %r10,%rax + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax adcq $0,%rdx movq %rdx,%r13 leaq 1(%r15),%r15 + jmp L$1st_enter + +.p2align 4 L$1st: + addq %rax,%r13 movq (%rsi,%r15,8),%rax - mulq %rbx - addq %r11,%rax adcq $0,%rdx - movq %rax,%r10 + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$1st_enter: + mulq %rbx + addq %rax,%r11 movq (%rcx,%r15,8),%rax - movq %rdx,%r11 + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 mulq %rbp - addq %r13,%rax - leaq 1(%r15),%r15 + cmpq %r9,%r15 + jne L$1st + + addq %rax,%r13 + movq (%rsi),%rax adcq $0,%rdx - addq %r10,%rax + addq %r11,%r13 adcq $0,%rdx - movq %rax,-16(%rsp,%r15,8) - cmpq %r9,%r15 + movq %r13,-16(%rsp,%r15,8) movq %rdx,%r13 - jl L$1st + movq %r10,%r11 xorq %rdx,%rdx addq %r11,%r13 @@ -70,50 +95,64 @@ L$1st: movq %rdx,(%rsp,%r9,8) leaq 1(%r14),%r14 -.p2align 2 + jmp L$outer +.p2align 4 L$outer: - xorq %r15,%r15 - movq (%r12,%r14,8),%rbx - movq (%rsi),%rax + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 mulq %rbx - addq (%rsp),%rax + addq %rax,%r10 + movq (%rcx),%rax adcq $0,%rdx - movq %rax,%r10 - movq %rdx,%r11 - imulq %r8,%rax - movq %rax,%rbp + imulq %r10,%rbp + movq %rdx,%r11 - mulq (%rcx,%r15,8) - addq %r10,%rax - movq 8(%rsp),%r10 + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax adcq $0,%rdx + movq 8(%rsp),%r10 movq %rdx,%r13 leaq 1(%r15),%r15 -.p2align 2 + jmp L$inner_enter + +.p2align 4 L$inner: + addq %rax,%r13 movq (%rsi,%r15,8),%rax - mulq %rbx - addq %r11,%rax adcq $0,%rdx - addq %rax,%r10 + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$inner_enter: + mulq %rbx + addq %rax,%r11 movq (%rcx,%r15,8),%rax adcq $0,%rdx + addq %r11,%r10 movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 mulq %rbp - addq %r13,%rax - leaq 1(%r15),%r15 - adcq $0,%rdx - addq %r10,%rax + cmpq %r9,%r15 + jne L$inner + + addq %rax,%r13 + movq (%rsi),%rax adcq $0,%rdx + addq %r10,%r13 movq (%rsp,%r15,8),%r10 - cmpq %r9,%r15 - movq %rax,-16(%rsp,%r15,8) + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) movq %rdx,%r13 - jl L$inner xorq %rdx,%rdx addq %r11,%r13 @@ -127,35 +166,434 @@ L$inner: cmpq %r9,%r14 jl L$outer - leaq (%rsp),%rsi - leaq -1(%r9),%r15 - - movq (%rsi),%rax xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 jmp L$sub .p2align 4 L$sub: sbbq (%rcx,%r14,8),%rax movq %rax,(%rdi,%r14,8) - decq %r15 movq 8(%rsi,%r14,8),%rax leaq 1(%r14),%r14 - jge L$sub + decq %r15 + jnz L$sub sbbq $0,%rax + xorq %r14,%r14 andq %rax,%rsi notq %rax movq %rdi,%rcx andq %rax,%rcx - leaq -1(%r9),%r15 + movq %r9,%r15 orq %rcx,%rsi .p2align 4 L$copy: + movq (%rsi,%r14,8),%rax + movq %r14,(%rsp,%r14,8) + movq %rax,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz L$copy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$mul_epilogue: + .byte 0xf3,0xc3 + + +.p2align 4 +bn_mul4x_mont: +L$mul4x_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 4(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +L$mul4x_body: + movq %rdi,16(%rsp,%r9,8) + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp L$1st4x +.p2align 4 +L$1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jl L$1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + leaq 1(%r14),%r14 +.p2align 2 +L$outer4x: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq (%rsp),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%rsp),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp L$inner4x +.p2align 4 +L$inner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi movq (%rsi,%r15,8),%rax - movq %rax,(%rdi,%r15,8) - movq %r14,(%rsp,%r15,8) + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq 8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jl L$inner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 1(%r14),%r14 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%rsp,%r9,8),%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + cmpq %r9,%r14 + jl L$outer4x + movq 16(%rsp,%r9,8),%rdi + movq 0(%rsp),%rax + pxor %xmm0,%xmm0 + movq 8(%rsp),%rdx + shrq $2,%r9 + leaq (%rsp),%rsi + xorq %r14,%r14 + + subq 0(%rcx),%rax + movq 16(%rsi),%rbx + movq 24(%rsi),%rbp + sbbq 8(%rcx),%rdx + leaq -1(%r9),%r15 + jmp L$sub4x +.p2align 4 +L$sub4x: + movq %rax,0(%rdi,%r14,8) + movq %rdx,8(%rdi,%r14,8) + sbbq 16(%rcx,%r14,8),%rbx + movq 32(%rsi,%r14,8),%rax + movq 40(%rsi,%r14,8),%rdx + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + movq %rbp,24(%rdi,%r14,8) + sbbq 32(%rcx,%r14,8),%rax + movq 48(%rsi,%r14,8),%rbx + movq 56(%rsi,%r14,8),%rbp + sbbq 40(%rcx,%r14,8),%rdx + leaq 4(%r14),%r14 decq %r15 - jge L$copy + jnz L$sub4x + movq %rax,0(%rdi,%r14,8) + movq 32(%rsi,%r14,8),%rax + sbbq 16(%rcx,%r14,8),%rbx + movq %rdx,8(%rdi,%r14,8) + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + + sbbq $0,%rax + movq %rbp,24(%rdi,%r14,8) + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + leaq -1(%r9),%r15 + orq %rcx,%rsi + + movdqu (%rsi),%xmm1 + movdqa %xmm0,(%rsp) + movdqu %xmm1,(%rdi) + jmp L$copy4x +.p2align 4 +L$copy4x: + movdqu 16(%rsi,%r14,1),%xmm2 + movdqu 32(%rsi,%r14,1),%xmm1 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) + movdqa %xmm0,32(%rsp,%r14,1) + movdqu %xmm1,32(%rdi,%r14,1) + leaq 32(%r14),%r14 + decq %r15 + jnz L$copy4x + + shlq $2,%r9 + movdqu 16(%rsi,%r14,1),%xmm2 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) movq 8(%rsp,%r9,8),%rsi movq $1,%rax movq (%rsi),%r15 @@ -165,7 +603,772 @@ L$copy: movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp -L$epilogue: +L$mul4x_epilogue: + .byte 0xf3,0xc3 + + +.p2align 4 +bn_sqr4x_mont: +L$sqr4x_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + shll $3,%r9d + xorq %r10,%r10 + movq %rsp,%r11 + subq %r9,%r10 + movq (%r8),%r8 + leaq -72(%rsp,%r10,2),%rsp + andq $-1024,%rsp + + + + + + + + + + + + movq %rdi,32(%rsp) + movq %rcx,40(%rsp) + movq %r8,48(%rsp) + movq %r11,56(%rsp) +L$sqr4x_body: + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 64(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + xorq %r10,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,-16(%rdi,%rbp,1) + + leaq -16(%rbp),%rcx + + + movq 8(%rsi,%rcx,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + leaq 16(%rcx),%rcx + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp L$sqr4x_1st + +.p2align 4 +L$sqr4x_1st: + movq (%rsi,%rcx,1),%rbx + xorq %r12,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,(%rdi,%rcx,1) + + + movq 8(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,8(%rdi,%rcx,1) + + movq 16(%rsi,%rcx,1),%rbx + xorq %r12,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,16(%rdi,%rcx,1) + + + movq 24(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + leaq 32(%rcx),%rcx + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne L$sqr4x_1st + + xorq %r12,%r12 + addq %r11,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + adcq %rdx,%r12 + + movq %r13,(%rdi) + leaq 16(%rbp),%rbp + movq %r12,8(%rdi) + jmp L$sqr4x_outer + +.p2align 4 +L$sqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 64(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + movq -24(%rdi,%rbp,1),%r10 + xorq %r11,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + xorq %r10,%r10 + addq -16(%rdi,%rbp,1),%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,-16(%rdi,%rbp,1) + + leaq -16(%rbp),%rcx + xorq %r12,%r12 + + + movq 8(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq 8(%rdi,%rcx,1),%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,8(%rdi,%rcx,1) + + leaq 16(%rcx),%rcx + jmp L$sqr4x_inner + +.p2align 4 +L$sqr4x_inner: + movq (%rsi,%rcx,1),%rbx + xorq %r12,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,(%rdi,%rcx,1) + + movq 8(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq 8(%rdi,%rcx,1),%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + + xorq %r11,%r11 + addq %r12,%r10 + leaq 16(%rcx),%rcx + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne L$sqr4x_inner + + xorq %r12,%r12 + addq %r11,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + adcq %rdx,%r12 + + movq %r13,(%rdi) + movq %r12,8(%rdi) + + addq $16,%rbp + jnz L$sqr4x_outer + + + movq -32(%rsi),%r14 + leaq 64(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + xorq %r11,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-24(%rdi) + + xorq %r10,%r10 + addq %r13,%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + movq %r11,-16(%rdi) + + movq -8(%rsi),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + + xorq %r11,%r11 + addq %r12,%r10 + movq %rdx,%r13 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq %rdx,%r11 + movq %r10,-8(%rdi) + + xorq %r12,%r12 + addq %r11,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq %rdx,%r12 + + movq %r13,(%rdi) + movq %r12,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 64(%rsp,%r9,2),%rdi + xorq %r10,%r10 + movq -24(%rdi,%rbp,2),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi,%rbp,2),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi,%rbp,2) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi,%rbp,2),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi,%rbp,2) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,-40(%rdi,%rbp,2) + sbbq %r15,%r15 + jmp L$sqr4x_shift_n_add + +.p2align 4 +L$sqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi,%rbp,2),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi,%rbp,2) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi,%rbp,2),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi,%rbp,2) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi,%rbp,2),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi,%rbp,2) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi,%rbp,2) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi,%rbp,2),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi,%rbp,2),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi,%rbp,2) + adcq %rdx,%r8 + movq %r8,24(%rdi,%rbp,2) + sbbq %r15,%r15 + addq $32,%rbp + jnz L$sqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) + movq 40(%rsp),%rsi + movq 48(%rsp),%r8 + xorq %rcx,%rcx + movq %r9,0(%rsp) + subq %r9,%rcx + movq 64(%rsp),%r10 + movq %r8,%r14 + leaq 64(%rsp,%r9,2),%rax + leaq 64(%rsp,%r9,1),%rdi + movq %rax,8(%rsp) + leaq (%rsi,%r9,1),%rsi + xorq %rbp,%rbp + + movq 0(%rsi,%rcx,1),%rax + movq 8(%rsi,%rcx,1),%r9 + imulq %r10,%r14 + movq %rax,%rbx + jmp L$sqr4x_mont_outer + +.p2align 4 +L$sqr4x_mont_outer: + xorq %r11,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + movq %r8,%r15 + + xorq %r10,%r10 + addq 8(%rdi,%rcx,1),%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + + imulq %r11,%r15 + + movq 16(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + movq %r12,8(%rdi,%rcx,1) + + xorq %r11,%r11 + addq 16(%rdi,%rcx,1),%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + + movq 24(%rsi,%rcx,1),%r9 + xorq %r12,%r12 + addq %r10,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r12 + movq %r13,16(%rdi,%rcx,1) + + xorq %r10,%r10 + addq 24(%rdi,%rcx,1),%r11 + leaq 32(%rcx),%rcx + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + jmp L$sqr4x_mont_inner + +.p2align 4 +L$sqr4x_mont_inner: + movq (%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + movq %r12,-8(%rdi,%rcx,1) + + xorq %r11,%r11 + addq (%rdi,%rcx,1),%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + + movq 8(%rsi,%rcx,1),%r9 + xorq %r12,%r12 + addq %r10,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r12 + movq %r13,(%rdi,%rcx,1) + + xorq %r10,%r10 + addq 8(%rdi,%rcx,1),%r11 + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + + + movq 16(%rsi,%rcx,1),%rbx + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq %rdx,%r13 + movq %r12,8(%rdi,%rcx,1) + + xorq %r11,%r11 + addq 16(%rdi,%rcx,1),%r10 + adcq $0,%r11 + mulq %r14 + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + + movq 24(%rsi,%rcx,1),%r9 + xorq %r12,%r12 + addq %r10,%r13 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %r9,%rax + adcq %rdx,%r12 + movq %r13,16(%rdi,%rcx,1) + + xorq %r10,%r10 + addq 24(%rdi,%rcx,1),%r11 + leaq 32(%rcx),%rcx + adcq $0,%r10 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq %rdx,%r10 + cmpq $0,%rcx + jne L$sqr4x_mont_inner + + subq 0(%rsp),%rcx + movq %r8,%r14 + + xorq %r13,%r13 + addq %r11,%r12 + adcq $0,%r13 + mulq %r15 + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + movq %r12,-8(%rdi) + + xorq %r11,%r11 + addq (%rdi),%r10 + adcq $0,%r11 + movq 0(%rsi,%rcx,1),%rbx + addq %rbp,%r10 + adcq $0,%r11 + + imulq 16(%rdi,%rcx,1),%r14 + xorq %r12,%r12 + movq 8(%rsi,%rcx,1),%r9 + addq %r10,%r13 + movq 16(%rdi,%rcx,1),%r10 + adcq $0,%r12 + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + adcq %rdx,%r12 + movq %r13,(%rdi) + + xorq %rbp,%rbp + addq 8(%rdi),%r12 + adcq %rbp,%rbp + addq %r11,%r12 + leaq 16(%rdi),%rdi + adcq $0,%rbp + movq %r12,-8(%rdi) + cmpq 8(%rsp),%rdi + jb L$sqr4x_mont_outer + + movq 0(%rsp),%r9 + movq %rbp,(%rdi) + movq 64(%rsp,%r9,1),%rax + leaq 64(%rsp,%r9,1),%rbx + movq 40(%rsp),%rsi + shrq $5,%r9 + movq 8(%rbx),%rdx + xorq %rbp,%rbp + + movq 32(%rsp),%rdi + subq 0(%rsi),%rax + movq 16(%rbx),%r10 + movq 24(%rbx),%r11 + sbbq 8(%rsi),%rdx + leaq -1(%r9),%rcx + jmp L$sqr4x_sub +.p2align 4 +L$sqr4x_sub: + movq %rax,0(%rdi,%rbp,8) + movq %rdx,8(%rdi,%rbp,8) + sbbq 16(%rsi,%rbp,8),%r10 + movq 32(%rbx,%rbp,8),%rax + movq 40(%rbx,%rbp,8),%rdx + sbbq 24(%rsi,%rbp,8),%r11 + movq %r10,16(%rdi,%rbp,8) + movq %r11,24(%rdi,%rbp,8) + sbbq 32(%rsi,%rbp,8),%rax + movq 48(%rbx,%rbp,8),%r10 + movq 56(%rbx,%rbp,8),%r11 + sbbq 40(%rsi,%rbp,8),%rdx + leaq 4(%rbp),%rbp + decq %rcx + jnz L$sqr4x_sub + + movq %rax,0(%rdi,%rbp,8) + movq 32(%rbx,%rbp,8),%rax + sbbq 16(%rsi,%rbp,8),%r10 + movq %rdx,8(%rdi,%rbp,8) + sbbq 24(%rsi,%rbp,8),%r11 + movq %r10,16(%rdi,%rbp,8) + + sbbq $0,%rax + movq %r11,24(%rdi,%rbp,8) + xorq %rbp,%rbp + andq %rax,%rbx + notq %rax + movq %rdi,%rsi + andq %rax,%rsi + leaq -1(%r9),%rcx + orq %rsi,%rbx + + pxor %xmm0,%xmm0 + leaq 64(%rsp,%r9,8),%rsi + movdqu (%rbx),%xmm1 + leaq (%rsi,%r9,8),%rsi + movdqa %xmm0,64(%rsp) + movdqa %xmm0,(%rsi) + movdqu %xmm1,(%rdi) + jmp L$sqr4x_copy +.p2align 4 +L$sqr4x_copy: + movdqu 16(%rbx,%rbp,1),%xmm2 + movdqu 32(%rbx,%rbp,1),%xmm1 + movdqa %xmm0,80(%rsp,%rbp,1) + movdqa %xmm0,96(%rsp,%rbp,1) + movdqa %xmm0,16(%rsi,%rbp,1) + movdqa %xmm0,32(%rsi,%rbp,1) + movdqu %xmm2,16(%rdi,%rbp,1) + movdqu %xmm1,32(%rdi,%rbp,1) + leaq 32(%rbp),%rbp + decq %rcx + jnz L$sqr4x_copy + + movdqu 16(%rbx,%rbp,1),%xmm2 + movdqa %xmm0,80(%rsp,%rbp,1) + movdqa %xmm0,16(%rsi,%rbp,1) + movdqu %xmm2,16(%rdi,%rbp,1) + movq 56(%rsp),%rsi + movq $1,%rax + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$sqr4x_epilogue: .byte 0xf3,0xc3 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm/x64-macosx-gas/md5/md5-x86_64.s b/deps/openssl/asm/x64-macosx-gas/md5/md5-x86_64.s index 96f6ea16ce..cdecac7b4c 100644 --- a/deps/openssl/asm/x64-macosx-gas/md5/md5-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/md5/md5-x86_64.s @@ -668,4 +668,3 @@ L$end: addq $40,%rsp L$epilogue: .byte 0xf3,0xc3 - diff --git a/deps/openssl/asm/x64-macosx-gas/rc4/rc4-md5-x86_64.s b/deps/openssl/asm/x64-macosx-gas/rc4/rc4-md5-x86_64.s new file mode 100644 index 0000000000..85f9905a84 --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/rc4/rc4-md5-x86_64.s @@ -0,0 +1,1259 @@ +.text + +.p2align 4 + +.globl _rc4_md5_enc + +_rc4_md5_enc: + cmpq $0,%r9 + je L$abort + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $40,%rsp +L$body: + movq %rcx,%r11 + movq %r9,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %r8,%r15 + xorq %rbp,%rbp + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%bpl + movb -4(%rdi),%cl + + incb %bpl + subq %r13,%r14 + movl (%rdi,%rbp,4),%eax + addb %al,%cl + leaq (%rdi,%rbp,4),%rsi + shlq $6,%r12 + addq %r15,%r12 + movq %r12,16(%rsp) + + movq %r11,24(%rsp) + movl 0(%r11),%r8d + movl 4(%r11),%r9d + movl 8(%r11),%r10d + movl 12(%r11),%r11d + jmp L$oop + +.p2align 4 +L$oop: + movl %r8d,0(%rsp) + movl %r9d,4(%rsp) + movl %r10d,8(%rsp) + movl %r11d,%r12d + movl %r11d,12(%rsp) + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $3614090360,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,0(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 4(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $3905402710,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,4(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $606105819,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,8(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 12(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $3250441966,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,12(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $4118548399,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,16(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 20(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1200080426,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,20(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $2821735955,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,24(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 28(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $4249261313,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,28(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $1770035416,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,32(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 36(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $2336552879,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,36(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $4294925233,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,40(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 44(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $2304563134,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,44(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $1804603682,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,48(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 52(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $4254626195,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,52(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $2792965006,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,56(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu (%r13),%xmm2 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 60(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $1236535329,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,60(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r10d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4129170786,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 24(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $3225465664,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $643717713,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 0(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $3921069994,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $3593408605,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 40(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $38016083,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $3634488961,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 16(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $3889429448,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $568446438,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 56(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $3275163606,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $4107603335,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 32(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1163531501,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $2850285829,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 8(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $4243563512,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $1735328473,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 16(%r13),%xmm3 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 48(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $2368359562,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $4294588738,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,0(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 32(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $2272392833,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,4(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $1839030562,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,8(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 56(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $4259657740,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,12(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $2763975236,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,16(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 16(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1272893353,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,20(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $4139469664,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,24(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 40(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $3200236656,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,28(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $681279174,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,32(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 0(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $3936430074,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,36(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $3572445317,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,40(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 24(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $76029189,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,44(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $3654602809,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,48(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 48(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $3873151461,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,52(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $530742520,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,56(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 32(%r13),%xmm4 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 8(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $3299628645,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,60(%rsi) + addb %al,%cl + roll $23,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm1,%xmm4 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4096336452,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 28(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $1126891415,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $2878612391,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 20(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $4237533241,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $1700485571,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 12(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $2399980690,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $4293915773,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 4(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $2240044497,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $1873313359,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 60(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $4264355552,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $2734768916,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 52(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1309151649,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $4149444226,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 44(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $3174756917,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $718787259,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 48(%r13),%xmm5 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 36(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $3951481745,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rbp,%rsi + xorq %rbp,%rbp + movb %sil,%bpl + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm1,%xmm5 + addl 0(%rsp),%r8d + addl 4(%rsp),%r9d + addl 8(%rsp),%r10d + addl 12(%rsp),%r11d + + movdqu %xmm2,(%r14,%r13,1) + movdqu %xmm3,16(%r14,%r13,1) + movdqu %xmm4,32(%r14,%r13,1) + movdqu %xmm5,48(%r14,%r13,1) + leaq 64(%r15),%r15 + leaq 64(%r13),%r13 + cmpq 16(%rsp),%r15 + jb L$oop + + movq 24(%rsp),%r12 + subb %al,%cl + movl %r8d,0(%r12) + movl %r9d,4(%r12) + movl %r10d,8(%r12) + movl %r11d,12(%r12) + subb $1,%bpl + movl %ebp,-8(%rdi) + movl %ecx,-4(%rdi) + + movq 40(%rsp),%r15 + movq 48(%rsp),%r14 + movq 56(%rsp),%r13 + movq 64(%rsp),%r12 + movq 72(%rsp),%rbp + movq 80(%rsp),%rbx + leaq 88(%rsp),%rsp +L$epilogue: +L$abort: + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s b/deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s index 41183cebec..8c4f29ecbb 100644 --- a/deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s @@ -1,6 +1,7 @@ .text + .globl _RC4 .p2align 4 @@ -12,316 +13,511 @@ L$entry: pushq %r12 pushq %r13 L$prologue: + movq %rsi,%r11 + movq %rdx,%r12 + movq %rcx,%r13 + xorq %r10,%r10 + xorq %rcx,%rcx - addq $8,%rdi - movl -8(%rdi),%r8d - movl -4(%rdi),%r12d + leaq 8(%rdi),%rdi + movb -8(%rdi),%r10b + movb -4(%rdi),%cl cmpl $-1,256(%rdi) je L$RC4_CHAR - incb %r8b - movl (%rdi,%r8,4),%r9d - testq $-8,%rsi - jz L$loop1 - jmp L$loop8 -.p2align 4 -L$loop8: - addb %r9b,%r12b - movq %r8,%r10 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax - incb %r10b - movl (%rdi,%r10,4),%r11d - cmpq %r10,%r12 - movl %r9d,(%rdi,%r12,4) - cmoveq %r9,%r11 - movl %r13d,(%rdi,%r8,4) - addb %r9b,%r13b - movb (%rdi,%r13,4),%al - addb %r11b,%r12b - movq %r10,%r8 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax - incb %r8b - movl (%rdi,%r8,4),%r9d - cmpq %r8,%r12 - movl %r11d,(%rdi,%r12,4) - cmoveq %r11,%r9 - movl %r13d,(%rdi,%r10,4) - addb %r11b,%r13b - movb (%rdi,%r13,4),%al - addb %r9b,%r12b - movq %r8,%r10 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax + movl _OPENSSL_ia32cap_P(%rip),%r8d + xorq %rbx,%rbx incb %r10b - movl (%rdi,%r10,4),%r11d - cmpq %r10,%r12 - movl %r9d,(%rdi,%r12,4) - cmoveq %r9,%r11 - movl %r13d,(%rdi,%r8,4) - addb %r9b,%r13b - movb (%rdi,%r13,4),%al - addb %r11b,%r12b - movq %r10,%r8 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax - incb %r8b - movl (%rdi,%r8,4),%r9d - cmpq %r8,%r12 - movl %r11d,(%rdi,%r12,4) - cmoveq %r11,%r9 - movl %r13d,(%rdi,%r10,4) - addb %r11b,%r13b - movb (%rdi,%r13,4),%al - addb %r9b,%r12b - movq %r8,%r10 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax + subq %r10,%rbx + subq %r12,%r13 + movl (%rdi,%r10,4),%eax + testq $-16,%r11 + jz L$loop1 + btl $30,%r8d + jc L$intel + andq $7,%rbx + leaq 1(%r10),%rsi + jz L$oop8 + subq %rbx,%r11 +L$oop8_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al incb %r10b - movl (%rdi,%r10,4),%r11d - cmpq %r10,%r12 - movl %r9d,(%rdi,%r12,4) - cmoveq %r9,%r11 - movl %r13d,(%rdi,%r8,4) - addb %r9b,%r13b - movb (%rdi,%r13,4),%al - addb %r11b,%r12b - movq %r10,%r8 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax - incb %r8b - movl (%rdi,%r8,4),%r9d - cmpq %r8,%r12 - movl %r11d,(%rdi,%r12,4) - cmoveq %r11,%r9 - movl %r13d,(%rdi,%r10,4) - addb %r11b,%r13b - movb (%rdi,%r13,4),%al - addb %r9b,%r12b - movq %r8,%r10 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r13,%r12,1) + leaq 1(%r12),%r12 + decq %rbx + jnz L$oop8_warmup + + leaq 1(%r10),%rsi + jmp L$oop8 +.p2align 4 +L$oop8: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 0(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,0(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,4(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 8(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,8(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 12(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,12(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 16(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,16(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 20(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,20(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 24(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,24(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%sil + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl -4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,28(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%r10b + rorq $8,%r8 + subq $8,%r11 + + xorq (%r12),%r8 + movq %r8,(%r13,%r12,1) + leaq 8(%r12),%r12 + + testq $-8,%r11 + jnz L$oop8 + cmpq $0,%r11 + jne L$loop1 + jmp L$exit + +.p2align 4 +L$intel: + testq $-32,%r11 + jz L$loop1 + andq $15,%rbx + jz L$oop16_is_hot + subq %rbx,%r11 +L$oop16_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al incb %r10b - movl (%rdi,%r10,4),%r11d - cmpq %r10,%r12 - movl %r9d,(%rdi,%r12,4) - cmoveq %r9,%r11 - movl %r13d,(%rdi,%r8,4) - addb %r9b,%r13b - movb (%rdi,%r13,4),%al - addb %r11b,%r12b - movq %r10,%r8 - movl (%rdi,%r12,4),%r13d - rorq $8,%rax - incb %r8b - movl (%rdi,%r8,4),%r9d - cmpq %r8,%r12 - movl %r11d,(%rdi,%r12,4) - cmoveq %r11,%r9 - movl %r13d,(%rdi,%r10,4) - addb %r11b,%r13b - movb (%rdi,%r13,4),%al - rorq $8,%rax - subq $8,%rsi - - xorq (%rdx),%rax - addq $8,%rdx - movq %rax,(%rcx) - addq $8,%rcx - - testq $-8,%rsi - jnz L$loop8 - cmpq $0,%rsi + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r13,%r12,1) + leaq 1(%r12),%r12 + decq %rbx + jnz L$oop16_warmup + + movq %rcx,%rbx + xorq %rcx,%rcx + movb %bl,%cl + +L$oop16_is_hot: + leaq (%rdi,%r10,4),%rsi + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + jmp L$oop16_enter +.p2align 4 +L$oop16: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm2 + psllq $8,%xmm1 + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + pxor %xmm1,%xmm2 + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + movdqu %xmm2,(%r13,%r12,1) + leaq 16(%r12),%r12 +L$oop16_enter: + movl (%rdi,%rcx,4),%edx + pxor %xmm1,%xmm1 + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 8(%rsi),%eax + movzbl %bl,%ebx + movl %edx,4(%rsi) + addb %al,%cl + pinsrw $0,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 12(%rsi),%ebx + movzbl %al,%eax + movl %edx,8(%rsi) + addb %bl,%cl + pinsrw $1,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 16(%rsi),%eax + movzbl %bl,%ebx + movl %edx,12(%rsi) + addb %al,%cl + pinsrw $1,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 20(%rsi),%ebx + movzbl %al,%eax + movl %edx,16(%rsi) + addb %bl,%cl + pinsrw $2,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 24(%rsi),%eax + movzbl %bl,%ebx + movl %edx,20(%rsi) + addb %al,%cl + pinsrw $2,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 28(%rsi),%ebx + movzbl %al,%eax + movl %edx,24(%rsi) + addb %bl,%cl + pinsrw $3,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 32(%rsi),%eax + movzbl %bl,%ebx + movl %edx,28(%rsi) + addb %al,%cl + pinsrw $3,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 36(%rsi),%ebx + movzbl %al,%eax + movl %edx,32(%rsi) + addb %bl,%cl + pinsrw $4,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 40(%rsi),%eax + movzbl %bl,%ebx + movl %edx,36(%rsi) + addb %al,%cl + pinsrw $4,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 44(%rsi),%ebx + movzbl %al,%eax + movl %edx,40(%rsi) + addb %bl,%cl + pinsrw $5,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 48(%rsi),%eax + movzbl %bl,%ebx + movl %edx,44(%rsi) + addb %al,%cl + pinsrw $5,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 52(%rsi),%ebx + movzbl %al,%eax + movl %edx,48(%rsi) + addb %bl,%cl + pinsrw $6,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 56(%rsi),%eax + movzbl %bl,%ebx + movl %edx,52(%rsi) + addb %al,%cl + pinsrw $6,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 60(%rsi),%ebx + movzbl %al,%eax + movl %edx,56(%rsi) + addb %bl,%cl + pinsrw $7,(%rdi,%rax,4),%xmm0 + addb $16,%r10b + movdqu (%r12),%xmm2 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movzbl %bl,%ebx + movl %edx,60(%rsi) + leaq (%rdi,%r10,4),%rsi + pinsrw $7,(%rdi,%rbx,4),%xmm1 + movl (%rsi),%eax + movq %rcx,%rbx + xorq %rcx,%rcx + subq $16,%r11 + movb %bl,%cl + testq $-16,%r11 + jnz L$oop16 + + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + movdqu %xmm2,(%r13,%r12,1) + leaq 16(%r12),%r12 + + cmpq $0,%r11 jne L$loop1 jmp L$exit .p2align 4 L$loop1: - addb %r9b,%r12b - movl (%rdi,%r12,4),%r13d - movl %r9d,(%rdi,%r12,4) - movl %r13d,(%rdi,%r8,4) - addb %r13b,%r9b - incb %r8b - movl (%rdi,%r9,4),%r13d - movl (%rdi,%r8,4),%r9d - xorb (%rdx),%r13b - incq %rdx - movb %r13b,(%rcx) - incq %rcx - decq %rsi + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r13,%r12,1) + leaq 1(%r12),%r12 + decq %r11 jnz L$loop1 jmp L$exit .p2align 4 L$RC4_CHAR: - addb $1,%r8b - movzbl (%rdi,%r8,1),%r9d - testq $-8,%rsi + addb $1,%r10b + movzbl (%rdi,%r10,1),%eax + testq $-8,%r11 jz L$cloop1 - cmpl $0,260(%rdi) - jnz L$cloop1 jmp L$cloop8 .p2align 4 L$cloop8: - movl (%rdx),%eax - movl 4(%rdx),%ebx - addb %r9b,%r12b - leaq 1(%r8),%r10 - movzbl (%rdi,%r12,1),%r13d - movzbl %r10b,%r10d - movzbl (%rdi,%r10,1),%r11d - movb %r9b,(%rdi,%r12,1) - cmpq %r10,%r12 - movb %r13b,(%rdi,%r8,1) + movl (%r12),%r8d + movl 4(%r12),%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) jne L$cmov0 - movq %r9,%r11 + movq %rax,%rbx L$cmov0: - addb %r9b,%r13b - xorb (%rdi,%r13,1),%al - rorl $8,%eax - addb %r11b,%r12b - leaq 1(%r10),%r8 - movzbl (%rdi,%r12,1),%r13d - movzbl %r8b,%r8d - movzbl (%rdi,%r8,1),%r9d - movb %r11b,(%rdi,%r12,1) - cmpq %r8,%r12 - movb %r13b,(%rdi,%r10,1) + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) jne L$cmov1 - movq %r11,%r9 + movq %rbx,%rax L$cmov1: - addb %r11b,%r13b - xorb (%rdi,%r13,1),%al - rorl $8,%eax - addb %r9b,%r12b - leaq 1(%r8),%r10 - movzbl (%rdi,%r12,1),%r13d - movzbl %r10b,%r10d - movzbl (%rdi,%r10,1),%r11d - movb %r9b,(%rdi,%r12,1) - cmpq %r10,%r12 - movb %r13b,(%rdi,%r8,1) + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) jne L$cmov2 - movq %r9,%r11 + movq %rax,%rbx L$cmov2: - addb %r9b,%r13b - xorb (%rdi,%r13,1),%al - rorl $8,%eax - addb %r11b,%r12b - leaq 1(%r10),%r8 - movzbl (%rdi,%r12,1),%r13d - movzbl %r8b,%r8d - movzbl (%rdi,%r8,1),%r9d - movb %r11b,(%rdi,%r12,1) - cmpq %r8,%r12 - movb %r13b,(%rdi,%r10,1) + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) jne L$cmov3 - movq %r11,%r9 + movq %rbx,%rax L$cmov3: - addb %r11b,%r13b - xorb (%rdi,%r13,1),%al - rorl $8,%eax - addb %r9b,%r12b - leaq 1(%r8),%r10 - movzbl (%rdi,%r12,1),%r13d - movzbl %r10b,%r10d - movzbl (%rdi,%r10,1),%r11d - movb %r9b,(%rdi,%r12,1) - cmpq %r10,%r12 - movb %r13b,(%rdi,%r8,1) + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) jne L$cmov4 - movq %r9,%r11 + movq %rax,%rbx L$cmov4: - addb %r9b,%r13b - xorb (%rdi,%r13,1),%bl - rorl $8,%ebx - addb %r11b,%r12b - leaq 1(%r10),%r8 - movzbl (%rdi,%r12,1),%r13d - movzbl %r8b,%r8d - movzbl (%rdi,%r8,1),%r9d - movb %r11b,(%rdi,%r12,1) - cmpq %r8,%r12 - movb %r13b,(%rdi,%r10,1) + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) jne L$cmov5 - movq %r11,%r9 + movq %rbx,%rax L$cmov5: - addb %r11b,%r13b - xorb (%rdi,%r13,1),%bl - rorl $8,%ebx - addb %r9b,%r12b - leaq 1(%r8),%r10 - movzbl (%rdi,%r12,1),%r13d - movzbl %r10b,%r10d - movzbl (%rdi,%r10,1),%r11d - movb %r9b,(%rdi,%r12,1) - cmpq %r10,%r12 - movb %r13b,(%rdi,%r8,1) + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) jne L$cmov6 - movq %r9,%r11 + movq %rax,%rbx L$cmov6: - addb %r9b,%r13b - xorb (%rdi,%r13,1),%bl - rorl $8,%ebx - addb %r11b,%r12b - leaq 1(%r10),%r8 - movzbl (%rdi,%r12,1),%r13d - movzbl %r8b,%r8d - movzbl (%rdi,%r8,1),%r9d - movb %r11b,(%rdi,%r12,1) - cmpq %r8,%r12 - movb %r13b,(%rdi,%r10,1) + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) jne L$cmov7 - movq %r11,%r9 + movq %rbx,%rax L$cmov7: - addb %r11b,%r13b - xorb (%rdi,%r13,1),%bl - rorl $8,%ebx - leaq -8(%rsi),%rsi - movl %eax,(%rcx) - leaq 8(%rdx),%rdx - movl %ebx,4(%rcx) - leaq 8(%rcx),%rcx - - testq $-8,%rsi + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + leaq -8(%r11),%r11 + movl %r8d,(%r13) + leaq 8(%r12),%r12 + movl %r9d,4(%r13) + leaq 8(%r13),%r13 + + testq $-8,%r11 jnz L$cloop8 - cmpq $0,%rsi + cmpq $0,%r11 jne L$cloop1 jmp L$exit .p2align 4 L$cloop1: - addb %r9b,%r12b - movzbl (%rdi,%r12,1),%r13d - movb %r9b,(%rdi,%r12,1) - movb %r13b,(%rdi,%r8,1) - addb %r9b,%r13b - addb $1,%r8b - movzbl %r13b,%r13d - movzbl %r8b,%r8d - movzbl (%rdi,%r13,1),%r13d - movzbl (%rdi,%r8,1),%r9d - xorb (%rdx),%r13b - leaq 1(%rdx),%rdx - movb %r13b,(%rcx) - leaq 1(%rcx),%rcx - subq $1,%rsi + addb %al,%cl + movzbl %cl,%ecx + movzbl (%rdi,%rcx,1),%edx + movb %al,(%rdi,%rcx,1) + movb %dl,(%rdi,%r10,1) + addb %al,%dl + addb $1,%r10b + movzbl %dl,%edx + movzbl %r10b,%r10d + movzbl (%rdi,%rdx,1),%edx + movzbl (%rdi,%r10,1),%eax + xorb (%r12),%dl + leaq 1(%r12),%r12 + movb %dl,(%r13) + leaq 1(%r13),%r13 + subq $1,%r11 jnz L$cloop1 jmp L$exit .p2align 4 L$exit: - subb $1,%r8b - movl %r8d,-8(%rdi) - movl %r12d,-4(%rdi) + subb $1,%r10b + movl %r10d,-8(%rdi) + movl %ecx,-4(%rdi) movq (%rsp),%r13 movq 8(%rsp),%r12 @@ -330,11 +526,10 @@ L$exit: L$epilogue: .byte 0xf3,0xc3 - -.globl _RC4_set_key +.globl _private_RC4_set_key .p2align 4 -_RC4_set_key: +_private_RC4_set_key: leaq 8(%rdi),%rdi leaq (%rdx,%rsi,1),%rdx negq %rsi @@ -346,11 +541,8 @@ _RC4_set_key: movl _OPENSSL_ia32cap_P(%rip),%r8d btl $20,%r8d - jnc L$w1stloop - btl $30,%r8d - setc %r9b - movl %r9d,260(%rdi) - jmp L$c1stloop + jc L$c1stloop + jmp L$w1stloop .p2align 4 L$w1stloop: @@ -413,18 +605,19 @@ _RC4_options: leaq L$opts(%rip),%rax movl _OPENSSL_ia32cap_P(%rip),%edx btl $20,%edx - jnc L$done - addq $12,%rax + jc L$8xchar btl $30,%edx jnc L$done - addq $13,%rax + addq $25,%rax + .byte 0xf3,0xc3 +L$8xchar: + addq $12,%rax L$done: .byte 0xf3,0xc3 .p2align 6 L$opts: .byte 114,99,52,40,56,120,44,105,110,116,41,0 .byte 114,99,52,40,56,120,44,99,104,97,114,41,0 -.byte 114,99,52,40,49,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,49,54,120,44,105,110,116,41,0 .byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 - diff --git a/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s b/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s index f9dc2568e5..9bb9bf0f22 100644 --- a/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s @@ -1,12 +1,23 @@ .text + + .globl _sha1_block_data_order .p2align 4 _sha1_block_data_order: + movl _OPENSSL_ia32cap_P+0(%rip),%r9d + movl _OPENSSL_ia32cap_P+4(%rip),%r8d + testl $512,%r8d + jz L$ialu + jmp _ssse3_shortcut + +.p2align 4 +L$ialu: pushq %rbx pushq %rbp pushq %r12 + pushq %r13 movq %rsp,%r11 movq %rdi,%r8 subq $72,%rsp @@ -16,1268 +27,2466 @@ _sha1_block_data_order: movq %r11,64(%rsp) L$prologue: - movl 0(%r8),%edx - movl 4(%r8),%esi - movl 8(%r8),%edi - movl 12(%r8),%ebp - movl 16(%r8),%r11d -.p2align 2 + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp L$loop + +.p2align 4 L$loop: - movl 0(%r9),%eax - bswapl %eax - movl %eax,0(%rsp) - leal 1518500249(%rax,%r11,1),%r12d - movl %edi,%ebx - movl 4(%r9),%eax - movl %edx,%r11d - xorl %ebp,%ebx - bswapl %eax - roll $5,%r11d - andl %esi,%ebx - movl %eax,4(%rsp) - addl %r11d,%r12d - xorl %ebp,%ebx + movl 0(%r9),%edx + bswapl %edx + movl %edx,0(%rsp) + movl %r11d,%eax + movl 4(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,4(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 8(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,8(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax roll $30,%esi - addl %ebx,%r12d - leal 1518500249(%rax,%rbp,1),%r11d - movl %esi,%ebx - movl 8(%r9),%eax - movl %r12d,%ebp - xorl %edi,%ebx - bswapl %eax - roll $5,%ebp - andl %edx,%ebx - movl %eax,8(%rsp) - addl %ebp,%r11d - xorl %edi,%ebx - roll $30,%edx - addl %ebx,%r11d - leal 1518500249(%rax,%rdi,1),%ebp - movl %edx,%ebx - movl 12(%r9),%eax - movl %r11d,%edi - xorl %esi,%ebx - bswapl %eax - roll $5,%edi - andl %r12d,%ebx - movl %eax,12(%rsp) - addl %edi,%ebp - xorl %esi,%ebx + addl %eax,%r12d + movl %esi,%eax + movl 12(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,12(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 16(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,16(%rsp) + addl %ecx,%edi + xorl %esi,%eax roll $30,%r12d - addl %ebx,%ebp - leal 1518500249(%rax,%rsi,1),%edi - movl %r12d,%ebx - movl 16(%r9),%eax - movl %ebp,%esi - xorl %edx,%ebx - bswapl %eax - roll $5,%esi - andl %r11d,%ebx - movl %eax,16(%rsp) - addl %esi,%edi - xorl %edx,%ebx + addl %eax,%edi + movl %r12d,%eax + movl 20(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,20(%rsp) + addl %ecx,%esi + xorl %r13d,%eax roll $30,%r11d - addl %ebx,%edi - leal 1518500249(%rax,%rdx,1),%esi + addl %eax,%esi + movl %r11d,%eax + movl 24(%r9),%edx + movl %esi,%ecx + xorl %r12d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r13,1),%r13d + andl %edi,%eax + movl %edx,24(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 28(%r9),%ebp + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r12,1),%r12d + andl %esi,%eax + movl %ebp,28(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 32(%r9),%edx + movl %r12d,%ecx + xorl %edi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r11,1),%r11d + andl %r13d,%eax + movl %edx,32(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 36(%r9),%ebp + movl %r11d,%ecx + xorl %esi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rdi,1),%edi + andl %r12d,%eax + movl %ebp,36(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 40(%r9),%edx + movl %edi,%ecx + xorl %r13d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rsi,1),%esi + andl %r11d,%eax + movl %edx,40(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 44(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,44(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 48(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,48(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 52(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,52(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 56(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,56(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 60(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,60(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 0(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 32(%rsp),%edx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + xorl 52(%rsp),%edx + xorl %r12d,%eax + roll $1,%edx + addl %ecx,%r13d + roll $30,%edi + movl %edx,0(%rsp) + addl %eax,%r13d + movl 4(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 36(%rsp),%ebp + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + xorl 56(%rsp),%ebp + xorl %r11d,%eax + roll $1,%ebp + addl %ecx,%r12d + roll $30,%esi + movl %ebp,4(%rsp) + addl %eax,%r12d + movl 8(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + xorl 60(%rsp),%edx + xorl %edi,%eax + roll $1,%edx + addl %ecx,%r11d + roll $30,%r13d + movl %edx,8(%rsp) + addl %eax,%r11d + movl 12(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + xorl 0(%rsp),%ebp + xorl %esi,%eax + roll $1,%ebp + addl %ecx,%edi + roll $30,%r12d + movl %ebp,12(%rsp) + addl %eax,%edi + movl 16(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + xorl 4(%rsp),%edx + xorl %r13d,%eax + roll $1,%edx + addl %ecx,%esi + roll $30,%r11d + movl %edx,16(%rsp) + addl %eax,%esi + movl 20(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 28(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 52(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 8(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 32(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 56(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 12(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 36(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 60(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 16(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 0(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 20(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 4(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 24(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 48(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 8(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 28(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 52(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 12(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 32(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 56(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 16(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 36(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 20(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 40(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 24(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 44(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 28(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 48(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 32(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 52(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 36(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 56(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 40(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 60(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 44(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 0(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 48(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 4(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 52(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 8(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 56(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 12(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 60(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 16(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 0(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 20(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r11d,%eax movl %r11d,%ebx - movl 20(%r9),%eax - movl %edi,%edx - xorl %r12d,%ebx - bswapl %eax - roll $5,%edx - andl %ebp,%ebx - movl %eax,20(%rsp) - addl %edx,%esi + xorl 44(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp xorl %r12d,%ebx - roll $30,%ebp - addl %ebx,%esi - leal 1518500249(%rax,%r12,1),%edx - movl %ebp,%ebx - movl 24(%r9),%eax - movl %esi,%r12d - xorl %r11d,%ebx - bswapl %eax - roll $5,%r12d + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r13d andl %edi,%ebx - movl %eax,24(%rsp) - addl %r12d,%edx - xorl %r11d,%ebx + roll $1,%ebp + addl %ebx,%r13d roll $30,%edi - addl %ebx,%edx - leal 1518500249(%rax,%r11,1),%r12d + movl %ebp,36(%rsp) + addl %ecx,%r13d + movl 40(%rsp),%edx + movl %edi,%eax movl %edi,%ebx - movl 28(%r9),%eax - movl %edx,%r11d - xorl %ebp,%ebx - bswapl %eax - roll $5,%r11d + xorl 48(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r12d andl %esi,%ebx - movl %eax,28(%rsp) - addl %r11d,%r12d - xorl %ebp,%ebx - roll $30,%esi + roll $1,%edx addl %ebx,%r12d - leal 1518500249(%rax,%rbp,1),%r11d + roll $30,%esi + movl %edx,40(%rsp) + addl %ecx,%r12d + movl 44(%rsp),%ebp + movl %esi,%eax movl %esi,%ebx - movl 32(%r9),%eax - movl %r12d,%ebp - xorl %edi,%ebx - bswapl %eax - roll $5,%ebp - andl %edx,%ebx - movl %eax,32(%rsp) - addl %ebp,%r11d + xorl 52(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp xorl %edi,%ebx - roll $30,%edx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp addl %ebx,%r11d - leal 1518500249(%rax,%rdi,1),%ebp - movl %edx,%ebx - movl 36(%r9),%eax - movl %r11d,%edi + roll $30,%r13d + movl %ebp,44(%rsp) + addl %ecx,%r11d + movl 48(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx xorl %esi,%ebx - bswapl %eax - roll $5,%edi + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%edi andl %r12d,%ebx - movl %eax,36(%rsp) - addl %edi,%ebp - xorl %esi,%ebx + roll $1,%edx + addl %ebx,%edi roll $30,%r12d - addl %ebx,%ebp - leal 1518500249(%rax,%rsi,1),%edi + movl %edx,48(%rsp) + addl %ecx,%edi + movl 52(%rsp),%ebp + movl %r12d,%eax movl %r12d,%ebx - movl 40(%r9),%eax - movl %ebp,%esi - xorl %edx,%ebx - bswapl %eax - roll $5,%esi + xorl 60(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 40(%rsp),%ebp + addl %eax,%esi andl %r11d,%ebx - movl %eax,40(%rsp) - addl %esi,%edi - xorl %edx,%ebx + roll $1,%ebp + addl %ebx,%esi roll $30,%r11d - addl %ebx,%edi - leal 1518500249(%rax,%rdx,1),%esi + movl %ebp,52(%rsp) + addl %ecx,%esi + movl 56(%rsp),%edx + movl %r11d,%eax movl %r11d,%ebx - movl 44(%r9),%eax - movl %edi,%edx - xorl %r12d,%ebx - bswapl %eax - roll $5,%edx - andl %ebp,%ebx - movl %eax,44(%rsp) - addl %edx,%esi + xorl 0(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx xorl %r12d,%ebx - roll $30,%ebp - addl %ebx,%esi - leal 1518500249(%rax,%r12,1),%edx - movl %ebp,%ebx - movl 48(%r9),%eax - movl %esi,%r12d - xorl %r11d,%ebx - bswapl %eax - roll $5,%r12d + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 44(%rsp),%edx + addl %eax,%r13d andl %edi,%ebx - movl %eax,48(%rsp) - addl %r12d,%edx - xorl %r11d,%ebx + roll $1,%edx + addl %ebx,%r13d roll $30,%edi - addl %ebx,%edx - leal 1518500249(%rax,%r11,1),%r12d + movl %edx,56(%rsp) + addl %ecx,%r13d + movl 60(%rsp),%ebp + movl %edi,%eax movl %edi,%ebx - movl 52(%r9),%eax - movl %edx,%r11d - xorl %ebp,%ebx - bswapl %eax - roll $5,%r11d + xorl 4(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 48(%rsp),%ebp + addl %eax,%r12d andl %esi,%ebx - movl %eax,52(%rsp) - addl %r11d,%r12d - xorl %ebp,%ebx - roll $30,%esi + roll $1,%ebp addl %ebx,%r12d - leal 1518500249(%rax,%rbp,1),%r11d + roll $30,%esi + movl %ebp,60(%rsp) + addl %ecx,%r12d + movl 0(%rsp),%edx + movl %esi,%eax movl %esi,%ebx - movl 56(%r9),%eax - movl %r12d,%ebp - xorl %edi,%ebx - bswapl %eax - roll $5,%ebp - andl %edx,%ebx - movl %eax,56(%rsp) - addl %ebp,%r11d + xorl 8(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx xorl %edi,%ebx - roll $30,%edx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 52(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx addl %ebx,%r11d - leal 1518500249(%rax,%rdi,1),%ebp - movl %edx,%ebx - movl 60(%r9),%eax - movl %r11d,%edi + roll $30,%r13d + movl %edx,0(%rsp) + addl %ecx,%r11d + movl 4(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 12(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp xorl %esi,%ebx - bswapl %eax - roll $5,%edi + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 56(%rsp),%ebp + addl %eax,%edi andl %r12d,%ebx - movl %eax,60(%rsp) - addl %edi,%ebp - xorl %esi,%ebx + roll $1,%ebp + addl %ebx,%edi roll $30,%r12d - addl %ebx,%ebp - leal 1518500249(%rax,%rsi,1),%edi - movl 0(%rsp),%eax + movl %ebp,4(%rsp) + addl %ecx,%edi + movl 8(%rsp),%edx + movl %r12d,%eax movl %r12d,%ebx - movl %ebp,%esi - xorl 8(%rsp),%eax - xorl %edx,%ebx - roll $5,%esi - xorl 32(%rsp),%eax + xorl 16(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 60(%rsp),%edx + addl %eax,%esi andl %r11d,%ebx - addl %esi,%edi - xorl 52(%rsp),%eax - xorl %edx,%ebx + roll $1,%edx + addl %ebx,%esi roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,0(%rsp) - leal 1518500249(%rax,%rdx,1),%esi - movl 4(%rsp),%eax + movl %edx,8(%rsp) + addl %ecx,%esi + movl 12(%rsp),%ebp + movl %r11d,%eax movl %r11d,%ebx - movl %edi,%edx - xorl 12(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edx - xorl 36(%rsp),%eax - andl %ebp,%ebx - addl %edx,%esi - xorl 56(%rsp),%eax + xorl 20(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 44(%rsp),%ebp xorl %r12d,%ebx - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - movl %eax,4(%rsp) - leal 1518500249(%rax,%r12,1),%edx - movl 8(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 16(%rsp),%eax - xorl %r11d,%ebx - roll $5,%r12d - xorl 40(%rsp),%eax + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 0(%rsp),%ebp + addl %eax,%r13d andl %edi,%ebx - addl %r12d,%edx - xorl 60(%rsp),%eax - xorl %r11d,%ebx + roll $1,%ebp + addl %ebx,%r13d roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,8(%rsp) - leal 1518500249(%rax,%r11,1),%r12d - movl 12(%rsp),%eax + movl %ebp,12(%rsp) + addl %ecx,%r13d + movl 16(%rsp),%edx + movl %edi,%eax movl %edi,%ebx - movl %edx,%r11d - xorl 20(%rsp),%eax - xorl %ebp,%ebx - roll $5,%r11d - xorl 44(%rsp),%eax + xorl 24(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 48(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 4(%rsp),%edx + addl %eax,%r12d andl %esi,%ebx - addl %r11d,%r12d - xorl 0(%rsp),%eax - xorl %ebp,%ebx - roll $30,%esi + roll $1,%edx addl %ebx,%r12d - roll $1,%eax - movl %eax,12(%rsp) - leal 1518500249(%rax,%rbp,1),%r11d - movl 16(%rsp),%eax + roll $30,%esi + movl %edx,16(%rsp) + addl %ecx,%r12d + movl 20(%rsp),%ebp + movl %esi,%eax movl %esi,%ebx - movl %r12d,%ebp - xorl 24(%rsp),%eax - xorl %edi,%ebx - roll $5,%ebp - xorl 48(%rsp),%eax - andl %edx,%ebx - addl %ebp,%r11d - xorl 4(%rsp),%eax + xorl 28(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%ebp xorl %edi,%ebx - roll $30,%edx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 8(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp addl %ebx,%r11d - roll $1,%eax - movl %eax,16(%rsp) - leal 1859775393(%rax,%rdi,1),%ebp - movl 20(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 28(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 52(%rsp),%eax + roll $30,%r13d + movl %ebp,20(%rsp) + addl %ecx,%r11d + movl 24(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 32(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx xorl %esi,%ebx - addl %edi,%ebp - xorl 8(%rsp),%eax + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 12(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,20(%rsp) - leal 1859775393(%rax,%rsi,1),%edi - movl 24(%rsp),%eax + movl %edx,24(%rsp) + addl %ecx,%edi + movl 28(%rsp),%ebp + movl %r12d,%eax movl %r12d,%ebx - movl %ebp,%esi - xorl 32(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 56(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 12(%rsp),%eax + xorl 36(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 16(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,24(%rsp) - leal 1859775393(%rax,%rdx,1),%esi - movl 28(%rsp),%eax + movl %ebp,28(%rsp) + addl %ecx,%esi + movl 32(%rsp),%edx + movl %r11d,%eax movl %r11d,%ebx - movl %edi,%edx - xorl 36(%rsp),%eax - xorl %ebp,%ebx - roll $5,%edx - xorl 60(%rsp),%eax + xorl 40(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 0(%rsp),%edx xorl %r12d,%ebx - addl %edx,%esi - xorl 16(%rsp),%eax - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - movl %eax,28(%rsp) - leal 1859775393(%rax,%r12,1),%edx - movl 32(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 40(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 0(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 20(%rsp),%eax + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 20(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,32(%rsp) - leal 1859775393(%rax,%r11,1),%r12d - movl 36(%rsp),%eax + movl %edx,32(%rsp) + addl %ecx,%r13d + movl 36(%rsp),%ebp + movl %edi,%eax movl %edi,%ebx - movl %edx,%r11d - xorl 44(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 4(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 24(%rsp),%eax - roll $30,%esi + xorl 44(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 4(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp addl %ebx,%r12d - roll $1,%eax - movl %eax,36(%rsp) - leal 1859775393(%rax,%rbp,1),%r11d - movl 40(%rsp),%eax + roll $30,%esi + movl %ebp,36(%rsp) + addl %ecx,%r12d + movl 40(%rsp),%edx + movl %esi,%eax movl %esi,%ebx - movl %r12d,%ebp - xorl 48(%rsp),%eax - xorl %edx,%ebx - roll $5,%ebp - xorl 8(%rsp),%eax + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%edx xorl %edi,%ebx - addl %ebp,%r11d - xorl 28(%rsp),%eax - roll $30,%edx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx addl %ebx,%r11d - roll $1,%eax - movl %eax,40(%rsp) - leal 1859775393(%rax,%rdi,1),%ebp - movl 44(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 52(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 12(%rsp),%eax + roll $30,%r13d + movl %edx,40(%rsp) + addl %ecx,%r11d + movl 44(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 12(%rsp),%ebp xorl %esi,%ebx - addl %edi,%ebp - xorl 32(%rsp),%eax + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,44(%rsp) - leal 1859775393(%rax,%rsi,1),%edi - movl 48(%rsp),%eax + movl %ebp,44(%rsp) + addl %ecx,%edi + movl 48(%rsp),%edx + movl %r12d,%eax movl %r12d,%ebx - movl %ebp,%esi - xorl 56(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 16(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 36(%rsp),%eax - roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,48(%rsp) - leal 1859775393(%rax,%rdx,1),%esi - movl 52(%rsp),%eax - movl %r11d,%ebx - movl %edi,%edx - xorl 60(%rsp),%eax - xorl %ebp,%ebx - roll $5,%edx - xorl 20(%rsp),%eax - xorl %r12d,%ebx - addl %edx,%esi - xorl 40(%rsp),%eax - roll $30,%ebp + xorl 56(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx addl %ebx,%esi - roll $1,%eax - movl %eax,52(%rsp) - leal 1859775393(%rax,%r12,1),%edx - movl 56(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 0(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 24(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 44(%rsp),%eax + roll $30,%r11d + movl %edx,48(%rsp) + addl %ecx,%esi + movl 52(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 20(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 40(%rsp),%ebp roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,56(%rsp) - leal 1859775393(%rax,%r11,1),%r12d - movl 60(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 4(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 28(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 48(%rsp),%eax + addl %eax,%r13d + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 0(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 24(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 44(%rsp),%edx roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - movl %eax,60(%rsp) - leal 1859775393(%rax,%rbp,1),%r11d - movl 0(%rsp),%eax - movl %esi,%ebx - movl %r12d,%ebp - xorl 8(%rsp),%eax - xorl %edx,%ebx - roll $5,%ebp - xorl 32(%rsp),%eax - xorl %edi,%ebx - addl %ebp,%r11d - xorl 52(%rsp),%eax - roll $30,%edx - addl %ebx,%r11d - roll $1,%eax - movl %eax,0(%rsp) - leal 1859775393(%rax,%rdi,1),%ebp - movl 4(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 12(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 36(%rsp),%eax - xorl %esi,%ebx - addl %edi,%ebp - xorl 56(%rsp),%eax + addl %eax,%r12d + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 4(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 28(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 48(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 32(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 52(%rsp),%edx roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,4(%rsp) - leal 1859775393(%rax,%rsi,1),%edi - movl 8(%rsp),%eax - movl %r12d,%ebx - movl %ebp,%esi - xorl 16(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 40(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 60(%rsp),%eax + addl %eax,%edi + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 36(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 56(%rsp),%ebp roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,8(%rsp) - leal 1859775393(%rax,%rdx,1),%esi - movl 12(%rsp),%eax - movl %r11d,%ebx - movl %edi,%edx - xorl 20(%rsp),%eax - xorl %ebp,%ebx - roll $5,%edx - xorl 44(%rsp),%eax - xorl %r12d,%ebx - addl %edx,%esi - xorl 0(%rsp),%eax - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - movl %eax,12(%rsp) - leal 1859775393(%rax,%r12,1),%edx - movl 16(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 24(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 48(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 4(%rsp),%eax + addl %eax,%esi + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 40(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 60(%rsp),%edx roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,16(%rsp) - leal 1859775393(%rax,%r11,1),%r12d - movl 20(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 28(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 52(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 8(%rsp),%eax + addl %eax,%r13d + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 44(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 0(%rsp),%ebp roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - movl %eax,20(%rsp) - leal 1859775393(%rax,%rbp,1),%r11d - movl 24(%rsp),%eax - movl %esi,%ebx - movl %r12d,%ebp - xorl 32(%rsp),%eax - xorl %edx,%ebx - roll $5,%ebp - xorl 56(%rsp),%eax - xorl %edi,%ebx - addl %ebp,%r11d - xorl 12(%rsp),%eax - roll $30,%edx - addl %ebx,%r11d - roll $1,%eax - movl %eax,24(%rsp) - leal 1859775393(%rax,%rdi,1),%ebp - movl 28(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 36(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 60(%rsp),%eax - xorl %esi,%ebx - addl %edi,%ebp - xorl 16(%rsp),%eax + addl %eax,%r12d + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 48(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 4(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 28(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 52(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 8(%rsp),%ebp roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,28(%rsp) - leal 1859775393(%rax,%rsi,1),%edi - movl 32(%rsp),%eax - movl %r12d,%ebx - movl %ebp,%esi - xorl 40(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 0(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 20(%rsp),%eax - roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,32(%rsp) - leal -1894007588(%rax,%rdx,1),%esi - movl 36(%rsp),%eax - movl %ebp,%ebx - movl %ebp,%ecx - xorl 44(%rsp),%eax - movl %edi,%edx - andl %r11d,%ebx - xorl 4(%rsp),%eax - orl %r11d,%ecx - roll $5,%edx - xorl 24(%rsp),%eax - andl %r12d,%ecx - addl %edx,%esi - roll $1,%eax - orl %ecx,%ebx - roll $30,%ebp - movl %eax,36(%rsp) - addl %ebx,%esi - leal -1894007588(%rax,%r12,1),%edx - movl 40(%rsp),%eax - movl %edi,%ebx + addl %eax,%edi + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %r12d,%eax movl %edi,%ecx - xorl 48(%rsp),%eax - movl %esi,%r12d - andl %ebp,%ebx - xorl 8(%rsp),%eax - orl %ebp,%ecx - roll $5,%r12d - xorl 28(%rsp),%eax - andl %r11d,%ecx - addl %r12d,%edx - roll $1,%eax - orl %ecx,%ebx - roll $30,%edi - movl %eax,40(%rsp) - addl %ebx,%edx - leal -1894007588(%rax,%r11,1),%r12d - movl 44(%rsp),%eax - movl %esi,%ebx + xorl 32(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rsi,1),%esi + xorl 56(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 12(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r11d,%eax movl %esi,%ecx - xorl 52(%rsp),%eax - movl %edx,%r11d - andl %edi,%ebx - xorl 12(%rsp),%eax - orl %edi,%ecx - roll $5,%r11d - xorl 32(%rsp),%eax - andl %ebp,%ecx - addl %r11d,%r12d - roll $1,%eax - orl %ecx,%ebx + xorl 36(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 60(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 16(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 0(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 20(%rsp),%edx roll $30,%esi - movl %eax,44(%rsp) - addl %ebx,%r12d - leal -1894007588(%rax,%rbp,1),%r11d - movl 48(%rsp),%eax - movl %edx,%ebx - movl %edx,%ecx - xorl 56(%rsp),%eax - movl %r12d,%ebp - andl %esi,%ebx - xorl 16(%rsp),%eax - orl %esi,%ecx - roll $5,%ebp - xorl 36(%rsp),%eax - andl %edi,%ecx - addl %ebp,%r11d - roll $1,%eax - orl %ecx,%ebx - roll $30,%edx - movl %eax,48(%rsp) - addl %ebx,%r11d - leal -1894007588(%rax,%rdi,1),%ebp - movl 52(%rsp),%eax - movl %r12d,%ebx + addl %eax,%r12d + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %esi,%eax movl %r12d,%ecx - xorl 60(%rsp),%eax - movl %r11d,%edi - andl %edx,%ebx - xorl 20(%rsp),%eax - orl %edx,%ecx - roll $5,%edi - xorl 40(%rsp),%eax - andl %esi,%ecx - addl %edi,%ebp - roll $1,%eax - orl %ecx,%ebx - roll $30,%r12d - movl %eax,52(%rsp) - addl %ebx,%ebp - leal -1894007588(%rax,%rsi,1),%edi - movl 56(%rsp),%eax - movl %r11d,%ebx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 4(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 24(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r13d,%eax movl %r11d,%ecx - xorl 0(%rsp),%eax - movl %ebp,%esi - andl %r12d,%ebx - xorl 24(%rsp),%eax - orl %r12d,%ecx - roll $5,%esi - xorl 44(%rsp),%eax - andl %edx,%ecx - addl %esi,%edi - roll $1,%eax - orl %ecx,%ebx - roll $30,%r11d - movl %eax,56(%rsp) - addl %ebx,%edi - leal -1894007588(%rax,%rdx,1),%esi - movl 60(%rsp),%eax - movl %ebp,%ebx - movl %ebp,%ecx - xorl 4(%rsp),%eax - movl %edi,%edx - andl %r11d,%ebx - xorl 28(%rsp),%eax - orl %r11d,%ecx - roll $5,%edx - xorl 48(%rsp),%eax - andl %r12d,%ecx - addl %edx,%esi - roll $1,%eax - orl %ecx,%ebx - roll $30,%ebp - movl %eax,60(%rsp) - addl %ebx,%esi - leal -1894007588(%rax,%r12,1),%edx - movl 0(%rsp),%eax - movl %edi,%ebx + xorl 48(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 8(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 28(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %r12d,%eax movl %edi,%ecx - xorl 8(%rsp),%eax - movl %esi,%r12d - andl %ebp,%ebx - xorl 32(%rsp),%eax - orl %ebp,%ecx - roll $5,%r12d - xorl 52(%rsp),%eax - andl %r11d,%ecx - addl %r12d,%edx - roll $1,%eax - orl %ecx,%ebx - roll $30,%edi - movl %eax,0(%rsp) - addl %ebx,%edx - leal -1894007588(%rax,%r11,1),%r12d - movl 4(%rsp),%eax - movl %esi,%ebx + xorl 52(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 12(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 32(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %r11d,%eax movl %esi,%ecx - xorl 12(%rsp),%eax - movl %edx,%r11d - andl %edi,%ebx - xorl 36(%rsp),%eax - orl %edi,%ecx - roll $5,%r11d - xorl 56(%rsp),%eax - andl %ebp,%ecx - addl %r11d,%r12d - roll $1,%eax - orl %ecx,%ebx + xorl 56(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 16(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 36(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 60(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 20(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 40(%rsp),%ebp roll $30,%esi - movl %eax,4(%rsp) - addl %ebx,%r12d - leal -1894007588(%rax,%rbp,1),%r11d - movl 8(%rsp),%eax - movl %edx,%ebx - movl %edx,%ecx - xorl 16(%rsp),%eax - movl %r12d,%ebp - andl %esi,%ebx - xorl 40(%rsp),%eax - orl %esi,%ecx - roll $5,%ebp - xorl 60(%rsp),%eax - andl %edi,%ecx - addl %ebp,%r11d - roll $1,%eax - orl %ecx,%ebx - roll $30,%edx - movl %eax,8(%rsp) - addl %ebx,%r11d - leal -1894007588(%rax,%rdi,1),%ebp - movl 12(%rsp),%eax - movl %r12d,%ebx + addl %eax,%r12d + roll $1,%ebp + movl 56(%rsp),%edx + movl %esi,%eax movl %r12d,%ecx - xorl 20(%rsp),%eax - movl %r11d,%edi - andl %edx,%ebx - xorl 44(%rsp),%eax - orl %edx,%ecx - roll $5,%edi - xorl 0(%rsp),%eax - andl %esi,%ecx - addl %edi,%ebp - roll $1,%eax - orl %ecx,%ebx - roll $30,%r12d - movl %eax,12(%rsp) - addl %ebx,%ebp - leal -1894007588(%rax,%rsi,1),%edi - movl 16(%rsp),%eax - movl %r11d,%ebx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 24(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 44(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl 60(%rsp),%ebp + movl %r13d,%eax movl %r11d,%ecx - xorl 24(%rsp),%eax - movl %ebp,%esi - andl %r12d,%ebx - xorl 48(%rsp),%eax - orl %r12d,%ecx - roll $5,%esi - xorl 4(%rsp),%eax - andl %edx,%ecx - addl %esi,%edi - roll $1,%eax - orl %ecx,%ebx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 28(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 48(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl %r11d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r13d,%eax + addl %ecx,%esi roll $30,%r11d - movl %eax,16(%rsp) - addl %ebx,%edi - leal -1894007588(%rax,%rdx,1),%esi - movl 20(%rsp),%eax - movl %ebp,%ebx - movl %ebp,%ecx - xorl 28(%rsp),%eax - movl %edi,%edx - andl %r11d,%ebx - xorl 52(%rsp),%eax - orl %r11d,%ecx + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz L$loop + + movq 64(%rsp),%rsi + movq (%rsi),%r13 + movq 8(%rsi),%r12 + movq 16(%rsi),%rbp + movq 24(%rsi),%rbx + leaq 32(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + + +.p2align 4 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -64(%rsp),%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp L$oop_ssse3 +.p2align 4 +L$oop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi roll $5,%edx - xorl 8(%rsp),%eax - andl %r12d,%ecx - addl %edx,%esi - roll $1,%eax - orl %ecx,%ebx - roll $30,%ebp - movl %eax,20(%rsp) - addl %ebx,%esi - leal -1894007588(%rax,%r12,1),%edx - movl 24(%rsp),%eax - movl %edi,%ebx - movl %edi,%ecx - xorl 32(%rsp),%eax - movl %esi,%r12d - andl %ebp,%ebx - xorl 56(%rsp),%eax - orl %ebp,%ecx - roll $5,%r12d - xorl 12(%rsp),%eax - andl %r11d,%ecx - addl %r12d,%edx - roll $1,%eax - orl %ecx,%ebx - roll $30,%edi - movl %eax,24(%rsp) - addl %ebx,%edx - leal -1894007588(%rax,%r11,1),%r12d - movl 28(%rsp),%eax - movl %esi,%ebx - movl %esi,%ecx - xorl 36(%rsp),%eax - movl %edx,%r11d - andl %edi,%ebx - xorl 60(%rsp),%eax - orl %edi,%ecx - roll $5,%r11d - xorl 16(%rsp),%eax - andl %ebp,%ecx - addl %r11d,%r12d - roll $1,%eax - orl %ecx,%ebx - roll $30,%esi - movl %eax,28(%rsp) - addl %ebx,%r12d - leal -1894007588(%rax,%rbp,1),%r11d - movl 32(%rsp),%eax - movl %edx,%ebx - movl %edx,%ecx - xorl 40(%rsp),%eax - movl %r12d,%ebp - andl %esi,%ebx - xorl 0(%rsp),%eax - orl %esi,%ecx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi roll $5,%ebp - xorl 20(%rsp),%eax - andl %edi,%ecx - addl %ebp,%r11d - roll $1,%eax - orl %ecx,%ebx - roll $30,%edx - movl %eax,32(%rsp) - addl %ebx,%r11d - leal -1894007588(%rax,%rdi,1),%ebp - movl 36(%rsp),%eax - movl %r12d,%ebx - movl %r12d,%ecx - xorl 44(%rsp),%eax - movl %r11d,%edi - andl %edx,%ebx - xorl 4(%rsp),%eax - orl %edx,%ecx - roll $5,%edi - xorl 24(%rsp),%eax - andl %esi,%ecx + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx addl %edi,%ebp - roll $1,%eax - orl %ecx,%ebx - roll $30,%r12d - movl %eax,36(%rsp) - addl %ebx,%ebp - leal -1894007588(%rax,%rsi,1),%edi - movl 40(%rsp),%eax - movl %r11d,%ebx - movl %r11d,%ecx - xorl 48(%rsp),%eax + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi movl %ebp,%esi - andl %r12d,%ebx - xorl 8(%rsp),%eax - orl %r12d,%ecx - roll $5,%esi - xorl 28(%rsp),%eax - andl %edx,%ecx - addl %esi,%edi - roll $1,%eax - orl %ecx,%ebx - roll $30,%r11d - movl %eax,40(%rsp) - addl %ebx,%edi - leal -1894007588(%rax,%rdx,1),%esi - movl 44(%rsp),%eax - movl %ebp,%ebx - movl %ebp,%ecx - xorl 52(%rsp),%eax - movl %edi,%edx - andl %r11d,%ebx - xorl 12(%rsp),%eax - orl %r11d,%ecx + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi roll $5,%edx - xorl 32(%rsp),%eax - andl %r12d,%ecx - addl %edx,%esi - roll $1,%eax - orl %ecx,%ebx - roll $30,%ebp - movl %eax,44(%rsp) - addl %ebx,%esi - leal -1894007588(%rax,%r12,1),%edx - movl 48(%rsp),%eax - movl %edi,%ebx - movl %edi,%ecx - xorl 56(%rsp),%eax - movl %esi,%r12d - andl %ebp,%ebx - xorl 16(%rsp),%eax - orl %ebp,%ecx - roll $5,%r12d - xorl 36(%rsp),%eax - andl %r11d,%ecx - addl %r12d,%edx - roll $1,%eax - orl %ecx,%ebx - roll $30,%edi - movl %eax,48(%rsp) - addl %ebx,%edx - leal -899497514(%rax,%r11,1),%r12d - movl 52(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 60(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 20(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 40(%rsp),%eax - roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - movl %eax,52(%rsp) - leal -899497514(%rax,%rbp,1),%r11d - movl 56(%rsp),%eax - movl %esi,%ebx - movl %r12d,%ebp - xorl 0(%rsp),%eax - xorl %edx,%ebx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi roll $5,%ebp - xorl 24(%rsp),%eax - xorl %edi,%ebx - addl %ebp,%r11d - xorl 44(%rsp),%eax - roll $30,%edx - addl %ebx,%r11d - roll $1,%eax - movl %eax,56(%rsp) - leal -899497514(%rax,%rdi,1),%ebp - movl 60(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 4(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 28(%rsp),%eax - xorl %esi,%ebx + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx addl %edi,%ebp - xorl 48(%rsp),%eax - roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,60(%rsp) - leal -899497514(%rax,%rsi,1),%edi - movl 0(%rsp),%eax - movl %r12d,%ebx + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx movl %ebp,%esi - xorl 8(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 32(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 52(%rsp),%eax - roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,0(%rsp) - leal -899497514(%rax,%rdx,1),%esi - movl 4(%rsp),%eax - movl %r11d,%ebx - movl %edi,%edx - xorl 12(%rsp),%eax - xorl %ebp,%ebx + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi roll $5,%edx - xorl 36(%rsp),%eax - xorl %r12d,%ebx - addl %edx,%esi - xorl 56(%rsp),%eax - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - movl %eax,4(%rsp) - leal -899497514(%rax,%r12,1),%edx - movl 8(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 16(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 40(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 60(%rsp),%eax - roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,8(%rsp) - leal -899497514(%rax,%r11,1),%r12d - movl 12(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 20(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 44(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 0(%rsp),%eax - roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - movl %eax,12(%rsp) - leal -899497514(%rax,%rbp,1),%r11d - movl 16(%rsp),%eax - movl %esi,%ebx - movl %r12d,%ebp - xorl 24(%rsp),%eax - xorl %edx,%ebx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi roll $5,%ebp - xorl 48(%rsp),%eax - xorl %edi,%ebx - addl %ebp,%r11d - xorl 4(%rsp),%eax - roll $30,%edx - addl %ebx,%r11d - roll $1,%eax - movl %eax,16(%rsp) - leal -899497514(%rax,%rdi,1),%ebp - movl 20(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 28(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 52(%rsp),%eax - xorl %esi,%ebx + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx addl %edi,%ebp - xorl 8(%rsp),%eax - roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,20(%rsp) - leal -899497514(%rax,%rsi,1),%edi - movl 24(%rsp),%eax - movl %r12d,%ebx + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx movl %ebp,%esi - xorl 32(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 56(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 12(%rsp),%eax - roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,24(%rsp) - leal -899497514(%rax,%rdx,1),%esi - movl 28(%rsp),%eax - movl %r11d,%ebx - movl %edi,%edx - xorl 36(%rsp),%eax - xorl %ebp,%ebx + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 roll $5,%edx - xorl 60(%rsp),%eax - xorl %r12d,%ebx - addl %edx,%esi - xorl 16(%rsp),%eax - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - movl %eax,28(%rsp) - leal -899497514(%rax,%r12,1),%edx - movl 32(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 40(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 0(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 20(%rsp),%eax - roll $30,%edi - addl %ebx,%edx - roll $1,%eax - movl %eax,32(%rsp) - leal -899497514(%rax,%r11,1),%r12d - movl 36(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 44(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 4(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 24(%rsp),%eax - roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - movl %eax,36(%rsp) - leal -899497514(%rax,%rbp,1),%r11d - movl 40(%rsp),%eax - movl %esi,%ebx - movl %r12d,%ebp - xorl 48(%rsp),%eax - xorl %edx,%ebx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 roll $5,%ebp - xorl 8(%rsp),%eax - xorl %edi,%ebx - addl %ebp,%r11d - xorl 28(%rsp),%eax - roll $30,%edx - addl %ebx,%r11d - roll $1,%eax - movl %eax,40(%rsp) - leal -899497514(%rax,%rdi,1),%ebp - movl 44(%rsp),%eax - movl %edx,%ebx - movl %r11d,%edi - xorl 52(%rsp),%eax - xorl %r12d,%ebx - roll $5,%edi - xorl 12(%rsp),%eax - xorl %esi,%ebx + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx addl %edi,%ebp - xorl 32(%rsp),%eax - roll $30,%r12d - addl %ebx,%ebp - roll $1,%eax - movl %eax,44(%rsp) - leal -899497514(%rax,%rsi,1),%edi - movl 48(%rsp),%eax - movl %r12d,%ebx + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r10,%r9 + je L$done_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi movl %ebp,%esi - xorl 56(%rsp),%eax - xorl %r11d,%ebx - roll $5,%esi - xorl 16(%rsp),%eax - xorl %edx,%ebx - addl %esi,%edi - xorl 36(%rsp),%eax - roll $30,%r11d - addl %ebx,%edi - roll $1,%eax - movl %eax,48(%rsp) - leal -899497514(%rax,%rdx,1),%esi - movl 52(%rsp),%eax - movl %r11d,%ebx - movl %edi,%edx - xorl 60(%rsp),%eax - xorl %ebp,%ebx + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi roll $5,%edx - xorl 20(%rsp),%eax - xorl %r12d,%ebx - addl %edx,%esi - xorl 40(%rsp),%eax - roll $30,%ebp - addl %ebx,%esi - roll $1,%eax - leal -899497514(%rax,%r12,1),%edx - movl 56(%rsp),%eax - movl %ebp,%ebx - movl %esi,%r12d - xorl 0(%rsp),%eax - xorl %edi,%ebx - roll $5,%r12d - xorl 24(%rsp),%eax - xorl %r11d,%ebx - addl %r12d,%edx - xorl 44(%rsp),%eax - roll $30,%edi - addl %ebx,%edx - roll $1,%eax - leal -899497514(%rax,%r11,1),%r12d - movl 60(%rsp),%eax - movl %edi,%ebx - movl %edx,%r11d - xorl 4(%rsp),%eax - xorl %esi,%ebx - roll $5,%r11d - xorl 28(%rsp),%eax - xorl %ebp,%ebx - addl %r11d,%r12d - xorl 48(%rsp),%eax - roll $30,%esi - addl %ebx,%r12d - roll $1,%eax - leal -899497514(%rax,%rbp,1),%r11d - movl %esi,%ebx - movl %r12d,%ebp - xorl %edx,%ebx + paddd %xmm9,%xmm1 + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi roll $5,%ebp - xorl %edi,%ebx - addl %ebp,%r11d - roll $30,%edx - addl %ebx,%r11d - addl 0(%r8),%r11d - addl 4(%r8),%r12d - addl 8(%r8),%edx - addl 12(%r8),%esi - addl 16(%r8),%edi - movl %r11d,0(%r8) - movl %r12d,4(%r8) - movl %edx,8(%r8) - movl %esi,12(%r8) - movl %edi,16(%r8) - - xchgl %r11d,%edx - xchgl %r12d,%esi - xchgl %r11d,%edi - xchgl %r12d,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + jmp L$oop_ssse3 - leaq 64(%r9),%r9 - subq $1,%r10 - jnz L$loop - movq 64(%rsp),%rsi - movq (%rsi),%r12 +.p2align 4 +L$done_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq 64(%rsp),%rsi + movq 0(%rsi),%r12 movq 8(%rsi),%rbp movq 16(%rsi),%rbx leaq 24(%rsi),%rsp -L$epilogue: +L$epilogue_ssse3: .byte 0xf3,0xc3 +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 + +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.p2align 4 +.p2align 6 diff --git a/deps/openssl/asm/x64-macosx-gas/sha/sha512-x86_64.s b/deps/openssl/asm/x64-macosx-gas/sha/sha512-x86_64.s index 73c4990304..dda5a96e9d 100644 --- a/deps/openssl/asm/x64-macosx-gas/sha/sha512-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/sha/sha512-x86_64.s @@ -38,1880 +38,1688 @@ L$prologue: L$loop: xorq %rdi,%rdi movl 0(%rsi),%r12d - bswapl %r12d movl %r8d,%r13d - movl %r8d,%r14d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d movl %r9d,%r15d + movl %r12d,0(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %r12d,0(%rsp) + movl %ebx,%r11d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - addl %r11d,%r12d - - movl %eax,%r11d - addl %r13d,%r12d + xorl %ecx,%r11d + xorl %eax,%r14d addl %r15d,%r12d - movl %eax,%r13d - movl %eax,%r14d + movl %ebx,%r15d - rorl $2,%r11d - rorl $13,%r13d - movl %eax,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d - xorl %r13d,%r11d - rorl $9,%r13d - orl %ecx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d - xorl %r13d,%r11d - andl %ecx,%r15d addl %r12d,%edx - - andl %ebx,%r14d addl %r12d,%r11d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r11d + movl 4(%rsi),%r12d - bswapl %r12d movl %edx,%r13d - movl %edx,%r14d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d movl %r8d,%r15d + movl %r12d,4(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %edx,%r15d - movl %r12d,4(%rsp) + movl %eax,%r10d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - addl %r10d,%r12d - - movl %r11d,%r10d - addl %r13d,%r12d + xorl %ebx,%r10d + xorl %r11d,%r14d addl %r15d,%r12d - movl %r11d,%r13d - movl %r11d,%r14d + movl %eax,%r15d - rorl $2,%r10d - rorl $13,%r13d - movl %r11d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d - xorl %r13d,%r10d - rorl $9,%r13d - orl %ebx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d - xorl %r13d,%r10d - andl %ebx,%r15d addl %r12d,%ecx - - andl %eax,%r14d addl %r12d,%r10d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r10d + movl 8(%rsi),%r12d - bswapl %r12d movl %ecx,%r13d - movl %ecx,%r14d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d movl %edx,%r15d + movl %r12d,8(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r12d,8(%rsp) + movl %r11d,%r9d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - addl %r9d,%r12d - - movl %r10d,%r9d - addl %r13d,%r12d + xorl %eax,%r9d + xorl %r10d,%r14d addl %r15d,%r12d - movl %r10d,%r13d - movl %r10d,%r14d + movl %r11d,%r15d - rorl $2,%r9d - rorl $13,%r13d - movl %r10d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d - xorl %r13d,%r9d - rorl $9,%r13d - orl %eax,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d - xorl %r13d,%r9d - andl %eax,%r15d addl %r12d,%ebx - - andl %r11d,%r14d addl %r12d,%r9d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r9d + movl 12(%rsi),%r12d - bswapl %r12d movl %ebx,%r13d - movl %ebx,%r14d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d movl %ecx,%r15d + movl %r12d,12(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ebx,%r15d - movl %r12d,12(%rsp) + movl %r10d,%r8d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - addl %r8d,%r12d - - movl %r9d,%r8d - addl %r13d,%r12d + xorl %r11d,%r8d + xorl %r9d,%r14d addl %r15d,%r12d - movl %r9d,%r13d - movl %r9d,%r14d + movl %r10d,%r15d - rorl $2,%r8d - rorl $13,%r13d - movl %r9d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d - xorl %r13d,%r8d - rorl $9,%r13d - orl %r11d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d - xorl %r13d,%r8d - andl %r11d,%r15d addl %r12d,%eax - - andl %r10d,%r14d addl %r12d,%r8d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r8d + movl 16(%rsi),%r12d - bswapl %r12d movl %eax,%r13d - movl %eax,%r14d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d movl %ebx,%r15d + movl %r12d,16(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r12d,16(%rsp) + movl %r9d,%edx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - addl %edx,%r12d - - movl %r8d,%edx - addl %r13d,%r12d + xorl %r10d,%edx + xorl %r8d,%r14d addl %r15d,%r12d - movl %r8d,%r13d - movl %r8d,%r14d + movl %r9d,%r15d - rorl $2,%edx - rorl $13,%r13d - movl %r8d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d - xorl %r13d,%edx - rorl $9,%r13d - orl %r10d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx - xorl %r13d,%edx - andl %r10d,%r15d addl %r12d,%r11d - - andl %r9d,%r14d addl %r12d,%edx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%edx + movl 20(%rsi),%r12d - bswapl %r12d movl %r11d,%r13d - movl %r11d,%r14d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d movl %eax,%r15d + movl %r12d,20(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r11d,%r15d - movl %r12d,20(%rsp) + movl %r8d,%ecx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - addl %ecx,%r12d - - movl %edx,%ecx - addl %r13d,%r12d + xorl %r9d,%ecx + xorl %edx,%r14d addl %r15d,%r12d - movl %edx,%r13d - movl %edx,%r14d + movl %r8d,%r15d - rorl $2,%ecx - rorl $13,%r13d - movl %edx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d - xorl %r13d,%ecx - rorl $9,%r13d - orl %r9d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx - xorl %r13d,%ecx - andl %r9d,%r15d addl %r12d,%r10d - - andl %r8d,%r14d addl %r12d,%ecx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ecx + movl 24(%rsi),%r12d - bswapl %r12d movl %r10d,%r13d - movl %r10d,%r14d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d movl %r11d,%r15d + movl %r12d,24(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %r12d,24(%rsp) + movl %edx,%ebx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - addl %ebx,%r12d - - movl %ecx,%ebx - addl %r13d,%r12d + xorl %r8d,%ebx + xorl %ecx,%r14d addl %r15d,%r12d - movl %ecx,%r13d - movl %ecx,%r14d + movl %edx,%r15d - rorl $2,%ebx - rorl $13,%r13d - movl %ecx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d - xorl %r13d,%ebx - rorl $9,%r13d - orl %r8d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx - xorl %r13d,%ebx - andl %r8d,%r15d addl %r12d,%r9d - - andl %edx,%r14d addl %r12d,%ebx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ebx + movl 28(%rsi),%r12d - bswapl %r12d movl %r9d,%r13d - movl %r9d,%r14d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d movl %r10d,%r15d + movl %r12d,28(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r9d,%r15d - movl %r12d,28(%rsp) + movl %ecx,%eax - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - addl %eax,%r12d - - movl %ebx,%eax - addl %r13d,%r12d + xorl %edx,%eax + xorl %ebx,%r14d addl %r15d,%r12d - movl %ebx,%r13d - movl %ebx,%r14d + movl %ecx,%r15d - rorl $2,%eax - rorl $13,%r13d - movl %ebx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d - xorl %r13d,%eax - rorl $9,%r13d - orl %edx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax - xorl %r13d,%eax - andl %edx,%r15d addl %r12d,%r8d - - andl %ecx,%r14d addl %r12d,%eax - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%eax + movl 32(%rsi),%r12d - bswapl %r12d movl %r8d,%r13d - movl %r8d,%r14d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d movl %r9d,%r15d + movl %r12d,32(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %r12d,32(%rsp) + movl %ebx,%r11d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - addl %r11d,%r12d - - movl %eax,%r11d - addl %r13d,%r12d + xorl %ecx,%r11d + xorl %eax,%r14d addl %r15d,%r12d - movl %eax,%r13d - movl %eax,%r14d + movl %ebx,%r15d - rorl $2,%r11d - rorl $13,%r13d - movl %eax,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d - xorl %r13d,%r11d - rorl $9,%r13d - orl %ecx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d - xorl %r13d,%r11d - andl %ecx,%r15d addl %r12d,%edx - - andl %ebx,%r14d addl %r12d,%r11d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r11d + movl 36(%rsi),%r12d - bswapl %r12d movl %edx,%r13d - movl %edx,%r14d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d movl %r8d,%r15d - - rorl $6,%r13d - rorl $11,%r14d - xorl %r9d,%r15d - - xorl %r14d,%r13d - rorl $14,%r14d - andl %edx,%r15d movl %r12d,36(%rsp) - xorl %r14d,%r13d + rorl $9,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - addl %r10d,%r12d - - movl %r11d,%r10d - addl %r13d,%r12d - addl %r15d,%r12d - movl %r11d,%r13d - movl %r11d,%r14d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d - rorl $2,%r10d - rorl $13,%r13d - movl %r11d,%r15d addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d - xorl %r13d,%r10d - rorl $9,%r13d - orl %ebx,%r14d + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %r13d,%r10d + rorl $6,%r13d + andl %r11d,%r10d andl %ebx,%r15d - addl %r12d,%ecx - andl %eax,%r14d - addl %r12d,%r10d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d - orl %r15d,%r14d + addl %r12d,%ecx + addl %r12d,%r10d leaq 1(%rdi),%rdi - addl %r14d,%r10d + movl 40(%rsi),%r12d - bswapl %r12d movl %ecx,%r13d - movl %ecx,%r14d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d movl %edx,%r15d + movl %r12d,40(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r12d,40(%rsp) + movl %r11d,%r9d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - addl %r9d,%r12d - - movl %r10d,%r9d - addl %r13d,%r12d + xorl %eax,%r9d + xorl %r10d,%r14d addl %r15d,%r12d - movl %r10d,%r13d - movl %r10d,%r14d + movl %r11d,%r15d - rorl $2,%r9d - rorl $13,%r13d - movl %r10d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d - xorl %r13d,%r9d - rorl $9,%r13d - orl %eax,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d - xorl %r13d,%r9d - andl %eax,%r15d addl %r12d,%ebx - - andl %r11d,%r14d addl %r12d,%r9d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r9d + movl 44(%rsi),%r12d - bswapl %r12d movl %ebx,%r13d - movl %ebx,%r14d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d movl %ecx,%r15d + movl %r12d,44(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ebx,%r15d - movl %r12d,44(%rsp) + movl %r10d,%r8d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - addl %r8d,%r12d - - movl %r9d,%r8d - addl %r13d,%r12d + xorl %r11d,%r8d + xorl %r9d,%r14d addl %r15d,%r12d - movl %r9d,%r13d - movl %r9d,%r14d + movl %r10d,%r15d - rorl $2,%r8d - rorl $13,%r13d - movl %r9d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d - xorl %r13d,%r8d - rorl $9,%r13d - orl %r11d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d - xorl %r13d,%r8d - andl %r11d,%r15d addl %r12d,%eax - - andl %r10d,%r14d addl %r12d,%r8d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r8d + movl 48(%rsi),%r12d - bswapl %r12d movl %eax,%r13d - movl %eax,%r14d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d movl %ebx,%r15d + movl %r12d,48(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r12d,48(%rsp) + movl %r9d,%edx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - addl %edx,%r12d - - movl %r8d,%edx - addl %r13d,%r12d + xorl %r10d,%edx + xorl %r8d,%r14d addl %r15d,%r12d - movl %r8d,%r13d - movl %r8d,%r14d + movl %r9d,%r15d - rorl $2,%edx - rorl $13,%r13d - movl %r8d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d - xorl %r13d,%edx - rorl $9,%r13d - orl %r10d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx - xorl %r13d,%edx - andl %r10d,%r15d addl %r12d,%r11d - - andl %r9d,%r14d addl %r12d,%edx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%edx + movl 52(%rsi),%r12d - bswapl %r12d movl %r11d,%r13d - movl %r11d,%r14d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d movl %eax,%r15d + movl %r12d,52(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r11d,%r15d - movl %r12d,52(%rsp) + movl %r8d,%ecx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - addl %ecx,%r12d - - movl %edx,%ecx - addl %r13d,%r12d + xorl %r9d,%ecx + xorl %edx,%r14d addl %r15d,%r12d - movl %edx,%r13d - movl %edx,%r14d + movl %r8d,%r15d - rorl $2,%ecx - rorl $13,%r13d - movl %edx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d - xorl %r13d,%ecx - rorl $9,%r13d - orl %r9d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx - xorl %r13d,%ecx - andl %r9d,%r15d addl %r12d,%r10d - - andl %r8d,%r14d addl %r12d,%ecx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ecx + movl 56(%rsi),%r12d - bswapl %r12d movl %r10d,%r13d - movl %r10d,%r14d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d movl %r11d,%r15d + movl %r12d,56(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %r12d,56(%rsp) + movl %edx,%ebx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - addl %ebx,%r12d - - movl %ecx,%ebx - addl %r13d,%r12d + xorl %r8d,%ebx + xorl %ecx,%r14d addl %r15d,%r12d - movl %ecx,%r13d - movl %ecx,%r14d + movl %edx,%r15d - rorl $2,%ebx - rorl $13,%r13d - movl %ecx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d - xorl %r13d,%ebx - rorl $9,%r13d - orl %r8d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx - xorl %r13d,%ebx - andl %r8d,%r15d addl %r12d,%r9d - - andl %edx,%r14d addl %r12d,%ebx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ebx + movl 60(%rsi),%r12d - bswapl %r12d movl %r9d,%r13d - movl %r9d,%r14d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d movl %r10d,%r15d + movl %r12d,60(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r9d,%r15d - movl %r12d,60(%rsp) + movl %ecx,%eax - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - addl %eax,%r12d - - movl %ebx,%eax - addl %r13d,%r12d + xorl %edx,%eax + xorl %ebx,%r14d addl %r15d,%r12d - movl %ebx,%r13d - movl %ebx,%r14d + movl %ecx,%r15d - rorl $2,%eax - rorl $13,%r13d - movl %ebx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d - xorl %r13d,%eax - rorl $9,%r13d - orl %edx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax - xorl %r13d,%eax - andl %edx,%r15d addl %r12d,%r8d - - andl %ecx,%r14d addl %r12d,%eax - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%eax + jmp L$rounds_16_xx .p2align 4 L$rounds_16_xx: movl 4(%rsp),%r13d - movl 56(%rsp),%r12d - - movl %r13d,%r15d + movl 56(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 36(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 36(%rsp),%r12d + xorl %r15d,%r14d addl 0(%rsp),%r12d movl %r8d,%r13d - movl %r8d,%r14d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d movl %r9d,%r15d + movl %r12d,0(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %r12d,0(%rsp) + movl %ebx,%r11d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - addl %r11d,%r12d - - movl %eax,%r11d - addl %r13d,%r12d + xorl %ecx,%r11d + xorl %eax,%r14d addl %r15d,%r12d - movl %eax,%r13d - movl %eax,%r14d + movl %ebx,%r15d - rorl $2,%r11d - rorl $13,%r13d - movl %eax,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d - xorl %r13d,%r11d - rorl $9,%r13d - orl %ecx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d - xorl %r13d,%r11d - andl %ecx,%r15d addl %r12d,%edx - - andl %ebx,%r14d addl %r12d,%r11d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r11d - movl 8(%rsp),%r13d - movl 60(%rsp),%r12d - movl %r13d,%r15d + movl 8(%rsp),%r13d + movl 60(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 40(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 40(%rsp),%r12d + xorl %r15d,%r14d addl 4(%rsp),%r12d movl %edx,%r13d - movl %edx,%r14d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d movl %r8d,%r15d + movl %r12d,4(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %edx,%r15d - movl %r12d,4(%rsp) + movl %eax,%r10d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - addl %r10d,%r12d - - movl %r11d,%r10d - addl %r13d,%r12d + xorl %ebx,%r10d + xorl %r11d,%r14d addl %r15d,%r12d - movl %r11d,%r13d - movl %r11d,%r14d + movl %eax,%r15d - rorl $2,%r10d - rorl $13,%r13d - movl %r11d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d - xorl %r13d,%r10d - rorl $9,%r13d - orl %ebx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d - xorl %r13d,%r10d - andl %ebx,%r15d addl %r12d,%ecx - - andl %eax,%r14d addl %r12d,%r10d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r10d - movl 12(%rsp),%r13d - movl 0(%rsp),%r12d - movl %r13d,%r15d + movl 12(%rsp),%r13d + movl 0(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 44(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 44(%rsp),%r12d + xorl %r15d,%r14d addl 8(%rsp),%r12d movl %ecx,%r13d - movl %ecx,%r14d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d movl %edx,%r15d + movl %r12d,8(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r12d,8(%rsp) + movl %r11d,%r9d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - addl %r9d,%r12d - - movl %r10d,%r9d - addl %r13d,%r12d + xorl %eax,%r9d + xorl %r10d,%r14d addl %r15d,%r12d - movl %r10d,%r13d - movl %r10d,%r14d + movl %r11d,%r15d - rorl $2,%r9d - rorl $13,%r13d - movl %r10d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d - xorl %r13d,%r9d - rorl $9,%r13d - orl %eax,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d - xorl %r13d,%r9d - andl %eax,%r15d addl %r12d,%ebx - - andl %r11d,%r14d addl %r12d,%r9d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r9d - movl 16(%rsp),%r13d - movl 4(%rsp),%r12d - movl %r13d,%r15d + movl 16(%rsp),%r13d + movl 4(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 48(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 48(%rsp),%r12d + xorl %r15d,%r14d addl 12(%rsp),%r12d movl %ebx,%r13d - movl %ebx,%r14d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d movl %ecx,%r15d + movl %r12d,12(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ebx,%r15d - movl %r12d,12(%rsp) + movl %r10d,%r8d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - addl %r8d,%r12d - - movl %r9d,%r8d - addl %r13d,%r12d + xorl %r11d,%r8d + xorl %r9d,%r14d addl %r15d,%r12d - movl %r9d,%r13d - movl %r9d,%r14d + movl %r10d,%r15d - rorl $2,%r8d - rorl $13,%r13d - movl %r9d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d - xorl %r13d,%r8d - rorl $9,%r13d - orl %r11d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d - xorl %r13d,%r8d - andl %r11d,%r15d addl %r12d,%eax - - andl %r10d,%r14d addl %r12d,%r8d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r8d - movl 20(%rsp),%r13d - movl 8(%rsp),%r12d - movl %r13d,%r15d + movl 20(%rsp),%r13d + movl 8(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 52(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 52(%rsp),%r12d + xorl %r15d,%r14d addl 16(%rsp),%r12d movl %eax,%r13d - movl %eax,%r14d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d movl %ebx,%r15d + movl %r12d,16(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r12d,16(%rsp) + movl %r9d,%edx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - addl %edx,%r12d - - movl %r8d,%edx - addl %r13d,%r12d + xorl %r10d,%edx + xorl %r8d,%r14d addl %r15d,%r12d - movl %r8d,%r13d - movl %r8d,%r14d + movl %r9d,%r15d - rorl $2,%edx - rorl $13,%r13d - movl %r8d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d - xorl %r13d,%edx - rorl $9,%r13d - orl %r10d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx - xorl %r13d,%edx - andl %r10d,%r15d addl %r12d,%r11d - - andl %r9d,%r14d addl %r12d,%edx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%edx - movl 24(%rsp),%r13d - movl 12(%rsp),%r12d - movl %r13d,%r15d + movl 24(%rsp),%r13d + movl 12(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 56(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 56(%rsp),%r12d + xorl %r15d,%r14d addl 20(%rsp),%r12d movl %r11d,%r13d - movl %r11d,%r14d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d movl %eax,%r15d + movl %r12d,20(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r11d,%r15d - movl %r12d,20(%rsp) + movl %r8d,%ecx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - addl %ecx,%r12d - - movl %edx,%ecx - addl %r13d,%r12d + xorl %r9d,%ecx + xorl %edx,%r14d addl %r15d,%r12d - movl %edx,%r13d - movl %edx,%r14d + movl %r8d,%r15d - rorl $2,%ecx - rorl $13,%r13d - movl %edx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d - xorl %r13d,%ecx - rorl $9,%r13d - orl %r9d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx - xorl %r13d,%ecx - andl %r9d,%r15d addl %r12d,%r10d - - andl %r8d,%r14d addl %r12d,%ecx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ecx - movl 28(%rsp),%r13d - movl 16(%rsp),%r12d - movl %r13d,%r15d + movl 28(%rsp),%r13d + movl 16(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 60(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 60(%rsp),%r12d + xorl %r15d,%r14d addl 24(%rsp),%r12d movl %r10d,%r13d - movl %r10d,%r14d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d movl %r11d,%r15d + movl %r12d,24(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %r12d,24(%rsp) + movl %edx,%ebx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - addl %ebx,%r12d - - movl %ecx,%ebx - addl %r13d,%r12d + xorl %r8d,%ebx + xorl %ecx,%r14d addl %r15d,%r12d - movl %ecx,%r13d - movl %ecx,%r14d + movl %edx,%r15d - rorl $2,%ebx - rorl $13,%r13d - movl %ecx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d - xorl %r13d,%ebx - rorl $9,%r13d - orl %r8d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx - xorl %r13d,%ebx - andl %r8d,%r15d addl %r12d,%r9d - - andl %edx,%r14d addl %r12d,%ebx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ebx - movl 32(%rsp),%r13d - movl 20(%rsp),%r12d - movl %r13d,%r15d + movl 32(%rsp),%r13d + movl 20(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 0(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 0(%rsp),%r12d + xorl %r15d,%r14d addl 28(%rsp),%r12d movl %r9d,%r13d - movl %r9d,%r14d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d movl %r10d,%r15d + movl %r12d,28(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r9d,%r15d - movl %r12d,28(%rsp) + movl %ecx,%eax - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - addl %eax,%r12d - - movl %ebx,%eax - addl %r13d,%r12d + xorl %edx,%eax + xorl %ebx,%r14d addl %r15d,%r12d - movl %ebx,%r13d - movl %ebx,%r14d + movl %ecx,%r15d - rorl $2,%eax - rorl $13,%r13d - movl %ebx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d - xorl %r13d,%eax - rorl $9,%r13d - orl %edx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax - xorl %r13d,%eax - andl %edx,%r15d addl %r12d,%r8d - - andl %ecx,%r14d addl %r12d,%eax - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%eax - movl 36(%rsp),%r13d - movl 24(%rsp),%r12d - movl %r13d,%r15d + movl 36(%rsp),%r13d + movl 24(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 4(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 4(%rsp),%r12d + xorl %r15d,%r14d addl 32(%rsp),%r12d movl %r8d,%r13d - movl %r8d,%r14d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d movl %r9d,%r15d + movl %r12d,32(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %r12d,32(%rsp) + movl %ebx,%r11d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - addl %r11d,%r12d - - movl %eax,%r11d - addl %r13d,%r12d + xorl %ecx,%r11d + xorl %eax,%r14d addl %r15d,%r12d - movl %eax,%r13d - movl %eax,%r14d - - rorl $2,%r11d - rorl $13,%r13d - movl %eax,%r15d - addl (%rbp,%rdi,4),%r12d - - xorl %r13d,%r11d - rorl $9,%r13d - orl %ecx,%r14d + movl %ebx,%r15d - xorl %r13d,%r11d + rorl $6,%r13d + andl %eax,%r11d andl %ecx,%r15d - addl %r12d,%edx - andl %ebx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx addl %r12d,%r11d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r11d - movl 40(%rsp),%r13d - movl 28(%rsp),%r12d - movl %r13d,%r15d + movl 40(%rsp),%r13d + movl 28(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 8(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 8(%rsp),%r12d + xorl %r15d,%r14d addl 36(%rsp),%r12d movl %edx,%r13d - movl %edx,%r14d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d movl %r8d,%r15d + movl %r12d,36(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %edx,%r15d - movl %r12d,36(%rsp) + movl %eax,%r10d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %edx,%r13d xorl %r9d,%r15d - addl %r10d,%r12d - - movl %r11d,%r10d - addl %r13d,%r12d + xorl %ebx,%r10d + xorl %r11d,%r14d addl %r15d,%r12d - movl %r11d,%r13d - movl %r11d,%r14d + movl %eax,%r15d - rorl $2,%r10d - rorl $13,%r13d - movl %r11d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d - xorl %r13d,%r10d - rorl $9,%r13d - orl %ebx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d - xorl %r13d,%r10d - andl %ebx,%r15d addl %r12d,%ecx - - andl %eax,%r14d addl %r12d,%r10d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r10d - movl 44(%rsp),%r13d - movl 32(%rsp),%r12d - movl %r13d,%r15d + movl 44(%rsp),%r13d + movl 32(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 12(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 12(%rsp),%r12d + xorl %r15d,%r14d addl 40(%rsp),%r12d movl %ecx,%r13d - movl %ecx,%r14d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d movl %edx,%r15d + movl %r12d,40(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r12d,40(%rsp) + movl %r11d,%r9d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - addl %r9d,%r12d - - movl %r10d,%r9d - addl %r13d,%r12d + xorl %eax,%r9d + xorl %r10d,%r14d addl %r15d,%r12d - movl %r10d,%r13d - movl %r10d,%r14d + movl %r11d,%r15d - rorl $2,%r9d - rorl $13,%r13d - movl %r10d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d - xorl %r13d,%r9d - rorl $9,%r13d - orl %eax,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d - xorl %r13d,%r9d - andl %eax,%r15d addl %r12d,%ebx - - andl %r11d,%r14d addl %r12d,%r9d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r9d - movl 48(%rsp),%r13d - movl 36(%rsp),%r12d - movl %r13d,%r15d + movl 48(%rsp),%r13d + movl 36(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 16(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 16(%rsp),%r12d + xorl %r15d,%r14d addl 44(%rsp),%r12d movl %ebx,%r13d - movl %ebx,%r14d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d movl %ecx,%r15d + movl %r12d,44(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %ebx,%r15d - movl %r12d,44(%rsp) + movl %r10d,%r8d - xorl %r14d,%r13d + rorl $11,%r14d + xorl %ebx,%r13d xorl %edx,%r15d - addl %r8d,%r12d - - movl %r9d,%r8d - addl %r13d,%r12d + xorl %r11d,%r8d + xorl %r9d,%r14d addl %r15d,%r12d - movl %r9d,%r13d - movl %r9d,%r14d + movl %r10d,%r15d - rorl $2,%r8d - rorl $13,%r13d - movl %r9d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d - xorl %r13d,%r8d - rorl $9,%r13d - orl %r11d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d - xorl %r13d,%r8d - andl %r11d,%r15d addl %r12d,%eax - - andl %r10d,%r14d addl %r12d,%r8d - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%r8d - movl 52(%rsp),%r13d - movl 40(%rsp),%r12d - movl %r13d,%r15d + movl 52(%rsp),%r13d + movl 40(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 20(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 20(%rsp),%r12d + xorl %r15d,%r14d addl 48(%rsp),%r12d movl %eax,%r13d - movl %eax,%r14d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d movl %ebx,%r15d + movl %r12d,48(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r12d,48(%rsp) + movl %r9d,%edx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - addl %edx,%r12d - - movl %r8d,%edx - addl %r13d,%r12d + xorl %r10d,%edx + xorl %r8d,%r14d addl %r15d,%r12d - movl %r8d,%r13d - movl %r8d,%r14d + movl %r9d,%r15d - rorl $2,%edx - rorl $13,%r13d - movl %r8d,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d - xorl %r13d,%edx - rorl $9,%r13d - orl %r10d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx - xorl %r13d,%edx - andl %r10d,%r15d addl %r12d,%r11d - - andl %r9d,%r14d addl %r12d,%edx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%edx - movl 56(%rsp),%r13d - movl 44(%rsp),%r12d - movl %r13d,%r15d + movl 56(%rsp),%r13d + movl 44(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 24(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 24(%rsp),%r12d + xorl %r15d,%r14d addl 52(%rsp),%r12d movl %r11d,%r13d - movl %r11d,%r14d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d movl %eax,%r15d + movl %r12d,52(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r11d,%r15d - movl %r12d,52(%rsp) + movl %r8d,%ecx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r11d,%r13d xorl %ebx,%r15d - addl %ecx,%r12d - - movl %edx,%ecx - addl %r13d,%r12d + xorl %r9d,%ecx + xorl %edx,%r14d addl %r15d,%r12d - movl %edx,%r13d - movl %edx,%r14d + movl %r8d,%r15d - rorl $2,%ecx - rorl $13,%r13d - movl %edx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d - xorl %r13d,%ecx - rorl $9,%r13d - orl %r9d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx - xorl %r13d,%ecx - andl %r9d,%r15d addl %r12d,%r10d - - andl %r8d,%r14d addl %r12d,%ecx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ecx - movl 60(%rsp),%r13d - movl 48(%rsp),%r12d - movl %r13d,%r15d + movl 60(%rsp),%r13d + movl 48(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 28(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 28(%rsp),%r12d + xorl %r15d,%r14d addl 56(%rsp),%r12d movl %r10d,%r13d - movl %r10d,%r14d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d movl %r11d,%r15d + movl %r12d,56(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %r12d,56(%rsp) + movl %edx,%ebx - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - addl %ebx,%r12d - - movl %ecx,%ebx - addl %r13d,%r12d + xorl %r8d,%ebx + xorl %ecx,%r14d addl %r15d,%r12d - movl %ecx,%r13d - movl %ecx,%r14d + movl %edx,%r15d - rorl $2,%ebx - rorl $13,%r13d - movl %ecx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d - xorl %r13d,%ebx - rorl $9,%r13d - orl %r8d,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx - xorl %r13d,%ebx - andl %r8d,%r15d addl %r12d,%r9d - - andl %edx,%r14d addl %r12d,%ebx - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%ebx - movl 0(%rsp),%r13d - movl 52(%rsp),%r12d - movl %r13d,%r15d + movl 0(%rsp),%r13d + movl 52(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d shrl $3,%r13d - rorl $7,%r15d - - xorl %r15d,%r13d - rorl $11,%r15d - - xorl %r15d,%r13d - movl %r12d,%r14d - shrl $10,%r12d - rorl $17,%r14d - - xorl %r14d,%r12d - rorl $2,%r14d + rorl $7,%r12d + xorl %r12d,%r13d + movl 32(%rsp),%r12d - xorl %r14d,%r12d + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d addl %r13d,%r12d - - addl 32(%rsp),%r12d + xorl %r15d,%r14d addl 60(%rsp),%r12d movl %r9d,%r13d - movl %r9d,%r14d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d movl %r10d,%r15d + movl %r12d,60(%rsp) - rorl $6,%r13d - rorl $11,%r14d + rorl $9,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - xorl %r14d,%r13d - rorl $14,%r14d + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d andl %r9d,%r15d - movl %r12d,60(%rsp) + movl %ecx,%eax - xorl %r14d,%r13d + rorl $11,%r14d + xorl %r9d,%r13d xorl %r11d,%r15d - addl %eax,%r12d - - movl %ebx,%eax - addl %r13d,%r12d + xorl %edx,%eax + xorl %ebx,%r14d addl %r15d,%r12d - movl %ebx,%r13d - movl %ebx,%r14d + movl %ecx,%r15d - rorl $2,%eax - rorl $13,%r13d - movl %ebx,%r15d - addl (%rbp,%rdi,4),%r12d + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d - xorl %r13d,%eax - rorl $9,%r13d - orl %edx,%r14d + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax - xorl %r13d,%eax - andl %edx,%r15d addl %r12d,%r8d - - andl %ecx,%r14d addl %r12d,%eax - - orl %r15d,%r14d leaq 1(%rdi),%rdi - addl %r14d,%eax + cmpq $64,%rdi jb L$rounds_16_xx diff --git a/deps/openssl/asm/x64-macosx-gas/x86_64cpuid.s b/deps/openssl/asm/x64-macosx-gas/x86_64cpuid.s index a1670e38e8..21e8a8fc2e 100644 --- a/deps/openssl/asm/x64-macosx-gas/x86_64cpuid.s +++ b/deps/openssl/asm/x64-macosx-gas/x86_64cpuid.s @@ -1,8 +1,12 @@ +.private_extern _OPENSSL_cpuid_setup .mod_init_func .p2align 3 .quad _OPENSSL_cpuid_setup +.private_extern _OPENSSL_ia32cap_P +.comm _OPENSSL_ia32cap_P,8,2 + .text @@ -68,7 +72,15 @@ _OPENSSL_ia32_cpuid: movl $2147483648,%eax cpuid - cmpl $2147483656,%eax + cmpl $2147483649,%eax + jb L$intel + movl %eax,%r10d + movl $2147483649,%eax + cpuid + orl %ecx,%r9d + andl $2049,%r9d + + cmpl $2147483656,%r10d jb L$intel movl $2147483656,%eax @@ -79,12 +91,12 @@ _OPENSSL_ia32_cpuid: movl $1,%eax cpuid btl $28,%edx - jnc L$done + jnc L$generic shrl $16,%ebx cmpb %r10b,%bl - ja L$done + ja L$generic andl $4026531839,%edx - jmp L$done + jmp L$generic L$intel: cmpl $4,%r11d @@ -101,30 +113,48 @@ L$intel: L$nocacheinfo: movl $1,%eax cpuid + andl $3220176895,%edx cmpl $0,%r9d jne L$notintel - orl $1048576,%edx + orl $1073741824,%edx andb $15,%ah cmpb $15,%ah - je L$notintel - orl $1073741824,%edx + jne L$notintel + orl $1048576,%edx L$notintel: btl $28,%edx - jnc L$done + jnc L$generic andl $4026531839,%edx cmpl $0,%r10d - je L$done + je L$generic orl $268435456,%edx shrl $16,%ebx cmpb $1,%bl - ja L$done + ja L$generic andl $4026531839,%edx +L$generic: + andl $2048,%r9d + andl $4294965247,%ecx + orl %ecx,%r9d + + movl %edx,%r10d + btl $27,%r9d + jnc L$clear_avx + xorl %ecx,%ecx +.byte 0x0f,0x01,0xd0 + + andl $6,%eax + cmpl $6,%eax + je L$done +L$clear_avx: + movl $4026525695,%eax + andl %eax,%r9d L$done: - shlq $32,%rcx - movl %edx,%eax + shlq $32,%r9 + movl %r10d,%eax movq %r8,%rbx - orq %rcx,%rax + orq %r9,%rax .byte 0xf3,0xc3 @@ -193,3 +223,16 @@ _OPENSSL_wipe_cpu: leaq 8(%rsp),%rax .byte 0xf3,0xc3 +.globl _OPENSSL_ia32_rdrand + +.p2align 4 +_OPENSSL_ia32_rdrand: + movl $8,%ecx +L$oop_rdrand: +.byte 72,15,199,240 + jc L$break_rdrand + loop L$oop_rdrand +L$break_rdrand: + cmpq $0,%rax + cmoveq %rcx,%rax + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-win32-masm/aes/aes-x86_64.asm b/deps/openssl/asm/x64-win32-masm/aes/aes-x86_64.asm index 2c590b94f4..b9f6fd081b 100644 --- a/deps/openssl/asm/x64-win32-masm/aes/aes-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/aes/aes-x86_64.asm @@ -333,6 +333,9 @@ _x86_64_AES_encrypt_compact ENDP PUBLIC AES_encrypt ALIGN 16 +PUBLIC asm_AES_encrypt + +asm_AES_encrypt:: AES_encrypt PROC PUBLIC mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi @@ -792,6 +795,9 @@ _x86_64_AES_decrypt_compact ENDP PUBLIC AES_decrypt ALIGN 16 +PUBLIC asm_AES_decrypt + +asm_AES_decrypt:: AES_decrypt PROC PUBLIC mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi @@ -867,14 +873,14 @@ $L$dec_epilogue:: DB 0F3h,0C3h ;repret $L$SEH_end_AES_decrypt:: AES_decrypt ENDP -PUBLIC AES_set_encrypt_key +PUBLIC private_AES_set_encrypt_key ALIGN 16 -AES_set_encrypt_key PROC PUBLIC +private_AES_set_encrypt_key PROC PUBLIC mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi mov rax,rsp -$L$SEH_begin_AES_set_encrypt_key:: +$L$SEH_begin_private_AES_set_encrypt_key:: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -902,8 +908,8 @@ $L$enc_key_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_AES_set_encrypt_key:: -AES_set_encrypt_key ENDP +$L$SEH_end_private_AES_set_encrypt_key:: +private_AES_set_encrypt_key ENDP ALIGN 16 @@ -1145,14 +1151,14 @@ $L$exit:: DB 0f3h,0c3h _x86_64_AES_set_encrypt_key ENDP -PUBLIC AES_set_decrypt_key +PUBLIC private_AES_set_decrypt_key ALIGN 16 -AES_set_decrypt_key PROC PUBLIC +private_AES_set_decrypt_key PROC PUBLIC mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi mov rax,rsp -$L$SEH_begin_AES_set_decrypt_key:: +$L$SEH_begin_private_AES_set_decrypt_key:: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -1342,12 +1348,15 @@ $L$dec_key_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_AES_set_decrypt_key:: -AES_set_decrypt_key ENDP +$L$SEH_end_private_AES_set_decrypt_key:: +private_AES_set_decrypt_key ENDP PUBLIC AES_cbc_encrypt ALIGN 16 EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC asm_AES_cbc_encrypt + +asm_AES_cbc_encrypt:: AES_cbc_encrypt PROC PUBLIC mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi @@ -2842,13 +2851,13 @@ ALIGN 4 DD imagerel $L$SEH_end_AES_decrypt DD imagerel $L$SEH_info_AES_decrypt - DD imagerel $L$SEH_begin_AES_set_encrypt_key - DD imagerel $L$SEH_end_AES_set_encrypt_key - DD imagerel $L$SEH_info_AES_set_encrypt_key + DD imagerel $L$SEH_begin_private_AES_set_encrypt_key + DD imagerel $L$SEH_end_private_AES_set_encrypt_key + DD imagerel $L$SEH_info_private_AES_set_encrypt_key - DD imagerel $L$SEH_begin_AES_set_decrypt_key - DD imagerel $L$SEH_end_AES_set_decrypt_key - DD imagerel $L$SEH_info_AES_set_decrypt_key + DD imagerel $L$SEH_begin_private_AES_set_decrypt_key + DD imagerel $L$SEH_end_private_AES_set_decrypt_key + DD imagerel $L$SEH_info_private_AES_set_decrypt_key DD imagerel $L$SEH_begin_AES_cbc_encrypt DD imagerel $L$SEH_end_AES_cbc_encrypt @@ -2867,12 +2876,12 @@ DB 9,0,0,0 DD imagerel block_se_handler DD imagerel $L$dec_prologue,imagerel $L$dec_epilogue -$L$SEH_info_AES_set_encrypt_key:: +$L$SEH_info_private_AES_set_encrypt_key:: DB 9,0,0,0 DD imagerel key_se_handler DD imagerel $L$enc_key_prologue,imagerel $L$enc_key_epilogue -$L$SEH_info_AES_set_decrypt_key:: +$L$SEH_info_private_AES_set_decrypt_key:: DB 9,0,0,0 DD imagerel key_se_handler DD imagerel $L$dec_key_prologue,imagerel $L$dec_key_epilogue diff --git a/deps/openssl/asm/x64-win32-masm/aes/aesni-sha1-x86_64.asm b/deps/openssl/asm/x64-win32-masm/aes/aesni-sha1-x86_64.asm new file mode 100644 index 0000000000..3f205a16a0 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/aes/aesni-sha1-x86_64.asm @@ -0,0 +1,1554 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(64) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC aesni_cbc_sha1_enc + +ALIGN 16 +aesni_cbc_sha1_enc PROC PUBLIC + + mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+0))] + mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + jmp aesni_cbc_sha1_enc_ssse3 + DB 0F3h,0C3h ;repret +aesni_cbc_sha1_enc ENDP + +ALIGN 16 +aesni_cbc_sha1_enc_ssse3 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_sha1_enc_ssse3:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + mov r10,QWORD PTR[56+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-264))+rsp] + + + movaps XMMWORD PTR[(96+0)+rsp],xmm6 + movaps XMMWORD PTR[(96+16)+rsp],xmm7 + movaps XMMWORD PTR[(96+32)+rsp],xmm8 + movaps XMMWORD PTR[(96+48)+rsp],xmm9 + movaps XMMWORD PTR[(96+64)+rsp],xmm10 + movaps XMMWORD PTR[(96+80)+rsp],xmm11 + movaps XMMWORD PTR[(96+96)+rsp],xmm12 + movaps XMMWORD PTR[(96+112)+rsp],xmm13 + movaps XMMWORD PTR[(96+128)+rsp],xmm14 + movaps XMMWORD PTR[(96+144)+rsp],xmm15 +$L$prologue_ssse3:: + mov r12,rdi + mov r13,rsi + mov r14,rdx + mov r15,rcx + movdqu xmm11,XMMWORD PTR[r8] + mov QWORD PTR[88+rsp],r8 + shl r14,6 + sub r13,r12 + mov r8d,DWORD PTR[240+r15] + add r14,r10 + + lea r11,QWORD PTR[K_XX_XX] + mov eax,DWORD PTR[r9] + mov ebx,DWORD PTR[4+r9] + mov ecx,DWORD PTR[8+r9] + mov edx,DWORD PTR[12+r9] + mov esi,ebx + mov ebp,DWORD PTR[16+r9] + + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[r11] + movdqu xmm0,XMMWORD PTR[r10] + movdqu xmm1,XMMWORD PTR[16+r10] + movdqu xmm2,XMMWORD PTR[32+r10] + movdqu xmm3,XMMWORD PTR[48+r10] +DB 102,15,56,0,198 + add r10,64 +DB 102,15,56,0,206 +DB 102,15,56,0,214 +DB 102,15,56,0,222 + paddd xmm0,xmm9 + paddd xmm1,xmm9 + paddd xmm2,xmm9 + movdqa XMMWORD PTR[rsp],xmm0 + psubd xmm0,xmm9 + movdqa XMMWORD PTR[16+rsp],xmm1 + psubd xmm1,xmm9 + movdqa XMMWORD PTR[32+rsp],xmm2 + psubd xmm2,xmm9 + movups xmm13,XMMWORD PTR[r15] + movups xmm14,XMMWORD PTR[16+r15] + jmp $L$oop_ssse3 +ALIGN 16 +$L$oop_ssse3:: + movdqa xmm4,xmm1 + add ebp,DWORD PTR[rsp] + movups xmm12,XMMWORD PTR[r12] + xorps xmm12,xmm13 + xorps xmm11,xmm12 +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[32+r15] + xor ecx,edx + movdqa xmm8,xmm3 +DB 102,15,58,15,224,8 + mov edi,eax + rol eax,5 + paddd xmm9,xmm3 + and esi,ecx + xor ecx,edx + psrldq xmm8,4 + xor esi,edx + add ebp,eax + pxor xmm4,xmm0 + ror ebx,2 + add ebp,esi + pxor xmm8,xmm2 + add edx,DWORD PTR[4+rsp] + xor ebx,ecx + mov esi,ebp + rol ebp,5 + pxor xmm4,xmm8 + and edi,ebx + xor ebx,ecx + movdqa XMMWORD PTR[48+rsp],xmm9 + xor edi,ecx +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[48+r15] + add edx,ebp + movdqa xmm10,xmm4 + movdqa xmm8,xmm4 + ror eax,7 + add edx,edi + add ecx,DWORD PTR[8+rsp] + xor eax,ebx + pslldq xmm10,12 + paddd xmm4,xmm4 + mov edi,edx + rol edx,5 + and esi,eax + xor eax,ebx + psrld xmm8,31 + xor esi,ebx + add ecx,edx + movdqa xmm9,xmm10 + ror ebp,7 + add ecx,esi + psrld xmm10,30 + por xmm4,xmm8 + add ebx,DWORD PTR[12+rsp] + xor ebp,eax + mov esi,ecx + rol ecx,5 +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[64+r15] + pslld xmm9,2 + pxor xmm4,xmm10 + and edi,ebp + xor ebp,eax + movdqa xmm10,XMMWORD PTR[r11] + xor edi,eax + add ebx,ecx + pxor xmm4,xmm9 + ror edx,7 + add ebx,edi + movdqa xmm5,xmm2 + add eax,DWORD PTR[16+rsp] + xor edx,ebp + movdqa xmm9,xmm4 +DB 102,15,58,15,233,8 + mov edi,ebx + rol ebx,5 + paddd xmm10,xmm4 + and esi,edx + xor edx,ebp + psrldq xmm9,4 + xor esi,ebp + add eax,ebx + pxor xmm5,xmm1 + ror ecx,7 + add eax,esi + pxor xmm9,xmm3 + add ebp,DWORD PTR[20+rsp] +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[80+r15] + xor ecx,edx + mov esi,eax + rol eax,5 + pxor xmm5,xmm9 + and edi,ecx + xor ecx,edx + movdqa XMMWORD PTR[rsp],xmm10 + xor edi,edx + add ebp,eax + movdqa xmm8,xmm5 + movdqa xmm9,xmm5 + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[24+rsp] + xor ebx,ecx + pslldq xmm8,12 + paddd xmm5,xmm5 + mov edi,ebp + rol ebp,5 + and esi,ebx + xor ebx,ecx + psrld xmm9,31 + xor esi,ecx +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[96+r15] + add edx,ebp + movdqa xmm10,xmm8 + ror eax,7 + add edx,esi + psrld xmm8,30 + por xmm5,xmm9 + add ecx,DWORD PTR[28+rsp] + xor eax,ebx + mov esi,edx + rol edx,5 + pslld xmm10,2 + pxor xmm5,xmm8 + and edi,eax + xor eax,ebx + movdqa xmm8,XMMWORD PTR[16+r11] + xor edi,ebx + add ecx,edx + pxor xmm5,xmm10 + ror ebp,7 + add ecx,edi + movdqa xmm6,xmm3 + add ebx,DWORD PTR[32+rsp] + xor ebp,eax + movdqa xmm10,xmm5 +DB 102,15,58,15,242,8 + mov edi,ecx + rol ecx,5 +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[112+r15] + paddd xmm8,xmm5 + and esi,ebp + xor ebp,eax + psrldq xmm10,4 + xor esi,eax + add ebx,ecx + pxor xmm6,xmm2 + ror edx,7 + add ebx,esi + pxor xmm10,xmm4 + add eax,DWORD PTR[36+rsp] + xor edx,ebp + mov esi,ebx + rol ebx,5 + pxor xmm6,xmm10 + and edi,edx + xor edx,ebp + movdqa XMMWORD PTR[16+rsp],xmm8 + xor edi,ebp + add eax,ebx + movdqa xmm9,xmm6 + movdqa xmm10,xmm6 + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[40+rsp] +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[128+r15] + xor ecx,edx + pslldq xmm9,12 + paddd xmm6,xmm6 + mov edi,eax + rol eax,5 + and esi,ecx + xor ecx,edx + psrld xmm10,31 + xor esi,edx + add ebp,eax + movdqa xmm8,xmm9 + ror ebx,7 + add ebp,esi + psrld xmm9,30 + por xmm6,xmm10 + add edx,DWORD PTR[44+rsp] + xor ebx,ecx + mov esi,ebp + rol ebp,5 + pslld xmm8,2 + pxor xmm6,xmm9 + and edi,ebx + xor ebx,ecx + movdqa xmm9,XMMWORD PTR[16+r11] + xor edi,ecx +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[144+r15] + add edx,ebp + pxor xmm6,xmm8 + ror eax,7 + add edx,edi + movdqa xmm7,xmm4 + add ecx,DWORD PTR[48+rsp] + xor eax,ebx + movdqa xmm8,xmm6 +DB 102,15,58,15,251,8 + mov edi,edx + rol edx,5 + paddd xmm9,xmm6 + and esi,eax + xor eax,ebx + psrldq xmm8,4 + xor esi,ebx + add ecx,edx + pxor xmm7,xmm3 + ror ebp,7 + add ecx,esi + pxor xmm8,xmm5 + add ebx,DWORD PTR[52+rsp] + xor ebp,eax + mov esi,ecx + rol ecx,5 +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[160+r15] + pxor xmm7,xmm8 + and edi,ebp + xor ebp,eax + movdqa XMMWORD PTR[32+rsp],xmm9 + xor edi,eax + add ebx,ecx + movdqa xmm10,xmm7 + movdqa xmm8,xmm7 + ror edx,7 + add ebx,edi + add eax,DWORD PTR[56+rsp] + xor edx,ebp + pslldq xmm10,12 + paddd xmm7,xmm7 + mov edi,ebx + rol ebx,5 + and esi,edx + xor edx,ebp + psrld xmm8,31 + xor esi,ebp + add eax,ebx + movdqa xmm9,xmm10 + ror ecx,7 + add eax,esi + psrld xmm10,30 + por xmm7,xmm8 + add ebp,DWORD PTR[60+rsp] + cmp r8d,11 + jb $L$aesenclast1 + movups xmm14,XMMWORD PTR[176+r15] +DB 102,69,15,56,220,223 + movups xmm15,XMMWORD PTR[192+r15] +DB 102,69,15,56,220,222 + je $L$aesenclast1 + movups xmm14,XMMWORD PTR[208+r15] +DB 102,69,15,56,220,223 + movups xmm15,XMMWORD PTR[224+r15] +DB 102,69,15,56,220,222 +$L$aesenclast1:: +DB 102,69,15,56,221,223 + movups xmm14,XMMWORD PTR[16+r15] + xor ecx,edx + mov esi,eax + rol eax,5 + pslld xmm9,2 + pxor xmm7,xmm10 + and edi,ecx + xor ecx,edx + movdqa xmm10,XMMWORD PTR[16+r11] + xor edi,edx + add ebp,eax + pxor xmm7,xmm9 + ror ebx,7 + add ebp,edi + movdqa xmm9,xmm7 + add edx,DWORD PTR[rsp] + pxor xmm0,xmm4 +DB 102,68,15,58,15,206,8 + xor ebx,ecx + mov edi,ebp + rol ebp,5 + pxor xmm0,xmm1 + and esi,ebx + xor ebx,ecx + movdqa xmm8,xmm10 + paddd xmm10,xmm7 + xor esi,ecx + movups xmm12,XMMWORD PTR[16+r12] + xorps xmm12,xmm13 + movups XMMWORD PTR[r12*1+r13],xmm11 + xorps xmm11,xmm12 +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[32+r15] + add edx,ebp + pxor xmm0,xmm9 + ror eax,7 + add edx,esi + add ecx,DWORD PTR[4+rsp] + xor eax,ebx + movdqa xmm9,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm10 + mov esi,edx + rol edx,5 + and edi,eax + xor eax,ebx + pslld xmm0,2 + xor edi,ebx + add ecx,edx + psrld xmm9,30 + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[8+rsp] + xor ebp,eax + mov edi,ecx + rol ecx,5 +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[48+r15] + por xmm0,xmm9 + and esi,ebp + xor ebp,eax + movdqa xmm10,xmm0 + xor esi,eax + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[12+rsp] + xor edx,ebp + mov esi,ebx + rol ebx,5 + and edi,edx + xor edx,ebp + xor edi,ebp + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[16+rsp] +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[64+r15] + pxor xmm1,xmm5 +DB 102,68,15,58,15,215,8 + xor esi,edx + mov edi,eax + rol eax,5 + pxor xmm1,xmm2 + xor esi,ecx + add ebp,eax + movdqa xmm9,xmm8 + paddd xmm8,xmm0 + ror ebx,7 + add ebp,esi + pxor xmm1,xmm10 + add edx,DWORD PTR[20+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + movdqa xmm10,xmm1 + movdqa XMMWORD PTR[rsp],xmm8 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + pslld xmm1,2 + add ecx,DWORD PTR[24+rsp] + xor esi,ebx + psrld xmm10,30 + mov edi,edx + rol edx,5 + xor esi,eax +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[80+r15] + add ecx,edx + ror ebp,7 + add ecx,esi + por xmm1,xmm10 + add ebx,DWORD PTR[28+rsp] + xor edi,eax + movdqa xmm8,xmm1 + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[32+rsp] + pxor xmm2,xmm6 +DB 102,68,15,58,15,192,8 + xor esi,ebp + mov edi,ebx + rol ebx,5 + pxor xmm2,xmm3 + xor esi,edx + add eax,ebx + movdqa xmm10,XMMWORD PTR[32+r11] + paddd xmm9,xmm1 + ror ecx,7 + add eax,esi + pxor xmm2,xmm8 + add ebp,DWORD PTR[36+rsp] +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[96+r15] + xor edi,edx + mov esi,eax + rol eax,5 + movdqa xmm8,xmm2 + movdqa XMMWORD PTR[16+rsp],xmm9 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + pslld xmm2,2 + add edx,DWORD PTR[40+rsp] + xor esi,ecx + psrld xmm8,30 + mov edi,ebp + rol ebp,5 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + por xmm2,xmm8 + add ecx,DWORD PTR[44+rsp] + xor edi,ebx + movdqa xmm9,xmm2 + mov esi,edx + rol edx,5 + xor edi,eax +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[112+r15] + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[48+rsp] + pxor xmm3,xmm7 +DB 102,68,15,58,15,201,8 + xor esi,eax + mov edi,ecx + rol ecx,5 + pxor xmm3,xmm4 + xor esi,ebp + add ebx,ecx + movdqa xmm8,xmm10 + paddd xmm10,xmm2 + ror edx,7 + add ebx,esi + pxor xmm3,xmm9 + add eax,DWORD PTR[52+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + movdqa xmm9,xmm3 + movdqa XMMWORD PTR[32+rsp],xmm10 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + pslld xmm3,2 + add ebp,DWORD PTR[56+rsp] +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[128+r15] + xor esi,edx + psrld xmm9,30 + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + por xmm3,xmm9 + add edx,DWORD PTR[60+rsp] + xor edi,ecx + movdqa xmm10,xmm3 + mov esi,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[rsp] + pxor xmm4,xmm0 +DB 102,68,15,58,15,210,8 + xor esi,ebx + mov edi,edx + rol edx,5 + pxor xmm4,xmm5 + xor esi,eax +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[144+r15] + add ecx,edx + movdqa xmm9,xmm8 + paddd xmm8,xmm3 + ror ebp,7 + add ecx,esi + pxor xmm4,xmm10 + add ebx,DWORD PTR[4+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + movdqa xmm10,xmm4 + movdqa XMMWORD PTR[48+rsp],xmm8 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + pslld xmm4,2 + add eax,DWORD PTR[8+rsp] + xor esi,ebp + psrld xmm10,30 + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + por xmm4,xmm10 + add ebp,DWORD PTR[12+rsp] +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[160+r15] + xor edi,edx + movdqa xmm8,xmm4 + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[16+rsp] + pxor xmm5,xmm1 +DB 102,68,15,58,15,195,8 + xor esi,ecx + mov edi,ebp + rol ebp,5 + pxor xmm5,xmm6 + xor esi,ebx + add edx,ebp + movdqa xmm10,xmm9 + paddd xmm9,xmm4 + ror eax,7 + add edx,esi + pxor xmm5,xmm8 + add ecx,DWORD PTR[20+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + movdqa xmm8,xmm5 + movdqa XMMWORD PTR[rsp],xmm9 + xor edi,eax + cmp r8d,11 + jb $L$aesenclast2 + movups xmm14,XMMWORD PTR[176+r15] +DB 102,69,15,56,220,223 + movups xmm15,XMMWORD PTR[192+r15] +DB 102,69,15,56,220,222 + je $L$aesenclast2 + movups xmm14,XMMWORD PTR[208+r15] +DB 102,69,15,56,220,223 + movups xmm15,XMMWORD PTR[224+r15] +DB 102,69,15,56,220,222 +$L$aesenclast2:: +DB 102,69,15,56,221,223 + movups xmm14,XMMWORD PTR[16+r15] + add ecx,edx + ror ebp,7 + add ecx,edi + pslld xmm5,2 + add ebx,DWORD PTR[24+rsp] + xor esi,eax + psrld xmm8,30 + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + por xmm5,xmm8 + add eax,DWORD PTR[28+rsp] + xor edi,ebp + movdqa xmm9,xmm5 + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + mov edi,ecx + movups xmm12,XMMWORD PTR[32+r12] + xorps xmm12,xmm13 + movups XMMWORD PTR[16+r12*1+r13],xmm11 + xorps xmm11,xmm12 +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[32+r15] + pxor xmm6,xmm2 +DB 102,68,15,58,15,204,8 + xor ecx,edx + add ebp,DWORD PTR[32+rsp] + and edi,edx + pxor xmm6,xmm7 + and esi,ecx + ror ebx,7 + movdqa xmm8,xmm10 + paddd xmm10,xmm5 + add ebp,edi + mov edi,eax + pxor xmm6,xmm9 + rol eax,5 + add ebp,esi + xor ecx,edx + add ebp,eax + movdqa xmm9,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm10 + mov esi,ebx + xor ebx,ecx + add edx,DWORD PTR[36+rsp] + and esi,ecx + pslld xmm6,2 + and edi,ebx + ror eax,7 + psrld xmm9,30 + add edx,esi + mov esi,ebp + rol ebp,5 +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[48+r15] + add edx,edi + xor ebx,ecx + add edx,ebp + por xmm6,xmm9 + mov edi,eax + xor eax,ebx + movdqa xmm10,xmm6 + add ecx,DWORD PTR[40+rsp] + and edi,ebx + and esi,eax + ror ebp,7 + add ecx,edi + mov edi,edx + rol edx,5 + add ecx,esi + xor eax,ebx + add ecx,edx + mov esi,ebp + xor ebp,eax + add ebx,DWORD PTR[44+rsp] + and esi,eax + and edi,ebp +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[64+r15] + ror edx,7 + add ebx,esi + mov esi,ecx + rol ecx,5 + add ebx,edi + xor ebp,eax + add ebx,ecx + mov edi,edx + pxor xmm7,xmm3 +DB 102,68,15,58,15,213,8 + xor edx,ebp + add eax,DWORD PTR[48+rsp] + and edi,ebp + pxor xmm7,xmm0 + and esi,edx + ror ecx,7 + movdqa xmm9,XMMWORD PTR[48+r11] + paddd xmm8,xmm6 + add eax,edi + mov edi,ebx + pxor xmm7,xmm10 + rol ebx,5 + add eax,esi + xor edx,ebp + add eax,ebx + movdqa xmm10,xmm7 + movdqa XMMWORD PTR[32+rsp],xmm8 + mov esi,ecx +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[80+r15] + xor ecx,edx + add ebp,DWORD PTR[52+rsp] + and esi,edx + pslld xmm7,2 + and edi,ecx + ror ebx,7 + psrld xmm10,30 + add ebp,esi + mov esi,eax + rol eax,5 + add ebp,edi + xor ecx,edx + add ebp,eax + por xmm7,xmm10 + mov edi,ebx + xor ebx,ecx + movdqa xmm8,xmm7 + add edx,DWORD PTR[56+rsp] + and edi,ecx + and esi,ebx + ror eax,7 + add edx,edi + mov edi,ebp + rol ebp,5 +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[96+r15] + add edx,esi + xor ebx,ecx + add edx,ebp + mov esi,eax + xor eax,ebx + add ecx,DWORD PTR[60+rsp] + and esi,ebx + and edi,eax + ror ebp,7 + add ecx,esi + mov esi,edx + rol edx,5 + add ecx,edi + xor eax,ebx + add ecx,edx + mov edi,ebp + pxor xmm0,xmm4 +DB 102,68,15,58,15,198,8 + xor ebp,eax + add ebx,DWORD PTR[rsp] + and edi,eax + pxor xmm0,xmm1 + and esi,ebp +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[112+r15] + ror edx,7 + movdqa xmm10,xmm9 + paddd xmm9,xmm7 + add ebx,edi + mov edi,ecx + pxor xmm0,xmm8 + rol ecx,5 + add ebx,esi + xor ebp,eax + add ebx,ecx + movdqa xmm8,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm9 + mov esi,edx + xor edx,ebp + add eax,DWORD PTR[4+rsp] + and esi,ebp + pslld xmm0,2 + and edi,edx + ror ecx,7 + psrld xmm8,30 + add eax,esi + mov esi,ebx + rol ebx,5 + add eax,edi + xor edx,ebp + add eax,ebx + por xmm0,xmm8 + mov edi,ecx +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[128+r15] + xor ecx,edx + movdqa xmm9,xmm0 + add ebp,DWORD PTR[8+rsp] + and edi,edx + and esi,ecx + ror ebx,7 + add ebp,edi + mov edi,eax + rol eax,5 + add ebp,esi + xor ecx,edx + add ebp,eax + mov esi,ebx + xor ebx,ecx + add edx,DWORD PTR[12+rsp] + and esi,ecx + and edi,ebx + ror eax,7 + add edx,esi + mov esi,ebp + rol ebp,5 +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[144+r15] + add edx,edi + xor ebx,ecx + add edx,ebp + mov edi,eax + pxor xmm1,xmm5 +DB 102,68,15,58,15,207,8 + xor eax,ebx + add ecx,DWORD PTR[16+rsp] + and edi,ebx + pxor xmm1,xmm2 + and esi,eax + ror ebp,7 + movdqa xmm8,xmm10 + paddd xmm10,xmm0 + add ecx,edi + mov edi,edx + pxor xmm1,xmm9 + rol edx,5 + add ecx,esi + xor eax,ebx + add ecx,edx + movdqa xmm9,xmm1 + movdqa XMMWORD PTR[rsp],xmm10 + mov esi,ebp + xor ebp,eax + add ebx,DWORD PTR[20+rsp] + and esi,eax + pslld xmm1,2 + and edi,ebp +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[160+r15] + ror edx,7 + psrld xmm9,30 + add ebx,esi + mov esi,ecx + rol ecx,5 + add ebx,edi + xor ebp,eax + add ebx,ecx + por xmm1,xmm9 + mov edi,edx + xor edx,ebp + movdqa xmm10,xmm1 + add eax,DWORD PTR[24+rsp] + and edi,ebp + and esi,edx + ror ecx,7 + add eax,edi + mov edi,ebx + rol ebx,5 + add eax,esi + xor edx,ebp + add eax,ebx + mov esi,ecx + cmp r8d,11 + jb $L$aesenclast3 + movups xmm14,XMMWORD PTR[176+r15] +DB 102,69,15,56,220,223 + movups xmm15,XMMWORD PTR[192+r15] +DB 102,69,15,56,220,222 + je $L$aesenclast3 + movups xmm14,XMMWORD PTR[208+r15] +DB 102,69,15,56,220,223 + movups xmm15,XMMWORD PTR[224+r15] +DB 102,69,15,56,220,222 +$L$aesenclast3:: +DB 102,69,15,56,221,223 + movups xmm14,XMMWORD PTR[16+r15] + xor ecx,edx + add ebp,DWORD PTR[28+rsp] + and esi,edx + and edi,ecx + ror ebx,7 + add ebp,esi + mov esi,eax + rol eax,5 + add ebp,edi + xor ecx,edx + add ebp,eax + mov edi,ebx + pxor xmm2,xmm6 +DB 102,68,15,58,15,208,8 + xor ebx,ecx + add edx,DWORD PTR[32+rsp] + and edi,ecx + pxor xmm2,xmm3 + and esi,ebx + ror eax,7 + movdqa xmm9,xmm8 + paddd xmm8,xmm1 + add edx,edi + mov edi,ebp + pxor xmm2,xmm10 + rol ebp,5 + movups xmm12,XMMWORD PTR[48+r12] + xorps xmm12,xmm13 + movups XMMWORD PTR[32+r12*1+r13],xmm11 + xorps xmm11,xmm12 +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[32+r15] + add edx,esi + xor ebx,ecx + add edx,ebp + movdqa xmm10,xmm2 + movdqa XMMWORD PTR[16+rsp],xmm8 + mov esi,eax + xor eax,ebx + add ecx,DWORD PTR[36+rsp] + and esi,ebx + pslld xmm2,2 + and edi,eax + ror ebp,7 + psrld xmm10,30 + add ecx,esi + mov esi,edx + rol edx,5 + add ecx,edi + xor eax,ebx + add ecx,edx + por xmm2,xmm10 + mov edi,ebp + xor ebp,eax + movdqa xmm8,xmm2 + add ebx,DWORD PTR[40+rsp] + and edi,eax + and esi,ebp +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[48+r15] + ror edx,7 + add ebx,edi + mov edi,ecx + rol ecx,5 + add ebx,esi + xor ebp,eax + add ebx,ecx + mov esi,edx + xor edx,ebp + add eax,DWORD PTR[44+rsp] + and esi,ebp + and edi,edx + ror ecx,7 + add eax,esi + mov esi,ebx + rol ebx,5 + add eax,edi + xor edx,ebp + add eax,ebx + add ebp,DWORD PTR[48+rsp] +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[64+r15] + pxor xmm3,xmm7 +DB 102,68,15,58,15,193,8 + xor esi,edx + mov edi,eax + rol eax,5 + pxor xmm3,xmm4 + xor esi,ecx + add ebp,eax + movdqa xmm10,xmm9 + paddd xmm9,xmm2 + ror ebx,7 + add ebp,esi + pxor xmm3,xmm8 + add edx,DWORD PTR[52+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + movdqa xmm8,xmm3 + movdqa XMMWORD PTR[32+rsp],xmm9 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + pslld xmm3,2 + add ecx,DWORD PTR[56+rsp] + xor esi,ebx + psrld xmm8,30 + mov edi,edx + rol edx,5 + xor esi,eax +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[80+r15] + add ecx,edx + ror ebp,7 + add ecx,esi + por xmm3,xmm8 + add ebx,DWORD PTR[60+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[rsp] + paddd xmm10,xmm3 + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + movdqa XMMWORD PTR[48+rsp],xmm10 + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[4+rsp] +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[96+r15] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[8+rsp] + xor esi,ecx + mov edi,ebp + rol ebp,5 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + add ecx,DWORD PTR[12+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + xor edi,eax +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[112+r15] + add ecx,edx + ror ebp,7 + add ecx,edi + cmp r10,r14 + je $L$done_ssse3 + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[r11] + movdqu xmm0,XMMWORD PTR[r10] + movdqu xmm1,XMMWORD PTR[16+r10] + movdqu xmm2,XMMWORD PTR[32+r10] + movdqu xmm3,XMMWORD PTR[48+r10] +DB 102,15,56,0,198 + add r10,64 + add ebx,DWORD PTR[16+rsp] + xor esi,eax +DB 102,15,56,0,206 + mov edi,ecx + rol ecx,5 + paddd xmm0,xmm9 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + movdqa XMMWORD PTR[rsp],xmm0 + add eax,DWORD PTR[20+rsp] + xor edi,ebp + psubd xmm0,xmm9 + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[24+rsp] +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[128+r15] + xor esi,edx + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + add edx,DWORD PTR[28+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[32+rsp] + xor esi,ebx +DB 102,15,56,0,214 + mov edi,edx + rol edx,5 + paddd xmm1,xmm9 + xor esi,eax +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[144+r15] + add ecx,edx + ror ebp,7 + add ecx,esi + movdqa XMMWORD PTR[16+rsp],xmm1 + add ebx,DWORD PTR[36+rsp] + xor edi,eax + psubd xmm1,xmm9 + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[40+rsp] + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[44+rsp] +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[160+r15] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[48+rsp] + xor esi,ecx +DB 102,15,56,0,222 + mov edi,ebp + rol ebp,5 + paddd xmm2,xmm9 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + movdqa XMMWORD PTR[32+rsp],xmm2 + add ecx,DWORD PTR[52+rsp] + xor edi,ebx + psubd xmm2,xmm9 + mov esi,edx + rol edx,5 + xor edi,eax + cmp r8d,11 + jb $L$aesenclast4 + movups xmm14,XMMWORD PTR[176+r15] +DB 102,69,15,56,220,223 + movups xmm15,XMMWORD PTR[192+r15] +DB 102,69,15,56,220,222 + je $L$aesenclast4 + movups xmm14,XMMWORD PTR[208+r15] +DB 102,69,15,56,220,223 + movups xmm15,XMMWORD PTR[224+r15] +DB 102,69,15,56,220,222 +$L$aesenclast4:: +DB 102,69,15,56,221,223 + movups xmm14,XMMWORD PTR[16+r15] + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[56+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + movups XMMWORD PTR[48+r12*1+r13],xmm11 + lea r12,QWORD PTR[64+r12] + + add eax,DWORD PTR[r9] + add esi,DWORD PTR[4+r9] + add ecx,DWORD PTR[8+r9] + add edx,DWORD PTR[12+r9] + mov DWORD PTR[r9],eax + add ebp,DWORD PTR[16+r9] + mov DWORD PTR[4+r9],esi + mov ebx,esi + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + mov DWORD PTR[16+r9],ebp + jmp $L$oop_ssse3 + +ALIGN 16 +$L$done_ssse3:: + add ebx,DWORD PTR[16+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[20+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[24+rsp] +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[128+r15] + xor esi,edx + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + add edx,DWORD PTR[28+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[32+rsp] + xor esi,ebx + mov edi,edx + rol edx,5 + xor esi,eax +DB 102,69,15,56,220,223 + movups xmm14,XMMWORD PTR[144+r15] + add ecx,edx + ror ebp,7 + add ecx,esi + add ebx,DWORD PTR[36+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[40+rsp] + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[44+rsp] +DB 102,69,15,56,220,222 + movups xmm15,XMMWORD PTR[160+r15] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[48+rsp] + xor esi,ecx + mov edi,ebp + rol ebp,5 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + add ecx,DWORD PTR[52+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + xor edi,eax + cmp r8d,11 + jb $L$aesenclast5 + movups xmm14,XMMWORD PTR[176+r15] +DB 102,69,15,56,220,223 + movups xmm15,XMMWORD PTR[192+r15] +DB 102,69,15,56,220,222 + je $L$aesenclast5 + movups xmm14,XMMWORD PTR[208+r15] +DB 102,69,15,56,220,223 + movups xmm15,XMMWORD PTR[224+r15] +DB 102,69,15,56,220,222 +$L$aesenclast5:: +DB 102,69,15,56,221,223 + movups xmm14,XMMWORD PTR[16+r15] + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[56+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + movups XMMWORD PTR[48+r12*1+r13],xmm11 + mov r8,QWORD PTR[88+rsp] + + add eax,DWORD PTR[r9] + add esi,DWORD PTR[4+r9] + add ecx,DWORD PTR[8+r9] + mov DWORD PTR[r9],eax + add edx,DWORD PTR[12+r9] + mov DWORD PTR[4+r9],esi + add ebp,DWORD PTR[16+r9] + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + mov DWORD PTR[16+r9],ebp + movups XMMWORD PTR[r8],xmm11 + movaps xmm6,XMMWORD PTR[((96+0))+rsp] + movaps xmm7,XMMWORD PTR[((96+16))+rsp] + movaps xmm8,XMMWORD PTR[((96+32))+rsp] + movaps xmm9,XMMWORD PTR[((96+48))+rsp] + movaps xmm10,XMMWORD PTR[((96+64))+rsp] + movaps xmm11,XMMWORD PTR[((96+80))+rsp] + movaps xmm12,XMMWORD PTR[((96+96))+rsp] + movaps xmm13,XMMWORD PTR[((96+112))+rsp] + movaps xmm14,XMMWORD PTR[((96+128))+rsp] + movaps xmm15,XMMWORD PTR[((96+144))+rsp] + lea rsi,QWORD PTR[264+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_ssse3:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_sha1_enc_ssse3:: +aesni_cbc_sha1_enc_ssse3 ENDP +ALIGN 64 +K_XX_XX:: + DD 05a827999h,05a827999h,05a827999h,05a827999h + + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + + +DB 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115 +DB 116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52 +DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +DB 114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +ssse3_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[96+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + + lea rax,QWORD PTR[264+rax] + + mov r15,QWORD PTR[rax] + mov r14,QWORD PTR[8+rax] + mov r13,QWORD PTR[16+rax] + mov r12,QWORD PTR[24+rax] + mov rbp,QWORD PTR[32+rax] + mov rbx,QWORD PTR[40+rax] + lea rax,QWORD PTR[48+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +ssse3_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_aesni_cbc_sha1_enc_ssse3 + DD imagerel $L$SEH_end_aesni_cbc_sha1_enc_ssse3 + DD imagerel $L$SEH_info_aesni_cbc_sha1_enc_ssse3 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_aesni_cbc_sha1_enc_ssse3:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 + + +.xdata ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm b/deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm new file mode 100644 index 0000000000..9d5a626071 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm @@ -0,0 +1,3062 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(64) 'CODE' +PUBLIC aesni_encrypt + +ALIGN 16 +aesni_encrypt PROC PUBLIC + movups xmm2,XMMWORD PTR[rcx] + mov eax,DWORD PTR[240+r8] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_1:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_1 + +DB 102,15,56,221,209 + movups XMMWORD PTR[rdx],xmm2 + DB 0F3h,0C3h ;repret +aesni_encrypt ENDP + +PUBLIC aesni_decrypt + +ALIGN 16 +aesni_decrypt PROC PUBLIC + movups xmm2,XMMWORD PTR[rcx] + mov eax,DWORD PTR[240+r8] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_dec1_2:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_dec1_2 + +DB 102,15,56,223,209 + movups XMMWORD PTR[rdx],xmm2 + DB 0F3h,0C3h ;repret +aesni_decrypt ENDP + +ALIGN 16 +_aesni_encrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[rcx] + +$L$enc_loop3:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + dec eax +DB 102,15,56,220,225 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 +DB 102,15,56,220,216 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,220,224 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$enc_loop3 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 + DB 0F3h,0C3h ;repret +_aesni_encrypt3 ENDP + +ALIGN 16 +_aesni_decrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[rcx] + +$L$dec_loop3:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + dec eax +DB 102,15,56,222,225 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,222,208 +DB 102,15,56,222,216 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,222,224 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$dec_loop3 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 + DB 0F3h,0C3h ;repret +_aesni_decrypt3 ENDP + +ALIGN 16 +_aesni_encrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD PTR[rcx] + +$L$enc_loop4:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + dec eax +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 +DB 102,15,56,220,216 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$enc_loop4 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 + DB 0F3h,0C3h ;repret +_aesni_encrypt4 ENDP + +ALIGN 16 +_aesni_decrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD PTR[rcx] + +$L$dec_loop4:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + dec eax +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,222,208 +DB 102,15,56,222,216 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$dec_loop4 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 + DB 0F3h,0C3h ;repret +_aesni_decrypt4 ENDP + +ALIGN 16 +_aesni_encrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 +DB 102,15,56,220,209 + pxor xmm4,xmm0 +DB 102,15,56,220,217 + pxor xmm5,xmm0 +DB 102,15,56,220,225 + pxor xmm6,xmm0 +DB 102,15,56,220,233 + pxor xmm7,xmm0 + dec eax +DB 102,15,56,220,241 + movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,220,249 + jmp $L$enc_loop6_enter +ALIGN 16 +$L$enc_loop6:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + dec eax +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +$L$enc_loop6_enter:: + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 +DB 102,15,56,220,216 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$enc_loop6 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 +DB 102,15,56,221,240 +DB 102,15,56,221,248 + DB 0F3h,0C3h ;repret +_aesni_encrypt6 ENDP + +ALIGN 16 +_aesni_decrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 +DB 102,15,56,222,209 + pxor xmm4,xmm0 +DB 102,15,56,222,217 + pxor xmm5,xmm0 +DB 102,15,56,222,225 + pxor xmm6,xmm0 +DB 102,15,56,222,233 + pxor xmm7,xmm0 + dec eax +DB 102,15,56,222,241 + movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,222,249 + jmp $L$dec_loop6_enter +ALIGN 16 +$L$dec_loop6:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + dec eax +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +$L$dec_loop6_enter:: + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,222,208 +DB 102,15,56,222,216 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$dec_loop6 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 +DB 102,15,56,223,240 +DB 102,15,56,223,248 + DB 0F3h,0C3h ;repret +_aesni_decrypt6 ENDP + +ALIGN 16 +_aesni_encrypt8 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 +DB 102,15,56,220,209 + pxor xmm4,xmm0 +DB 102,15,56,220,217 + pxor xmm5,xmm0 +DB 102,15,56,220,225 + pxor xmm6,xmm0 +DB 102,15,56,220,233 + pxor xmm7,xmm0 + dec eax +DB 102,15,56,220,241 + pxor xmm8,xmm0 +DB 102,15,56,220,249 + pxor xmm9,xmm0 + movups xmm0,XMMWORD PTR[rcx] +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[16+rcx] + jmp $L$enc_loop8_enter +ALIGN 16 +$L$enc_loop8:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + dec eax +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[16+rcx] +$L$enc_loop8_enter:: +DB 102,15,56,220,208 +DB 102,15,56,220,216 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$enc_loop8 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 +DB 102,15,56,221,240 +DB 102,15,56,221,248 +DB 102,68,15,56,221,192 +DB 102,68,15,56,221,200 + DB 0F3h,0C3h ;repret +_aesni_encrypt8 ENDP + +ALIGN 16 +_aesni_decrypt8 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 +DB 102,15,56,222,209 + pxor xmm4,xmm0 +DB 102,15,56,222,217 + pxor xmm5,xmm0 +DB 102,15,56,222,225 + pxor xmm6,xmm0 +DB 102,15,56,222,233 + pxor xmm7,xmm0 + dec eax +DB 102,15,56,222,241 + pxor xmm8,xmm0 +DB 102,15,56,222,249 + pxor xmm9,xmm0 + movups xmm0,XMMWORD PTR[rcx] +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[16+rcx] + jmp $L$dec_loop8_enter +ALIGN 16 +$L$dec_loop8:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + dec eax +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[16+rcx] +$L$dec_loop8_enter:: +DB 102,15,56,222,208 +DB 102,15,56,222,216 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$dec_loop8 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 +DB 102,15,56,223,240 +DB 102,15,56,223,248 +DB 102,68,15,56,223,192 +DB 102,68,15,56,223,200 + DB 0F3h,0C3h ;repret +_aesni_decrypt8 ENDP +PUBLIC aesni_ecb_encrypt + +ALIGN 16 +aesni_ecb_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ecb_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + and rdx,-16 + jz $L$ecb_ret + + mov eax,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[rcx] + mov r11,rcx + mov r10d,eax + test r8d,r8d + jz $L$ecb_decrypt + + cmp rdx,080h + jb $L$ecb_enc_tail + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqu xmm8,XMMWORD PTR[96+rdi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] + sub rdx,080h + jmp $L$ecb_enc_loop8_enter +ALIGN 16 +$L$ecb_enc_loop8:: + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD PTR[rdi] + mov eax,r10d + movups XMMWORD PTR[16+rsi],xmm3 + movdqu xmm3,XMMWORD PTR[16+rdi] + movups XMMWORD PTR[32+rsi],xmm4 + movdqu xmm4,XMMWORD PTR[32+rdi] + movups XMMWORD PTR[48+rsi],xmm5 + movdqu xmm5,XMMWORD PTR[48+rdi] + movups XMMWORD PTR[64+rsi],xmm6 + movdqu xmm6,XMMWORD PTR[64+rdi] + movups XMMWORD PTR[80+rsi],xmm7 + movdqu xmm7,XMMWORD PTR[80+rdi] + movups XMMWORD PTR[96+rsi],xmm8 + movdqu xmm8,XMMWORD PTR[96+rdi] + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] +$L$ecb_enc_loop8_enter:: + + call _aesni_encrypt8 + + sub rdx,080h + jnc $L$ecb_enc_loop8 + + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movups XMMWORD PTR[16+rsi],xmm3 + mov eax,r10d + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + add rdx,080h + jz $L$ecb_ret + +$L$ecb_enc_tail:: + movups xmm2,XMMWORD PTR[rdi] + cmp rdx,020h + jb $L$ecb_enc_one + movups xmm3,XMMWORD PTR[16+rdi] + je $L$ecb_enc_two + movups xmm4,XMMWORD PTR[32+rdi] + cmp rdx,040h + jb $L$ecb_enc_three + movups xmm5,XMMWORD PTR[48+rdi] + je $L$ecb_enc_four + movups xmm6,XMMWORD PTR[64+rdi] + cmp rdx,060h + jb $L$ecb_enc_five + movups xmm7,XMMWORD PTR[80+rdi] + je $L$ecb_enc_six + movdqu xmm8,XMMWORD PTR[96+rdi] + call _aesni_encrypt8 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_3:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_3 + +DB 102,15,56,221,209 + movups XMMWORD PTR[rsi],xmm2 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_two:: + xorps xmm4,xmm4 + call _aesni_encrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_three:: + call _aesni_encrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_four:: + call _aesni_encrypt4 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_five:: + xorps xmm7,xmm7 + call _aesni_encrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_six:: + call _aesni_encrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + jmp $L$ecb_ret + +ALIGN 16 +$L$ecb_decrypt:: + cmp rdx,080h + jb $L$ecb_dec_tail + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqu xmm8,XMMWORD PTR[96+rdi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] + sub rdx,080h + jmp $L$ecb_dec_loop8_enter +ALIGN 16 +$L$ecb_dec_loop8:: + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD PTR[rdi] + mov eax,r10d + movups XMMWORD PTR[16+rsi],xmm3 + movdqu xmm3,XMMWORD PTR[16+rdi] + movups XMMWORD PTR[32+rsi],xmm4 + movdqu xmm4,XMMWORD PTR[32+rdi] + movups XMMWORD PTR[48+rsi],xmm5 + movdqu xmm5,XMMWORD PTR[48+rdi] + movups XMMWORD PTR[64+rsi],xmm6 + movdqu xmm6,XMMWORD PTR[64+rdi] + movups XMMWORD PTR[80+rsi],xmm7 + movdqu xmm7,XMMWORD PTR[80+rdi] + movups XMMWORD PTR[96+rsi],xmm8 + movdqu xmm8,XMMWORD PTR[96+rdi] + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] +$L$ecb_dec_loop8_enter:: + + call _aesni_decrypt8 + + movups xmm0,XMMWORD PTR[r11] + sub rdx,080h + jnc $L$ecb_dec_loop8 + + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movups XMMWORD PTR[16+rsi],xmm3 + mov eax,r10d + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + add rdx,080h + jz $L$ecb_ret + +$L$ecb_dec_tail:: + movups xmm2,XMMWORD PTR[rdi] + cmp rdx,020h + jb $L$ecb_dec_one + movups xmm3,XMMWORD PTR[16+rdi] + je $L$ecb_dec_two + movups xmm4,XMMWORD PTR[32+rdi] + cmp rdx,040h + jb $L$ecb_dec_three + movups xmm5,XMMWORD PTR[48+rdi] + je $L$ecb_dec_four + movups xmm6,XMMWORD PTR[64+rdi] + cmp rdx,060h + jb $L$ecb_dec_five + movups xmm7,XMMWORD PTR[80+rdi] + je $L$ecb_dec_six + movups xmm8,XMMWORD PTR[96+rdi] + movups xmm0,XMMWORD PTR[rcx] + call _aesni_decrypt8 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_4:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_4 + +DB 102,15,56,223,209 + movups XMMWORD PTR[rsi],xmm2 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_two:: + xorps xmm4,xmm4 + call _aesni_decrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_three:: + call _aesni_decrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_four:: + call _aesni_decrypt4 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_five:: + xorps xmm7,xmm7 + call _aesni_decrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_six:: + call _aesni_decrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + +$L$ecb_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ecb_encrypt:: +aesni_ecb_encrypt ENDP +PUBLIC aesni_ccm64_encrypt_blocks + +ALIGN 16 +aesni_ccm64_encrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ccm64_encrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$ccm64_enc_body:: + mov eax,DWORD PTR[240+rcx] + movdqu xmm9,XMMWORD PTR[r8] + movdqa xmm6,XMMWORD PTR[$L$increment64] + movdqa xmm7,XMMWORD PTR[$L$bswap_mask] + + shr eax,1 + lea r11,QWORD PTR[rcx] + movdqu xmm3,XMMWORD PTR[r9] + movdqa xmm2,xmm9 + mov r10d,eax +DB 102,68,15,56,0,207 + jmp $L$ccm64_enc_outer +ALIGN 16 +$L$ccm64_enc_outer:: + movups xmm0,XMMWORD PTR[r11] + mov eax,r10d + movups xmm8,XMMWORD PTR[rdi] + + xorps xmm2,xmm0 + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm0,xmm8 + lea rcx,QWORD PTR[32+r11] + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR[rcx] + +$L$ccm64_enc2_loop:: +DB 102,15,56,220,209 + dec eax +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$ccm64_enc2_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 + paddq xmm9,xmm6 +DB 102,15,56,221,208 +DB 102,15,56,221,216 + + dec rdx + lea rdi,QWORD PTR[16+rdi] + xorps xmm8,xmm2 + movdqa xmm2,xmm9 + movups XMMWORD PTR[rsi],xmm8 + lea rsi,QWORD PTR[16+rsi] +DB 102,15,56,0,215 + jnz $L$ccm64_enc_outer + + movups XMMWORD PTR[r9],xmm3 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$ccm64_enc_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ccm64_encrypt_blocks:: +aesni_ccm64_encrypt_blocks ENDP +PUBLIC aesni_ccm64_decrypt_blocks + +ALIGN 16 +aesni_ccm64_decrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ccm64_decrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$ccm64_dec_body:: + mov eax,DWORD PTR[240+rcx] + movups xmm9,XMMWORD PTR[r8] + movdqu xmm3,XMMWORD PTR[r9] + movdqa xmm6,XMMWORD PTR[$L$increment64] + movdqa xmm7,XMMWORD PTR[$L$bswap_mask] + + movaps xmm2,xmm9 + mov r10d,eax + mov r11,rcx +DB 102,68,15,56,0,207 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_5:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_5 + +DB 102,15,56,221,209 + movups xmm8,XMMWORD PTR[rdi] + paddq xmm9,xmm6 + lea rdi,QWORD PTR[16+rdi] + jmp $L$ccm64_dec_outer +ALIGN 16 +$L$ccm64_dec_outer:: + xorps xmm8,xmm2 + movdqa xmm2,xmm9 + mov eax,r10d + movups XMMWORD PTR[rsi],xmm8 + lea rsi,QWORD PTR[16+rsi] +DB 102,15,56,0,215 + + sub rdx,1 + jz $L$ccm64_dec_break + + movups xmm0,XMMWORD PTR[r11] + shr eax,1 + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm8,xmm0 + lea rcx,QWORD PTR[32+r11] + xorps xmm2,xmm0 + xorps xmm3,xmm8 + movups xmm0,XMMWORD PTR[rcx] + +$L$ccm64_dec2_loop:: +DB 102,15,56,220,209 + dec eax +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$ccm64_dec2_loop + movups xmm8,XMMWORD PTR[rdi] + paddq xmm9,xmm6 +DB 102,15,56,220,209 +DB 102,15,56,220,217 + lea rdi,QWORD PTR[16+rdi] +DB 102,15,56,221,208 +DB 102,15,56,221,216 + jmp $L$ccm64_dec_outer + +ALIGN 16 +$L$ccm64_dec_break:: + + movups xmm0,XMMWORD PTR[r11] + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm8,xmm0 + lea r11,QWORD PTR[32+r11] + xorps xmm3,xmm8 +$L$oop_enc1_6:: +DB 102,15,56,220,217 + dec eax + movups xmm1,XMMWORD PTR[r11] + lea r11,QWORD PTR[16+r11] + jnz $L$oop_enc1_6 + +DB 102,15,56,221,217 + movups XMMWORD PTR[r9],xmm3 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$ccm64_dec_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ccm64_decrypt_blocks:: +aesni_ccm64_decrypt_blocks ENDP +PUBLIC aesni_ctr32_encrypt_blocks + +ALIGN 16 +aesni_ctr32_encrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ctr32_encrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + lea rsp,QWORD PTR[((-200))+rsp] + movaps XMMWORD PTR[32+rsp],xmm6 + movaps XMMWORD PTR[48+rsp],xmm7 + movaps XMMWORD PTR[64+rsp],xmm8 + movaps XMMWORD PTR[80+rsp],xmm9 + movaps XMMWORD PTR[96+rsp],xmm10 + movaps XMMWORD PTR[112+rsp],xmm11 + movaps XMMWORD PTR[128+rsp],xmm12 + movaps XMMWORD PTR[144+rsp],xmm13 + movaps XMMWORD PTR[160+rsp],xmm14 + movaps XMMWORD PTR[176+rsp],xmm15 +$L$ctr32_body:: + cmp rdx,1 + je $L$ctr32_one_shortcut + + movdqu xmm14,XMMWORD PTR[r8] + movdqa xmm15,XMMWORD PTR[$L$bswap_mask] + xor eax,eax +DB 102,69,15,58,22,242,3 +DB 102,68,15,58,34,240,3 + + mov eax,DWORD PTR[240+rcx] + bswap r10d + pxor xmm12,xmm12 + pxor xmm13,xmm13 +DB 102,69,15,58,34,226,0 + lea r11,QWORD PTR[3+r10] +DB 102,69,15,58,34,235,0 + inc r10d +DB 102,69,15,58,34,226,1 + inc r11 +DB 102,69,15,58,34,235,1 + inc r10d +DB 102,69,15,58,34,226,2 + inc r11 +DB 102,69,15,58,34,235,2 + movdqa XMMWORD PTR[rsp],xmm12 +DB 102,69,15,56,0,231 + movdqa XMMWORD PTR[16+rsp],xmm13 +DB 102,69,15,56,0,239 + + pshufd xmm2,xmm12,192 + pshufd xmm3,xmm12,128 + pshufd xmm4,xmm12,64 + cmp rdx,6 + jb $L$ctr32_tail + shr eax,1 + mov r11,rcx + mov r10d,eax + sub rdx,6 + jmp $L$ctr32_loop6 + +ALIGN 16 +$L$ctr32_loop6:: + pshufd xmm5,xmm13,192 + por xmm2,xmm14 + movups xmm0,XMMWORD PTR[r11] + pshufd xmm6,xmm13,128 + por xmm3,xmm14 + movups xmm1,XMMWORD PTR[16+r11] + pshufd xmm7,xmm13,64 + por xmm4,xmm14 + por xmm5,xmm14 + xorps xmm2,xmm0 + por xmm6,xmm14 + por xmm7,xmm14 + + + + + pxor xmm3,xmm0 +DB 102,15,56,220,209 + lea rcx,QWORD PTR[32+r11] + pxor xmm4,xmm0 +DB 102,15,56,220,217 + movdqa xmm13,XMMWORD PTR[$L$increment32] + pxor xmm5,xmm0 +DB 102,15,56,220,225 + movdqa xmm12,XMMWORD PTR[rsp] + pxor xmm6,xmm0 +DB 102,15,56,220,233 + pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR[rcx] + dec eax +DB 102,15,56,220,241 +DB 102,15,56,220,249 + jmp $L$ctr32_enc_loop6_enter +ALIGN 16 +$L$ctr32_enc_loop6:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + dec eax +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +$L$ctr32_enc_loop6_enter:: + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 +DB 102,15,56,220,216 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$ctr32_enc_loop6 + +DB 102,15,56,220,209 + paddd xmm12,xmm13 +DB 102,15,56,220,217 + paddd xmm13,XMMWORD PTR[16+rsp] +DB 102,15,56,220,225 + movdqa XMMWORD PTR[rsp],xmm12 +DB 102,15,56,220,233 + movdqa XMMWORD PTR[16+rsp],xmm13 +DB 102,15,56,220,241 +DB 102,69,15,56,0,231 +DB 102,15,56,220,249 +DB 102,69,15,56,0,239 + +DB 102,15,56,221,208 + movups xmm8,XMMWORD PTR[rdi] +DB 102,15,56,221,216 + movups xmm9,XMMWORD PTR[16+rdi] +DB 102,15,56,221,224 + movups xmm10,XMMWORD PTR[32+rdi] +DB 102,15,56,221,232 + movups xmm11,XMMWORD PTR[48+rdi] +DB 102,15,56,221,240 + movups xmm1,XMMWORD PTR[64+rdi] +DB 102,15,56,221,248 + movups xmm0,XMMWORD PTR[80+rdi] + lea rdi,QWORD PTR[96+rdi] + + xorps xmm8,xmm2 + pshufd xmm2,xmm12,192 + xorps xmm9,xmm3 + pshufd xmm3,xmm12,128 + movups XMMWORD PTR[rsi],xmm8 + xorps xmm10,xmm4 + pshufd xmm4,xmm12,64 + movups XMMWORD PTR[16+rsi],xmm9 + xorps xmm11,xmm5 + movups XMMWORD PTR[32+rsi],xmm10 + xorps xmm1,xmm6 + movups XMMWORD PTR[48+rsi],xmm11 + xorps xmm0,xmm7 + movups XMMWORD PTR[64+rsi],xmm1 + movups XMMWORD PTR[80+rsi],xmm0 + lea rsi,QWORD PTR[96+rsi] + mov eax,r10d + sub rdx,6 + jnc $L$ctr32_loop6 + + add rdx,6 + jz $L$ctr32_done + mov rcx,r11 + lea eax,DWORD PTR[1+rax*1+rax] + +$L$ctr32_tail:: + por xmm2,xmm14 + movups xmm8,XMMWORD PTR[rdi] + cmp rdx,2 + jb $L$ctr32_one + + por xmm3,xmm14 + movups xmm9,XMMWORD PTR[16+rdi] + je $L$ctr32_two + + pshufd xmm5,xmm13,192 + por xmm4,xmm14 + movups xmm10,XMMWORD PTR[32+rdi] + cmp rdx,4 + jb $L$ctr32_three + + pshufd xmm6,xmm13,128 + por xmm5,xmm14 + movups xmm11,XMMWORD PTR[48+rdi] + je $L$ctr32_four + + por xmm6,xmm14 + xorps xmm7,xmm7 + + call _aesni_encrypt6 + + movups xmm1,XMMWORD PTR[64+rdi] + xorps xmm8,xmm2 + xorps xmm9,xmm3 + movups XMMWORD PTR[rsi],xmm8 + xorps xmm10,xmm4 + movups XMMWORD PTR[16+rsi],xmm9 + xorps xmm11,xmm5 + movups XMMWORD PTR[32+rsi],xmm10 + xorps xmm1,xmm6 + movups XMMWORD PTR[48+rsi],xmm11 + movups XMMWORD PTR[64+rsi],xmm1 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_one_shortcut:: + movups xmm2,XMMWORD PTR[r8] + movups xmm8,XMMWORD PTR[rdi] + mov eax,DWORD PTR[240+rcx] +$L$ctr32_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_7:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_7 + +DB 102,15,56,221,209 + xorps xmm8,xmm2 + movups XMMWORD PTR[rsi],xmm8 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_two:: + xorps xmm4,xmm4 + call _aesni_encrypt3 + xorps xmm8,xmm2 + xorps xmm9,xmm3 + movups XMMWORD PTR[rsi],xmm8 + movups XMMWORD PTR[16+rsi],xmm9 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_three:: + call _aesni_encrypt3 + xorps xmm8,xmm2 + xorps xmm9,xmm3 + movups XMMWORD PTR[rsi],xmm8 + xorps xmm10,xmm4 + movups XMMWORD PTR[16+rsi],xmm9 + movups XMMWORD PTR[32+rsi],xmm10 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_four:: + call _aesni_encrypt4 + xorps xmm8,xmm2 + xorps xmm9,xmm3 + movups XMMWORD PTR[rsi],xmm8 + xorps xmm10,xmm4 + movups XMMWORD PTR[16+rsi],xmm9 + xorps xmm11,xmm5 + movups XMMWORD PTR[32+rsi],xmm10 + movups XMMWORD PTR[48+rsi],xmm11 + +$L$ctr32_done:: + movaps xmm6,XMMWORD PTR[32+rsp] + movaps xmm7,XMMWORD PTR[48+rsp] + movaps xmm8,XMMWORD PTR[64+rsp] + movaps xmm9,XMMWORD PTR[80+rsp] + movaps xmm10,XMMWORD PTR[96+rsp] + movaps xmm11,XMMWORD PTR[112+rsp] + movaps xmm12,XMMWORD PTR[128+rsp] + movaps xmm13,XMMWORD PTR[144+rsp] + movaps xmm14,XMMWORD PTR[160+rsp] + movaps xmm15,XMMWORD PTR[176+rsp] + lea rsp,QWORD PTR[200+rsp] +$L$ctr32_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ctr32_encrypt_blocks:: +aesni_ctr32_encrypt_blocks ENDP +PUBLIC aesni_xts_encrypt + +ALIGN 16 +aesni_xts_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-264))+rsp] + movaps XMMWORD PTR[96+rsp],xmm6 + movaps XMMWORD PTR[112+rsp],xmm7 + movaps XMMWORD PTR[128+rsp],xmm8 + movaps XMMWORD PTR[144+rsp],xmm9 + movaps XMMWORD PTR[160+rsp],xmm10 + movaps XMMWORD PTR[176+rsp],xmm11 + movaps XMMWORD PTR[192+rsp],xmm12 + movaps XMMWORD PTR[208+rsp],xmm13 + movaps XMMWORD PTR[224+rsp],xmm14 + movaps XMMWORD PTR[240+rsp],xmm15 +$L$xts_enc_body:: + movups xmm15,XMMWORD PTR[r9] + mov eax,DWORD PTR[240+r8] + mov r10d,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm15,xmm0 +$L$oop_enc1_8:: +DB 102,68,15,56,220,249 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_8 + +DB 102,68,15,56,221,249 + mov r11,rcx + mov eax,r10d + mov r9,rdx + and rdx,-16 + + movdqa xmm8,XMMWORD PTR[$L$xts_magic] + pxor xmm14,xmm14 + pcmpgtd xmm14,xmm15 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm10,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm11,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm12,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm13,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + sub rdx,16*6 + jc $L$xts_enc_short + + shr eax,1 + sub eax,1 + mov r10d,eax + jmp $L$xts_enc_grandloop + +ALIGN 16 +$L$xts_enc_grandloop:: + pshufd xmm9,xmm14,013h + movdqa xmm14,xmm15 + paddq xmm15,xmm15 + movdqu xmm2,XMMWORD PTR[rdi] + pand xmm9,xmm8 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm15,xmm9 + + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + pxor xmm4,xmm12 + movdqu xmm7,XMMWORD PTR[80+rdi] + lea rdi,QWORD PTR[96+rdi] + pxor xmm5,xmm13 + movups xmm0,XMMWORD PTR[r11] + pxor xmm6,xmm14 + pxor xmm7,xmm15 + + + + movups xmm1,XMMWORD PTR[16+r11] + pxor xmm2,xmm0 + pxor xmm3,xmm0 + movdqa XMMWORD PTR[rsp],xmm10 +DB 102,15,56,220,209 + lea rcx,QWORD PTR[32+r11] + pxor xmm4,xmm0 + movdqa XMMWORD PTR[16+rsp],xmm11 +DB 102,15,56,220,217 + pxor xmm5,xmm0 + movdqa XMMWORD PTR[32+rsp],xmm12 +DB 102,15,56,220,225 + pxor xmm6,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm13 +DB 102,15,56,220,233 + pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR[rcx] + dec eax + movdqa XMMWORD PTR[64+rsp],xmm14 +DB 102,15,56,220,241 + movdqa XMMWORD PTR[80+rsp],xmm15 +DB 102,15,56,220,249 + pxor xmm14,xmm14 + pcmpgtd xmm14,xmm15 + jmp $L$xts_enc_loop6_enter + +ALIGN 16 +$L$xts_enc_loop6:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + dec eax +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +$L$xts_enc_loop6_enter:: + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 +DB 102,15,56,220,216 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$xts_enc_loop6 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + paddq xmm15,xmm15 +DB 102,15,56,220,209 + pand xmm9,xmm8 +DB 102,15,56,220,217 + pcmpgtd xmm14,xmm15 +DB 102,15,56,220,225 + pxor xmm15,xmm9 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[16+rcx] + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm10,xmm15 + paddq xmm15,xmm15 +DB 102,15,56,220,208 + pand xmm9,xmm8 +DB 102,15,56,220,216 + pcmpgtd xmm14,xmm15 +DB 102,15,56,220,224 + pxor xmm15,xmm9 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[32+rcx] + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm11,xmm15 + paddq xmm15,xmm15 +DB 102,15,56,220,209 + pand xmm9,xmm8 +DB 102,15,56,220,217 + pcmpgtd xmm14,xmm15 +DB 102,15,56,220,225 + pxor xmm15,xmm9 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm12,xmm15 + paddq xmm15,xmm15 +DB 102,15,56,221,208 + pand xmm9,xmm8 +DB 102,15,56,221,216 + pcmpgtd xmm14,xmm15 +DB 102,15,56,221,224 + pxor xmm15,xmm9 +DB 102,15,56,221,232 +DB 102,15,56,221,240 +DB 102,15,56,221,248 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm13,xmm15 + paddq xmm15,xmm15 + xorps xmm2,XMMWORD PTR[rsp] + pand xmm9,xmm8 + xorps xmm3,XMMWORD PTR[16+rsp] + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + + xorps xmm4,XMMWORD PTR[32+rsp] + movups XMMWORD PTR[rsi],xmm2 + xorps xmm5,XMMWORD PTR[48+rsp] + movups XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,XMMWORD PTR[64+rsp] + movups XMMWORD PTR[32+rsi],xmm4 + xorps xmm7,XMMWORD PTR[80+rsp] + movups XMMWORD PTR[48+rsi],xmm5 + mov eax,r10d + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + lea rsi,QWORD PTR[96+rsi] + sub rdx,16*6 + jnc $L$xts_enc_grandloop + + lea eax,DWORD PTR[3+rax*1+rax] + mov rcx,r11 + mov r10d,eax + +$L$xts_enc_short:: + add rdx,16*6 + jz $L$xts_enc_done + + cmp rdx,020h + jb $L$xts_enc_one + je $L$xts_enc_two + + cmp rdx,040h + jb $L$xts_enc_three + je $L$xts_enc_four + + pshufd xmm9,xmm14,013h + movdqa xmm14,xmm15 + paddq xmm15,xmm15 + movdqu xmm2,XMMWORD PTR[rdi] + pand xmm9,xmm8 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm15,xmm9 + + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + lea rdi,QWORD PTR[80+rdi] + pxor xmm4,xmm12 + pxor xmm5,xmm13 + pxor xmm6,xmm14 + + call _aesni_encrypt6 + + xorps xmm2,xmm10 + movdqa xmm10,xmm15 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,xmm14 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqu XMMWORD PTR[48+rsi],xmm5 + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_one:: + movups xmm2,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_9:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_9 + +DB 102,15,56,221,209 + xorps xmm2,xmm10 + movdqa xmm10,xmm11 + movups XMMWORD PTR[rsi],xmm2 + lea rsi,QWORD PTR[16+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_two:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + lea rdi,QWORD PTR[32+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + + call _aesni_encrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm12 + xorps xmm3,xmm11 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + lea rsi,QWORD PTR[32+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_three:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + lea rdi,QWORD PTR[48+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + + call _aesni_encrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm13 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + lea rsi,QWORD PTR[48+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_four:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + xorps xmm2,xmm10 + movups xmm5,XMMWORD PTR[48+rdi] + lea rdi,QWORD PTR[64+rdi] + xorps xmm3,xmm11 + xorps xmm4,xmm12 + xorps xmm5,xmm13 + + call _aesni_encrypt4 + + xorps xmm2,xmm10 + movdqa xmm10,xmm15 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_done:: + and r9,15 + jz $L$xts_enc_ret + mov rdx,r9 + +$L$xts_enc_steal:: + movzx eax,BYTE PTR[rdi] + movzx ecx,BYTE PTR[((-16))+rsi] + lea rdi,QWORD PTR[1+rdi] + mov BYTE PTR[((-16))+rsi],al + mov BYTE PTR[rsi],cl + lea rsi,QWORD PTR[1+rsi] + sub rdx,1 + jnz $L$xts_enc_steal + + sub rsi,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[((-16))+rsi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_10:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_10 + +DB 102,15,56,221,209 + xorps xmm2,xmm10 + movups XMMWORD PTR[(-16)+rsi],xmm2 + +$L$xts_enc_ret:: + movaps xmm6,XMMWORD PTR[96+rsp] + movaps xmm7,XMMWORD PTR[112+rsp] + movaps xmm8,XMMWORD PTR[128+rsp] + movaps xmm9,XMMWORD PTR[144+rsp] + movaps xmm10,XMMWORD PTR[160+rsp] + movaps xmm11,XMMWORD PTR[176+rsp] + movaps xmm12,XMMWORD PTR[192+rsp] + movaps xmm13,XMMWORD PTR[208+rsp] + movaps xmm14,XMMWORD PTR[224+rsp] + movaps xmm15,XMMWORD PTR[240+rsp] + lea rsp,QWORD PTR[264+rsp] +$L$xts_enc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_xts_encrypt:: +aesni_xts_encrypt ENDP +PUBLIC aesni_xts_decrypt + +ALIGN 16 +aesni_xts_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-264))+rsp] + movaps XMMWORD PTR[96+rsp],xmm6 + movaps XMMWORD PTR[112+rsp],xmm7 + movaps XMMWORD PTR[128+rsp],xmm8 + movaps XMMWORD PTR[144+rsp],xmm9 + movaps XMMWORD PTR[160+rsp],xmm10 + movaps XMMWORD PTR[176+rsp],xmm11 + movaps XMMWORD PTR[192+rsp],xmm12 + movaps XMMWORD PTR[208+rsp],xmm13 + movaps XMMWORD PTR[224+rsp],xmm14 + movaps XMMWORD PTR[240+rsp],xmm15 +$L$xts_dec_body:: + movups xmm15,XMMWORD PTR[r9] + mov eax,DWORD PTR[240+r8] + mov r10d,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm15,xmm0 +$L$oop_enc1_11:: +DB 102,68,15,56,220,249 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_11 + +DB 102,68,15,56,221,249 + xor eax,eax + test rdx,15 + setnz al + shl rax,4 + sub rdx,rax + + mov r11,rcx + mov eax,r10d + mov r9,rdx + and rdx,-16 + + movdqa xmm8,XMMWORD PTR[$L$xts_magic] + pxor xmm14,xmm14 + pcmpgtd xmm14,xmm15 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm10,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm11,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm12,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm13,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + sub rdx,16*6 + jc $L$xts_dec_short + + shr eax,1 + sub eax,1 + mov r10d,eax + jmp $L$xts_dec_grandloop + +ALIGN 16 +$L$xts_dec_grandloop:: + pshufd xmm9,xmm14,013h + movdqa xmm14,xmm15 + paddq xmm15,xmm15 + movdqu xmm2,XMMWORD PTR[rdi] + pand xmm9,xmm8 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm15,xmm9 + + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + pxor xmm4,xmm12 + movdqu xmm7,XMMWORD PTR[80+rdi] + lea rdi,QWORD PTR[96+rdi] + pxor xmm5,xmm13 + movups xmm0,XMMWORD PTR[r11] + pxor xmm6,xmm14 + pxor xmm7,xmm15 + + + + movups xmm1,XMMWORD PTR[16+r11] + pxor xmm2,xmm0 + pxor xmm3,xmm0 + movdqa XMMWORD PTR[rsp],xmm10 +DB 102,15,56,222,209 + lea rcx,QWORD PTR[32+r11] + pxor xmm4,xmm0 + movdqa XMMWORD PTR[16+rsp],xmm11 +DB 102,15,56,222,217 + pxor xmm5,xmm0 + movdqa XMMWORD PTR[32+rsp],xmm12 +DB 102,15,56,222,225 + pxor xmm6,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm13 +DB 102,15,56,222,233 + pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR[rcx] + dec eax + movdqa XMMWORD PTR[64+rsp],xmm14 +DB 102,15,56,222,241 + movdqa XMMWORD PTR[80+rsp],xmm15 +DB 102,15,56,222,249 + pxor xmm14,xmm14 + pcmpgtd xmm14,xmm15 + jmp $L$xts_dec_loop6_enter + +ALIGN 16 +$L$xts_dec_loop6:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + dec eax +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +$L$xts_dec_loop6_enter:: + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,222,208 +DB 102,15,56,222,216 + lea rcx,QWORD PTR[32+rcx] +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$xts_dec_loop6 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + paddq xmm15,xmm15 +DB 102,15,56,222,209 + pand xmm9,xmm8 +DB 102,15,56,222,217 + pcmpgtd xmm14,xmm15 +DB 102,15,56,222,225 + pxor xmm15,xmm9 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[16+rcx] + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm10,xmm15 + paddq xmm15,xmm15 +DB 102,15,56,222,208 + pand xmm9,xmm8 +DB 102,15,56,222,216 + pcmpgtd xmm14,xmm15 +DB 102,15,56,222,224 + pxor xmm15,xmm9 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[32+rcx] + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm11,xmm15 + paddq xmm15,xmm15 +DB 102,15,56,222,209 + pand xmm9,xmm8 +DB 102,15,56,222,217 + pcmpgtd xmm14,xmm15 +DB 102,15,56,222,225 + pxor xmm15,xmm9 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm12,xmm15 + paddq xmm15,xmm15 +DB 102,15,56,223,208 + pand xmm9,xmm8 +DB 102,15,56,223,216 + pcmpgtd xmm14,xmm15 +DB 102,15,56,223,224 + pxor xmm15,xmm9 +DB 102,15,56,223,232 +DB 102,15,56,223,240 +DB 102,15,56,223,248 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm13,xmm15 + paddq xmm15,xmm15 + xorps xmm2,XMMWORD PTR[rsp] + pand xmm9,xmm8 + xorps xmm3,XMMWORD PTR[16+rsp] + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + + xorps xmm4,XMMWORD PTR[32+rsp] + movups XMMWORD PTR[rsi],xmm2 + xorps xmm5,XMMWORD PTR[48+rsp] + movups XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,XMMWORD PTR[64+rsp] + movups XMMWORD PTR[32+rsi],xmm4 + xorps xmm7,XMMWORD PTR[80+rsp] + movups XMMWORD PTR[48+rsi],xmm5 + mov eax,r10d + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + lea rsi,QWORD PTR[96+rsi] + sub rdx,16*6 + jnc $L$xts_dec_grandloop + + lea eax,DWORD PTR[3+rax*1+rax] + mov rcx,r11 + mov r10d,eax + +$L$xts_dec_short:: + add rdx,16*6 + jz $L$xts_dec_done + + cmp rdx,020h + jb $L$xts_dec_one + je $L$xts_dec_two + + cmp rdx,040h + jb $L$xts_dec_three + je $L$xts_dec_four + + pshufd xmm9,xmm14,013h + movdqa xmm14,xmm15 + paddq xmm15,xmm15 + movdqu xmm2,XMMWORD PTR[rdi] + pand xmm9,xmm8 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm15,xmm9 + + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + lea rdi,QWORD PTR[80+rdi] + pxor xmm4,xmm12 + pxor xmm5,xmm13 + pxor xmm6,xmm14 + + call _aesni_decrypt6 + + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,xmm14 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm14,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + pcmpgtd xmm14,xmm15 + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + pshufd xmm11,xmm14,013h + and r9,15 + jz $L$xts_dec_ret + + movdqa xmm10,xmm15 + paddq xmm15,xmm15 + pand xmm11,xmm8 + pxor xmm11,xmm15 + jmp $L$xts_dec_done2 + +ALIGN 16 +$L$xts_dec_one:: + movups xmm2,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_12:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_12 + +DB 102,15,56,223,209 + xorps xmm2,xmm10 + movdqa xmm10,xmm11 + movups XMMWORD PTR[rsi],xmm2 + movdqa xmm11,xmm12 + lea rsi,QWORD PTR[16+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_two:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + lea rdi,QWORD PTR[32+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + + call _aesni_decrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm12 + xorps xmm3,xmm11 + movdqa xmm11,xmm13 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + lea rsi,QWORD PTR[32+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_three:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + lea rdi,QWORD PTR[48+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + + call _aesni_decrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm13 + xorps xmm3,xmm11 + movdqa xmm11,xmm15 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + lea rsi,QWORD PTR[48+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_four:: + pshufd xmm9,xmm14,013h + movdqa xmm14,xmm15 + paddq xmm15,xmm15 + movups xmm2,XMMWORD PTR[rdi] + pand xmm9,xmm8 + movups xmm3,XMMWORD PTR[16+rdi] + pxor xmm15,xmm9 + + movups xmm4,XMMWORD PTR[32+rdi] + xorps xmm2,xmm10 + movups xmm5,XMMWORD PTR[48+rdi] + lea rdi,QWORD PTR[64+rdi] + xorps xmm3,xmm11 + xorps xmm4,xmm12 + xorps xmm5,xmm13 + + call _aesni_decrypt4 + + xorps xmm2,xmm10 + movdqa xmm10,xmm14 + xorps xmm3,xmm11 + movdqa xmm11,xmm15 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_done:: + and r9,15 + jz $L$xts_dec_ret +$L$xts_dec_done2:: + mov rdx,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[rdi] + xorps xmm2,xmm11 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_13:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_13 + +DB 102,15,56,223,209 + xorps xmm2,xmm11 + movups XMMWORD PTR[rsi],xmm2 + +$L$xts_dec_steal:: + movzx eax,BYTE PTR[16+rdi] + movzx ecx,BYTE PTR[rsi] + lea rdi,QWORD PTR[1+rdi] + mov BYTE PTR[rsi],al + mov BYTE PTR[16+rsi],cl + lea rsi,QWORD PTR[1+rsi] + sub rdx,1 + jnz $L$xts_dec_steal + + sub rsi,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[rsi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_14:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_14 + +DB 102,15,56,223,209 + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + +$L$xts_dec_ret:: + movaps xmm6,XMMWORD PTR[96+rsp] + movaps xmm7,XMMWORD PTR[112+rsp] + movaps xmm8,XMMWORD PTR[128+rsp] + movaps xmm9,XMMWORD PTR[144+rsp] + movaps xmm10,XMMWORD PTR[160+rsp] + movaps xmm11,XMMWORD PTR[176+rsp] + movaps xmm12,XMMWORD PTR[192+rsp] + movaps xmm13,XMMWORD PTR[208+rsp] + movaps xmm14,XMMWORD PTR[224+rsp] + movaps xmm15,XMMWORD PTR[240+rsp] + lea rsp,QWORD PTR[264+rsp] +$L$xts_dec_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_xts_decrypt:: +aesni_xts_decrypt ENDP +PUBLIC aesni_cbc_encrypt + +ALIGN 16 +aesni_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + test rdx,rdx + jz $L$cbc_ret + + mov r10d,DWORD PTR[240+rcx] + mov r11,rcx + test r9d,r9d + jz $L$cbc_decrypt + + movups xmm2,XMMWORD PTR[r8] + mov eax,r10d + cmp rdx,16 + jb $L$cbc_enc_tail + sub rdx,16 + jmp $L$cbc_enc_loop +ALIGN 16 +$L$cbc_enc_loop:: + movups xmm3,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm3 +$L$oop_enc1_15:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_15 + +DB 102,15,56,221,209 + mov eax,r10d + mov rcx,r11 + movups XMMWORD PTR[rsi],xmm2 + lea rsi,QWORD PTR[16+rsi] + sub rdx,16 + jnc $L$cbc_enc_loop + add rdx,16 + jnz $L$cbc_enc_tail + movups XMMWORD PTR[r8],xmm2 + jmp $L$cbc_ret + +$L$cbc_enc_tail:: + mov rcx,rdx + xchg rsi,rdi + DD 09066A4F3h + + mov ecx,16 + sub rcx,rdx + xor eax,eax + DD 09066AAF3h + + lea rdi,QWORD PTR[((-16))+rdi] + mov eax,r10d + mov rsi,rdi + mov rcx,r11 + xor rdx,rdx + jmp $L$cbc_enc_loop + + +ALIGN 16 +$L$cbc_decrypt:: + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$cbc_decrypt_body:: + movups xmm9,XMMWORD PTR[r8] + mov eax,r10d + cmp rdx,070h + jbe $L$cbc_dec_tail + shr r10d,1 + sub rdx,070h + mov eax,r10d + movaps XMMWORD PTR[64+rsp],xmm9 + jmp $L$cbc_dec_loop8_enter +ALIGN 16 +$L$cbc_dec_loop8:: + movaps XMMWORD PTR[64+rsp],xmm0 + movups XMMWORD PTR[rsi],xmm9 + lea rsi,QWORD PTR[16+rsi] +$L$cbc_dec_loop8_enter:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm1,XMMWORD PTR[16+rcx] + + lea rcx,QWORD PTR[32+rcx] + movdqu xmm4,XMMWORD PTR[32+rdi] + xorps xmm2,xmm0 + movdqu xmm5,XMMWORD PTR[48+rdi] + xorps xmm3,xmm0 + movdqu xmm6,XMMWORD PTR[64+rdi] +DB 102,15,56,222,209 + pxor xmm4,xmm0 + movdqu xmm7,XMMWORD PTR[80+rdi] +DB 102,15,56,222,217 + pxor xmm5,xmm0 + movdqu xmm8,XMMWORD PTR[96+rdi] +DB 102,15,56,222,225 + pxor xmm6,xmm0 + movdqu xmm9,XMMWORD PTR[112+rdi] +DB 102,15,56,222,233 + pxor xmm7,xmm0 + dec eax +DB 102,15,56,222,241 + pxor xmm8,xmm0 +DB 102,15,56,222,249 + pxor xmm9,xmm0 + movups xmm0,XMMWORD PTR[rcx] +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[16+rcx] + + call $L$dec_loop8_enter + + movups xmm1,XMMWORD PTR[rdi] + movups xmm0,XMMWORD PTR[16+rdi] + xorps xmm2,XMMWORD PTR[64+rsp] + xorps xmm3,xmm1 + movups xmm1,XMMWORD PTR[32+rdi] + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[48+rdi] + xorps xmm5,xmm1 + movups xmm1,XMMWORD PTR[64+rdi] + xorps xmm6,xmm0 + movups xmm0,XMMWORD PTR[80+rdi] + xorps xmm7,xmm1 + movups xmm1,XMMWORD PTR[96+rdi] + xorps xmm8,xmm0 + movups xmm0,XMMWORD PTR[112+rdi] + xorps xmm9,xmm1 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + mov eax,r10d + movups XMMWORD PTR[64+rsi],xmm6 + mov rcx,r11 + movups XMMWORD PTR[80+rsi],xmm7 + lea rdi,QWORD PTR[128+rdi] + movups XMMWORD PTR[96+rsi],xmm8 + lea rsi,QWORD PTR[112+rsi] + sub rdx,080h + ja $L$cbc_dec_loop8 + + movaps xmm2,xmm9 + movaps xmm9,xmm0 + add rdx,070h + jle $L$cbc_dec_tail_collected + movups XMMWORD PTR[rsi],xmm2 + lea eax,DWORD PTR[1+r10*1+r10] + lea rsi,QWORD PTR[16+rsi] +$L$cbc_dec_tail:: + movups xmm2,XMMWORD PTR[rdi] + movaps xmm8,xmm2 + cmp rdx,010h + jbe $L$cbc_dec_one + + movups xmm3,XMMWORD PTR[16+rdi] + movaps xmm7,xmm3 + cmp rdx,020h + jbe $L$cbc_dec_two + + movups xmm4,XMMWORD PTR[32+rdi] + movaps xmm6,xmm4 + cmp rdx,030h + jbe $L$cbc_dec_three + + movups xmm5,XMMWORD PTR[48+rdi] + cmp rdx,040h + jbe $L$cbc_dec_four + + movups xmm6,XMMWORD PTR[64+rdi] + cmp rdx,050h + jbe $L$cbc_dec_five + + movups xmm7,XMMWORD PTR[80+rdi] + cmp rdx,060h + jbe $L$cbc_dec_six + + movups xmm8,XMMWORD PTR[96+rdi] + movaps XMMWORD PTR[64+rsp],xmm9 + call _aesni_decrypt8 + movups xmm1,XMMWORD PTR[rdi] + movups xmm0,XMMWORD PTR[16+rdi] + xorps xmm2,XMMWORD PTR[64+rsp] + xorps xmm3,xmm1 + movups xmm1,XMMWORD PTR[32+rdi] + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[48+rdi] + xorps xmm5,xmm1 + movups xmm1,XMMWORD PTR[64+rdi] + xorps xmm6,xmm0 + movups xmm0,XMMWORD PTR[80+rdi] + xorps xmm7,xmm1 + movups xmm9,XMMWORD PTR[96+rdi] + xorps xmm8,xmm0 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + lea rsi,QWORD PTR[96+rsi] + movaps xmm2,xmm8 + sub rdx,070h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_16:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_16 + +DB 102,15,56,223,209 + xorps xmm2,xmm9 + movaps xmm9,xmm8 + sub rdx,010h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_two:: + xorps xmm4,xmm4 + call _aesni_decrypt3 + xorps xmm2,xmm9 + xorps xmm3,xmm8 + movups XMMWORD PTR[rsi],xmm2 + movaps xmm9,xmm7 + movaps xmm2,xmm3 + lea rsi,QWORD PTR[16+rsi] + sub rdx,020h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_three:: + call _aesni_decrypt3 + xorps xmm2,xmm9 + xorps xmm3,xmm8 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm4,xmm7 + movups XMMWORD PTR[16+rsi],xmm3 + movaps xmm9,xmm6 + movaps xmm2,xmm4 + lea rsi,QWORD PTR[32+rsi] + sub rdx,030h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_four:: + call _aesni_decrypt4 + xorps xmm2,xmm9 + movups xmm9,XMMWORD PTR[48+rdi] + xorps xmm3,xmm8 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm4,xmm7 + movups XMMWORD PTR[16+rsi],xmm3 + xorps xmm5,xmm6 + movups XMMWORD PTR[32+rsi],xmm4 + movaps xmm2,xmm5 + lea rsi,QWORD PTR[48+rsi] + sub rdx,040h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_five:: + xorps xmm7,xmm7 + call _aesni_decrypt6 + movups xmm1,XMMWORD PTR[16+rdi] + movups xmm0,XMMWORD PTR[32+rdi] + xorps xmm2,xmm9 + xorps xmm3,xmm8 + xorps xmm4,xmm1 + movups xmm1,XMMWORD PTR[48+rdi] + xorps xmm5,xmm0 + movups xmm9,XMMWORD PTR[64+rdi] + xorps xmm6,xmm1 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + movaps xmm2,xmm6 + sub rdx,050h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_six:: + call _aesni_decrypt6 + movups xmm1,XMMWORD PTR[16+rdi] + movups xmm0,XMMWORD PTR[32+rdi] + xorps xmm2,xmm9 + xorps xmm3,xmm8 + xorps xmm4,xmm1 + movups xmm1,XMMWORD PTR[48+rdi] + xorps xmm5,xmm0 + movups xmm0,XMMWORD PTR[64+rdi] + xorps xmm6,xmm1 + movups xmm9,XMMWORD PTR[80+rdi] + xorps xmm7,xmm0 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + movaps xmm2,xmm7 + sub rdx,060h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_tail_collected:: + and rdx,15 + movups XMMWORD PTR[r8],xmm9 + jnz $L$cbc_dec_tail_partial + movups XMMWORD PTR[rsi],xmm2 + jmp $L$cbc_dec_ret +ALIGN 16 +$L$cbc_dec_tail_partial:: + movaps XMMWORD PTR[64+rsp],xmm2 + mov rcx,16 + mov rdi,rsi + sub rcx,rdx + lea rsi,QWORD PTR[64+rsp] + DD 09066A4F3h + + +$L$cbc_dec_ret:: + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$cbc_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_encrypt:: +aesni_cbc_encrypt ENDP +PUBLIC aesni_set_decrypt_key + +ALIGN 16 +aesni_set_decrypt_key PROC PUBLIC +DB 048h,083h,0ECh,008h + + call __aesni_set_encrypt_key + shl edx,4 + test eax,eax + jnz $L$dec_key_ret + lea rcx,QWORD PTR[16+rdx*1+r8] + + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[rcx] + movups XMMWORD PTR[rcx],xmm0 + movups XMMWORD PTR[r8],xmm1 + lea r8,QWORD PTR[16+r8] + lea rcx,QWORD PTR[((-16))+rcx] + +$L$dec_key_inverse:: + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[rcx] +DB 102,15,56,219,192 +DB 102,15,56,219,201 + lea r8,QWORD PTR[16+r8] + lea rcx,QWORD PTR[((-16))+rcx] + movups XMMWORD PTR[16+rcx],xmm0 + movups XMMWORD PTR[(-16)+r8],xmm1 + cmp rcx,r8 + ja $L$dec_key_inverse + + movups xmm0,XMMWORD PTR[r8] +DB 102,15,56,219,192 + movups XMMWORD PTR[rcx],xmm0 +$L$dec_key_ret:: + add rsp,8 + DB 0F3h,0C3h ;repret +$L$SEH_end_set_decrypt_key:: +aesni_set_decrypt_key ENDP +PUBLIC aesni_set_encrypt_key + +ALIGN 16 +aesni_set_encrypt_key PROC PUBLIC +__aesni_set_encrypt_key:: +DB 048h,083h,0ECh,008h + + mov rax,-1 + test rcx,rcx + jz $L$enc_key_ret + test r8,r8 + jz $L$enc_key_ret + + movups xmm0,XMMWORD PTR[rcx] + xorps xmm4,xmm4 + lea rax,QWORD PTR[16+r8] + cmp edx,256 + je $L$14rounds + cmp edx,192 + je $L$12rounds + cmp edx,128 + jne $L$bad_keybits + +$L$10rounds:: + mov edx,9 + movups XMMWORD PTR[r8],xmm0 +DB 102,15,58,223,200,1 + call $L$key_expansion_128_cold +DB 102,15,58,223,200,2 + call $L$key_expansion_128 +DB 102,15,58,223,200,4 + call $L$key_expansion_128 +DB 102,15,58,223,200,8 + call $L$key_expansion_128 +DB 102,15,58,223,200,16 + call $L$key_expansion_128 +DB 102,15,58,223,200,32 + call $L$key_expansion_128 +DB 102,15,58,223,200,64 + call $L$key_expansion_128 +DB 102,15,58,223,200,128 + call $L$key_expansion_128 +DB 102,15,58,223,200,27 + call $L$key_expansion_128 +DB 102,15,58,223,200,54 + call $L$key_expansion_128 + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[80+rax],edx + xor eax,eax + jmp $L$enc_key_ret + +ALIGN 16 +$L$12rounds:: + movq xmm2,QWORD PTR[16+rcx] + mov edx,11 + movups XMMWORD PTR[r8],xmm0 +DB 102,15,58,223,202,1 + call $L$key_expansion_192a_cold +DB 102,15,58,223,202,2 + call $L$key_expansion_192b +DB 102,15,58,223,202,4 + call $L$key_expansion_192a +DB 102,15,58,223,202,8 + call $L$key_expansion_192b +DB 102,15,58,223,202,16 + call $L$key_expansion_192a +DB 102,15,58,223,202,32 + call $L$key_expansion_192b +DB 102,15,58,223,202,64 + call $L$key_expansion_192a +DB 102,15,58,223,202,128 + call $L$key_expansion_192b + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[48+rax],edx + xor rax,rax + jmp $L$enc_key_ret + +ALIGN 16 +$L$14rounds:: + movups xmm2,XMMWORD PTR[16+rcx] + mov edx,13 + lea rax,QWORD PTR[16+rax] + movups XMMWORD PTR[r8],xmm0 + movups XMMWORD PTR[16+r8],xmm2 +DB 102,15,58,223,202,1 + call $L$key_expansion_256a_cold +DB 102,15,58,223,200,1 + call $L$key_expansion_256b +DB 102,15,58,223,202,2 + call $L$key_expansion_256a +DB 102,15,58,223,200,2 + call $L$key_expansion_256b +DB 102,15,58,223,202,4 + call $L$key_expansion_256a +DB 102,15,58,223,200,4 + call $L$key_expansion_256b +DB 102,15,58,223,202,8 + call $L$key_expansion_256a +DB 102,15,58,223,200,8 + call $L$key_expansion_256b +DB 102,15,58,223,202,16 + call $L$key_expansion_256a +DB 102,15,58,223,200,16 + call $L$key_expansion_256b +DB 102,15,58,223,202,32 + call $L$key_expansion_256a +DB 102,15,58,223,200,32 + call $L$key_expansion_256b +DB 102,15,58,223,202,64 + call $L$key_expansion_256a + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[16+rax],edx + xor rax,rax + jmp $L$enc_key_ret + +ALIGN 16 +$L$bad_keybits:: + mov rax,-2 +$L$enc_key_ret:: + add rsp,8 + DB 0F3h,0C3h ;repret +$L$SEH_end_set_encrypt_key:: + +ALIGN 16 +$L$key_expansion_128:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_128_cold:: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_192a:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_192a_cold:: + movaps xmm5,xmm2 +$L$key_expansion_192b_warm:: + shufps xmm4,xmm0,16 + movdqa xmm3,xmm2 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + pslldq xmm3,4 + xorps xmm0,xmm4 + pshufd xmm1,xmm1,85 + pxor xmm2,xmm3 + pxor xmm0,xmm1 + pshufd xmm3,xmm0,255 + pxor xmm2,xmm3 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_192b:: + movaps xmm3,xmm0 + shufps xmm5,xmm0,68 + movups XMMWORD PTR[rax],xmm5 + shufps xmm3,xmm2,78 + movups XMMWORD PTR[16+rax],xmm3 + lea rax,QWORD PTR[32+rax] + jmp $L$key_expansion_192b_warm + +ALIGN 16 +$L$key_expansion_256a:: + movups XMMWORD PTR[rax],xmm2 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_256a_cold:: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_256b:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] + + shufps xmm4,xmm2,16 + xorps xmm2,xmm4 + shufps xmm4,xmm2,140 + xorps xmm2,xmm4 + shufps xmm1,xmm1,170 + xorps xmm2,xmm1 + DB 0F3h,0C3h ;repret +aesni_set_encrypt_key ENDP + +ALIGN 64 +$L$bswap_mask:: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$increment32:: + DD 6,6,6,0 +$L$increment64:: + DD 1,0,0,0 +$L$xts_magic:: + DD 087h,0,1,0 + +DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +ecb_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[152+r8] + + jmp $L$common_seh_tail +ecb_se_handler ENDP + + +ALIGN 16 +ccm64_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,8 + DD 0a548f3fch + + lea rax,QWORD PTR[88+rax] + + jmp $L$common_seh_tail +ccm64_se_handler ENDP + + +ALIGN 16 +ctr32_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$ctr32_body] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$ctr32_ret] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[32+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + + lea rax,QWORD PTR[200+rax] + + jmp $L$common_seh_tail +ctr32_se_handler ENDP + + +ALIGN 16 +xts_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[96+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + + lea rax,QWORD PTR[((104+160))+rax] + + jmp $L$common_seh_tail +xts_se_handler ENDP + +ALIGN 16 +cbc_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[152+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$cbc_decrypt] + cmp rbx,r10 + jb $L$common_seh_tail + + lea r10,QWORD PTR[$L$cbc_decrypt_body] + cmp rbx,r10 + jb $L$restore_cbc_rax + + lea r10,QWORD PTR[$L$cbc_ret] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,8 + DD 0a548f3fch + + lea rax,QWORD PTR[88+rax] + jmp $L$common_seh_tail + +$L$restore_cbc_rax:: + mov rax,QWORD PTR[120+r8] + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +cbc_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_aesni_ecb_encrypt + DD imagerel $L$SEH_end_aesni_ecb_encrypt + DD imagerel $L$SEH_info_ecb + + DD imagerel $L$SEH_begin_aesni_ccm64_encrypt_blocks + DD imagerel $L$SEH_end_aesni_ccm64_encrypt_blocks + DD imagerel $L$SEH_info_ccm64_enc + + DD imagerel $L$SEH_begin_aesni_ccm64_decrypt_blocks + DD imagerel $L$SEH_end_aesni_ccm64_decrypt_blocks + DD imagerel $L$SEH_info_ccm64_dec + + DD imagerel $L$SEH_begin_aesni_ctr32_encrypt_blocks + DD imagerel $L$SEH_end_aesni_ctr32_encrypt_blocks + DD imagerel $L$SEH_info_ctr32 + + DD imagerel $L$SEH_begin_aesni_xts_encrypt + DD imagerel $L$SEH_end_aesni_xts_encrypt + DD imagerel $L$SEH_info_xts_enc + + DD imagerel $L$SEH_begin_aesni_xts_decrypt + DD imagerel $L$SEH_end_aesni_xts_decrypt + DD imagerel $L$SEH_info_xts_dec + DD imagerel $L$SEH_begin_aesni_cbc_encrypt + DD imagerel $L$SEH_end_aesni_cbc_encrypt + DD imagerel $L$SEH_info_cbc + + DD imagerel aesni_set_decrypt_key + DD imagerel $L$SEH_end_set_decrypt_key + DD imagerel $L$SEH_info_key + + DD imagerel aesni_set_encrypt_key + DD imagerel $L$SEH_end_set_encrypt_key + DD imagerel $L$SEH_info_key +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_ecb:: +DB 9,0,0,0 + DD imagerel ecb_se_handler +$L$SEH_info_ccm64_enc:: +DB 9,0,0,0 + DD imagerel ccm64_se_handler + DD imagerel $L$ccm64_enc_body,imagerel $L$ccm64_enc_ret + +$L$SEH_info_ccm64_dec:: +DB 9,0,0,0 + DD imagerel ccm64_se_handler + DD imagerel $L$ccm64_dec_body,imagerel $L$ccm64_dec_ret + +$L$SEH_info_ctr32:: +DB 9,0,0,0 + DD imagerel ctr32_se_handler +$L$SEH_info_xts_enc:: +DB 9,0,0,0 + DD imagerel xts_se_handler + DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue + +$L$SEH_info_xts_dec:: +DB 9,0,0,0 + DD imagerel xts_se_handler + DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue + +$L$SEH_info_cbc:: +DB 9,0,0,0 + DD imagerel cbc_se_handler +$L$SEH_info_key:: +DB 001h,004h,001h,000h +DB 004h,002h,000h,000h + + +.xdata ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/bn/modexp512-x86_64.asm b/deps/openssl/asm/x64-win32-masm/bn/modexp512-x86_64.asm new file mode 100644 index 0000000000..b83aa18d46 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/bn/modexp512-x86_64.asm @@ -0,0 +1,1890 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(64) 'CODE' + + +ALIGN 16 +MULADD_128x512 PROC PRIVATE + mov rax,QWORD PTR[rsi] + mul rbp + add r8,rax + adc rdx,0 + mov QWORD PTR[rcx],r8 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov r8,rdx + mov rbp,QWORD PTR[8+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r9,rax + adc rdx,0 + mov QWORD PTR[8+rcx],r9 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov r9,rdx + DB 0F3h,0C3h ;repret +MULADD_128x512 ENDP + +ALIGN 16 +mont_reduce PROC PRIVATE + lea rdi,QWORD PTR[192+rsp] + mov rsi,QWORD PTR[32+rsp] + add rsi,576 + lea rcx,QWORD PTR[520+rsp] + + mov rbp,QWORD PTR[96+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + mov r8,QWORD PTR[rcx] + add r8,rax + adc rdx,0 + mov QWORD PTR[rdi],r8 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + mov r9,QWORD PTR[8+rcx] + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + mov r10,QWORD PTR[16+rcx] + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + mov r11,QWORD PTR[24+rcx] + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + mov r12,QWORD PTR[32+rcx] + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + mov r13,QWORD PTR[40+rcx] + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + mov r14,QWORD PTR[48+rcx] + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + mov r15,QWORD PTR[56+rcx] + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov r8,rdx + mov rbp,QWORD PTR[104+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + add r9,rax + adc rdx,0 + mov QWORD PTR[8+rdi],r9 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov r9,rdx + mov rbp,QWORD PTR[112+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + add r10,rax + adc rdx,0 + mov QWORD PTR[16+rdi],r10 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov r10,rdx + mov rbp,QWORD PTR[120+rcx] + mov rax,QWORD PTR[rsi] + mul rbp + add r11,rax + adc rdx,0 + mov QWORD PTR[24+rdi],r11 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov r11,rdx + xor rax,rax + + add r8,QWORD PTR[64+rcx] + adc r9,QWORD PTR[72+rcx] + adc r10,QWORD PTR[80+rcx] + adc r11,QWORD PTR[88+rcx] + adc rax,0 + + + + + mov QWORD PTR[64+rdi],r8 + mov QWORD PTR[72+rdi],r9 + mov rbp,r10 + mov QWORD PTR[88+rdi],r11 + + mov QWORD PTR[384+rsp],rax + + mov r8,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + + + + + + + + + add rdi,8*10 + + add rsi,64 + lea rcx,QWORD PTR[296+rsp] + + call MULADD_128x512 + + + mov rax,QWORD PTR[384+rsp] + + + add r8,QWORD PTR[((-16))+rdi] + adc r9,QWORD PTR[((-8))+rdi] + mov QWORD PTR[64+rcx],r8 + mov QWORD PTR[72+rcx],r9 + + adc rax,rax + mov QWORD PTR[384+rsp],rax + + lea rdi,QWORD PTR[192+rsp] + add rsi,64 + + + + + + mov r8,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + + mov rax,QWORD PTR[rcx] + mul r8 + mov rbp,rax + mov r9,rdx + + mov rax,QWORD PTR[8+rcx] + mul r8 + add r9,rax + + mov rax,QWORD PTR[rcx] + mul rbx + add r9,rax + + mov QWORD PTR[8+rdi],r9 + + + sub rsi,192 + + mov r8,QWORD PTR[rcx] + mov r9,QWORD PTR[8+rcx] + + call MULADD_128x512 + + + + + + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rdi,QWORD PTR[16+rsi] + mov rdx,QWORD PTR[24+rsi] + + + mov rbp,QWORD PTR[384+rsp] + + add r8,QWORD PTR[64+rcx] + adc r9,QWORD PTR[72+rcx] + + + adc rbp,rbp + + + + shl rbp,3 + mov rcx,QWORD PTR[32+rsp] + add rbp,rcx + + + xor rsi,rsi + + add r10,QWORD PTR[rbp] + adc r11,QWORD PTR[64+rbp] + adc r12,QWORD PTR[128+rbp] + adc r13,QWORD PTR[192+rbp] + adc r14,QWORD PTR[256+rbp] + adc r15,QWORD PTR[320+rbp] + adc r8,QWORD PTR[384+rbp] + adc r9,QWORD PTR[448+rbp] + + + + sbb rsi,0 + + + and rax,rsi + and rbx,rsi + and rdi,rsi + and rdx,rsi + + mov rbp,1 + sub r10,rax + sbb r11,rbx + sbb r12,rdi + sbb r13,rdx + + + + + sbb rbp,0 + + + + add rcx,512 + mov rax,QWORD PTR[32+rcx] + mov rbx,QWORD PTR[40+rcx] + mov rdi,QWORD PTR[48+rcx] + mov rdx,QWORD PTR[56+rcx] + + + + and rax,rsi + and rbx,rsi + and rdi,rsi + and rdx,rsi + + + + sub rbp,1 + + sbb r14,rax + sbb r15,rbx + sbb r8,rdi + sbb r9,rdx + + + + mov rsi,QWORD PTR[144+rsp] + mov QWORD PTR[rsi],r10 + mov QWORD PTR[8+rsi],r11 + mov QWORD PTR[16+rsi],r12 + mov QWORD PTR[24+rsi],r13 + mov QWORD PTR[32+rsi],r14 + mov QWORD PTR[40+rsi],r15 + mov QWORD PTR[48+rsi],r8 + mov QWORD PTR[56+rsi],r9 + + DB 0F3h,0C3h ;repret +mont_reduce ENDP + +ALIGN 16 +mont_mul_a3b PROC PRIVATE + + + + + mov rbp,QWORD PTR[rdi] + + mov rax,r10 + mul rbp + mov QWORD PTR[520+rsp],rax + mov r10,rdx + mov rax,r11 + mul rbp + add r10,rax + adc rdx,0 + mov r11,rdx + mov rax,r12 + mul rbp + add r11,rax + adc rdx,0 + mov r12,rdx + mov rax,r13 + mul rbp + add r12,rax + adc rdx,0 + mov r13,rdx + mov rax,r14 + mul rbp + add r13,rax + adc rdx,0 + mov r14,rdx + mov rax,r15 + mul rbp + add r14,rax + adc rdx,0 + mov r15,rdx + mov rax,r8 + mul rbp + add r15,rax + adc rdx,0 + mov r8,rdx + mov rax,r9 + mul rbp + add r8,rax + adc rdx,0 + mov r9,rdx + mov rbp,QWORD PTR[8+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r10,rax + adc rdx,0 + mov QWORD PTR[528+rsp],r10 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov r10,rdx + mov rbp,QWORD PTR[16+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r11,rax + adc rdx,0 + mov QWORD PTR[536+rsp],r11 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov r11,rdx + mov rbp,QWORD PTR[24+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r12,rax + adc rdx,0 + mov QWORD PTR[544+rsp],r12 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov r12,rdx + mov rbp,QWORD PTR[32+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r13,rax + adc rdx,0 + mov QWORD PTR[552+rsp],r13 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov r13,rdx + mov rbp,QWORD PTR[40+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r14,rax + adc rdx,0 + mov QWORD PTR[560+rsp],r14 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov r14,rdx + mov rbp,QWORD PTR[48+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r15,rax + adc rdx,0 + mov QWORD PTR[568+rsp],r15 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r8,rax + adc rdx,0 + add r8,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov r15,rdx + mov rbp,QWORD PTR[56+rdi] + mov rax,QWORD PTR[rsi] + mul rbp + add r8,rax + adc rdx,0 + mov QWORD PTR[576+rsp],r8 + mov rbx,rdx + + mov rax,QWORD PTR[8+rsi] + mul rbp + add r9,rax + adc rdx,0 + add r9,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[16+rsi] + mul rbp + add r10,rax + adc rdx,0 + add r10,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[24+rsi] + mul rbp + add r11,rax + adc rdx,0 + add r11,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[32+rsi] + mul rbp + add r12,rax + adc rdx,0 + add r12,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[40+rsi] + mul rbp + add r13,rax + adc rdx,0 + add r13,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[48+rsi] + mul rbp + add r14,rax + adc rdx,0 + add r14,rbx + adc rdx,0 + mov rbx,rdx + + mov rax,QWORD PTR[56+rsi] + mul rbp + add r15,rax + adc rdx,0 + add r15,rbx + adc rdx,0 + mov r8,rdx + mov QWORD PTR[584+rsp],r9 + mov QWORD PTR[592+rsp],r10 + mov QWORD PTR[600+rsp],r11 + mov QWORD PTR[608+rsp],r12 + mov QWORD PTR[616+rsp],r13 + mov QWORD PTR[624+rsp],r14 + mov QWORD PTR[632+rsp],r15 + mov QWORD PTR[640+rsp],r8 + + + + + + jmp mont_reduce + + +mont_mul_a3b ENDP + +ALIGN 16 +sqr_reduce PROC PRIVATE + mov rcx,QWORD PTR[16+rsp] + + + + mov rbx,r10 + + mov rax,r11 + mul rbx + mov QWORD PTR[528+rsp],rax + mov r10,rdx + mov rax,r12 + mul rbx + add r10,rax + adc rdx,0 + mov r11,rdx + mov rax,r13 + mul rbx + add r11,rax + adc rdx,0 + mov r12,rdx + mov rax,r14 + mul rbx + add r12,rax + adc rdx,0 + mov r13,rdx + mov rax,r15 + mul rbx + add r13,rax + adc rdx,0 + mov r14,rdx + mov rax,r8 + mul rbx + add r14,rax + adc rdx,0 + mov r15,rdx + mov rax,r9 + mul rbx + add r15,rax + adc rdx,0 + mov rsi,rdx + + mov QWORD PTR[536+rsp],r10 + + + + + + mov rbx,QWORD PTR[8+rcx] + + mov rax,QWORD PTR[16+rcx] + mul rbx + add r11,rax + adc rdx,0 + mov QWORD PTR[544+rsp],r11 + + mov r10,rdx + mov rax,QWORD PTR[24+rcx] + mul rbx + add r12,rax + adc rdx,0 + add r12,r10 + adc rdx,0 + mov QWORD PTR[552+rsp],r12 + + mov r10,rdx + mov rax,QWORD PTR[32+rcx] + mul rbx + add r13,rax + adc rdx,0 + add r13,r10 + adc rdx,0 + + mov r10,rdx + mov rax,QWORD PTR[40+rcx] + mul rbx + add r14,rax + adc rdx,0 + add r14,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r8 + mul rbx + add r15,rax + adc rdx,0 + add r15,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r9 + mul rbx + add rsi,rax + adc rdx,0 + add rsi,r10 + adc rdx,0 + + mov r11,rdx + + + + + mov rbx,QWORD PTR[16+rcx] + + mov rax,QWORD PTR[24+rcx] + mul rbx + add r13,rax + adc rdx,0 + mov QWORD PTR[560+rsp],r13 + + mov r10,rdx + mov rax,QWORD PTR[32+rcx] + mul rbx + add r14,rax + adc rdx,0 + add r14,r10 + adc rdx,0 + mov QWORD PTR[568+rsp],r14 + + mov r10,rdx + mov rax,QWORD PTR[40+rcx] + mul rbx + add r15,rax + adc rdx,0 + add r15,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r8 + mul rbx + add rsi,rax + adc rdx,0 + add rsi,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r9 + mul rbx + add r11,rax + adc rdx,0 + add r11,r10 + adc rdx,0 + + mov r12,rdx + + + + + + mov rbx,QWORD PTR[24+rcx] + + mov rax,QWORD PTR[32+rcx] + mul rbx + add r15,rax + adc rdx,0 + mov QWORD PTR[576+rsp],r15 + + mov r10,rdx + mov rax,QWORD PTR[40+rcx] + mul rbx + add rsi,rax + adc rdx,0 + add rsi,r10 + adc rdx,0 + mov QWORD PTR[584+rsp],rsi + + mov r10,rdx + mov rax,r8 + mul rbx + add r11,rax + adc rdx,0 + add r11,r10 + adc rdx,0 + + mov r10,rdx + mov rax,r9 + mul rbx + add r12,rax + adc rdx,0 + add r12,r10 + adc rdx,0 + + mov r15,rdx + + + + + mov rbx,QWORD PTR[32+rcx] + + mov rax,QWORD PTR[40+rcx] + mul rbx + add r11,rax + adc rdx,0 + mov QWORD PTR[592+rsp],r11 + + mov r10,rdx + mov rax,r8 + mul rbx + add r12,rax + adc rdx,0 + add r12,r10 + adc rdx,0 + mov QWORD PTR[600+rsp],r12 + + mov r10,rdx + mov rax,r9 + mul rbx + add r15,rax + adc rdx,0 + add r15,r10 + adc rdx,0 + + mov r11,rdx + + + + + mov rbx,QWORD PTR[40+rcx] + + mov rax,r8 + mul rbx + add r15,rax + adc rdx,0 + mov QWORD PTR[608+rsp],r15 + + mov r10,rdx + mov rax,r9 + mul rbx + add r11,rax + adc rdx,0 + add r11,r10 + adc rdx,0 + mov QWORD PTR[616+rsp],r11 + + mov r12,rdx + + + + + mov rbx,r8 + + mov rax,r9 + mul rbx + add r12,rax + adc rdx,0 + mov QWORD PTR[624+rsp],r12 + + mov QWORD PTR[632+rsp],rdx + + + mov r10,QWORD PTR[528+rsp] + mov r11,QWORD PTR[536+rsp] + mov r12,QWORD PTR[544+rsp] + mov r13,QWORD PTR[552+rsp] + mov r14,QWORD PTR[560+rsp] + mov r15,QWORD PTR[568+rsp] + + mov rax,QWORD PTR[24+rcx] + mul rax + mov rdi,rax + mov r8,rdx + + add r10,r10 + adc r11,r11 + adc r12,r12 + adc r13,r13 + adc r14,r14 + adc r15,r15 + adc r8,0 + + mov rax,QWORD PTR[rcx] + mul rax + mov QWORD PTR[520+rsp],rax + mov rbx,rdx + + mov rax,QWORD PTR[8+rcx] + mul rax + + add r10,rbx + adc r11,rax + adc rdx,0 + + mov rbx,rdx + mov QWORD PTR[528+rsp],r10 + mov QWORD PTR[536+rsp],r11 + + mov rax,QWORD PTR[16+rcx] + mul rax + + add r12,rbx + adc r13,rax + adc rdx,0 + + mov rbx,rdx + + mov QWORD PTR[544+rsp],r12 + mov QWORD PTR[552+rsp],r13 + + xor rbp,rbp + add r14,rbx + adc r15,rdi + adc rbp,0 + + mov QWORD PTR[560+rsp],r14 + mov QWORD PTR[568+rsp],r15 + + + + + mov r10,QWORD PTR[576+rsp] + mov r11,QWORD PTR[584+rsp] + mov r12,QWORD PTR[592+rsp] + mov r13,QWORD PTR[600+rsp] + mov r14,QWORD PTR[608+rsp] + mov r15,QWORD PTR[616+rsp] + mov rdi,QWORD PTR[624+rsp] + mov rsi,QWORD PTR[632+rsp] + + mov rax,r9 + mul rax + mov r9,rax + mov rbx,rdx + + add r10,r10 + adc r11,r11 + adc r12,r12 + adc r13,r13 + adc r14,r14 + adc r15,r15 + adc rdi,rdi + adc rsi,rsi + adc rbx,0 + + add r10,rbp + + mov rax,QWORD PTR[32+rcx] + mul rax + + add r10,r8 + adc r11,rax + adc rdx,0 + + mov rbp,rdx + + mov QWORD PTR[576+rsp],r10 + mov QWORD PTR[584+rsp],r11 + + mov rax,QWORD PTR[40+rcx] + mul rax + + add r12,rbp + adc r13,rax + adc rdx,0 + + mov rbp,rdx + + mov QWORD PTR[592+rsp],r12 + mov QWORD PTR[600+rsp],r13 + + mov rax,QWORD PTR[48+rcx] + mul rax + + add r14,rbp + adc r15,rax + adc rdx,0 + + mov QWORD PTR[608+rsp],r14 + mov QWORD PTR[616+rsp],r15 + + add rdi,rdx + adc rsi,r9 + adc rbx,0 + + mov QWORD PTR[624+rsp],rdi + mov QWORD PTR[632+rsp],rsi + mov QWORD PTR[640+rsp],rbx + + jmp mont_reduce + + +sqr_reduce ENDP +PUBLIC mod_exp_512 + +mod_exp_512 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_mod_exp_512:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + + + mov r8,rsp + sub rsp,2688 + and rsp,-64 + + + mov QWORD PTR[rsp],r8 + mov QWORD PTR[8+rsp],rdi + mov QWORD PTR[16+rsp],rsi + mov QWORD PTR[24+rsp],rcx +$L$body:: + + + + pxor xmm4,xmm4 + movdqu xmm0,XMMWORD PTR[rsi] + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + movdqu xmm3,XMMWORD PTR[48+rsi] + movdqa XMMWORD PTR[512+rsp],xmm4 + movdqa XMMWORD PTR[528+rsp],xmm4 + movdqa XMMWORD PTR[608+rsp],xmm4 + movdqa XMMWORD PTR[624+rsp],xmm4 + movdqa XMMWORD PTR[544+rsp],xmm0 + movdqa XMMWORD PTR[560+rsp],xmm1 + movdqa XMMWORD PTR[576+rsp],xmm2 + movdqa XMMWORD PTR[592+rsp],xmm3 + + + movdqu xmm0,XMMWORD PTR[rdx] + movdqu xmm1,XMMWORD PTR[16+rdx] + movdqu xmm2,XMMWORD PTR[32+rdx] + movdqu xmm3,XMMWORD PTR[48+rdx] + + lea rbx,QWORD PTR[384+rsp] + mov QWORD PTR[136+rsp],rbx + call mont_reduce + + + lea rcx,QWORD PTR[448+rsp] + xor rax,rax + mov QWORD PTR[rcx],rax + mov QWORD PTR[8+rcx],rax + mov QWORD PTR[24+rcx],rax + mov QWORD PTR[32+rcx],rax + mov QWORD PTR[40+rcx],rax + mov QWORD PTR[48+rcx],rax + mov QWORD PTR[56+rcx],rax + mov QWORD PTR[128+rsp],rax + mov QWORD PTR[16+rcx],1 + + lea rbp,QWORD PTR[640+rsp] + mov rsi,rcx + mov rdi,rbp + mov rax,8 +loop_0:: + mov rbx,QWORD PTR[rcx] + mov WORD PTR[rdi],bx + shr rbx,16 + mov WORD PTR[64+rdi],bx + shr rbx,16 + mov WORD PTR[128+rdi],bx + shr rbx,16 + mov WORD PTR[192+rdi],bx + lea rcx,QWORD PTR[8+rcx] + lea rdi,QWORD PTR[256+rdi] + dec rax + jnz loop_0 + mov rax,31 + mov QWORD PTR[32+rsp],rax + mov QWORD PTR[40+rsp],rbp + + mov QWORD PTR[136+rsp],rsi + mov r10,QWORD PTR[rsi] + mov r11,QWORD PTR[8+rsi] + mov r12,QWORD PTR[16+rsi] + mov r13,QWORD PTR[24+rsi] + mov r14,QWORD PTR[32+rsi] + mov r15,QWORD PTR[40+rsi] + mov r8,QWORD PTR[48+rsi] + mov r9,QWORD PTR[56+rsi] +init_loop:: + lea rdi,QWORD PTR[384+rsp] + call mont_mul_a3b + lea rsi,QWORD PTR[448+rsp] + mov rbp,QWORD PTR[40+rsp] + add rbp,2 + mov QWORD PTR[40+rsp],rbp + mov rcx,rsi + mov rax,8 +loop_1:: + mov rbx,QWORD PTR[rcx] + mov WORD PTR[rbp],bx + shr rbx,16 + mov WORD PTR[64+rbp],bx + shr rbx,16 + mov WORD PTR[128+rbp],bx + shr rbx,16 + mov WORD PTR[192+rbp],bx + lea rcx,QWORD PTR[8+rcx] + lea rbp,QWORD PTR[256+rbp] + dec rax + jnz loop_1 + mov rax,QWORD PTR[32+rsp] + sub rax,1 + mov QWORD PTR[32+rsp],rax + jne init_loop + + + + movdqa XMMWORD PTR[64+rsp],xmm0 + movdqa XMMWORD PTR[80+rsp],xmm1 + movdqa XMMWORD PTR[96+rsp],xmm2 + movdqa XMMWORD PTR[112+rsp],xmm3 + + + + + + mov eax,DWORD PTR[126+rsp] + mov rdx,rax + shr rax,11 + and edx,007FFh + mov DWORD PTR[126+rsp],edx + lea rsi,QWORD PTR[640+rax*2+rsp] + mov rdx,QWORD PTR[8+rsp] + mov rbp,4 +loop_2:: + movzx rbx,WORD PTR[192+rsi] + movzx rax,WORD PTR[448+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[128+rsi] + mov ax,WORD PTR[384+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[64+rsi] + mov ax,WORD PTR[320+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[rsi] + mov ax,WORD PTR[256+rsi] + mov QWORD PTR[rdx],rbx + mov QWORD PTR[8+rdx],rax + lea rsi,QWORD PTR[512+rsi] + lea rdx,QWORD PTR[16+rdx] + sub rbp,1 + jnz loop_2 + mov QWORD PTR[48+rsp],505 + + mov rcx,QWORD PTR[8+rsp] + mov QWORD PTR[136+rsp],rcx + mov r10,QWORD PTR[rcx] + mov r11,QWORD PTR[8+rcx] + mov r12,QWORD PTR[16+rcx] + mov r13,QWORD PTR[24+rcx] + mov r14,QWORD PTR[32+rcx] + mov r15,QWORD PTR[40+rcx] + mov r8,QWORD PTR[48+rcx] + mov r9,QWORD PTR[56+rcx] + jmp sqr_2 + +main_loop_a3b:: + call sqr_reduce + call sqr_reduce + call sqr_reduce +sqr_2:: + call sqr_reduce + call sqr_reduce + + + + mov rcx,QWORD PTR[48+rsp] + mov rax,rcx + shr rax,4 + mov edx,DWORD PTR[64+rax*2+rsp] + and rcx,15 + shr rdx,cl + and rdx,01Fh + + lea rsi,QWORD PTR[640+rdx*2+rsp] + lea rdx,QWORD PTR[448+rsp] + mov rdi,rdx + mov rbp,4 +loop_3:: + movzx rbx,WORD PTR[192+rsi] + movzx rax,WORD PTR[448+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[128+rsi] + mov ax,WORD PTR[384+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[64+rsi] + mov ax,WORD PTR[320+rsi] + shl rbx,16 + shl rax,16 + mov bx,WORD PTR[rsi] + mov ax,WORD PTR[256+rsi] + mov QWORD PTR[rdx],rbx + mov QWORD PTR[8+rdx],rax + lea rsi,QWORD PTR[512+rsi] + lea rdx,QWORD PTR[16+rdx] + sub rbp,1 + jnz loop_3 + mov rsi,QWORD PTR[8+rsp] + call mont_mul_a3b + + + + mov rcx,QWORD PTR[48+rsp] + sub rcx,5 + mov QWORD PTR[48+rsp],rcx + jge main_loop_a3b + + + +end_main_loop_a3b:: + + + mov rdx,QWORD PTR[8+rsp] + pxor xmm4,xmm4 + movdqu xmm0,XMMWORD PTR[rdx] + movdqu xmm1,XMMWORD PTR[16+rdx] + movdqu xmm2,XMMWORD PTR[32+rdx] + movdqu xmm3,XMMWORD PTR[48+rdx] + movdqa XMMWORD PTR[576+rsp],xmm4 + movdqa XMMWORD PTR[592+rsp],xmm4 + movdqa XMMWORD PTR[608+rsp],xmm4 + movdqa XMMWORD PTR[624+rsp],xmm4 + movdqa XMMWORD PTR[512+rsp],xmm0 + movdqa XMMWORD PTR[528+rsp],xmm1 + movdqa XMMWORD PTR[544+rsp],xmm2 + movdqa XMMWORD PTR[560+rsp],xmm3 + call mont_reduce + + + + mov rax,QWORD PTR[8+rsp] + mov r8,QWORD PTR[rax] + mov r9,QWORD PTR[8+rax] + mov r10,QWORD PTR[16+rax] + mov r11,QWORD PTR[24+rax] + mov r12,QWORD PTR[32+rax] + mov r13,QWORD PTR[40+rax] + mov r14,QWORD PTR[48+rax] + mov r15,QWORD PTR[56+rax] + + + mov rbx,QWORD PTR[24+rsp] + add rbx,512 + + sub r8,QWORD PTR[rbx] + sbb r9,QWORD PTR[8+rbx] + sbb r10,QWORD PTR[16+rbx] + sbb r11,QWORD PTR[24+rbx] + sbb r12,QWORD PTR[32+rbx] + sbb r13,QWORD PTR[40+rbx] + sbb r14,QWORD PTR[48+rbx] + sbb r15,QWORD PTR[56+rbx] + + + mov rsi,QWORD PTR[rax] + mov rdi,QWORD PTR[8+rax] + mov rcx,QWORD PTR[16+rax] + mov rdx,QWORD PTR[24+rax] + cmovnc rsi,r8 + cmovnc rdi,r9 + cmovnc rcx,r10 + cmovnc rdx,r11 + mov QWORD PTR[rax],rsi + mov QWORD PTR[8+rax],rdi + mov QWORD PTR[16+rax],rcx + mov QWORD PTR[24+rax],rdx + + mov rsi,QWORD PTR[32+rax] + mov rdi,QWORD PTR[40+rax] + mov rcx,QWORD PTR[48+rax] + mov rdx,QWORD PTR[56+rax] + cmovnc rsi,r12 + cmovnc rdi,r13 + cmovnc rcx,r14 + cmovnc rdx,r15 + mov QWORD PTR[32+rax],rsi + mov QWORD PTR[40+rax],rdi + mov QWORD PTR[48+rax],rcx + mov QWORD PTR[56+rax],rdx + + mov rsi,QWORD PTR[rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbx,QWORD PTR[32+rsi] + mov rbp,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_mod_exp_512:: +mod_exp_512 ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +mod_exp_512_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$body] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[rax] + + mov rbx,QWORD PTR[32+rax] + mov rbp,QWORD PTR[40+rax] + mov r12,QWORD PTR[24+rax] + mov r13,QWORD PTR[16+rax] + mov r14,QWORD PTR[8+rax] + mov r15,QWORD PTR[rax] + lea rax,QWORD PTR[48+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +mod_exp_512_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_mod_exp_512 + DD imagerel $L$SEH_end_mod_exp_512 + DD imagerel $L$SEH_info_mod_exp_512 + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_mod_exp_512:: +DB 9,0,0,0 + DD imagerel mod_exp_512_se_handler + +.xdata ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm index 9e54d88953..f4518aa3bd 100644 --- a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm +++ b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm @@ -17,6 +17,641 @@ $L$SEH_begin_bn_mul_mont:: mov r9,QWORD PTR[48+rsp] + test r9d,3 + jnz $L$mul_enter + cmp r9d,8 + jb $L$mul_enter + cmp rdx,rsi + jne $L$mul4x_enter + jmp $L$sqr4x_enter + +ALIGN 16 +$L$mul_enter:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + lea r10,QWORD PTR[2+r9] + mov r11,rsp + neg r10 + lea rsp,QWORD PTR[r10*8+rsp] + and rsp,-1024 + + mov QWORD PTR[8+r9*8+rsp],r11 +$L$mul_body:: + mov r12,rdx + mov r8,QWORD PTR[r8] + mov rbx,QWORD PTR[r12] + mov rax,QWORD PTR[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$1st_enter + +ALIGN 16 +$L$1st:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r11 + mov r11,r10 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$1st_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + lea r15,QWORD PTR[1+r15] + mov r10,rdx + + mul rbp + cmp r15,r9 + jne $L$1st + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + mov r11,r10 + + xor rdx,rdx + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + jmp $L$outer +ALIGN 16 +$L$outer:: + mov rbx,QWORD PTR[r14*8+r12] + xor r15,r15 + mov rbp,r8 + mov r10,QWORD PTR[rsp] + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r10,QWORD PTR[8+rsp] + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$inner_enter + +ALIGN 16 +$L$inner:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$inner_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + lea r15,QWORD PTR[1+r15] + + mul rbp + cmp r15,r9 + jne $L$inner + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + + xor rdx,rdx + add r13,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + cmp r14,r9 + jl $L$outer + + xor r14,r14 + mov rax,QWORD PTR[rsp] + lea rsi,QWORD PTR[rsp] + mov r15,r9 + jmp $L$sub +ALIGN 16 +$L$sub:: sbb rax,QWORD PTR[r14*8+rcx] + mov QWORD PTR[r14*8+rdi],rax + mov rax,QWORD PTR[8+r14*8+rsi] + lea r14,QWORD PTR[1+r14] + dec r15 + jnz $L$sub + + sbb rax,0 + xor r14,r14 + and rsi,rax + not rax + mov rcx,rdi + and rcx,rax + mov r15,r9 + or rsi,rcx +ALIGN 16 +$L$copy:: + mov rax,QWORD PTR[r14*8+rsi] + mov QWORD PTR[r14*8+rsp],r14 + mov QWORD PTR[r14*8+rdi],rax + lea r14,QWORD PTR[1+r14] + sub r15,1 + jnz $L$copy + + mov rsi,QWORD PTR[8+r9*8+rsp] + mov rax,1 + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$mul_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul_mont:: +bn_mul_mont ENDP + +ALIGN 16 +bn_mul4x_mont PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul4x_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$mul4x_enter:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + lea r10,QWORD PTR[4+r9] + mov r11,rsp + neg r10 + lea rsp,QWORD PTR[r10*8+rsp] + and rsp,-1024 + + mov QWORD PTR[8+r9*8+rsp],r11 +$L$mul4x_body:: + mov QWORD PTR[16+r9*8+rsp],rdi + mov r12,rdx + mov r8,QWORD PTR[r8] + mov rbx,QWORD PTR[r12] + mov rax,QWORD PTR[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[4+r15] + adc rdx,0 + mov QWORD PTR[rsp],rdi + mov r13,rdx + jmp $L$1st4x +ALIGN 16 +$L$1st4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+r15*8+rcx] + adc rdx,0 + lea r15,QWORD PTR[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jl $L$1st4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov QWORD PTR[r15*8+rsp],rdi + + lea r14,QWORD PTR[1+r14] +ALIGN 4 +$L$outer4x:: + mov rbx,QWORD PTR[r14*8+r12] + xor r15,r15 + mov r10,QWORD PTR[rsp] + mov rbp,r8 + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+rcx] + adc rdx,0 + add r11,QWORD PTR[8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[4+r15] + adc rdx,0 + mov QWORD PTR[rsp],rdi + mov r13,rdx + jmp $L$inner4x +ALIGN 16 +$L$inner4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r15*8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[8+r15*8+rsp] + adc rdx,0 + lea r15,QWORD PTR[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jl $L$inner4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r15*8+rsp] + adc rdx,0 + lea r14,QWORD PTR[1+r14] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + add r13,QWORD PTR[r9*8+rsp] + adc rdi,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov QWORD PTR[r15*8+rsp],rdi + + cmp r14,r9 + jl $L$outer4x + mov rdi,QWORD PTR[16+r9*8+rsp] + mov rax,QWORD PTR[rsp] + pxor xmm0,xmm0 + mov rdx,QWORD PTR[8+rsp] + shr r9,2 + lea rsi,QWORD PTR[rsp] + xor r14,r14 + + sub rax,QWORD PTR[rcx] + mov rbx,QWORD PTR[16+rsi] + mov rbp,QWORD PTR[24+rsi] + sbb rdx,QWORD PTR[8+rcx] + lea r15,QWORD PTR[((-1))+r9] + jmp $L$sub4x +ALIGN 16 +$L$sub4x:: + mov QWORD PTR[r14*8+rdi],rax + mov QWORD PTR[8+r14*8+rdi],rdx + sbb rbx,QWORD PTR[16+r14*8+rcx] + mov rax,QWORD PTR[32+r14*8+rsi] + mov rdx,QWORD PTR[40+r14*8+rsi] + sbb rbp,QWORD PTR[24+r14*8+rcx] + mov QWORD PTR[16+r14*8+rdi],rbx + mov QWORD PTR[24+r14*8+rdi],rbp + sbb rax,QWORD PTR[32+r14*8+rcx] + mov rbx,QWORD PTR[48+r14*8+rsi] + mov rbp,QWORD PTR[56+r14*8+rsi] + sbb rdx,QWORD PTR[40+r14*8+rcx] + lea r14,QWORD PTR[4+r14] + dec r15 + jnz $L$sub4x + + mov QWORD PTR[r14*8+rdi],rax + mov rax,QWORD PTR[32+r14*8+rsi] + sbb rbx,QWORD PTR[16+r14*8+rcx] + mov QWORD PTR[8+r14*8+rdi],rdx + sbb rbp,QWORD PTR[24+r14*8+rcx] + mov QWORD PTR[16+r14*8+rdi],rbx + + sbb rax,0 + mov QWORD PTR[24+r14*8+rdi],rbp + xor r14,r14 + and rsi,rax + not rax + mov rcx,rdi + and rcx,rax + lea r15,QWORD PTR[((-1))+r9] + or rsi,rcx + + movdqu xmm1,XMMWORD PTR[rsi] + movdqa XMMWORD PTR[rsp],xmm0 + movdqu XMMWORD PTR[rdi],xmm1 + jmp $L$copy4x +ALIGN 16 +$L$copy4x:: + movdqu xmm2,XMMWORD PTR[16+r14*1+rsi] + movdqu xmm1,XMMWORD PTR[32+r14*1+rsi] + movdqa XMMWORD PTR[16+r14*1+rsp],xmm0 + movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 + movdqa XMMWORD PTR[32+r14*1+rsp],xmm0 + movdqu XMMWORD PTR[32+r14*1+rdi],xmm1 + lea r14,QWORD PTR[32+r14] + dec r15 + jnz $L$copy4x + + shl r9,2 + movdqu xmm2,XMMWORD PTR[16+r14*1+rsi] + movdqa XMMWORD PTR[16+r14*1+rsp],xmm0 + movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 + mov rsi,QWORD PTR[8+r9*8+rsp] + mov rax,1 + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$mul4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul4x_mont:: +bn_mul4x_mont ENDP + +ALIGN 16 +bn_sqr4x_mont PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_sqr4x_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$sqr4x_enter:: push rbx push rbp push r12 @@ -24,151 +659,749 @@ $L$SEH_begin_bn_mul_mont:: push r14 push r15 - mov r9d,r9d - lea r10,QWORD PTR[2+r9] + shl r9d,3 + xor r10,r10 mov r11,rsp - neg r10 - lea rsp,QWORD PTR[r10*8+rsp] + sub r10,r9 + mov r8,QWORD PTR[r8] + lea rsp,QWORD PTR[((-72))+r10*2+rsp] and rsp,-1024 - mov QWORD PTR[8+r9*8+rsp],r11 -$L$prologue:: - mov r12,rdx - mov r8,QWORD PTR[r8] - xor r14,r14 - xor r15,r15 - mov rbx,QWORD PTR[r12] - mov rax,QWORD PTR[rsi] - mul rbx - mov r10,rax - mov r11,rdx - imul rax,r8 - mov rbp,rax - mul QWORD PTR[rcx] - add rax,r10 - adc rdx,0 - mov r13,rdx - lea r15,QWORD PTR[1+r15] -$L$1st:: - mov rax,QWORD PTR[r15*8+rsi] - mul rbx - add rax,r11 - adc rdx,0 + + + + + mov QWORD PTR[32+rsp],rdi + mov QWORD PTR[40+rsp],rcx + mov QWORD PTR[48+rsp],r8 + mov QWORD PTR[56+rsp],r11 +$L$sqr4x_body:: + + + + + + + + lea rbp,QWORD PTR[32+r10] + lea rsi,QWORD PTR[r9*1+rsi] + + mov rcx,r9 + + + mov r14,QWORD PTR[((-32))+rbp*1+rsi] + lea rdi,QWORD PTR[64+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rbp*1+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 mov r10,rax - mov rax,QWORD PTR[r15*8+rcx] + mov rax,rbx mov r11,rdx + mov QWORD PTR[((-24))+rbp*1+rdi],r10 - mul rbp - add rax,r13 - lea r15,QWORD PTR[1+r15] - adc rdx,0 - add rax,r10 - adc rdx,0 - mov QWORD PTR[((-16))+r15*8+rsp],rax - cmp r15,r9 + xor r10,r10 + mul r14 + add r11,rax + mov rax,rbx + adc r10,rdx + mov QWORD PTR[((-16))+rbp*1+rdi],r11 + + lea rcx,QWORD PTR[((-16))+rbp] + + + mov rbx,QWORD PTR[8+rcx*1+rsi] + mul r15 + mov r12,rax + mov rax,rbx mov r13,rdx - jl $L$1st - xor rdx,rdx + xor r11,r11 + add r10,r12 + lea rcx,QWORD PTR[16+rcx] + adc r11,0 + mul r14 + add r10,rax + mov rax,rbx + adc r11,rdx + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + jmp $L$sqr4x_1st + +ALIGN 16 +$L$sqr4x_1st:: + mov rbx,QWORD PTR[rcx*1+rsi] + xor r12,r12 + mul r15 + add r13,rax + mov rax,rbx + adc r12,rdx + + xor r10,r10 + add r11,r13 + adc r10,0 + mul r14 + add r11,rax + mov rax,rbx + adc r10,rdx + mov QWORD PTR[rcx*1+rdi],r11 + + + mov rbx,QWORD PTR[8+rcx*1+rsi] + xor r13,r13 + mul r15 + add r12,rax + mov rax,rbx + adc r13,rdx + + xor r11,r11 + add r10,r12 + adc r11,0 + mul r14 + add r10,rax + mov rax,rbx + adc r11,rdx + mov QWORD PTR[8+rcx*1+rdi],r10 + + mov rbx,QWORD PTR[16+rcx*1+rsi] + xor r12,r12 + mul r15 + add r13,rax + mov rax,rbx + adc r12,rdx + + xor r10,r10 + add r11,r13 + adc r10,0 + mul r14 + add r11,rax + mov rax,rbx + adc r10,rdx + mov QWORD PTR[16+rcx*1+rdi],r11 + + + mov rbx,QWORD PTR[24+rcx*1+rsi] + xor r13,r13 + mul r15 + add r12,rax + mov rax,rbx + adc r13,rdx + + xor r11,r11 + add r10,r12 + lea rcx,QWORD PTR[32+rcx] + adc r11,0 + mul r14 + add r10,rax + mov rax,rbx + adc r11,rdx + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne $L$sqr4x_1st + + xor r12,r12 add r13,r11 - adc rdx,0 - mov QWORD PTR[((-8))+r9*8+rsp],r13 - mov QWORD PTR[r9*8+rsp],rdx + adc r12,0 + mul r15 + add r13,rax + adc r12,rdx - lea r14,QWORD PTR[1+r14] -ALIGN 4 -$L$outer:: - xor r15,r15 + mov QWORD PTR[rdi],r13 + lea rbp,QWORD PTR[16+rbp] + mov QWORD PTR[8+rdi],r12 + jmp $L$sqr4x_outer - mov rbx,QWORD PTR[r14*8+r12] - mov rax,QWORD PTR[rsi] - mul rbx - add rax,QWORD PTR[rsp] - adc rdx,0 - mov r10,rax - mov r11,rdx +ALIGN 16 +$L$sqr4x_outer:: + mov r14,QWORD PTR[((-32))+rbp*1+rsi] + lea rdi,QWORD PTR[64+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rbp*1+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rbp*1+rsi] + mov r15,rax - imul rax,r8 - mov rbp,rax + mov r10,QWORD PTR[((-24))+rbp*1+rdi] + xor r11,r11 + mul r14 + add r10,rax + mov rax,rbx + adc r11,rdx + mov QWORD PTR[((-24))+rbp*1+rdi],r10 - mul QWORD PTR[r15*8+rcx] - add rax,r10 - mov r10,QWORD PTR[8+rsp] - adc rdx,0 - mov r13,rdx + xor r10,r10 + add r11,QWORD PTR[((-16))+rbp*1+rdi] + adc r10,0 + mul r14 + add r11,rax + mov rax,rbx + adc r10,rdx + mov QWORD PTR[((-16))+rbp*1+rdi],r11 - lea r15,QWORD PTR[1+r15] -ALIGN 4 -$L$inner:: - mov rax,QWORD PTR[r15*8+rsi] - mul rbx - add rax,r11 - adc rdx,0 + lea rcx,QWORD PTR[((-16))+rbp] + xor r12,r12 + + + mov rbx,QWORD PTR[8+rcx*1+rsi] + xor r13,r13 + add r12,QWORD PTR[8+rcx*1+rdi] + adc r13,0 + mul r15 + add r12,rax + mov rax,rbx + adc r13,rdx + + xor r11,r11 + add r10,r12 + adc r11,0 + mul r14 add r10,rax - mov rax,QWORD PTR[r15*8+rcx] - adc rdx,0 - mov r11,rdx + mov rax,rbx + adc r11,rdx + mov QWORD PTR[8+rcx*1+rdi],r10 - mul rbp - add rax,r13 - lea r15,QWORD PTR[1+r15] - adc rdx,0 - add rax,r10 + lea rcx,QWORD PTR[16+rcx] + jmp $L$sqr4x_inner + +ALIGN 16 +$L$sqr4x_inner:: + mov rbx,QWORD PTR[rcx*1+rsi] + xor r12,r12 + add r13,QWORD PTR[rcx*1+rdi] + adc r12,0 + mul r15 + add r13,rax + mov rax,rbx + adc r12,rdx + + xor r10,r10 + add r11,r13 + adc r10,0 + mul r14 + add r11,rax + mov rax,rbx + adc r10,rdx + mov QWORD PTR[rcx*1+rdi],r11 + + mov rbx,QWORD PTR[8+rcx*1+rsi] + xor r13,r13 + add r12,QWORD PTR[8+rcx*1+rdi] + adc r13,0 + mul r15 + add r12,rax + mov rax,rbx + adc r13,rdx + + xor r11,r11 + add r10,r12 + lea rcx,QWORD PTR[16+rcx] + adc r11,0 + mul r14 + add r10,rax + mov rax,rbx + adc r11,rdx + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne $L$sqr4x_inner + + xor r12,r12 + add r13,r11 + adc r12,0 + mul r15 + add r13,rax + adc r12,rdx + + mov QWORD PTR[rdi],r13 + mov QWORD PTR[8+rdi],r12 + + add rbp,16 + jnz $L$sqr4x_outer + + + mov r14,QWORD PTR[((-32))+rsi] + lea rdi,QWORD PTR[64+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rsi] + mov r15,rax + + xor r11,r11 + mul r14 + add r10,rax + mov rax,rbx + adc r11,rdx + mov QWORD PTR[((-24))+rdi],r10 + + xor r10,r10 + add r11,r13 + adc r10,0 + mul r14 + add r11,rax + mov rax,rbx + adc r10,rdx + mov QWORD PTR[((-16))+rdi],r11 + + mov rbx,QWORD PTR[((-8))+rsi] + mul r15 + add r12,rax + mov rax,rbx adc rdx,0 - mov r10,QWORD PTR[r15*8+rsp] - cmp r15,r9 - mov QWORD PTR[((-16))+r15*8+rsp],rax + + xor r11,r11 + add r10,r12 mov r13,rdx - jl $L$inner + adc r11,0 + mul r14 + add r10,rax + mov rax,rbx + adc r11,rdx + mov QWORD PTR[((-8))+rdi],r10 - xor rdx,rdx + xor r12,r12 add r13,r11 + adc r12,0 + mul r15 + add r13,rax + mov rax,QWORD PTR[((-16))+rsi] + adc r12,rdx + + mov QWORD PTR[rdi],r13 + mov QWORD PTR[8+rdi],r12 + + mul rbx + add rbp,16 + xor r14,r14 + sub rbp,r9 + xor r15,r15 + + add rax,r12 adc rdx,0 + mov QWORD PTR[8+rdi],rax + mov QWORD PTR[16+rdi],rdx + mov QWORD PTR[24+rdi],r15 + + mov rax,QWORD PTR[((-16))+rbp*1+rsi] + lea rdi,QWORD PTR[64+r9*2+rsp] + xor r10,r10 + mov r11,QWORD PTR[((-24))+rbp*2+rdi] + + lea r12,QWORD PTR[r10*2+r14] + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[((-16))+rbp*2+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[((-8))+rbp*2+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rbp*1+rsi] + mov QWORD PTR[((-32))+rbp*2+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-24))+rbp*2+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[rbp*2+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[8+rbp*2+rdi] + adc rbx,rax + mov rax,QWORD PTR[rbp*1+rsi] + mov QWORD PTR[((-16))+rbp*2+rdi],rbx + adc r8,rdx + lea rbp,QWORD PTR[16+rbp] + mov QWORD PTR[((-40))+rbp*2+rdi],r8 + sbb r15,r15 + jmp $L$sqr4x_shift_n_add + +ALIGN 16 +$L$sqr4x_shift_n_add:: + lea r12,QWORD PTR[r10*2+r14] + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[((-16))+rbp*2+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[((-8))+rbp*2+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rbp*1+rsi] + mov QWORD PTR[((-32))+rbp*2+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-24))+rbp*2+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[rbp*2+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[8+rbp*2+rdi] + adc rbx,rax + mov rax,QWORD PTR[rbp*1+rsi] + mov QWORD PTR[((-16))+rbp*2+rdi],rbx + adc r8,rdx + + lea r12,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-8))+rbp*2+rdi],r8 + sbb r15,r15 + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[16+rbp*2+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[24+rbp*2+rdi] + adc r12,rax + mov rax,QWORD PTR[8+rbp*1+rsi] + mov QWORD PTR[rbp*2+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[8+rbp*2+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[32+rbp*2+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[40+rbp*2+rdi] + adc rbx,rax + mov rax,QWORD PTR[16+rbp*1+rsi] + mov QWORD PTR[16+rbp*2+rdi],rbx + adc r8,rdx + mov QWORD PTR[24+rbp*2+rdi],r8 + sbb r15,r15 + add rbp,32 + jnz $L$sqr4x_shift_n_add + + lea r12,QWORD PTR[r10*2+r14] + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[((-8))+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rsi] + mov QWORD PTR[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mul rax + neg r15 + adc rbx,rax + adc r8,rdx + mov QWORD PTR[((-16))+rdi],rbx + mov QWORD PTR[((-8))+rdi],r8 + mov rsi,QWORD PTR[40+rsp] + mov r8,QWORD PTR[48+rsp] + xor rcx,rcx + mov QWORD PTR[rsp],r9 + sub rcx,r9 + mov r10,QWORD PTR[64+rsp] + mov r14,r8 + lea rax,QWORD PTR[64+r9*2+rsp] + lea rdi,QWORD PTR[64+r9*1+rsp] + mov QWORD PTR[8+rsp],rax + lea rsi,QWORD PTR[r9*1+rsi] + xor rbp,rbp + + mov rax,QWORD PTR[rcx*1+rsi] + mov r9,QWORD PTR[8+rcx*1+rsi] + imul r14,r10 + mov rbx,rax + jmp $L$sqr4x_mont_outer + +ALIGN 16 +$L$sqr4x_mont_outer:: + xor r11,r11 + mul r14 + add r10,rax + mov rax,r9 + adc r11,rdx + mov r15,r8 + + xor r10,r10 + add r11,QWORD PTR[8+rcx*1+rdi] + adc r10,0 + mul r14 + add r11,rax + mov rax,rbx + adc r10,rdx + + imul r15,r11 + + mov rbx,QWORD PTR[16+rcx*1+rsi] + xor r13,r13 + add r12,r11 + adc r13,0 + mul r15 + add r12,rax + mov rax,rbx + adc r13,rdx + mov QWORD PTR[8+rcx*1+rdi],r12 + + xor r11,r11 + add r10,QWORD PTR[16+rcx*1+rdi] + adc r11,0 + mul r14 + add r10,rax + mov rax,r9 + adc r11,rdx + + mov r9,QWORD PTR[24+rcx*1+rsi] + xor r12,r12 add r13,r10 - adc rdx,0 - mov QWORD PTR[((-8))+r9*8+rsp],r13 - mov QWORD PTR[r9*8+rsp],rdx + adc r12,0 + mul r15 + add r13,rax + mov rax,r9 + adc r12,rdx + mov QWORD PTR[16+rcx*1+rdi],r13 - lea r14,QWORD PTR[1+r14] - cmp r14,r9 - jl $L$outer + xor r10,r10 + add r11,QWORD PTR[24+rcx*1+rdi] + lea rcx,QWORD PTR[32+rcx] + adc r10,0 + mul r14 + add r11,rax + mov rax,rbx + adc r10,rdx + jmp $L$sqr4x_mont_inner - lea rsi,QWORD PTR[rsp] - lea r15,QWORD PTR[((-1))+r9] +ALIGN 16 +$L$sqr4x_mont_inner:: + mov rbx,QWORD PTR[rcx*1+rsi] + xor r13,r13 + add r12,r11 + adc r13,0 + mul r15 + add r12,rax + mov rax,rbx + adc r13,rdx + mov QWORD PTR[((-8))+rcx*1+rdi],r12 - mov rax,QWORD PTR[rsi] - xor r14,r14 - jmp $L$sub + xor r11,r11 + add r10,QWORD PTR[rcx*1+rdi] + adc r11,0 + mul r14 + add r10,rax + mov rax,r9 + adc r11,rdx + + mov r9,QWORD PTR[8+rcx*1+rsi] + xor r12,r12 + add r13,r10 + adc r12,0 + mul r15 + add r13,rax + mov rax,r9 + adc r12,rdx + mov QWORD PTR[rcx*1+rdi],r13 + + xor r10,r10 + add r11,QWORD PTR[8+rcx*1+rdi] + adc r10,0 + mul r14 + add r11,rax + mov rax,rbx + adc r10,rdx + + + mov rbx,QWORD PTR[16+rcx*1+rsi] + xor r13,r13 + add r12,r11 + adc r13,0 + mul r15 + add r12,rax + mov rax,rbx + adc r13,rdx + mov QWORD PTR[8+rcx*1+rdi],r12 + + xor r11,r11 + add r10,QWORD PTR[16+rcx*1+rdi] + adc r11,0 + mul r14 + add r10,rax + mov rax,r9 + adc r11,rdx + + mov r9,QWORD PTR[24+rcx*1+rsi] + xor r12,r12 + add r13,r10 + adc r12,0 + mul r15 + add r13,rax + mov rax,r9 + adc r12,rdx + mov QWORD PTR[16+rcx*1+rdi],r13 + + xor r10,r10 + add r11,QWORD PTR[24+rcx*1+rdi] + lea rcx,QWORD PTR[32+rcx] + adc r10,0 + mul r14 + add r11,rax + mov rax,rbx + adc r10,rdx + cmp rcx,0 + jne $L$sqr4x_mont_inner + + sub rcx,QWORD PTR[rsp] + mov r14,r8 + + xor r13,r13 + add r12,r11 + adc r13,0 + mul r15 + add r12,rax + mov rax,r9 + adc r13,rdx + mov QWORD PTR[((-8))+rdi],r12 + + xor r11,r11 + add r10,QWORD PTR[rdi] + adc r11,0 + mov rbx,QWORD PTR[rcx*1+rsi] + add r10,rbp + adc r11,0 + + imul r14,QWORD PTR[16+rcx*1+rdi] + xor r12,r12 + mov r9,QWORD PTR[8+rcx*1+rsi] + add r13,r10 + mov r10,QWORD PTR[16+rcx*1+rdi] + adc r12,0 + mul r15 + add r13,rax + mov rax,rbx + adc r12,rdx + mov QWORD PTR[rdi],r13 + + xor rbp,rbp + add r12,QWORD PTR[8+rdi] + adc rbp,rbp + add r12,r11 + lea rdi,QWORD PTR[16+rdi] + adc rbp,0 + mov QWORD PTR[((-8))+rdi],r12 + cmp rdi,QWORD PTR[8+rsp] + jb $L$sqr4x_mont_outer + + mov r9,QWORD PTR[rsp] + mov QWORD PTR[rdi],rbp + mov rax,QWORD PTR[64+r9*1+rsp] + lea rbx,QWORD PTR[64+r9*1+rsp] + mov rsi,QWORD PTR[40+rsp] + shr r9,5 + mov rdx,QWORD PTR[8+rbx] + xor rbp,rbp + + mov rdi,QWORD PTR[32+rsp] + sub rax,QWORD PTR[rsi] + mov r10,QWORD PTR[16+rbx] + mov r11,QWORD PTR[24+rbx] + sbb rdx,QWORD PTR[8+rsi] + lea rcx,QWORD PTR[((-1))+r9] + jmp $L$sqr4x_sub ALIGN 16 -$L$sub:: sbb rax,QWORD PTR[r14*8+rcx] - mov QWORD PTR[r14*8+rdi],rax - dec r15 - mov rax,QWORD PTR[8+r14*8+rsi] - lea r14,QWORD PTR[1+r14] - jge $L$sub +$L$sqr4x_sub:: + mov QWORD PTR[rbp*8+rdi],rax + mov QWORD PTR[8+rbp*8+rdi],rdx + sbb r10,QWORD PTR[16+rbp*8+rsi] + mov rax,QWORD PTR[32+rbp*8+rbx] + mov rdx,QWORD PTR[40+rbp*8+rbx] + sbb r11,QWORD PTR[24+rbp*8+rsi] + mov QWORD PTR[16+rbp*8+rdi],r10 + mov QWORD PTR[24+rbp*8+rdi],r11 + sbb rax,QWORD PTR[32+rbp*8+rsi] + mov r10,QWORD PTR[48+rbp*8+rbx] + mov r11,QWORD PTR[56+rbp*8+rbx] + sbb rdx,QWORD PTR[40+rbp*8+rsi] + lea rbp,QWORD PTR[4+rbp] + dec rcx + jnz $L$sqr4x_sub + + mov QWORD PTR[rbp*8+rdi],rax + mov rax,QWORD PTR[32+rbp*8+rbx] + sbb r10,QWORD PTR[16+rbp*8+rsi] + mov QWORD PTR[8+rbp*8+rdi],rdx + sbb r11,QWORD PTR[24+rbp*8+rsi] + mov QWORD PTR[16+rbp*8+rdi],r10 sbb rax,0 - and rsi,rax + mov QWORD PTR[24+rbp*8+rdi],r11 + xor rbp,rbp + and rbx,rax not rax - mov rcx,rdi - and rcx,rax - lea r15,QWORD PTR[((-1))+r9] - or rsi,rcx + mov rsi,rdi + and rsi,rax + lea rcx,QWORD PTR[((-1))+r9] + or rbx,rsi + + pxor xmm0,xmm0 + lea rsi,QWORD PTR[64+r9*8+rsp] + movdqu xmm1,XMMWORD PTR[rbx] + lea rsi,QWORD PTR[r9*8+rsi] + movdqa XMMWORD PTR[64+rsp],xmm0 + movdqa XMMWORD PTR[rsi],xmm0 + movdqu XMMWORD PTR[rdi],xmm1 + jmp $L$sqr4x_copy ALIGN 16 -$L$copy:: - mov rax,QWORD PTR[r15*8+rsi] - mov QWORD PTR[r15*8+rdi],rax - mov QWORD PTR[r15*8+rsp],r14 - dec r15 - jge $L$copy +$L$sqr4x_copy:: + movdqu xmm2,XMMWORD PTR[16+rbp*1+rbx] + movdqu xmm1,XMMWORD PTR[32+rbp*1+rbx] + movdqa XMMWORD PTR[80+rbp*1+rsp],xmm0 + movdqa XMMWORD PTR[96+rbp*1+rsp],xmm0 + movdqa XMMWORD PTR[16+rbp*1+rsi],xmm0 + movdqa XMMWORD PTR[32+rbp*1+rsi],xmm0 + movdqu XMMWORD PTR[16+rbp*1+rdi],xmm2 + movdqu XMMWORD PTR[32+rbp*1+rdi],xmm1 + lea rbp,QWORD PTR[32+rbp] + dec rcx + jnz $L$sqr4x_copy - mov rsi,QWORD PTR[8+r9*8+rsp] + movdqu xmm2,XMMWORD PTR[16+rbp*1+rbx] + movdqa XMMWORD PTR[80+rbp*1+rsp],xmm0 + movdqa XMMWORD PTR[16+rbp*1+rsi],xmm0 + movdqu XMMWORD PTR[16+rbp*1+rdi],xmm2 + mov rsi,QWORD PTR[56+rsp] mov rax,1 mov r15,QWORD PTR[rsi] mov r14,QWORD PTR[8+rsi] @@ -177,12 +1410,12 @@ $L$copy:: mov rbp,QWORD PTR[32+rsi] mov rbx,QWORD PTR[40+rsi] lea rsp,QWORD PTR[48+rsi] -$L$epilogue:: +$L$sqr4x_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_bn_mul_mont:: -bn_mul_mont ENDP +$L$SEH_end_bn_sqr4x_mont:: +bn_sqr4x_mont ENDP DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 @@ -192,7 +1425,7 @@ ALIGN 16 EXTERN __imp_RtlVirtualUnwind:NEAR ALIGN 16 -se_handler PROC PRIVATE +mul_handler PROC PRIVATE push rsi push rdi push rbx @@ -207,15 +1440,20 @@ se_handler PROC PRIVATE mov rax,QWORD PTR[120+r8] mov rbx,QWORD PTR[248+r8] - lea r10,QWORD PTR[$L$prologue] + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 - jb $L$in_prologue + jb $L$common_seh_tail mov rax,QWORD PTR[152+r8] - lea r10,QWORD PTR[$L$epilogue] + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 - jae $L$in_prologue + jae $L$common_seh_tail mov r10,QWORD PTR[192+r8] mov rax,QWORD PTR[8+r10*8+rax] @@ -234,7 +1472,53 @@ se_handler PROC PRIVATE mov QWORD PTR[232+r8],r14 mov QWORD PTR[240+r8],r15 -$L$in_prologue:: + jmp $L$common_seh_tail +mul_handler ENDP + + +ALIGN 16 +sqr_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$sqr4x_body] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$sqr4x_epilogue] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[56+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$common_seh_tail:: mov rdi,QWORD PTR[8+rax] mov rsi,QWORD PTR[16+rax] mov QWORD PTR[152+r8],rax @@ -273,7 +1557,7 @@ $L$in_prologue:: pop rdi pop rsi DB 0F3h,0C3h ;repret -se_handler ENDP +sqr_handler ENDP .text$ ENDS .pdata SEGMENT READONLY ALIGN(4) @@ -282,12 +1566,30 @@ ALIGN 4 DD imagerel $L$SEH_end_bn_mul_mont DD imagerel $L$SEH_info_bn_mul_mont + DD imagerel $L$SEH_begin_bn_mul4x_mont + DD imagerel $L$SEH_end_bn_mul4x_mont + DD imagerel $L$SEH_info_bn_mul4x_mont + + DD imagerel $L$SEH_begin_bn_sqr4x_mont + DD imagerel $L$SEH_end_bn_sqr4x_mont + DD imagerel $L$SEH_info_bn_sqr4x_mont + .pdata ENDS .xdata SEGMENT READONLY ALIGN(8) ALIGN 8 $L$SEH_info_bn_mul_mont:: DB 9,0,0,0 - DD imagerel se_handler + DD imagerel mul_handler + DD imagerel $L$mul_body,imagerel $L$mul_epilogue + +$L$SEH_info_bn_mul4x_mont:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue + +$L$SEH_info_bn_sqr4x_mont:: +DB 9,0,0,0 + DD imagerel sqr_handler .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/camellia/cmll-x86_64.asm b/deps/openssl/asm/x64-win32-masm/camellia/cmll-x86_64.asm index a5913da92e..0ea789b6ae 100644 --- a/deps/openssl/asm/x64-win32-masm/camellia/cmll-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/camellia/cmll-x86_64.asm @@ -250,7 +250,7 @@ $L$eloop:: xor r8d,ecx xor r9d,ecx xor r9d,edx - lea r14,QWORD PTR[((16*4))+r14] + lea r14,QWORD PTR[64+r14] cmp r14,r15 mov edx,DWORD PTR[8+r14] mov ecx,DWORD PTR[12+r14] @@ -533,7 +533,7 @@ $L$dloop:: xor r8d,ecx xor r9d,ecx xor r9d,edx - lea r14,QWORD PTR[((-16*4))+r14] + lea r14,QWORD PTR[((-64))+r14] cmp r14,r15 mov edx,DWORD PTR[r14] mov ecx,DWORD PTR[4+r14] diff --git a/deps/openssl/asm/x64-win32-masm/md5/md5-x86_64.asm b/deps/openssl/asm/x64-win32-masm/md5/md5-x86_64.asm index 34305c687c..8ddad41c84 100644 --- a/deps/openssl/asm/x64-win32-masm/md5/md5-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/md5/md5-x86_64.asm @@ -27,10 +27,10 @@ $L$prologue:: mov rbp,rdi shl rdx,6 lea rdi,QWORD PTR[rdx*1+rsi] - mov eax,DWORD PTR[((0*4))+rbp] - mov ebx,DWORD PTR[((1*4))+rbp] - mov ecx,DWORD PTR[((2*4))+rbp] - mov edx,DWORD PTR[((3*4))+rbp] + mov eax,DWORD PTR[rbp] + mov ebx,DWORD PTR[4+rbp] + mov ecx,DWORD PTR[8+rbp] + mov edx,DWORD PTR[12+rbp] @@ -48,160 +48,160 @@ $L$loop:: mov r9d,ebx mov r14d,ecx mov r15d,edx - mov r10d,DWORD PTR[((0*4))+rsi] + mov r10d,DWORD PTR[rsi] mov r11d,edx xor r11d,ecx - lea eax,DWORD PTR[0d76aa478h+r10*1+rax] + lea eax,DWORD PTR[((-680876936))+r10*1+rax] and r11d,ebx xor r11d,edx - mov r10d,DWORD PTR[((1*4))+rsi] + mov r10d,DWORD PTR[4+rsi] add eax,r11d rol eax,7 mov r11d,ecx add eax,ebx xor r11d,ebx - lea edx,DWORD PTR[0e8c7b756h+r10*1+rdx] + lea edx,DWORD PTR[((-389564586))+r10*1+rdx] and r11d,eax xor r11d,ecx - mov r10d,DWORD PTR[((2*4))+rsi] + mov r10d,DWORD PTR[8+rsi] add edx,r11d rol edx,12 mov r11d,ebx add edx,eax xor r11d,eax - lea ecx,DWORD PTR[0242070dbh+r10*1+rcx] + lea ecx,DWORD PTR[606105819+r10*1+rcx] and r11d,edx xor r11d,ebx - mov r10d,DWORD PTR[((3*4))+rsi] + mov r10d,DWORD PTR[12+rsi] add ecx,r11d rol ecx,17 mov r11d,eax add ecx,edx xor r11d,edx - lea ebx,DWORD PTR[0c1bdceeeh+r10*1+rbx] + lea ebx,DWORD PTR[((-1044525330))+r10*1+rbx] and r11d,ecx xor r11d,eax - mov r10d,DWORD PTR[((4*4))+rsi] + mov r10d,DWORD PTR[16+rsi] add ebx,r11d rol ebx,22 mov r11d,edx add ebx,ecx xor r11d,ecx - lea eax,DWORD PTR[0f57c0fafh+r10*1+rax] + lea eax,DWORD PTR[((-176418897))+r10*1+rax] and r11d,ebx xor r11d,edx - mov r10d,DWORD PTR[((5*4))+rsi] + mov r10d,DWORD PTR[20+rsi] add eax,r11d rol eax,7 mov r11d,ecx add eax,ebx xor r11d,ebx - lea edx,DWORD PTR[04787c62ah+r10*1+rdx] + lea edx,DWORD PTR[1200080426+r10*1+rdx] and r11d,eax xor r11d,ecx - mov r10d,DWORD PTR[((6*4))+rsi] + mov r10d,DWORD PTR[24+rsi] add edx,r11d rol edx,12 mov r11d,ebx add edx,eax xor r11d,eax - lea ecx,DWORD PTR[0a8304613h+r10*1+rcx] + lea ecx,DWORD PTR[((-1473231341))+r10*1+rcx] and r11d,edx xor r11d,ebx - mov r10d,DWORD PTR[((7*4))+rsi] + mov r10d,DWORD PTR[28+rsi] add ecx,r11d rol ecx,17 mov r11d,eax add ecx,edx xor r11d,edx - lea ebx,DWORD PTR[0fd469501h+r10*1+rbx] + lea ebx,DWORD PTR[((-45705983))+r10*1+rbx] and r11d,ecx xor r11d,eax - mov r10d,DWORD PTR[((8*4))+rsi] + mov r10d,DWORD PTR[32+rsi] add ebx,r11d rol ebx,22 mov r11d,edx add ebx,ecx xor r11d,ecx - lea eax,DWORD PTR[0698098d8h+r10*1+rax] + lea eax,DWORD PTR[1770035416+r10*1+rax] and r11d,ebx xor r11d,edx - mov r10d,DWORD PTR[((9*4))+rsi] + mov r10d,DWORD PTR[36+rsi] add eax,r11d rol eax,7 mov r11d,ecx add eax,ebx xor r11d,ebx - lea edx,DWORD PTR[08b44f7afh+r10*1+rdx] + lea edx,DWORD PTR[((-1958414417))+r10*1+rdx] and r11d,eax xor r11d,ecx - mov r10d,DWORD PTR[((10*4))+rsi] + mov r10d,DWORD PTR[40+rsi] add edx,r11d rol edx,12 mov r11d,ebx add edx,eax xor r11d,eax - lea ecx,DWORD PTR[0ffff5bb1h+r10*1+rcx] + lea ecx,DWORD PTR[((-42063))+r10*1+rcx] and r11d,edx xor r11d,ebx - mov r10d,DWORD PTR[((11*4))+rsi] + mov r10d,DWORD PTR[44+rsi] add ecx,r11d rol ecx,17 mov r11d,eax add ecx,edx xor r11d,edx - lea ebx,DWORD PTR[0895cd7beh+r10*1+rbx] + lea ebx,DWORD PTR[((-1990404162))+r10*1+rbx] and r11d,ecx xor r11d,eax - mov r10d,DWORD PTR[((12*4))+rsi] + mov r10d,DWORD PTR[48+rsi] add ebx,r11d rol ebx,22 mov r11d,edx add ebx,ecx xor r11d,ecx - lea eax,DWORD PTR[06b901122h+r10*1+rax] + lea eax,DWORD PTR[1804603682+r10*1+rax] and r11d,ebx xor r11d,edx - mov r10d,DWORD PTR[((13*4))+rsi] + mov r10d,DWORD PTR[52+rsi] add eax,r11d rol eax,7 mov r11d,ecx add eax,ebx xor r11d,ebx - lea edx,DWORD PTR[0fd987193h+r10*1+rdx] + lea edx,DWORD PTR[((-40341101))+r10*1+rdx] and r11d,eax xor r11d,ecx - mov r10d,DWORD PTR[((14*4))+rsi] + mov r10d,DWORD PTR[56+rsi] add edx,r11d rol edx,12 mov r11d,ebx add edx,eax xor r11d,eax - lea ecx,DWORD PTR[0a679438eh+r10*1+rcx] + lea ecx,DWORD PTR[((-1502002290))+r10*1+rcx] and r11d,edx xor r11d,ebx - mov r10d,DWORD PTR[((15*4))+rsi] + mov r10d,DWORD PTR[60+rsi] add ecx,r11d rol ecx,17 mov r11d,eax add ecx,edx xor r11d,edx - lea ebx,DWORD PTR[049b40821h+r10*1+rbx] + lea ebx,DWORD PTR[1236535329+r10*1+rbx] and r11d,ecx xor r11d,eax - mov r10d,DWORD PTR[((0*4))+rsi] + mov r10d,DWORD PTR[rsi] add ebx,r11d rol ebx,22 mov r11d,edx add ebx,ecx - mov r10d,DWORD PTR[((1*4))+rsi] + mov r10d,DWORD PTR[4+rsi] mov r11d,edx mov r12d,edx not r11d - lea eax,DWORD PTR[0f61e2562h+r10*1+rax] + lea eax,DWORD PTR[((-165796510))+r10*1+rax] and r12d,ebx and r11d,ecx - mov r10d,DWORD PTR[((6*4))+rsi] + mov r10d,DWORD PTR[24+rsi] or r12d,r11d mov r11d,ecx add eax,r12d @@ -209,10 +209,10 @@ $L$loop:: rol eax,5 add eax,ebx not r11d - lea edx,DWORD PTR[0c040b340h+r10*1+rdx] + lea edx,DWORD PTR[((-1069501632))+r10*1+rdx] and r12d,eax and r11d,ebx - mov r10d,DWORD PTR[((11*4))+rsi] + mov r10d,DWORD PTR[44+rsi] or r12d,r11d mov r11d,ebx add edx,r12d @@ -220,10 +220,10 @@ $L$loop:: rol edx,9 add edx,eax not r11d - lea ecx,DWORD PTR[0265e5a51h+r10*1+rcx] + lea ecx,DWORD PTR[643717713+r10*1+rcx] and r12d,edx and r11d,eax - mov r10d,DWORD PTR[((0*4))+rsi] + mov r10d,DWORD PTR[rsi] or r12d,r11d mov r11d,eax add ecx,r12d @@ -231,10 +231,10 @@ $L$loop:: rol ecx,14 add ecx,edx not r11d - lea ebx,DWORD PTR[0e9b6c7aah+r10*1+rbx] + lea ebx,DWORD PTR[((-373897302))+r10*1+rbx] and r12d,ecx and r11d,edx - mov r10d,DWORD PTR[((5*4))+rsi] + mov r10d,DWORD PTR[20+rsi] or r12d,r11d mov r11d,edx add ebx,r12d @@ -242,10 +242,10 @@ $L$loop:: rol ebx,20 add ebx,ecx not r11d - lea eax,DWORD PTR[0d62f105dh+r10*1+rax] + lea eax,DWORD PTR[((-701558691))+r10*1+rax] and r12d,ebx and r11d,ecx - mov r10d,DWORD PTR[((10*4))+rsi] + mov r10d,DWORD PTR[40+rsi] or r12d,r11d mov r11d,ecx add eax,r12d @@ -253,10 +253,10 @@ $L$loop:: rol eax,5 add eax,ebx not r11d - lea edx,DWORD PTR[02441453h+r10*1+rdx] + lea edx,DWORD PTR[38016083+r10*1+rdx] and r12d,eax and r11d,ebx - mov r10d,DWORD PTR[((15*4))+rsi] + mov r10d,DWORD PTR[60+rsi] or r12d,r11d mov r11d,ebx add edx,r12d @@ -264,10 +264,10 @@ $L$loop:: rol edx,9 add edx,eax not r11d - lea ecx,DWORD PTR[0d8a1e681h+r10*1+rcx] + lea ecx,DWORD PTR[((-660478335))+r10*1+rcx] and r12d,edx and r11d,eax - mov r10d,DWORD PTR[((4*4))+rsi] + mov r10d,DWORD PTR[16+rsi] or r12d,r11d mov r11d,eax add ecx,r12d @@ -275,10 +275,10 @@ $L$loop:: rol ecx,14 add ecx,edx not r11d - lea ebx,DWORD PTR[0e7d3fbc8h+r10*1+rbx] + lea ebx,DWORD PTR[((-405537848))+r10*1+rbx] and r12d,ecx and r11d,edx - mov r10d,DWORD PTR[((9*4))+rsi] + mov r10d,DWORD PTR[36+rsi] or r12d,r11d mov r11d,edx add ebx,r12d @@ -286,10 +286,10 @@ $L$loop:: rol ebx,20 add ebx,ecx not r11d - lea eax,DWORD PTR[021e1cde6h+r10*1+rax] + lea eax,DWORD PTR[568446438+r10*1+rax] and r12d,ebx and r11d,ecx - mov r10d,DWORD PTR[((14*4))+rsi] + mov r10d,DWORD PTR[56+rsi] or r12d,r11d mov r11d,ecx add eax,r12d @@ -297,10 +297,10 @@ $L$loop:: rol eax,5 add eax,ebx not r11d - lea edx,DWORD PTR[0c33707d6h+r10*1+rdx] + lea edx,DWORD PTR[((-1019803690))+r10*1+rdx] and r12d,eax and r11d,ebx - mov r10d,DWORD PTR[((3*4))+rsi] + mov r10d,DWORD PTR[12+rsi] or r12d,r11d mov r11d,ebx add edx,r12d @@ -308,10 +308,10 @@ $L$loop:: rol edx,9 add edx,eax not r11d - lea ecx,DWORD PTR[0f4d50d87h+r10*1+rcx] + lea ecx,DWORD PTR[((-187363961))+r10*1+rcx] and r12d,edx and r11d,eax - mov r10d,DWORD PTR[((8*4))+rsi] + mov r10d,DWORD PTR[32+rsi] or r12d,r11d mov r11d,eax add ecx,r12d @@ -319,10 +319,10 @@ $L$loop:: rol ecx,14 add ecx,edx not r11d - lea ebx,DWORD PTR[0455a14edh+r10*1+rbx] + lea ebx,DWORD PTR[1163531501+r10*1+rbx] and r12d,ecx and r11d,edx - mov r10d,DWORD PTR[((13*4))+rsi] + mov r10d,DWORD PTR[52+rsi] or r12d,r11d mov r11d,edx add ebx,r12d @@ -330,10 +330,10 @@ $L$loop:: rol ebx,20 add ebx,ecx not r11d - lea eax,DWORD PTR[0a9e3e905h+r10*1+rax] + lea eax,DWORD PTR[((-1444681467))+r10*1+rax] and r12d,ebx and r11d,ecx - mov r10d,DWORD PTR[((2*4))+rsi] + mov r10d,DWORD PTR[8+rsi] or r12d,r11d mov r11d,ecx add eax,r12d @@ -341,10 +341,10 @@ $L$loop:: rol eax,5 add eax,ebx not r11d - lea edx,DWORD PTR[0fcefa3f8h+r10*1+rdx] + lea edx,DWORD PTR[((-51403784))+r10*1+rdx] and r12d,eax and r11d,ebx - mov r10d,DWORD PTR[((7*4))+rsi] + mov r10d,DWORD PTR[28+rsi] or r12d,r11d mov r11d,ebx add edx,r12d @@ -352,10 +352,10 @@ $L$loop:: rol edx,9 add edx,eax not r11d - lea ecx,DWORD PTR[0676f02d9h+r10*1+rcx] + lea ecx,DWORD PTR[1735328473+r10*1+rcx] and r12d,edx and r11d,eax - mov r10d,DWORD PTR[((12*4))+rsi] + mov r10d,DWORD PTR[48+rsi] or r12d,r11d mov r11d,eax add ecx,r12d @@ -363,289 +363,289 @@ $L$loop:: rol ecx,14 add ecx,edx not r11d - lea ebx,DWORD PTR[08d2a4c8ah+r10*1+rbx] + lea ebx,DWORD PTR[((-1926607734))+r10*1+rbx] and r12d,ecx and r11d,edx - mov r10d,DWORD PTR[((0*4))+rsi] + mov r10d,DWORD PTR[rsi] or r12d,r11d mov r11d,edx add ebx,r12d mov r12d,edx rol ebx,20 add ebx,ecx - mov r10d,DWORD PTR[((5*4))+rsi] + mov r10d,DWORD PTR[20+rsi] mov r11d,ecx - lea eax,DWORD PTR[0fffa3942h+r10*1+rax] - mov r10d,DWORD PTR[((8*4))+rsi] + lea eax,DWORD PTR[((-378558))+r10*1+rax] + mov r10d,DWORD PTR[32+rsi] xor r11d,edx xor r11d,ebx add eax,r11d rol eax,4 mov r11d,ebx add eax,ebx - lea edx,DWORD PTR[08771f681h+r10*1+rdx] - mov r10d,DWORD PTR[((11*4))+rsi] + lea edx,DWORD PTR[((-2022574463))+r10*1+rdx] + mov r10d,DWORD PTR[44+rsi] xor r11d,ecx xor r11d,eax add edx,r11d rol edx,11 mov r11d,eax add edx,eax - lea ecx,DWORD PTR[06d9d6122h+r10*1+rcx] - mov r10d,DWORD PTR[((14*4))+rsi] + lea ecx,DWORD PTR[1839030562+r10*1+rcx] + mov r10d,DWORD PTR[56+rsi] xor r11d,ebx xor r11d,edx add ecx,r11d rol ecx,16 mov r11d,edx add ecx,edx - lea ebx,DWORD PTR[0fde5380ch+r10*1+rbx] - mov r10d,DWORD PTR[((1*4))+rsi] + lea ebx,DWORD PTR[((-35309556))+r10*1+rbx] + mov r10d,DWORD PTR[4+rsi] xor r11d,eax xor r11d,ecx add ebx,r11d rol ebx,23 mov r11d,ecx add ebx,ecx - lea eax,DWORD PTR[0a4beea44h+r10*1+rax] - mov r10d,DWORD PTR[((4*4))+rsi] + lea eax,DWORD PTR[((-1530992060))+r10*1+rax] + mov r10d,DWORD PTR[16+rsi] xor r11d,edx xor r11d,ebx add eax,r11d rol eax,4 mov r11d,ebx add eax,ebx - lea edx,DWORD PTR[04bdecfa9h+r10*1+rdx] - mov r10d,DWORD PTR[((7*4))+rsi] + lea edx,DWORD PTR[1272893353+r10*1+rdx] + mov r10d,DWORD PTR[28+rsi] xor r11d,ecx xor r11d,eax add edx,r11d rol edx,11 mov r11d,eax add edx,eax - lea ecx,DWORD PTR[0f6bb4b60h+r10*1+rcx] - mov r10d,DWORD PTR[((10*4))+rsi] + lea ecx,DWORD PTR[((-155497632))+r10*1+rcx] + mov r10d,DWORD PTR[40+rsi] xor r11d,ebx xor r11d,edx add ecx,r11d rol ecx,16 mov r11d,edx add ecx,edx - lea ebx,DWORD PTR[0bebfbc70h+r10*1+rbx] - mov r10d,DWORD PTR[((13*4))+rsi] + lea ebx,DWORD PTR[((-1094730640))+r10*1+rbx] + mov r10d,DWORD PTR[52+rsi] xor r11d,eax xor r11d,ecx add ebx,r11d rol ebx,23 mov r11d,ecx add ebx,ecx - lea eax,DWORD PTR[0289b7ec6h+r10*1+rax] - mov r10d,DWORD PTR[((0*4))+rsi] + lea eax,DWORD PTR[681279174+r10*1+rax] + mov r10d,DWORD PTR[rsi] xor r11d,edx xor r11d,ebx add eax,r11d rol eax,4 mov r11d,ebx add eax,ebx - lea edx,DWORD PTR[0eaa127fah+r10*1+rdx] - mov r10d,DWORD PTR[((3*4))+rsi] + lea edx,DWORD PTR[((-358537222))+r10*1+rdx] + mov r10d,DWORD PTR[12+rsi] xor r11d,ecx xor r11d,eax add edx,r11d rol edx,11 mov r11d,eax add edx,eax - lea ecx,DWORD PTR[0d4ef3085h+r10*1+rcx] - mov r10d,DWORD PTR[((6*4))+rsi] + lea ecx,DWORD PTR[((-722521979))+r10*1+rcx] + mov r10d,DWORD PTR[24+rsi] xor r11d,ebx xor r11d,edx add ecx,r11d rol ecx,16 mov r11d,edx add ecx,edx - lea ebx,DWORD PTR[04881d05h+r10*1+rbx] - mov r10d,DWORD PTR[((9*4))+rsi] + lea ebx,DWORD PTR[76029189+r10*1+rbx] + mov r10d,DWORD PTR[36+rsi] xor r11d,eax xor r11d,ecx add ebx,r11d rol ebx,23 mov r11d,ecx add ebx,ecx - lea eax,DWORD PTR[0d9d4d039h+r10*1+rax] - mov r10d,DWORD PTR[((12*4))+rsi] + lea eax,DWORD PTR[((-640364487))+r10*1+rax] + mov r10d,DWORD PTR[48+rsi] xor r11d,edx xor r11d,ebx add eax,r11d rol eax,4 mov r11d,ebx add eax,ebx - lea edx,DWORD PTR[0e6db99e5h+r10*1+rdx] - mov r10d,DWORD PTR[((15*4))+rsi] + lea edx,DWORD PTR[((-421815835))+r10*1+rdx] + mov r10d,DWORD PTR[60+rsi] xor r11d,ecx xor r11d,eax add edx,r11d rol edx,11 mov r11d,eax add edx,eax - lea ecx,DWORD PTR[01fa27cf8h+r10*1+rcx] - mov r10d,DWORD PTR[((2*4))+rsi] + lea ecx,DWORD PTR[530742520+r10*1+rcx] + mov r10d,DWORD PTR[8+rsi] xor r11d,ebx xor r11d,edx add ecx,r11d rol ecx,16 mov r11d,edx add ecx,edx - lea ebx,DWORD PTR[0c4ac5665h+r10*1+rbx] - mov r10d,DWORD PTR[((0*4))+rsi] + lea ebx,DWORD PTR[((-995338651))+r10*1+rbx] + mov r10d,DWORD PTR[rsi] xor r11d,eax xor r11d,ecx add ebx,r11d rol ebx,23 mov r11d,ecx add ebx,ecx - mov r10d,DWORD PTR[((0*4))+rsi] + mov r10d,DWORD PTR[rsi] mov r11d,0ffffffffh xor r11d,edx - lea eax,DWORD PTR[0f4292244h+r10*1+rax] + lea eax,DWORD PTR[((-198630844))+r10*1+rax] or r11d,ebx xor r11d,ecx add eax,r11d - mov r10d,DWORD PTR[((7*4))+rsi] + mov r10d,DWORD PTR[28+rsi] mov r11d,0ffffffffh rol eax,6 xor r11d,ecx add eax,ebx - lea edx,DWORD PTR[0432aff97h+r10*1+rdx] + lea edx,DWORD PTR[1126891415+r10*1+rdx] or r11d,eax xor r11d,ebx add edx,r11d - mov r10d,DWORD PTR[((14*4))+rsi] + mov r10d,DWORD PTR[56+rsi] mov r11d,0ffffffffh rol edx,10 xor r11d,ebx add edx,eax - lea ecx,DWORD PTR[0ab9423a7h+r10*1+rcx] + lea ecx,DWORD PTR[((-1416354905))+r10*1+rcx] or r11d,edx xor r11d,eax add ecx,r11d - mov r10d,DWORD PTR[((5*4))+rsi] + mov r10d,DWORD PTR[20+rsi] mov r11d,0ffffffffh rol ecx,15 xor r11d,eax add ecx,edx - lea ebx,DWORD PTR[0fc93a039h+r10*1+rbx] + lea ebx,DWORD PTR[((-57434055))+r10*1+rbx] or r11d,ecx xor r11d,edx add ebx,r11d - mov r10d,DWORD PTR[((12*4))+rsi] + mov r10d,DWORD PTR[48+rsi] mov r11d,0ffffffffh rol ebx,21 xor r11d,edx add ebx,ecx - lea eax,DWORD PTR[0655b59c3h+r10*1+rax] + lea eax,DWORD PTR[1700485571+r10*1+rax] or r11d,ebx xor r11d,ecx add eax,r11d - mov r10d,DWORD PTR[((3*4))+rsi] + mov r10d,DWORD PTR[12+rsi] mov r11d,0ffffffffh rol eax,6 xor r11d,ecx add eax,ebx - lea edx,DWORD PTR[08f0ccc92h+r10*1+rdx] + lea edx,DWORD PTR[((-1894986606))+r10*1+rdx] or r11d,eax xor r11d,ebx add edx,r11d - mov r10d,DWORD PTR[((10*4))+rsi] + mov r10d,DWORD PTR[40+rsi] mov r11d,0ffffffffh rol edx,10 xor r11d,ebx add edx,eax - lea ecx,DWORD PTR[0ffeff47dh+r10*1+rcx] + lea ecx,DWORD PTR[((-1051523))+r10*1+rcx] or r11d,edx xor r11d,eax add ecx,r11d - mov r10d,DWORD PTR[((1*4))+rsi] + mov r10d,DWORD PTR[4+rsi] mov r11d,0ffffffffh rol ecx,15 xor r11d,eax add ecx,edx - lea ebx,DWORD PTR[085845dd1h+r10*1+rbx] + lea ebx,DWORD PTR[((-2054922799))+r10*1+rbx] or r11d,ecx xor r11d,edx add ebx,r11d - mov r10d,DWORD PTR[((8*4))+rsi] + mov r10d,DWORD PTR[32+rsi] mov r11d,0ffffffffh rol ebx,21 xor r11d,edx add ebx,ecx - lea eax,DWORD PTR[06fa87e4fh+r10*1+rax] + lea eax,DWORD PTR[1873313359+r10*1+rax] or r11d,ebx xor r11d,ecx add eax,r11d - mov r10d,DWORD PTR[((15*4))+rsi] + mov r10d,DWORD PTR[60+rsi] mov r11d,0ffffffffh rol eax,6 xor r11d,ecx add eax,ebx - lea edx,DWORD PTR[0fe2ce6e0h+r10*1+rdx] + lea edx,DWORD PTR[((-30611744))+r10*1+rdx] or r11d,eax xor r11d,ebx add edx,r11d - mov r10d,DWORD PTR[((6*4))+rsi] + mov r10d,DWORD PTR[24+rsi] mov r11d,0ffffffffh rol edx,10 xor r11d,ebx add edx,eax - lea ecx,DWORD PTR[0a3014314h+r10*1+rcx] + lea ecx,DWORD PTR[((-1560198380))+r10*1+rcx] or r11d,edx xor r11d,eax add ecx,r11d - mov r10d,DWORD PTR[((13*4))+rsi] + mov r10d,DWORD PTR[52+rsi] mov r11d,0ffffffffh rol ecx,15 xor r11d,eax add ecx,edx - lea ebx,DWORD PTR[04e0811a1h+r10*1+rbx] + lea ebx,DWORD PTR[1309151649+r10*1+rbx] or r11d,ecx xor r11d,edx add ebx,r11d - mov r10d,DWORD PTR[((4*4))+rsi] + mov r10d,DWORD PTR[16+rsi] mov r11d,0ffffffffh rol ebx,21 xor r11d,edx add ebx,ecx - lea eax,DWORD PTR[0f7537e82h+r10*1+rax] + lea eax,DWORD PTR[((-145523070))+r10*1+rax] or r11d,ebx xor r11d,ecx add eax,r11d - mov r10d,DWORD PTR[((11*4))+rsi] + mov r10d,DWORD PTR[44+rsi] mov r11d,0ffffffffh rol eax,6 xor r11d,ecx add eax,ebx - lea edx,DWORD PTR[0bd3af235h+r10*1+rdx] + lea edx,DWORD PTR[((-1120210379))+r10*1+rdx] or r11d,eax xor r11d,ebx add edx,r11d - mov r10d,DWORD PTR[((2*4))+rsi] + mov r10d,DWORD PTR[8+rsi] mov r11d,0ffffffffh rol edx,10 xor r11d,ebx add edx,eax - lea ecx,DWORD PTR[02ad7d2bbh+r10*1+rcx] + lea ecx,DWORD PTR[718787259+r10*1+rcx] or r11d,edx xor r11d,eax add ecx,r11d - mov r10d,DWORD PTR[((9*4))+rsi] + mov r10d,DWORD PTR[36+rsi] mov r11d,0ffffffffh rol ecx,15 xor r11d,eax add ecx,edx - lea ebx,DWORD PTR[0eb86d391h+r10*1+rbx] + lea ebx,DWORD PTR[((-343485551))+r10*1+rbx] or r11d,ecx xor r11d,edx add ebx,r11d - mov r10d,DWORD PTR[((0*4))+rsi] + mov r10d,DWORD PTR[rsi] mov r11d,0ffffffffh rol ebx,21 xor r11d,edx @@ -664,10 +664,10 @@ $L$loop:: $L$end:: - mov DWORD PTR[((0*4))+rbp],eax - mov DWORD PTR[((1*4))+rbp],ebx - mov DWORD PTR[((2*4))+rbp],ecx - mov DWORD PTR[((3*4))+rbp],edx + mov DWORD PTR[rbp],eax + mov DWORD PTR[4+rbp],ebx + mov DWORD PTR[8+rbp],ecx + mov DWORD PTR[12+rbp],edx mov r15,QWORD PTR[rsp] mov r14,QWORD PTR[8+rsp] diff --git a/deps/openssl/asm/x64-win32-masm/rc4/rc4-md5-x86_64.asm b/deps/openssl/asm/x64-win32-masm/rc4/rc4-md5-x86_64.asm new file mode 100644 index 0000000000..4af838e3e9 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/rc4/rc4-md5-x86_64.asm @@ -0,0 +1,1375 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(64) 'CODE' +ALIGN 16 + +PUBLIC rc4_md5_enc + +rc4_md5_enc PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rc4_md5_enc:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + cmp r9,0 + je $L$abort + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,40 +$L$body:: + mov r11,rcx + mov r12,r9 + mov r13,rsi + mov r14,rdx + mov r15,r8 + xor rbp,rbp + xor rcx,rcx + + lea rdi,QWORD PTR[8+rdi] + mov bpl,BYTE PTR[((-8))+rdi] + mov cl,BYTE PTR[((-4))+rdi] + + inc bpl + sub r14,r13 + mov eax,DWORD PTR[rbp*4+rdi] + add cl,al + lea rsi,QWORD PTR[rbp*4+rdi] + shl r12,6 + add r12,r15 + mov QWORD PTR[16+rsp],r12 + + mov QWORD PTR[24+rsp],r11 + mov r8d,DWORD PTR[r11] + mov r9d,DWORD PTR[4+r11] + mov r10d,DWORD PTR[8+r11] + mov r11d,DWORD PTR[12+r11] + jmp $L$oop + +ALIGN 16 +$L$oop:: + mov DWORD PTR[rsp],r8d + mov DWORD PTR[4+rsp],r9d + mov DWORD PTR[8+rsp],r10d + mov r12d,r11d + mov DWORD PTR[12+rsp],r11d + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[r15] + add al,dl + mov ebx,DWORD PTR[4+rsi] + add r8d,3614090360 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[4+r15] + add bl,dl + mov eax,DWORD PTR[8+rsi] + add r11d,3905402710 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[4+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[8+r15] + add al,dl + mov ebx,DWORD PTR[12+rsi] + add r10d,606105819 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[8+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[12+r15] + add bl,dl + mov eax,DWORD PTR[16+rsi] + add r9d,3250441966 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[12+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[16+r15] + add al,dl + mov ebx,DWORD PTR[20+rsi] + add r8d,4118548399 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[16+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[20+r15] + add bl,dl + mov eax,DWORD PTR[24+rsi] + add r11d,1200080426 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[20+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[24+r15] + add al,dl + mov ebx,DWORD PTR[28+rsi] + add r10d,2821735955 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[24+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[28+r15] + add bl,dl + mov eax,DWORD PTR[32+rsi] + add r9d,4249261313 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[28+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[32+r15] + add al,dl + mov ebx,DWORD PTR[36+rsi] + add r8d,1770035416 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[32+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[36+r15] + add bl,dl + mov eax,DWORD PTR[40+rsi] + add r11d,2336552879 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[36+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[40+r15] + add al,dl + mov ebx,DWORD PTR[44+rsi] + add r10d,4294925233 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[40+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[44+r15] + add bl,dl + mov eax,DWORD PTR[48+rsi] + add r9d,2304563134 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[44+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[48+r15] + add al,dl + mov ebx,DWORD PTR[52+rsi] + add r8d,1804603682 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[48+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[52+r15] + add bl,dl + mov eax,DWORD PTR[56+rsi] + add r11d,4254626195 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[52+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[56+r15] + add al,dl + mov ebx,DWORD PTR[60+rsi] + add r10d,2792965006 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[56+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm2,XMMWORD PTR[r13] + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[60+r15] + add bl,dl + mov eax,DWORD PTR[64+rsi] + add r9d,1236535329 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[60+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + psllq xmm1,8 + pxor xmm2,xmm0 + pxor xmm2,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[4+r15] + add al,dl + mov ebx,DWORD PTR[68+rsi] + add r8d,4129170786 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[64+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[24+r15] + add bl,dl + mov eax,DWORD PTR[72+rsi] + add r11d,3225465664 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[68+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[44+r15] + add al,dl + mov ebx,DWORD PTR[76+rsi] + add r10d,643717713 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[72+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[r15] + add bl,dl + mov eax,DWORD PTR[80+rsi] + add r9d,3921069994 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[76+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[20+r15] + add al,dl + mov ebx,DWORD PTR[84+rsi] + add r8d,3593408605 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[80+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[40+r15] + add bl,dl + mov eax,DWORD PTR[88+rsi] + add r11d,38016083 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[84+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[60+r15] + add al,dl + mov ebx,DWORD PTR[92+rsi] + add r10d,3634488961 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[88+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[16+r15] + add bl,dl + mov eax,DWORD PTR[96+rsi] + add r9d,3889429448 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[92+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[36+r15] + add al,dl + mov ebx,DWORD PTR[100+rsi] + add r8d,568446438 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[96+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[56+r15] + add bl,dl + mov eax,DWORD PTR[104+rsi] + add r11d,3275163606 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[100+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[12+r15] + add al,dl + mov ebx,DWORD PTR[108+rsi] + add r10d,4107603335 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[104+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[32+r15] + add bl,dl + mov eax,DWORD PTR[112+rsi] + add r9d,1163531501 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[108+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[52+r15] + add al,dl + mov ebx,DWORD PTR[116+rsi] + add r8d,2850285829 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[112+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[8+r15] + add bl,dl + mov eax,DWORD PTR[120+rsi] + add r11d,4243563512 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[116+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[28+r15] + add al,dl + mov ebx,DWORD PTR[124+rsi] + add r10d,1735328473 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[120+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm3,XMMWORD PTR[16+r13] + add bpl,32 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[48+r15] + add bl,dl + mov eax,DWORD PTR[rbp*4+rdi] + add r9d,2368359562 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[124+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + mov rsi,rcx + xor rcx,rcx + mov cl,sil + lea rsi,QWORD PTR[rbp*4+rdi] + psllq xmm1,8 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[20+r15] + add al,dl + mov ebx,DWORD PTR[4+rsi] + add r8d,4294588738 + movzx eax,al + add r8d,r12d + mov DWORD PTR[rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[32+r15] + add bl,dl + mov eax,DWORD PTR[8+rsi] + add r11d,2272392833 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[4+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[44+r15] + add al,dl + mov ebx,DWORD PTR[12+rsi] + add r10d,1839030562 + movzx eax,al + add r10d,r12d + mov DWORD PTR[8+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[56+r15] + add bl,dl + mov eax,DWORD PTR[16+rsi] + add r9d,4259657740 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[12+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[4+r15] + add al,dl + mov ebx,DWORD PTR[20+rsi] + add r8d,2763975236 + movzx eax,al + add r8d,r12d + mov DWORD PTR[16+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[16+r15] + add bl,dl + mov eax,DWORD PTR[24+rsi] + add r11d,1272893353 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[20+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[28+r15] + add al,dl + mov ebx,DWORD PTR[28+rsi] + add r10d,4139469664 + movzx eax,al + add r10d,r12d + mov DWORD PTR[24+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[40+r15] + add bl,dl + mov eax,DWORD PTR[32+rsi] + add r9d,3200236656 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[28+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[52+r15] + add al,dl + mov ebx,DWORD PTR[36+rsi] + add r8d,681279174 + movzx eax,al + add r8d,r12d + mov DWORD PTR[32+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[r15] + add bl,dl + mov eax,DWORD PTR[40+rsi] + add r11d,3936430074 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[36+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[12+r15] + add al,dl + mov ebx,DWORD PTR[44+rsi] + add r10d,3572445317 + movzx eax,al + add r10d,r12d + mov DWORD PTR[40+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[24+r15] + add bl,dl + mov eax,DWORD PTR[48+rsi] + add r9d,76029189 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[44+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[36+r15] + add al,dl + mov ebx,DWORD PTR[52+rsi] + add r8d,3654602809 + movzx eax,al + add r8d,r12d + mov DWORD PTR[48+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[48+r15] + add bl,dl + mov eax,DWORD PTR[56+rsi] + add r11d,3873151461 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[52+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[60+r15] + add al,dl + mov ebx,DWORD PTR[60+rsi] + add r10d,530742520 + movzx eax,al + add r10d,r12d + mov DWORD PTR[56+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm4,XMMWORD PTR[32+r13] + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[8+r15] + add bl,dl + mov eax,DWORD PTR[64+rsi] + add r9d,3299628645 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[60+rsi],edx + add cl,al + rol r9d,23 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + psllq xmm1,8 + pxor xmm4,xmm0 + pxor xmm4,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[r15] + add al,dl + mov ebx,DWORD PTR[68+rsi] + add r8d,4096336452 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[64+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[28+r15] + add bl,dl + mov eax,DWORD PTR[72+rsi] + add r11d,1126891415 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[68+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[56+r15] + add al,dl + mov ebx,DWORD PTR[76+rsi] + add r10d,2878612391 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[72+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[20+r15] + add bl,dl + mov eax,DWORD PTR[80+rsi] + add r9d,4237533241 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[76+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[48+r15] + add al,dl + mov ebx,DWORD PTR[84+rsi] + add r8d,1700485571 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[80+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[12+r15] + add bl,dl + mov eax,DWORD PTR[88+rsi] + add r11d,2399980690 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[84+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[40+r15] + add al,dl + mov ebx,DWORD PTR[92+rsi] + add r10d,4293915773 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[88+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[4+r15] + add bl,dl + mov eax,DWORD PTR[96+rsi] + add r9d,2240044497 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[92+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[32+r15] + add al,dl + mov ebx,DWORD PTR[100+rsi] + add r8d,1873313359 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[96+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[60+r15] + add bl,dl + mov eax,DWORD PTR[104+rsi] + add r11d,4264355552 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[100+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[24+r15] + add al,dl + mov ebx,DWORD PTR[108+rsi] + add r10d,2734768916 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[104+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[52+r15] + add bl,dl + mov eax,DWORD PTR[112+rsi] + add r9d,1309151649 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[108+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[16+r15] + add al,dl + mov ebx,DWORD PTR[116+rsi] + add r8d,4149444226 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[112+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[44+r15] + add bl,dl + mov eax,DWORD PTR[120+rsi] + add r11d,3174756917 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[116+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[8+r15] + add al,dl + mov ebx,DWORD PTR[124+rsi] + add r10d,718787259 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[120+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm5,XMMWORD PTR[48+r13] + add bpl,32 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[36+r15] + add bl,dl + mov eax,DWORD PTR[rbp*4+rdi] + add r9d,3951481745 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[124+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + mov rsi,rbp + xor rbp,rbp + mov bpl,sil + mov rsi,rcx + xor rcx,rcx + mov cl,sil + lea rsi,QWORD PTR[rbp*4+rdi] + psllq xmm1,8 + pxor xmm5,xmm0 + pxor xmm5,xmm1 + add r8d,DWORD PTR[rsp] + add r9d,DWORD PTR[4+rsp] + add r10d,DWORD PTR[8+rsp] + add r11d,DWORD PTR[12+rsp] + + movdqu XMMWORD PTR[r13*1+r14],xmm2 + movdqu XMMWORD PTR[16+r13*1+r14],xmm3 + movdqu XMMWORD PTR[32+r13*1+r14],xmm4 + movdqu XMMWORD PTR[48+r13*1+r14],xmm5 + lea r15,QWORD PTR[64+r15] + lea r13,QWORD PTR[64+r13] + cmp r15,QWORD PTR[16+rsp] + jb $L$oop + + mov r12,QWORD PTR[24+rsp] + sub cl,al + mov DWORD PTR[r12],r8d + mov DWORD PTR[4+r12],r9d + mov DWORD PTR[8+r12],r10d + mov DWORD PTR[12+r12],r11d + sub bpl,1 + mov DWORD PTR[((-8))+rdi],ebp + mov DWORD PTR[((-4))+rdi],ecx + + mov r15,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r13,QWORD PTR[56+rsp] + mov r12,QWORD PTR[64+rsp] + mov rbp,QWORD PTR[72+rsp] + mov rbx,QWORD PTR[80+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$epilogue:: +$L$abort:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rc4_md5_enc:: +rc4_md5_enc ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$body] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$in_prologue + + mov r15,QWORD PTR[40+rax] + mov r14,QWORD PTR[48+rax] + mov r13,QWORD PTR[56+rax] + mov r12,QWORD PTR[64+rax] + mov rbp,QWORD PTR[72+rax] + mov rbx,QWORD PTR[80+rax] + lea rax,QWORD PTR[88+rax] + + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_rc4_md5_enc + DD imagerel $L$SEH_end_rc4_md5_enc + DD imagerel $L$SEH_info_rc4_md5_enc + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_rc4_md5_enc:: +DB 9,0,0,0 + DD imagerel se_handler + +.xdata ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm b/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm index f508fa6679..aea304fbad 100644 --- a/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm @@ -1,5 +1,6 @@ OPTION DOTNAME .text$ SEGMENT ALIGN(64) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR PUBLIC RC4 @@ -24,316 +25,511 @@ $L$entry:: push r12 push r13 $L$prologue:: + mov r11,rsi + mov r12,rdx + mov r13,rcx + xor r10,r10 + xor rcx,rcx - add rdi,8 - mov r8d,DWORD PTR[((-8))+rdi] - mov r12d,DWORD PTR[((-4))+rdi] + lea rdi,QWORD PTR[8+rdi] + mov r10b,BYTE PTR[((-8))+rdi] + mov cl,BYTE PTR[((-4))+rdi] cmp DWORD PTR[256+rdi],-1 je $L$RC4_CHAR - inc r8b - mov r9d,DWORD PTR[r8*4+rdi] - test rsi,-8 - jz $L$loop1 - jmp $L$loop8 -ALIGN 16 -$L$loop8:: - add r12b,r9b - mov r10,r8 - mov r13d,DWORD PTR[r12*4+rdi] - ror rax,8 - inc r10b - mov r11d,DWORD PTR[r10*4+rdi] - cmp r12,r10 - mov DWORD PTR[r12*4+rdi],r9d - cmove r11,r9 - mov DWORD PTR[r8*4+rdi],r13d - add r13b,r9b - mov al,BYTE PTR[r13*4+rdi] - add r12b,r11b - mov r8,r10 - mov r13d,DWORD PTR[r12*4+rdi] - ror rax,8 - inc r8b - mov r9d,DWORD PTR[r8*4+rdi] - cmp r12,r8 - mov DWORD PTR[r12*4+rdi],r11d - cmove r9,r11 - mov DWORD PTR[r10*4+rdi],r13d - add r13b,r11b - mov al,BYTE PTR[r13*4+rdi] - add r12b,r9b - mov r10,r8 - mov r13d,DWORD PTR[r12*4+rdi] - ror rax,8 + mov r8d,DWORD PTR[OPENSSL_ia32cap_P] + xor rbx,rbx inc r10b - mov r11d,DWORD PTR[r10*4+rdi] - cmp r12,r10 - mov DWORD PTR[r12*4+rdi],r9d - cmove r11,r9 - mov DWORD PTR[r8*4+rdi],r13d - add r13b,r9b - mov al,BYTE PTR[r13*4+rdi] - add r12b,r11b - mov r8,r10 - mov r13d,DWORD PTR[r12*4+rdi] - ror rax,8 - inc r8b - mov r9d,DWORD PTR[r8*4+rdi] - cmp r12,r8 - mov DWORD PTR[r12*4+rdi],r11d - cmove r9,r11 - mov DWORD PTR[r10*4+rdi],r13d - add r13b,r11b - mov al,BYTE PTR[r13*4+rdi] - add r12b,r9b - mov r10,r8 - mov r13d,DWORD PTR[r12*4+rdi] - ror rax,8 + sub rbx,r10 + sub r13,r12 + mov eax,DWORD PTR[r10*4+rdi] + test r11,-16 + jz $L$loop1 + bt r8d,30 + jc $L$intel + and rbx,7 + lea rsi,QWORD PTR[1+r10] + jz $L$oop8 + sub r11,rbx +$L$oop8_warmup:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov DWORD PTR[r10*4+rdi],edx + add al,dl inc r10b - mov r11d,DWORD PTR[r10*4+rdi] - cmp r12,r10 - mov DWORD PTR[r12*4+rdi],r9d - cmove r11,r9 - mov DWORD PTR[r8*4+rdi],r13d - add r13b,r9b - mov al,BYTE PTR[r13*4+rdi] - add r12b,r11b - mov r8,r10 - mov r13d,DWORD PTR[r12*4+rdi] - ror rax,8 - inc r8b - mov r9d,DWORD PTR[r8*4+rdi] - cmp r12,r8 - mov DWORD PTR[r12*4+rdi],r11d - cmove r9,r11 - mov DWORD PTR[r10*4+rdi],r13d - add r13b,r11b - mov al,BYTE PTR[r13*4+rdi] - add r12b,r9b - mov r10,r8 - mov r13d,DWORD PTR[r12*4+rdi] - ror rax,8 + mov edx,DWORD PTR[rax*4+rdi] + mov eax,DWORD PTR[r10*4+rdi] + xor dl,BYTE PTR[r12] + mov BYTE PTR[r12*1+r13],dl + lea r12,QWORD PTR[1+r12] + dec rbx + jnz $L$oop8_warmup + + lea rsi,QWORD PTR[1+r10] + jmp $L$oop8 +ALIGN 16 +$L$oop8:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[rsi*4+rdi] + ror r8,8 + mov DWORD PTR[r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[4+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[4+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[8+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[8+r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[12+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[12+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[16+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[16+r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[20+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[20+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[24+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[24+r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add sil,8 + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[((-4))+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[28+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add r10b,8 + ror r8,8 + sub r11,8 + + xor r8,QWORD PTR[r12] + mov QWORD PTR[r12*1+r13],r8 + lea r12,QWORD PTR[8+r12] + + test r11,-8 + jnz $L$oop8 + cmp r11,0 + jne $L$loop1 + jmp $L$exit + +ALIGN 16 +$L$intel:: + test r11,-32 + jz $L$loop1 + and rbx,15 + jz $L$oop16_is_hot + sub r11,rbx +$L$oop16_warmup:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov DWORD PTR[r10*4+rdi],edx + add al,dl inc r10b - mov r11d,DWORD PTR[r10*4+rdi] - cmp r12,r10 - mov DWORD PTR[r12*4+rdi],r9d - cmove r11,r9 - mov DWORD PTR[r8*4+rdi],r13d - add r13b,r9b - mov al,BYTE PTR[r13*4+rdi] - add r12b,r11b - mov r8,r10 - mov r13d,DWORD PTR[r12*4+rdi] - ror rax,8 - inc r8b - mov r9d,DWORD PTR[r8*4+rdi] - cmp r12,r8 - mov DWORD PTR[r12*4+rdi],r11d - cmove r9,r11 - mov DWORD PTR[r10*4+rdi],r13d - add r13b,r11b - mov al,BYTE PTR[r13*4+rdi] - ror rax,8 - sub rsi,8 - - xor rax,QWORD PTR[rdx] - add rdx,8 - mov QWORD PTR[rcx],rax - add rcx,8 - - test rsi,-8 - jnz $L$loop8 - cmp rsi,0 + mov edx,DWORD PTR[rax*4+rdi] + mov eax,DWORD PTR[r10*4+rdi] + xor dl,BYTE PTR[r12] + mov BYTE PTR[r12*1+r13],dl + lea r12,QWORD PTR[1+r12] + dec rbx + jnz $L$oop16_warmup + + mov rbx,rcx + xor rcx,rcx + mov cl,bl + +$L$oop16_is_hot:: + lea rsi,QWORD PTR[r10*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + pxor xmm0,xmm0 + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[4+rsi] + movzx eax,al + mov DWORD PTR[rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],0 + jmp $L$oop16_enter +ALIGN 16 +$L$oop16:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + pxor xmm2,xmm0 + psllq xmm1,8 + pxor xmm0,xmm0 + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[4+rsi] + movzx eax,al + mov DWORD PTR[rsi],edx + pxor xmm2,xmm1 + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],0 + movdqu XMMWORD PTR[r12*1+r13],xmm2 + lea r12,QWORD PTR[16+r12] +$L$oop16_enter:: + mov edx,DWORD PTR[rcx*4+rdi] + pxor xmm1,xmm1 + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[8+rsi] + movzx ebx,bl + mov DWORD PTR[4+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],0 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[12+rsi] + movzx eax,al + mov DWORD PTR[8+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[16+rsi] + movzx ebx,bl + mov DWORD PTR[12+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[20+rsi] + movzx eax,al + mov DWORD PTR[16+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[24+rsi] + movzx ebx,bl + mov DWORD PTR[20+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[28+rsi] + movzx eax,al + mov DWORD PTR[24+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[32+rsi] + movzx ebx,bl + mov DWORD PTR[28+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[36+rsi] + movzx eax,al + mov DWORD PTR[32+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[40+rsi] + movzx ebx,bl + mov DWORD PTR[36+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[44+rsi] + movzx eax,al + mov DWORD PTR[40+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[48+rsi] + movzx ebx,bl + mov DWORD PTR[44+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[52+rsi] + movzx eax,al + mov DWORD PTR[48+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[56+rsi] + movzx ebx,bl + mov DWORD PTR[52+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[60+rsi] + movzx eax,al + mov DWORD PTR[56+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + add r10b,16 + movdqu xmm2,XMMWORD PTR[r12] + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + movzx ebx,bl + mov DWORD PTR[60+rsi],edx + lea rsi,QWORD PTR[r10*4+rdi] + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + mov eax,DWORD PTR[rsi] + mov rbx,rcx + xor rcx,rcx + sub r11,16 + mov cl,bl + test r11,-16 + jnz $L$oop16 + + psllq xmm1,8 + pxor xmm2,xmm0 + pxor xmm2,xmm1 + movdqu XMMWORD PTR[r12*1+r13],xmm2 + lea r12,QWORD PTR[16+r12] + + cmp r11,0 jne $L$loop1 jmp $L$exit ALIGN 16 $L$loop1:: - add r12b,r9b - mov r13d,DWORD PTR[r12*4+rdi] - mov DWORD PTR[r12*4+rdi],r9d - mov DWORD PTR[r8*4+rdi],r13d - add r9b,r13b - inc r8b - mov r13d,DWORD PTR[r9*4+rdi] - mov r9d,DWORD PTR[r8*4+rdi] - xor r13b,BYTE PTR[rdx] - inc rdx - mov BYTE PTR[rcx],r13b - inc rcx - dec rsi + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov DWORD PTR[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD PTR[rax*4+rdi] + mov eax,DWORD PTR[r10*4+rdi] + xor dl,BYTE PTR[r12] + mov BYTE PTR[r12*1+r13],dl + lea r12,QWORD PTR[1+r12] + dec r11 jnz $L$loop1 jmp $L$exit ALIGN 16 $L$RC4_CHAR:: - add r8b,1 - movzx r9d,BYTE PTR[r8*1+rdi] - test rsi,-8 + add r10b,1 + movzx eax,BYTE PTR[r10*1+rdi] + test r11,-8 jz $L$cloop1 - cmp DWORD PTR[260+rdi],0 - jnz $L$cloop1 jmp $L$cloop8 ALIGN 16 $L$cloop8:: - mov eax,DWORD PTR[rdx] - mov ebx,DWORD PTR[4+rdx] - add r12b,r9b - lea r10,QWORD PTR[1+r8] - movzx r13d,BYTE PTR[r12*1+rdi] - movzx r10d,r10b - movzx r11d,BYTE PTR[r10*1+rdi] - mov BYTE PTR[r12*1+rdi],r9b - cmp r12,r10 - mov BYTE PTR[r8*1+rdi],r13b + mov r8d,DWORD PTR[r12] + mov r9d,DWORD PTR[4+r12] + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl jne $L$cmov0 - mov r11,r9 + mov rbx,rax $L$cmov0:: - add r13b,r9b - xor al,BYTE PTR[r13*1+rdi] - ror eax,8 - add r12b,r11b - lea r8,QWORD PTR[1+r10] - movzx r13d,BYTE PTR[r12*1+rdi] - movzx r8d,r8b - movzx r9d,BYTE PTR[r8*1+rdi] - mov BYTE PTR[r12*1+rdi],r11b - cmp r12,r8 - mov BYTE PTR[r10*1+rdi],r13b + add dl,al + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl jne $L$cmov1 - mov r9,r11 + mov rax,rbx $L$cmov1:: - add r13b,r11b - xor al,BYTE PTR[r13*1+rdi] - ror eax,8 - add r12b,r9b - lea r10,QWORD PTR[1+r8] - movzx r13d,BYTE PTR[r12*1+rdi] - movzx r10d,r10b - movzx r11d,BYTE PTR[r10*1+rdi] - mov BYTE PTR[r12*1+rdi],r9b - cmp r12,r10 - mov BYTE PTR[r8*1+rdi],r13b + add dl,bl + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl jne $L$cmov2 - mov r11,r9 + mov rbx,rax $L$cmov2:: - add r13b,r9b - xor al,BYTE PTR[r13*1+rdi] - ror eax,8 - add r12b,r11b - lea r8,QWORD PTR[1+r10] - movzx r13d,BYTE PTR[r12*1+rdi] - movzx r8d,r8b - movzx r9d,BYTE PTR[r8*1+rdi] - mov BYTE PTR[r12*1+rdi],r11b - cmp r12,r8 - mov BYTE PTR[r10*1+rdi],r13b + add dl,al + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl jne $L$cmov3 - mov r9,r11 + mov rax,rbx $L$cmov3:: - add r13b,r11b - xor al,BYTE PTR[r13*1+rdi] - ror eax,8 - add r12b,r9b - lea r10,QWORD PTR[1+r8] - movzx r13d,BYTE PTR[r12*1+rdi] - movzx r10d,r10b - movzx r11d,BYTE PTR[r10*1+rdi] - mov BYTE PTR[r12*1+rdi],r9b - cmp r12,r10 - mov BYTE PTR[r8*1+rdi],r13b + add dl,bl + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl jne $L$cmov4 - mov r11,r9 + mov rbx,rax $L$cmov4:: - add r13b,r9b - xor bl,BYTE PTR[r13*1+rdi] - ror ebx,8 - add r12b,r11b - lea r8,QWORD PTR[1+r10] - movzx r13d,BYTE PTR[r12*1+rdi] - movzx r8d,r8b - movzx r9d,BYTE PTR[r8*1+rdi] - mov BYTE PTR[r12*1+rdi],r11b - cmp r12,r8 - mov BYTE PTR[r10*1+rdi],r13b + add dl,al + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl jne $L$cmov5 - mov r9,r11 + mov rax,rbx $L$cmov5:: - add r13b,r11b - xor bl,BYTE PTR[r13*1+rdi] - ror ebx,8 - add r12b,r9b - lea r10,QWORD PTR[1+r8] - movzx r13d,BYTE PTR[r12*1+rdi] - movzx r10d,r10b - movzx r11d,BYTE PTR[r10*1+rdi] - mov BYTE PTR[r12*1+rdi],r9b - cmp r12,r10 - mov BYTE PTR[r8*1+rdi],r13b + add dl,bl + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl jne $L$cmov6 - mov r11,r9 + mov rbx,rax $L$cmov6:: - add r13b,r9b - xor bl,BYTE PTR[r13*1+rdi] - ror ebx,8 - add r12b,r11b - lea r8,QWORD PTR[1+r10] - movzx r13d,BYTE PTR[r12*1+rdi] - movzx r8d,r8b - movzx r9d,BYTE PTR[r8*1+rdi] - mov BYTE PTR[r12*1+rdi],r11b - cmp r12,r8 - mov BYTE PTR[r10*1+rdi],r13b + add dl,al + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl jne $L$cmov7 - mov r9,r11 + mov rax,rbx $L$cmov7:: - add r13b,r11b - xor bl,BYTE PTR[r13*1+rdi] - ror ebx,8 - lea rsi,QWORD PTR[((-8))+rsi] - mov DWORD PTR[rcx],eax - lea rdx,QWORD PTR[8+rdx] - mov DWORD PTR[4+rcx],ebx - lea rcx,QWORD PTR[8+rcx] - - test rsi,-8 + add dl,bl + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + lea r11,QWORD PTR[((-8))+r11] + mov DWORD PTR[r13],r8d + lea r12,QWORD PTR[8+r12] + mov DWORD PTR[4+r13],r9d + lea r13,QWORD PTR[8+r13] + + test r11,-8 jnz $L$cloop8 - cmp rsi,0 + cmp r11,0 jne $L$cloop1 jmp $L$exit ALIGN 16 $L$cloop1:: - add r12b,r9b - movzx r13d,BYTE PTR[r12*1+rdi] - mov BYTE PTR[r12*1+rdi],r9b - mov BYTE PTR[r8*1+rdi],r13b - add r13b,r9b - add r8b,1 - movzx r13d,r13b - movzx r8d,r8b - movzx r13d,BYTE PTR[r13*1+rdi] - movzx r9d,BYTE PTR[r8*1+rdi] - xor r13b,BYTE PTR[rdx] - lea rdx,QWORD PTR[1+rdx] - mov BYTE PTR[rcx],r13b - lea rcx,QWORD PTR[1+rcx] - sub rsi,1 + add cl,al + movzx ecx,cl + movzx edx,BYTE PTR[rcx*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + mov BYTE PTR[r10*1+rdi],dl + add dl,al + add r10b,1 + movzx edx,dl + movzx r10d,r10b + movzx edx,BYTE PTR[rdx*1+rdi] + movzx eax,BYTE PTR[r10*1+rdi] + xor dl,BYTE PTR[r12] + lea r12,QWORD PTR[1+r12] + mov BYTE PTR[r13],dl + lea r13,QWORD PTR[1+r13] + sub r11,1 jnz $L$cloop1 jmp $L$exit ALIGN 16 $L$exit:: - sub r8b,1 - mov DWORD PTR[((-8))+rdi],r8d - mov DWORD PTR[((-4))+rdi],r12d + sub r10b,1 + mov DWORD PTR[((-8))+rdi],r10d + mov DWORD PTR[((-4))+rdi],ecx mov r13,QWORD PTR[rsp] mov r12,QWORD PTR[8+rsp] @@ -345,15 +541,14 @@ $L$epilogue:: DB 0F3h,0C3h ;repret $L$SEH_end_RC4:: RC4 ENDP -EXTERN OPENSSL_ia32cap_P:NEAR -PUBLIC RC4_set_key +PUBLIC private_RC4_set_key ALIGN 16 -RC4_set_key PROC PUBLIC +private_RC4_set_key PROC PUBLIC mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi mov rax,rsp -$L$SEH_begin_RC4_set_key:: +$L$SEH_begin_private_RC4_set_key:: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -370,11 +565,8 @@ $L$SEH_begin_RC4_set_key:: mov r8d,DWORD PTR[OPENSSL_ia32cap_P] bt r8d,20 - jnc $L$w1stloop - bt r8d,30 - setc r9b - mov DWORD PTR[260+rdi],r9d - jmp $L$c1stloop + jc $L$c1stloop + jmp $L$w1stloop ALIGN 16 $L$w1stloop:: @@ -430,8 +622,8 @@ $L$exit_key:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_RC4_set_key:: -RC4_set_key ENDP +$L$SEH_end_private_RC4_set_key:: +private_RC4_set_key ENDP PUBLIC RC4_options @@ -440,18 +632,20 @@ RC4_options PROC PUBLIC lea rax,QWORD PTR[$L$opts] mov edx,DWORD PTR[OPENSSL_ia32cap_P] bt edx,20 - jnc $L$done - add rax,12 + jc $L$8xchar bt edx,30 jnc $L$done - add rax,13 + add rax,25 + DB 0F3h,0C3h ;repret +$L$8xchar:: + add rax,12 $L$done:: DB 0F3h,0C3h ;repret ALIGN 64 $L$opts:: DB 114,99,52,40,56,120,44,105,110,116,41,0 DB 114,99,52,40,56,120,44,99,104,97,114,41,0 -DB 114,99,52,40,49,120,44,99,104,97,114,41,0 +DB 114,99,52,40,49,54,120,44,105,110,116,41,0 DB 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32 DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 @@ -568,9 +762,9 @@ ALIGN 4 DD imagerel $L$SEH_end_RC4 DD imagerel $L$SEH_info_RC4 - DD imagerel $L$SEH_begin_RC4_set_key - DD imagerel $L$SEH_end_RC4_set_key - DD imagerel $L$SEH_info_RC4_set_key + DD imagerel $L$SEH_begin_private_RC4_set_key + DD imagerel $L$SEH_end_private_RC4_set_key + DD imagerel $L$SEH_info_private_RC4_set_key .pdata ENDS .xdata SEGMENT READONLY ALIGN(8) @@ -578,7 +772,7 @@ ALIGN 8 $L$SEH_info_RC4:: DB 9,0,0,0 DD imagerel stream_se_handler -$L$SEH_info_RC4_set_key:: +$L$SEH_info_private_RC4_set_key:: DB 9,0,0,0 DD imagerel key_se_handler diff --git a/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm b/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm index 9323f2b26a..9589f7fa08 100644 --- a/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm @@ -1,5 +1,7 @@ OPTION DOTNAME .text$ SEGMENT ALIGN(64) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + PUBLIC sha1_block_data_order ALIGN 16 @@ -13,9 +15,18 @@ $L$SEH_begin_sha1_block_data_order:: mov rdx,r8 + mov r9d,DWORD PTR[((OPENSSL_ia32cap_P+0))] + mov r8d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + test r8d,512 + jz $L$ialu + jmp _ssse3_shortcut + +ALIGN 16 +$L$ialu:: push rbx push rbp push r12 + push r13 mov r11,rsp mov r8,rdi sub rsp,72 @@ -25,1278 +36,2499 @@ $L$SEH_begin_sha1_block_data_order:: mov QWORD PTR[64+rsp],r11 $L$prologue:: - mov edx,DWORD PTR[r8] - mov esi,DWORD PTR[4+r8] - mov edi,DWORD PTR[8+r8] - mov ebp,DWORD PTR[12+r8] - mov r11d,DWORD PTR[16+r8] -ALIGN 4 + mov esi,DWORD PTR[r8] + mov edi,DWORD PTR[4+r8] + mov r11d,DWORD PTR[8+r8] + mov r12d,DWORD PTR[12+r8] + mov r13d,DWORD PTR[16+r8] + jmp $L$loop + +ALIGN 16 $L$loop:: - mov eax,DWORD PTR[r9] - bswap eax - mov DWORD PTR[rsp],eax - lea r12d,DWORD PTR[05a827999h+r11*1+rax] - mov ebx,edi - mov eax,DWORD PTR[4+r9] - mov r11d,edx - xor ebx,ebp - bswap eax - rol r11d,5 - and ebx,esi - mov DWORD PTR[4+rsp],eax - add r12d,r11d - xor ebx,ebp + mov edx,DWORD PTR[r9] + bswap edx + mov DWORD PTR[rsp],edx + mov eax,r11d + mov ebp,DWORD PTR[4+r9] + mov ecx,esi + xor eax,r12d + bswap ebp + rol ecx,5 + lea r13d,DWORD PTR[1518500249+r13*1+rdx] + and eax,edi + mov DWORD PTR[4+rsp],ebp + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov eax,edi + mov edx,DWORD PTR[8+r9] + mov ecx,r13d + xor eax,r11d + bswap edx + rol ecx,5 + lea r12d,DWORD PTR[1518500249+r12*1+rbp] + and eax,esi + mov DWORD PTR[8+rsp],edx + add r12d,ecx + xor eax,r11d rol esi,30 - add r12d,ebx - lea r11d,DWORD PTR[05a827999h+rbp*1+rax] - mov ebx,esi - mov eax,DWORD PTR[8+r9] - mov ebp,r12d - xor ebx,edi - bswap eax - rol ebp,5 - and ebx,edx - mov DWORD PTR[8+rsp],eax - add r11d,ebp - xor ebx,edi - rol edx,30 - add r11d,ebx - lea ebp,DWORD PTR[05a827999h+rdi*1+rax] - mov ebx,edx - mov eax,DWORD PTR[12+r9] - mov edi,r11d - xor ebx,esi - bswap eax - rol edi,5 - and ebx,r12d - mov DWORD PTR[12+rsp],eax - add ebp,edi - xor ebx,esi + add r12d,eax + mov eax,esi + mov ebp,DWORD PTR[12+r9] + mov ecx,r12d + xor eax,edi + bswap ebp + rol ecx,5 + lea r11d,DWORD PTR[1518500249+r11*1+rdx] + and eax,r13d + mov DWORD PTR[12+rsp],ebp + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov eax,r13d + mov edx,DWORD PTR[16+r9] + mov ecx,r11d + xor eax,esi + bswap edx + rol ecx,5 + lea edi,DWORD PTR[1518500249+rdi*1+rbp] + and eax,r12d + mov DWORD PTR[16+rsp],edx + add edi,ecx + xor eax,esi rol r12d,30 - add ebp,ebx - lea edi,DWORD PTR[05a827999h+rsi*1+rax] - mov ebx,r12d - mov eax,DWORD PTR[16+r9] - mov esi,ebp - xor ebx,edx - bswap eax - rol esi,5 - and ebx,r11d - mov DWORD PTR[16+rsp],eax - add edi,esi - xor ebx,edx + add edi,eax + mov eax,r12d + mov ebp,DWORD PTR[20+r9] + mov ecx,edi + xor eax,r13d + bswap ebp + rol ecx,5 + lea esi,DWORD PTR[1518500249+rsi*1+rdx] + and eax,r11d + mov DWORD PTR[20+rsp],ebp + add esi,ecx + xor eax,r13d rol r11d,30 - add edi,ebx - lea esi,DWORD PTR[05a827999h+rdx*1+rax] + add esi,eax + mov eax,r11d + mov edx,DWORD PTR[24+r9] + mov ecx,esi + xor eax,r12d + bswap edx + rol ecx,5 + lea r13d,DWORD PTR[1518500249+r13*1+rbp] + and eax,edi + mov DWORD PTR[24+rsp],edx + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov eax,edi + mov ebp,DWORD PTR[28+r9] + mov ecx,r13d + xor eax,r11d + bswap ebp + rol ecx,5 + lea r12d,DWORD PTR[1518500249+r12*1+rdx] + and eax,esi + mov DWORD PTR[28+rsp],ebp + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov eax,esi + mov edx,DWORD PTR[32+r9] + mov ecx,r12d + xor eax,edi + bswap edx + rol ecx,5 + lea r11d,DWORD PTR[1518500249+r11*1+rbp] + and eax,r13d + mov DWORD PTR[32+rsp],edx + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov eax,r13d + mov ebp,DWORD PTR[36+r9] + mov ecx,r11d + xor eax,esi + bswap ebp + rol ecx,5 + lea edi,DWORD PTR[1518500249+rdi*1+rdx] + and eax,r12d + mov DWORD PTR[36+rsp],ebp + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov eax,r12d + mov edx,DWORD PTR[40+r9] + mov ecx,edi + xor eax,r13d + bswap edx + rol ecx,5 + lea esi,DWORD PTR[1518500249+rsi*1+rbp] + and eax,r11d + mov DWORD PTR[40+rsp],edx + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov eax,r11d + mov ebp,DWORD PTR[44+r9] + mov ecx,esi + xor eax,r12d + bswap ebp + rol ecx,5 + lea r13d,DWORD PTR[1518500249+r13*1+rdx] + and eax,edi + mov DWORD PTR[44+rsp],ebp + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov eax,edi + mov edx,DWORD PTR[48+r9] + mov ecx,r13d + xor eax,r11d + bswap edx + rol ecx,5 + lea r12d,DWORD PTR[1518500249+r12*1+rbp] + and eax,esi + mov DWORD PTR[48+rsp],edx + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov eax,esi + mov ebp,DWORD PTR[52+r9] + mov ecx,r12d + xor eax,edi + bswap ebp + rol ecx,5 + lea r11d,DWORD PTR[1518500249+r11*1+rdx] + and eax,r13d + mov DWORD PTR[52+rsp],ebp + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov eax,r13d + mov edx,DWORD PTR[56+r9] + mov ecx,r11d + xor eax,esi + bswap edx + rol ecx,5 + lea edi,DWORD PTR[1518500249+rdi*1+rbp] + and eax,r12d + mov DWORD PTR[56+rsp],edx + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov eax,r12d + mov ebp,DWORD PTR[60+r9] + mov ecx,edi + xor eax,r13d + bswap ebp + rol ecx,5 + lea esi,DWORD PTR[1518500249+rsi*1+rdx] + and eax,r11d + mov DWORD PTR[60+rsp],ebp + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov edx,DWORD PTR[rsp] + mov eax,r11d + mov ecx,esi + xor edx,DWORD PTR[8+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD PTR[32+rsp] + and eax,edi + lea r13d,DWORD PTR[1518500249+r13*1+rbp] + xor edx,DWORD PTR[52+rsp] + xor eax,r12d + rol edx,1 + add r13d,ecx + rol edi,30 + mov DWORD PTR[rsp],edx + add r13d,eax + mov ebp,DWORD PTR[4+rsp] + mov eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[12+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD PTR[36+rsp] + and eax,esi + lea r12d,DWORD PTR[1518500249+r12*1+rdx] + xor ebp,DWORD PTR[56+rsp] + xor eax,r11d + rol ebp,1 + add r12d,ecx + rol esi,30 + mov DWORD PTR[4+rsp],ebp + add r12d,eax + mov edx,DWORD PTR[8+rsp] + mov eax,esi + mov ecx,r12d + xor edx,DWORD PTR[16+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD PTR[40+rsp] + and eax,r13d + lea r11d,DWORD PTR[1518500249+r11*1+rbp] + xor edx,DWORD PTR[60+rsp] + xor eax,edi + rol edx,1 + add r11d,ecx + rol r13d,30 + mov DWORD PTR[8+rsp],edx + add r11d,eax + mov ebp,DWORD PTR[12+rsp] + mov eax,r13d + mov ecx,r11d + xor ebp,DWORD PTR[20+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD PTR[44+rsp] + and eax,r12d + lea edi,DWORD PTR[1518500249+rdi*1+rdx] + xor ebp,DWORD PTR[rsp] + xor eax,esi + rol ebp,1 + add edi,ecx + rol r12d,30 + mov DWORD PTR[12+rsp],ebp + add edi,eax + mov edx,DWORD PTR[16+rsp] + mov eax,r12d + mov ecx,edi + xor edx,DWORD PTR[24+rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD PTR[48+rsp] + and eax,r11d + lea esi,DWORD PTR[1518500249+rsi*1+rbp] + xor edx,DWORD PTR[4+rsp] + xor eax,r13d + rol edx,1 + add esi,ecx + rol r11d,30 + mov DWORD PTR[16+rsp],edx + add esi,eax + mov ebp,DWORD PTR[20+rsp] + mov eax,r11d + mov ecx,esi + xor ebp,DWORD PTR[28+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[1859775393+r13*1+rdx] + xor ebp,DWORD PTR[52+rsp] + xor eax,r12d + add r13d,ecx + xor ebp,DWORD PTR[8+rsp] + rol edi,30 + add r13d,eax + rol ebp,1 + mov DWORD PTR[20+rsp],ebp + mov edx,DWORD PTR[24+rsp] + mov eax,edi + mov ecx,r13d + xor edx,DWORD PTR[32+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[1859775393+r12*1+rbp] + xor edx,DWORD PTR[56+rsp] + xor eax,r11d + add r12d,ecx + xor edx,DWORD PTR[12+rsp] + rol esi,30 + add r12d,eax + rol edx,1 + mov DWORD PTR[24+rsp],edx + mov ebp,DWORD PTR[28+rsp] + mov eax,esi + mov ecx,r12d + xor ebp,DWORD PTR[36+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[1859775393+r11*1+rdx] + xor ebp,DWORD PTR[60+rsp] + xor eax,edi + add r11d,ecx + xor ebp,DWORD PTR[16+rsp] + rol r13d,30 + add r11d,eax + rol ebp,1 + mov DWORD PTR[28+rsp],ebp + mov edx,DWORD PTR[32+rsp] + mov eax,r13d + mov ecx,r11d + xor edx,DWORD PTR[40+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[1859775393+rdi*1+rbp] + xor edx,DWORD PTR[rsp] + xor eax,esi + add edi,ecx + xor edx,DWORD PTR[20+rsp] + rol r12d,30 + add edi,eax + rol edx,1 + mov DWORD PTR[32+rsp],edx + mov ebp,DWORD PTR[36+rsp] + mov eax,r12d + mov ecx,edi + xor ebp,DWORD PTR[44+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[1859775393+rsi*1+rdx] + xor ebp,DWORD PTR[4+rsp] + xor eax,r13d + add esi,ecx + xor ebp,DWORD PTR[24+rsp] + rol r11d,30 + add esi,eax + rol ebp,1 + mov DWORD PTR[36+rsp],ebp + mov edx,DWORD PTR[40+rsp] + mov eax,r11d + mov ecx,esi + xor edx,DWORD PTR[48+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[1859775393+r13*1+rbp] + xor edx,DWORD PTR[8+rsp] + xor eax,r12d + add r13d,ecx + xor edx,DWORD PTR[28+rsp] + rol edi,30 + add r13d,eax + rol edx,1 + mov DWORD PTR[40+rsp],edx + mov ebp,DWORD PTR[44+rsp] + mov eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[52+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[1859775393+r12*1+rdx] + xor ebp,DWORD PTR[12+rsp] + xor eax,r11d + add r12d,ecx + xor ebp,DWORD PTR[32+rsp] + rol esi,30 + add r12d,eax + rol ebp,1 + mov DWORD PTR[44+rsp],ebp + mov edx,DWORD PTR[48+rsp] + mov eax,esi + mov ecx,r12d + xor edx,DWORD PTR[56+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[1859775393+r11*1+rbp] + xor edx,DWORD PTR[16+rsp] + xor eax,edi + add r11d,ecx + xor edx,DWORD PTR[36+rsp] + rol r13d,30 + add r11d,eax + rol edx,1 + mov DWORD PTR[48+rsp],edx + mov ebp,DWORD PTR[52+rsp] + mov eax,r13d + mov ecx,r11d + xor ebp,DWORD PTR[60+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[1859775393+rdi*1+rdx] + xor ebp,DWORD PTR[20+rsp] + xor eax,esi + add edi,ecx + xor ebp,DWORD PTR[40+rsp] + rol r12d,30 + add edi,eax + rol ebp,1 + mov DWORD PTR[52+rsp],ebp + mov edx,DWORD PTR[56+rsp] + mov eax,r12d + mov ecx,edi + xor edx,DWORD PTR[rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[1859775393+rsi*1+rbp] + xor edx,DWORD PTR[24+rsp] + xor eax,r13d + add esi,ecx + xor edx,DWORD PTR[44+rsp] + rol r11d,30 + add esi,eax + rol edx,1 + mov DWORD PTR[56+rsp],edx + mov ebp,DWORD PTR[60+rsp] + mov eax,r11d + mov ecx,esi + xor ebp,DWORD PTR[4+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[1859775393+r13*1+rdx] + xor ebp,DWORD PTR[28+rsp] + xor eax,r12d + add r13d,ecx + xor ebp,DWORD PTR[48+rsp] + rol edi,30 + add r13d,eax + rol ebp,1 + mov DWORD PTR[60+rsp],ebp + mov edx,DWORD PTR[rsp] + mov eax,edi + mov ecx,r13d + xor edx,DWORD PTR[8+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[1859775393+r12*1+rbp] + xor edx,DWORD PTR[32+rsp] + xor eax,r11d + add r12d,ecx + xor edx,DWORD PTR[52+rsp] + rol esi,30 + add r12d,eax + rol edx,1 + mov DWORD PTR[rsp],edx + mov ebp,DWORD PTR[4+rsp] + mov eax,esi + mov ecx,r12d + xor ebp,DWORD PTR[12+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[1859775393+r11*1+rdx] + xor ebp,DWORD PTR[36+rsp] + xor eax,edi + add r11d,ecx + xor ebp,DWORD PTR[56+rsp] + rol r13d,30 + add r11d,eax + rol ebp,1 + mov DWORD PTR[4+rsp],ebp + mov edx,DWORD PTR[8+rsp] + mov eax,r13d + mov ecx,r11d + xor edx,DWORD PTR[16+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[1859775393+rdi*1+rbp] + xor edx,DWORD PTR[40+rsp] + xor eax,esi + add edi,ecx + xor edx,DWORD PTR[60+rsp] + rol r12d,30 + add edi,eax + rol edx,1 + mov DWORD PTR[8+rsp],edx + mov ebp,DWORD PTR[12+rsp] + mov eax,r12d + mov ecx,edi + xor ebp,DWORD PTR[20+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[1859775393+rsi*1+rdx] + xor ebp,DWORD PTR[44+rsp] + xor eax,r13d + add esi,ecx + xor ebp,DWORD PTR[rsp] + rol r11d,30 + add esi,eax + rol ebp,1 + mov DWORD PTR[12+rsp],ebp + mov edx,DWORD PTR[16+rsp] + mov eax,r11d + mov ecx,esi + xor edx,DWORD PTR[24+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[1859775393+r13*1+rbp] + xor edx,DWORD PTR[48+rsp] + xor eax,r12d + add r13d,ecx + xor edx,DWORD PTR[4+rsp] + rol edi,30 + add r13d,eax + rol edx,1 + mov DWORD PTR[16+rsp],edx + mov ebp,DWORD PTR[20+rsp] + mov eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[28+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[1859775393+r12*1+rdx] + xor ebp,DWORD PTR[52+rsp] + xor eax,r11d + add r12d,ecx + xor ebp,DWORD PTR[8+rsp] + rol esi,30 + add r12d,eax + rol ebp,1 + mov DWORD PTR[20+rsp],ebp + mov edx,DWORD PTR[24+rsp] + mov eax,esi + mov ecx,r12d + xor edx,DWORD PTR[32+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[1859775393+r11*1+rbp] + xor edx,DWORD PTR[56+rsp] + xor eax,edi + add r11d,ecx + xor edx,DWORD PTR[12+rsp] + rol r13d,30 + add r11d,eax + rol edx,1 + mov DWORD PTR[24+rsp],edx + mov ebp,DWORD PTR[28+rsp] + mov eax,r13d + mov ecx,r11d + xor ebp,DWORD PTR[36+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[1859775393+rdi*1+rdx] + xor ebp,DWORD PTR[60+rsp] + xor eax,esi + add edi,ecx + xor ebp,DWORD PTR[16+rsp] + rol r12d,30 + add edi,eax + rol ebp,1 + mov DWORD PTR[28+rsp],ebp + mov edx,DWORD PTR[32+rsp] + mov eax,r12d + mov ecx,edi + xor edx,DWORD PTR[40+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[1859775393+rsi*1+rbp] + xor edx,DWORD PTR[rsp] + xor eax,r13d + add esi,ecx + xor edx,DWORD PTR[20+rsp] + rol r11d,30 + add esi,eax + rol edx,1 + mov DWORD PTR[32+rsp],edx + mov ebp,DWORD PTR[36+rsp] + mov eax,r11d mov ebx,r11d - mov eax,DWORD PTR[20+r9] - mov edx,edi - xor ebx,r12d - bswap eax - rol edx,5 - and ebx,ebp - mov DWORD PTR[20+rsp],eax - add esi,edx + xor ebp,DWORD PTR[44+rsp] + and eax,r12d + mov ecx,esi + xor ebp,DWORD PTR[4+rsp] xor ebx,r12d - rol ebp,30 - add esi,ebx - lea edx,DWORD PTR[05a827999h+r12*1+rax] - mov ebx,ebp - mov eax,DWORD PTR[24+r9] - mov r12d,esi - xor ebx,r11d - bswap eax - rol r12d,5 + lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[24+rsp] + add r13d,eax and ebx,edi - mov DWORD PTR[24+rsp],eax - add edx,r12d - xor ebx,r11d + rol ebp,1 + add r13d,ebx rol edi,30 - add edx,ebx - lea r12d,DWORD PTR[05a827999h+r11*1+rax] + mov DWORD PTR[36+rsp],ebp + add r13d,ecx + mov edx,DWORD PTR[40+rsp] + mov eax,edi mov ebx,edi - mov eax,DWORD PTR[28+r9] - mov r11d,edx - xor ebx,ebp - bswap eax - rol r11d,5 + xor edx,DWORD PTR[48+rsp] + and eax,r11d + mov ecx,r13d + xor edx,DWORD PTR[8+rsp] + xor ebx,r11d + lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[28+rsp] + add r12d,eax and ebx,esi - mov DWORD PTR[28+rsp],eax - add r12d,r11d - xor ebx,ebp - rol esi,30 + rol edx,1 add r12d,ebx - lea r11d,DWORD PTR[05a827999h+rbp*1+rax] + rol esi,30 + mov DWORD PTR[40+rsp],edx + add r12d,ecx + mov ebp,DWORD PTR[44+rsp] + mov eax,esi mov ebx,esi - mov eax,DWORD PTR[32+r9] - mov ebp,r12d - xor ebx,edi - bswap eax - rol ebp,5 - and ebx,edx - mov DWORD PTR[32+rsp],eax - add r11d,ebp + xor ebp,DWORD PTR[52+rsp] + and eax,edi + mov ecx,r12d + xor ebp,DWORD PTR[12+rsp] xor ebx,edi - rol edx,30 + lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[32+rsp] + add r11d,eax + and ebx,r13d + rol ebp,1 add r11d,ebx - lea ebp,DWORD PTR[05a827999h+rdi*1+rax] - mov ebx,edx - mov eax,DWORD PTR[36+r9] - mov edi,r11d + rol r13d,30 + mov DWORD PTR[44+rsp],ebp + add r11d,ecx + mov edx,DWORD PTR[48+rsp] + mov eax,r13d + mov ebx,r13d + xor edx,DWORD PTR[56+rsp] + and eax,esi + mov ecx,r11d + xor edx,DWORD PTR[16+rsp] xor ebx,esi - bswap eax - rol edi,5 + lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[36+rsp] + add edi,eax and ebx,r12d - mov DWORD PTR[36+rsp],eax - add ebp,edi - xor ebx,esi + rol edx,1 + add edi,ebx rol r12d,30 - add ebp,ebx - lea edi,DWORD PTR[05a827999h+rsi*1+rax] + mov DWORD PTR[48+rsp],edx + add edi,ecx + mov ebp,DWORD PTR[52+rsp] + mov eax,r12d mov ebx,r12d - mov eax,DWORD PTR[40+r9] - mov esi,ebp - xor ebx,edx - bswap eax - rol esi,5 + xor ebp,DWORD PTR[60+rsp] + and eax,r13d + mov ecx,edi + xor ebp,DWORD PTR[20+rsp] + xor ebx,r13d + lea esi,DWORD PTR[((-1894007588))+rsi*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[40+rsp] + add esi,eax and ebx,r11d - mov DWORD PTR[40+rsp],eax - add edi,esi - xor ebx,edx + rol ebp,1 + add esi,ebx rol r11d,30 - add edi,ebx - lea esi,DWORD PTR[05a827999h+rdx*1+rax] + mov DWORD PTR[52+rsp],ebp + add esi,ecx + mov edx,DWORD PTR[56+rsp] + mov eax,r11d mov ebx,r11d - mov eax,DWORD PTR[44+r9] - mov edx,edi - xor ebx,r12d - bswap eax - rol edx,5 - and ebx,ebp - mov DWORD PTR[44+rsp],eax - add esi,edx + xor edx,DWORD PTR[rsp] + and eax,r12d + mov ecx,esi + xor edx,DWORD PTR[24+rsp] xor ebx,r12d - rol ebp,30 - add esi,ebx - lea edx,DWORD PTR[05a827999h+r12*1+rax] - mov ebx,ebp - mov eax,DWORD PTR[48+r9] - mov r12d,esi - xor ebx,r11d - bswap eax - rol r12d,5 + lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[44+rsp] + add r13d,eax and ebx,edi - mov DWORD PTR[48+rsp],eax - add edx,r12d - xor ebx,r11d + rol edx,1 + add r13d,ebx rol edi,30 - add edx,ebx - lea r12d,DWORD PTR[05a827999h+r11*1+rax] + mov DWORD PTR[56+rsp],edx + add r13d,ecx + mov ebp,DWORD PTR[60+rsp] + mov eax,edi mov ebx,edi - mov eax,DWORD PTR[52+r9] - mov r11d,edx - xor ebx,ebp - bswap eax - rol r11d,5 + xor ebp,DWORD PTR[4+rsp] + and eax,r11d + mov ecx,r13d + xor ebp,DWORD PTR[28+rsp] + xor ebx,r11d + lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[48+rsp] + add r12d,eax and ebx,esi - mov DWORD PTR[52+rsp],eax - add r12d,r11d - xor ebx,ebp - rol esi,30 + rol ebp,1 add r12d,ebx - lea r11d,DWORD PTR[05a827999h+rbp*1+rax] + rol esi,30 + mov DWORD PTR[60+rsp],ebp + add r12d,ecx + mov edx,DWORD PTR[rsp] + mov eax,esi mov ebx,esi - mov eax,DWORD PTR[56+r9] - mov ebp,r12d - xor ebx,edi - bswap eax - rol ebp,5 - and ebx,edx - mov DWORD PTR[56+rsp],eax - add r11d,ebp + xor edx,DWORD PTR[8+rsp] + and eax,edi + mov ecx,r12d + xor edx,DWORD PTR[32+rsp] xor ebx,edi - rol edx,30 + lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[52+rsp] + add r11d,eax + and ebx,r13d + rol edx,1 add r11d,ebx - lea ebp,DWORD PTR[05a827999h+rdi*1+rax] - mov ebx,edx - mov eax,DWORD PTR[60+r9] - mov edi,r11d + rol r13d,30 + mov DWORD PTR[rsp],edx + add r11d,ecx + mov ebp,DWORD PTR[4+rsp] + mov eax,r13d + mov ebx,r13d + xor ebp,DWORD PTR[12+rsp] + and eax,esi + mov ecx,r11d + xor ebp,DWORD PTR[36+rsp] xor ebx,esi - bswap eax - rol edi,5 + lea edi,DWORD PTR[((-1894007588))+rdi*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[56+rsp] + add edi,eax and ebx,r12d - mov DWORD PTR[60+rsp],eax - add ebp,edi - xor ebx,esi + rol ebp,1 + add edi,ebx rol r12d,30 - add ebp,ebx - lea edi,DWORD PTR[05a827999h+rsi*1+rax] - mov eax,DWORD PTR[rsp] + mov DWORD PTR[4+rsp],ebp + add edi,ecx + mov edx,DWORD PTR[8+rsp] + mov eax,r12d mov ebx,r12d - mov esi,ebp - xor eax,DWORD PTR[8+rsp] - xor ebx,edx - rol esi,5 - xor eax,DWORD PTR[32+rsp] + xor edx,DWORD PTR[16+rsp] + and eax,r13d + mov ecx,edi + xor edx,DWORD PTR[40+rsp] + xor ebx,r13d + lea esi,DWORD PTR[((-1894007588))+rsi*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[60+rsp] + add esi,eax and ebx,r11d - add edi,esi - xor eax,DWORD PTR[52+rsp] - xor ebx,edx + rol edx,1 + add esi,ebx rol r11d,30 - add edi,ebx - rol eax,1 - mov DWORD PTR[rsp],eax - lea esi,DWORD PTR[05a827999h+rdx*1+rax] - mov eax,DWORD PTR[4+rsp] + mov DWORD PTR[8+rsp],edx + add esi,ecx + mov ebp,DWORD PTR[12+rsp] + mov eax,r11d mov ebx,r11d - mov edx,edi - xor eax,DWORD PTR[12+rsp] - xor ebx,r12d - rol edx,5 - xor eax,DWORD PTR[36+rsp] - and ebx,ebp - add esi,edx - xor eax,DWORD PTR[56+rsp] + xor ebp,DWORD PTR[20+rsp] + and eax,r12d + mov ecx,esi + xor ebp,DWORD PTR[44+rsp] xor ebx,r12d - rol ebp,30 - add esi,ebx - rol eax,1 - mov DWORD PTR[4+rsp],eax - lea edx,DWORD PTR[05a827999h+r12*1+rax] - mov eax,DWORD PTR[8+rsp] - mov ebx,ebp - mov r12d,esi - xor eax,DWORD PTR[16+rsp] - xor ebx,r11d - rol r12d,5 - xor eax,DWORD PTR[40+rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[rsp] + add r13d,eax and ebx,edi - add edx,r12d - xor eax,DWORD PTR[60+rsp] - xor ebx,r11d + rol ebp,1 + add r13d,ebx rol edi,30 - add edx,ebx - rol eax,1 - mov DWORD PTR[8+rsp],eax - lea r12d,DWORD PTR[05a827999h+r11*1+rax] - mov eax,DWORD PTR[12+rsp] + mov DWORD PTR[12+rsp],ebp + add r13d,ecx + mov edx,DWORD PTR[16+rsp] + mov eax,edi mov ebx,edi - mov r11d,edx - xor eax,DWORD PTR[20+rsp] - xor ebx,ebp - rol r11d,5 - xor eax,DWORD PTR[44+rsp] + xor edx,DWORD PTR[24+rsp] + and eax,r11d + mov ecx,r13d + xor edx,DWORD PTR[48+rsp] + xor ebx,r11d + lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[4+rsp] + add r12d,eax and ebx,esi - add r12d,r11d - xor eax,DWORD PTR[rsp] - xor ebx,ebp - rol esi,30 + rol edx,1 add r12d,ebx - rol eax,1 - mov DWORD PTR[12+rsp],eax - lea r11d,DWORD PTR[05a827999h+rbp*1+rax] - mov eax,DWORD PTR[16+rsp] + rol esi,30 + mov DWORD PTR[16+rsp],edx + add r12d,ecx + mov ebp,DWORD PTR[20+rsp] + mov eax,esi mov ebx,esi - mov ebp,r12d - xor eax,DWORD PTR[24+rsp] - xor ebx,edi - rol ebp,5 - xor eax,DWORD PTR[48+rsp] - and ebx,edx - add r11d,ebp - xor eax,DWORD PTR[4+rsp] + xor ebp,DWORD PTR[28+rsp] + and eax,edi + mov ecx,r12d + xor ebp,DWORD PTR[52+rsp] xor ebx,edi - rol edx,30 + lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[8+rsp] + add r11d,eax + and ebx,r13d + rol ebp,1 add r11d,ebx - rol eax,1 - mov DWORD PTR[16+rsp],eax - lea ebp,DWORD PTR[1859775393+rdi*1+rax] - mov eax,DWORD PTR[20+rsp] - mov ebx,edx - mov edi,r11d - xor eax,DWORD PTR[28+rsp] - xor ebx,r12d - rol edi,5 - xor eax,DWORD PTR[52+rsp] + rol r13d,30 + mov DWORD PTR[20+rsp],ebp + add r11d,ecx + mov edx,DWORD PTR[24+rsp] + mov eax,r13d + mov ebx,r13d + xor edx,DWORD PTR[32+rsp] + and eax,esi + mov ecx,r11d + xor edx,DWORD PTR[56+rsp] xor ebx,esi - add ebp,edi - xor eax,DWORD PTR[8+rsp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[12+rsp] + add edi,eax + and ebx,r12d + rol edx,1 + add edi,ebx rol r12d,30 - add ebp,ebx - rol eax,1 - mov DWORD PTR[20+rsp],eax - lea edi,DWORD PTR[1859775393+rsi*1+rax] - mov eax,DWORD PTR[24+rsp] + mov DWORD PTR[24+rsp],edx + add edi,ecx + mov ebp,DWORD PTR[28+rsp] + mov eax,r12d mov ebx,r12d - mov esi,ebp - xor eax,DWORD PTR[32+rsp] - xor ebx,r11d - rol esi,5 - xor eax,DWORD PTR[56+rsp] - xor ebx,edx - add edi,esi - xor eax,DWORD PTR[12+rsp] + xor ebp,DWORD PTR[36+rsp] + and eax,r13d + mov ecx,edi + xor ebp,DWORD PTR[60+rsp] + xor ebx,r13d + lea esi,DWORD PTR[((-1894007588))+rsi*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[16+rsp] + add esi,eax + and ebx,r11d + rol ebp,1 + add esi,ebx rol r11d,30 - add edi,ebx - rol eax,1 - mov DWORD PTR[24+rsp],eax - lea esi,DWORD PTR[1859775393+rdx*1+rax] - mov eax,DWORD PTR[28+rsp] + mov DWORD PTR[28+rsp],ebp + add esi,ecx + mov edx,DWORD PTR[32+rsp] + mov eax,r11d mov ebx,r11d - mov edx,edi - xor eax,DWORD PTR[36+rsp] - xor ebx,ebp - rol edx,5 - xor eax,DWORD PTR[60+rsp] + xor edx,DWORD PTR[40+rsp] + and eax,r12d + mov ecx,esi + xor edx,DWORD PTR[rsp] xor ebx,r12d - add esi,edx - xor eax,DWORD PTR[16+rsp] - rol ebp,30 - add esi,ebx - rol eax,1 - mov DWORD PTR[28+rsp],eax - lea edx,DWORD PTR[1859775393+r12*1+rax] - mov eax,DWORD PTR[32+rsp] - mov ebx,ebp - mov r12d,esi - xor eax,DWORD PTR[40+rsp] - xor ebx,edi - rol r12d,5 - xor eax,DWORD PTR[rsp] - xor ebx,r11d - add edx,r12d - xor eax,DWORD PTR[20+rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[20+rsp] + add r13d,eax + and ebx,edi + rol edx,1 + add r13d,ebx rol edi,30 - add edx,ebx - rol eax,1 - mov DWORD PTR[32+rsp],eax - lea r12d,DWORD PTR[1859775393+r11*1+rax] - mov eax,DWORD PTR[36+rsp] + mov DWORD PTR[32+rsp],edx + add r13d,ecx + mov ebp,DWORD PTR[36+rsp] + mov eax,edi mov ebx,edi - mov r11d,edx - xor eax,DWORD PTR[44+rsp] - xor ebx,esi - rol r11d,5 - xor eax,DWORD PTR[4+rsp] - xor ebx,ebp - add r12d,r11d - xor eax,DWORD PTR[24+rsp] - rol esi,30 + xor ebp,DWORD PTR[44+rsp] + and eax,r11d + mov ecx,r13d + xor ebp,DWORD PTR[4+rsp] + xor ebx,r11d + lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[24+rsp] + add r12d,eax + and ebx,esi + rol ebp,1 add r12d,ebx - rol eax,1 - mov DWORD PTR[36+rsp],eax - lea r11d,DWORD PTR[1859775393+rbp*1+rax] - mov eax,DWORD PTR[40+rsp] + rol esi,30 + mov DWORD PTR[36+rsp],ebp + add r12d,ecx + mov edx,DWORD PTR[40+rsp] + mov eax,esi mov ebx,esi - mov ebp,r12d - xor eax,DWORD PTR[48+rsp] - xor ebx,edx - rol ebp,5 - xor eax,DWORD PTR[8+rsp] + xor edx,DWORD PTR[48+rsp] + and eax,edi + mov ecx,r12d + xor edx,DWORD PTR[8+rsp] xor ebx,edi - add r11d,ebp - xor eax,DWORD PTR[28+rsp] - rol edx,30 + lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[28+rsp] + add r11d,eax + and ebx,r13d + rol edx,1 add r11d,ebx - rol eax,1 - mov DWORD PTR[40+rsp],eax - lea ebp,DWORD PTR[1859775393+rdi*1+rax] - mov eax,DWORD PTR[44+rsp] - mov ebx,edx - mov edi,r11d - xor eax,DWORD PTR[52+rsp] - xor ebx,r12d - rol edi,5 - xor eax,DWORD PTR[12+rsp] + rol r13d,30 + mov DWORD PTR[40+rsp],edx + add r11d,ecx + mov ebp,DWORD PTR[44+rsp] + mov eax,r13d + mov ebx,r13d + xor ebp,DWORD PTR[52+rsp] + and eax,esi + mov ecx,r11d + xor ebp,DWORD PTR[12+rsp] xor ebx,esi - add ebp,edi - xor eax,DWORD PTR[32+rsp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[32+rsp] + add edi,eax + and ebx,r12d + rol ebp,1 + add edi,ebx rol r12d,30 - add ebp,ebx - rol eax,1 - mov DWORD PTR[44+rsp],eax - lea edi,DWORD PTR[1859775393+rsi*1+rax] - mov eax,DWORD PTR[48+rsp] + mov DWORD PTR[44+rsp],ebp + add edi,ecx + mov edx,DWORD PTR[48+rsp] + mov eax,r12d mov ebx,r12d - mov esi,ebp - xor eax,DWORD PTR[56+rsp] - xor ebx,r11d - rol esi,5 - xor eax,DWORD PTR[16+rsp] - xor ebx,edx - add edi,esi - xor eax,DWORD PTR[36+rsp] - rol r11d,30 - add edi,ebx - rol eax,1 - mov DWORD PTR[48+rsp],eax - lea esi,DWORD PTR[1859775393+rdx*1+rax] - mov eax,DWORD PTR[52+rsp] - mov ebx,r11d - mov edx,edi - xor eax,DWORD PTR[60+rsp] - xor ebx,ebp - rol edx,5 - xor eax,DWORD PTR[20+rsp] - xor ebx,r12d - add esi,edx - xor eax,DWORD PTR[40+rsp] - rol ebp,30 + xor edx,DWORD PTR[56+rsp] + and eax,r13d + mov ecx,edi + xor edx,DWORD PTR[16+rsp] + xor ebx,r13d + lea esi,DWORD PTR[((-1894007588))+rsi*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[36+rsp] + add esi,eax + and ebx,r11d + rol edx,1 add esi,ebx - rol eax,1 - mov DWORD PTR[52+rsp],eax - lea edx,DWORD PTR[1859775393+r12*1+rax] - mov eax,DWORD PTR[56+rsp] - mov ebx,ebp - mov r12d,esi - xor eax,DWORD PTR[rsp] - xor ebx,edi - rol r12d,5 - xor eax,DWORD PTR[24+rsp] - xor ebx,r11d - add edx,r12d - xor eax,DWORD PTR[44+rsp] + rol r11d,30 + mov DWORD PTR[48+rsp],edx + add esi,ecx + mov ebp,DWORD PTR[52+rsp] + mov eax,r11d + mov ecx,esi + xor ebp,DWORD PTR[60+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] + xor ebp,DWORD PTR[20+rsp] + xor eax,r12d + add r13d,ecx + xor ebp,DWORD PTR[40+rsp] rol edi,30 - add edx,ebx - rol eax,1 - mov DWORD PTR[56+rsp],eax - lea r12d,DWORD PTR[1859775393+r11*1+rax] - mov eax,DWORD PTR[60+rsp] - mov ebx,edi - mov r11d,edx - xor eax,DWORD PTR[4+rsp] - xor ebx,esi - rol r11d,5 - xor eax,DWORD PTR[28+rsp] - xor ebx,ebp - add r12d,r11d - xor eax,DWORD PTR[48+rsp] + add r13d,eax + rol ebp,1 + mov DWORD PTR[52+rsp],ebp + mov edx,DWORD PTR[56+rsp] + mov eax,edi + mov ecx,r13d + xor edx,DWORD PTR[rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[((-899497514))+r12*1+rbp] + xor edx,DWORD PTR[24+rsp] + xor eax,r11d + add r12d,ecx + xor edx,DWORD PTR[44+rsp] rol esi,30 - add r12d,ebx - rol eax,1 - mov DWORD PTR[60+rsp],eax - lea r11d,DWORD PTR[1859775393+rbp*1+rax] - mov eax,DWORD PTR[rsp] - mov ebx,esi - mov ebp,r12d - xor eax,DWORD PTR[8+rsp] - xor ebx,edx - rol ebp,5 - xor eax,DWORD PTR[32+rsp] - xor ebx,edi - add r11d,ebp - xor eax,DWORD PTR[52+rsp] - rol edx,30 - add r11d,ebx - rol eax,1 - mov DWORD PTR[rsp],eax - lea ebp,DWORD PTR[1859775393+rdi*1+rax] - mov eax,DWORD PTR[4+rsp] - mov ebx,edx - mov edi,r11d - xor eax,DWORD PTR[12+rsp] - xor ebx,r12d - rol edi,5 - xor eax,DWORD PTR[36+rsp] - xor ebx,esi - add ebp,edi - xor eax,DWORD PTR[56+rsp] + add r12d,eax + rol edx,1 + mov DWORD PTR[56+rsp],edx + mov ebp,DWORD PTR[60+rsp] + mov eax,esi + mov ecx,r12d + xor ebp,DWORD PTR[4+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[((-899497514))+r11*1+rdx] + xor ebp,DWORD PTR[28+rsp] + xor eax,edi + add r11d,ecx + xor ebp,DWORD PTR[48+rsp] + rol r13d,30 + add r11d,eax + rol ebp,1 + mov DWORD PTR[60+rsp],ebp + mov edx,DWORD PTR[rsp] + mov eax,r13d + mov ecx,r11d + xor edx,DWORD PTR[8+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[((-899497514))+rdi*1+rbp] + xor edx,DWORD PTR[32+rsp] + xor eax,esi + add edi,ecx + xor edx,DWORD PTR[52+rsp] rol r12d,30 - add ebp,ebx - rol eax,1 - mov DWORD PTR[4+rsp],eax - lea edi,DWORD PTR[1859775393+rsi*1+rax] - mov eax,DWORD PTR[8+rsp] - mov ebx,r12d - mov esi,ebp - xor eax,DWORD PTR[16+rsp] - xor ebx,r11d - rol esi,5 - xor eax,DWORD PTR[40+rsp] - xor ebx,edx - add edi,esi - xor eax,DWORD PTR[60+rsp] + add edi,eax + rol edx,1 + mov DWORD PTR[rsp],edx + mov ebp,DWORD PTR[4+rsp] + mov eax,r12d + mov ecx,edi + xor ebp,DWORD PTR[12+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[((-899497514))+rsi*1+rdx] + xor ebp,DWORD PTR[36+rsp] + xor eax,r13d + add esi,ecx + xor ebp,DWORD PTR[56+rsp] rol r11d,30 - add edi,ebx - rol eax,1 - mov DWORD PTR[8+rsp],eax - lea esi,DWORD PTR[1859775393+rdx*1+rax] - mov eax,DWORD PTR[12+rsp] - mov ebx,r11d - mov edx,edi - xor eax,DWORD PTR[20+rsp] - xor ebx,ebp - rol edx,5 - xor eax,DWORD PTR[44+rsp] - xor ebx,r12d - add esi,edx - xor eax,DWORD PTR[rsp] - rol ebp,30 - add esi,ebx - rol eax,1 - mov DWORD PTR[12+rsp],eax - lea edx,DWORD PTR[1859775393+r12*1+rax] - mov eax,DWORD PTR[16+rsp] - mov ebx,ebp - mov r12d,esi - xor eax,DWORD PTR[24+rsp] - xor ebx,edi - rol r12d,5 - xor eax,DWORD PTR[48+rsp] - xor ebx,r11d - add edx,r12d - xor eax,DWORD PTR[4+rsp] + add esi,eax + rol ebp,1 + mov DWORD PTR[4+rsp],ebp + mov edx,DWORD PTR[8+rsp] + mov eax,r11d + mov ecx,esi + xor edx,DWORD PTR[16+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[((-899497514))+r13*1+rbp] + xor edx,DWORD PTR[40+rsp] + xor eax,r12d + add r13d,ecx + xor edx,DWORD PTR[60+rsp] rol edi,30 - add edx,ebx - rol eax,1 - mov DWORD PTR[16+rsp],eax - lea r12d,DWORD PTR[1859775393+r11*1+rax] - mov eax,DWORD PTR[20+rsp] - mov ebx,edi - mov r11d,edx - xor eax,DWORD PTR[28+rsp] - xor ebx,esi - rol r11d,5 - xor eax,DWORD PTR[52+rsp] - xor ebx,ebp - add r12d,r11d - xor eax,DWORD PTR[8+rsp] + add r13d,eax + rol edx,1 + mov DWORD PTR[8+rsp],edx + mov ebp,DWORD PTR[12+rsp] + mov eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[20+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[((-899497514))+r12*1+rdx] + xor ebp,DWORD PTR[44+rsp] + xor eax,r11d + add r12d,ecx + xor ebp,DWORD PTR[rsp] rol esi,30 - add r12d,ebx - rol eax,1 - mov DWORD PTR[20+rsp],eax - lea r11d,DWORD PTR[1859775393+rbp*1+rax] - mov eax,DWORD PTR[24+rsp] - mov ebx,esi - mov ebp,r12d - xor eax,DWORD PTR[32+rsp] - xor ebx,edx - rol ebp,5 - xor eax,DWORD PTR[56+rsp] - xor ebx,edi - add r11d,ebp - xor eax,DWORD PTR[12+rsp] - rol edx,30 - add r11d,ebx - rol eax,1 - mov DWORD PTR[24+rsp],eax - lea ebp,DWORD PTR[1859775393+rdi*1+rax] - mov eax,DWORD PTR[28+rsp] - mov ebx,edx - mov edi,r11d - xor eax,DWORD PTR[36+rsp] - xor ebx,r12d - rol edi,5 - xor eax,DWORD PTR[60+rsp] - xor ebx,esi - add ebp,edi - xor eax,DWORD PTR[16+rsp] + add r12d,eax + rol ebp,1 + mov DWORD PTR[12+rsp],ebp + mov edx,DWORD PTR[16+rsp] + mov eax,esi + mov ecx,r12d + xor edx,DWORD PTR[24+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[((-899497514))+r11*1+rbp] + xor edx,DWORD PTR[48+rsp] + xor eax,edi + add r11d,ecx + xor edx,DWORD PTR[4+rsp] + rol r13d,30 + add r11d,eax + rol edx,1 + mov DWORD PTR[16+rsp],edx + mov ebp,DWORD PTR[20+rsp] + mov eax,r13d + mov ecx,r11d + xor ebp,DWORD PTR[28+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] + xor ebp,DWORD PTR[52+rsp] + xor eax,esi + add edi,ecx + xor ebp,DWORD PTR[8+rsp] rol r12d,30 - add ebp,ebx - rol eax,1 - mov DWORD PTR[28+rsp],eax - lea edi,DWORD PTR[1859775393+rsi*1+rax] - mov eax,DWORD PTR[32+rsp] - mov ebx,r12d - mov esi,ebp - xor eax,DWORD PTR[40+rsp] - xor ebx,r11d - rol esi,5 - xor eax,DWORD PTR[rsp] - xor ebx,edx - add edi,esi - xor eax,DWORD PTR[20+rsp] - rol r11d,30 - add edi,ebx - rol eax,1 - mov DWORD PTR[32+rsp],eax - lea esi,DWORD PTR[08f1bbcdch+rdx*1+rax] - mov eax,DWORD PTR[36+rsp] - mov ebx,ebp - mov ecx,ebp - xor eax,DWORD PTR[44+rsp] - mov edx,edi - and ebx,r11d - xor eax,DWORD PTR[4+rsp] - or ecx,r11d - rol edx,5 - xor eax,DWORD PTR[24+rsp] - and ecx,r12d - add esi,edx - rol eax,1 - or ebx,ecx - rol ebp,30 - mov DWORD PTR[36+rsp],eax - add esi,ebx - lea edx,DWORD PTR[08f1bbcdch+r12*1+rax] - mov eax,DWORD PTR[40+rsp] - mov ebx,edi + add edi,eax + rol ebp,1 + mov DWORD PTR[20+rsp],ebp + mov edx,DWORD PTR[24+rsp] + mov eax,r12d mov ecx,edi - xor eax,DWORD PTR[48+rsp] - mov r12d,esi - and ebx,ebp - xor eax,DWORD PTR[8+rsp] - or ecx,ebp - rol r12d,5 - xor eax,DWORD PTR[28+rsp] - and ecx,r11d - add edx,r12d - rol eax,1 - or ebx,ecx - rol edi,30 - mov DWORD PTR[40+rsp],eax - add edx,ebx - lea r12d,DWORD PTR[08f1bbcdch+r11*1+rax] - mov eax,DWORD PTR[44+rsp] - mov ebx,esi + xor edx,DWORD PTR[32+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[((-899497514))+rsi*1+rbp] + xor edx,DWORD PTR[56+rsp] + xor eax,r13d + add esi,ecx + xor edx,DWORD PTR[12+rsp] + rol r11d,30 + add esi,eax + rol edx,1 + mov DWORD PTR[24+rsp],edx + mov ebp,DWORD PTR[28+rsp] + mov eax,r11d mov ecx,esi - xor eax,DWORD PTR[52+rsp] - mov r11d,edx - and ebx,edi - xor eax,DWORD PTR[12+rsp] - or ecx,edi - rol r11d,5 - xor eax,DWORD PTR[32+rsp] - and ecx,ebp - add r12d,r11d - rol eax,1 - or ebx,ecx + xor ebp,DWORD PTR[36+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] + xor ebp,DWORD PTR[60+rsp] + xor eax,r12d + add r13d,ecx + xor ebp,DWORD PTR[16+rsp] + rol edi,30 + add r13d,eax + rol ebp,1 + mov DWORD PTR[28+rsp],ebp + mov edx,DWORD PTR[32+rsp] + mov eax,edi + mov ecx,r13d + xor edx,DWORD PTR[40+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[((-899497514))+r12*1+rbp] + xor edx,DWORD PTR[rsp] + xor eax,r11d + add r12d,ecx + xor edx,DWORD PTR[20+rsp] rol esi,30 - mov DWORD PTR[44+rsp],eax - add r12d,ebx - lea r11d,DWORD PTR[08f1bbcdch+rbp*1+rax] - mov eax,DWORD PTR[48+rsp] - mov ebx,edx - mov ecx,edx - xor eax,DWORD PTR[56+rsp] - mov ebp,r12d - and ebx,esi - xor eax,DWORD PTR[16+rsp] - or ecx,esi - rol ebp,5 - xor eax,DWORD PTR[36+rsp] - and ecx,edi - add r11d,ebp - rol eax,1 - or ebx,ecx - rol edx,30 - mov DWORD PTR[48+rsp],eax - add r11d,ebx - lea ebp,DWORD PTR[08f1bbcdch+rdi*1+rax] - mov eax,DWORD PTR[52+rsp] - mov ebx,r12d + add r12d,eax + rol edx,1 + mov DWORD PTR[32+rsp],edx + mov ebp,DWORD PTR[36+rsp] + mov eax,esi mov ecx,r12d - xor eax,DWORD PTR[60+rsp] - mov edi,r11d - and ebx,edx - xor eax,DWORD PTR[20+rsp] - or ecx,edx - rol edi,5 - xor eax,DWORD PTR[40+rsp] - and ecx,esi - add ebp,edi - rol eax,1 - or ebx,ecx - rol r12d,30 - mov DWORD PTR[52+rsp],eax - add ebp,ebx - lea edi,DWORD PTR[08f1bbcdch+rsi*1+rax] - mov eax,DWORD PTR[56+rsp] - mov ebx,r11d + xor ebp,DWORD PTR[44+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[((-899497514))+r11*1+rdx] + xor ebp,DWORD PTR[4+rsp] + xor eax,edi + add r11d,ecx + xor ebp,DWORD PTR[24+rsp] + rol r13d,30 + add r11d,eax + rol ebp,1 + mov DWORD PTR[36+rsp],ebp + mov edx,DWORD PTR[40+rsp] + mov eax,r13d mov ecx,r11d - xor eax,DWORD PTR[rsp] - mov esi,ebp - and ebx,r12d - xor eax,DWORD PTR[24+rsp] - or ecx,r12d - rol esi,5 - xor eax,DWORD PTR[44+rsp] - and ecx,edx - add edi,esi - rol eax,1 - or ebx,ecx - rol r11d,30 - mov DWORD PTR[56+rsp],eax - add edi,ebx - lea esi,DWORD PTR[08f1bbcdch+rdx*1+rax] - mov eax,DWORD PTR[60+rsp] - mov ebx,ebp - mov ecx,ebp - xor eax,DWORD PTR[4+rsp] - mov edx,edi - and ebx,r11d - xor eax,DWORD PTR[28+rsp] - or ecx,r11d - rol edx,5 - xor eax,DWORD PTR[48+rsp] - and ecx,r12d - add esi,edx - rol eax,1 - or ebx,ecx - rol ebp,30 - mov DWORD PTR[60+rsp],eax - add esi,ebx - lea edx,DWORD PTR[08f1bbcdch+r12*1+rax] - mov eax,DWORD PTR[rsp] - mov ebx,edi + xor edx,DWORD PTR[48+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[((-899497514))+rdi*1+rbp] + xor edx,DWORD PTR[8+rsp] + xor eax,esi + add edi,ecx + xor edx,DWORD PTR[28+rsp] + rol r12d,30 + add edi,eax + rol edx,1 + mov DWORD PTR[40+rsp],edx + mov ebp,DWORD PTR[44+rsp] + mov eax,r12d mov ecx,edi - xor eax,DWORD PTR[8+rsp] - mov r12d,esi - and ebx,ebp - xor eax,DWORD PTR[32+rsp] - or ecx,ebp - rol r12d,5 - xor eax,DWORD PTR[52+rsp] - and ecx,r11d - add edx,r12d - rol eax,1 - or ebx,ecx - rol edi,30 - mov DWORD PTR[rsp],eax - add edx,ebx - lea r12d,DWORD PTR[08f1bbcdch+r11*1+rax] - mov eax,DWORD PTR[4+rsp] - mov ebx,esi + xor ebp,DWORD PTR[52+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[((-899497514))+rsi*1+rdx] + xor ebp,DWORD PTR[12+rsp] + xor eax,r13d + add esi,ecx + xor ebp,DWORD PTR[32+rsp] + rol r11d,30 + add esi,eax + rol ebp,1 + mov DWORD PTR[44+rsp],ebp + mov edx,DWORD PTR[48+rsp] + mov eax,r11d mov ecx,esi - xor eax,DWORD PTR[12+rsp] - mov r11d,edx - and ebx,edi - xor eax,DWORD PTR[36+rsp] - or ecx,edi - rol r11d,5 - xor eax,DWORD PTR[56+rsp] - and ecx,ebp - add r12d,r11d - rol eax,1 - or ebx,ecx + xor edx,DWORD PTR[56+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[((-899497514))+r13*1+rbp] + xor edx,DWORD PTR[16+rsp] + xor eax,r12d + add r13d,ecx + xor edx,DWORD PTR[36+rsp] + rol edi,30 + add r13d,eax + rol edx,1 + mov DWORD PTR[48+rsp],edx + mov ebp,DWORD PTR[52+rsp] + mov eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[60+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[((-899497514))+r12*1+rdx] + xor ebp,DWORD PTR[20+rsp] + xor eax,r11d + add r12d,ecx + xor ebp,DWORD PTR[40+rsp] rol esi,30 - mov DWORD PTR[4+rsp],eax - add r12d,ebx - lea r11d,DWORD PTR[08f1bbcdch+rbp*1+rax] - mov eax,DWORD PTR[8+rsp] - mov ebx,edx - mov ecx,edx - xor eax,DWORD PTR[16+rsp] - mov ebp,r12d - and ebx,esi - xor eax,DWORD PTR[40+rsp] - or ecx,esi - rol ebp,5 - xor eax,DWORD PTR[60+rsp] - and ecx,edi - add r11d,ebp - rol eax,1 - or ebx,ecx - rol edx,30 - mov DWORD PTR[8+rsp],eax - add r11d,ebx - lea ebp,DWORD PTR[08f1bbcdch+rdi*1+rax] - mov eax,DWORD PTR[12+rsp] - mov ebx,r12d + add r12d,eax + rol ebp,1 + mov edx,DWORD PTR[56+rsp] + mov eax,esi mov ecx,r12d - xor eax,DWORD PTR[20+rsp] - mov edi,r11d - and ebx,edx - xor eax,DWORD PTR[44+rsp] - or ecx,edx - rol edi,5 - xor eax,DWORD PTR[rsp] - and ecx,esi - add ebp,edi - rol eax,1 - or ebx,ecx - rol r12d,30 - mov DWORD PTR[12+rsp],eax - add ebp,ebx - lea edi,DWORD PTR[08f1bbcdch+rsi*1+rax] - mov eax,DWORD PTR[16+rsp] - mov ebx,r11d + xor edx,DWORD PTR[rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[((-899497514))+r11*1+rbp] + xor edx,DWORD PTR[24+rsp] + xor eax,edi + add r11d,ecx + xor edx,DWORD PTR[44+rsp] + rol r13d,30 + add r11d,eax + rol edx,1 + mov ebp,DWORD PTR[60+rsp] + mov eax,r13d mov ecx,r11d - xor eax,DWORD PTR[24+rsp] - mov esi,ebp - and ebx,r12d - xor eax,DWORD PTR[48+rsp] - or ecx,r12d - rol esi,5 - xor eax,DWORD PTR[4+rsp] - and ecx,edx - add edi,esi - rol eax,1 - or ebx,ecx + xor ebp,DWORD PTR[4+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] + xor ebp,DWORD PTR[28+rsp] + xor eax,esi + add edi,ecx + xor ebp,DWORD PTR[48+rsp] + rol r12d,30 + add edi,eax + rol ebp,1 + mov eax,r12d + mov ecx,edi + xor eax,r11d + lea esi,DWORD PTR[((-899497514))+rsi*1+rbp] + rol ecx,5 + xor eax,r13d + add esi,ecx rol r11d,30 - mov DWORD PTR[16+rsp],eax - add edi,ebx - lea esi,DWORD PTR[08f1bbcdch+rdx*1+rax] - mov eax,DWORD PTR[20+rsp] - mov ebx,ebp - mov ecx,ebp - xor eax,DWORD PTR[28+rsp] - mov edx,edi - and ebx,r11d - xor eax,DWORD PTR[52+rsp] - or ecx,r11d + add esi,eax + add esi,DWORD PTR[r8] + add edi,DWORD PTR[4+r8] + add r11d,DWORD PTR[8+r8] + add r12d,DWORD PTR[12+r8] + add r13d,DWORD PTR[16+r8] + mov DWORD PTR[r8],esi + mov DWORD PTR[4+r8],edi + mov DWORD PTR[8+r8],r11d + mov DWORD PTR[12+r8],r12d + mov DWORD PTR[16+r8],r13d + + sub r10,1 + lea r9,QWORD PTR[64+r9] + jnz $L$loop + + mov rsi,QWORD PTR[64+rsp] + mov r13,QWORD PTR[rsi] + mov r12,QWORD PTR[8+rsi] + mov rbp,QWORD PTR[16+rsi] + mov rbx,QWORD PTR[24+rsi] + lea rsp,QWORD PTR[32+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order:: +sha1_block_data_order ENDP + +ALIGN 16 +sha1_block_data_order_ssse3 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_ssse3:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_ssse3_shortcut:: + push rbx + push rbp + push r12 + lea rsp,QWORD PTR[((-144))+rsp] + movaps XMMWORD PTR[(64+0)+rsp],xmm6 + movaps XMMWORD PTR[(64+16)+rsp],xmm7 + movaps XMMWORD PTR[(64+32)+rsp],xmm8 + movaps XMMWORD PTR[(64+48)+rsp],xmm9 + movaps XMMWORD PTR[(64+64)+rsp],xmm10 +$L$prologue_ssse3:: + mov r8,rdi + mov r9,rsi + mov r10,rdx + + shl r10,6 + add r10,r9 + lea r11,QWORD PTR[K_XX_XX] + + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov esi,ebx + mov ebp,DWORD PTR[16+r8] + + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[r11] + movdqu xmm0,XMMWORD PTR[r9] + movdqu xmm1,XMMWORD PTR[16+r9] + movdqu xmm2,XMMWORD PTR[32+r9] + movdqu xmm3,XMMWORD PTR[48+r9] +DB 102,15,56,0,198 + add r9,64 +DB 102,15,56,0,206 +DB 102,15,56,0,214 +DB 102,15,56,0,222 + paddd xmm0,xmm9 + paddd xmm1,xmm9 + paddd xmm2,xmm9 + movdqa XMMWORD PTR[rsp],xmm0 + psubd xmm0,xmm9 + movdqa XMMWORD PTR[16+rsp],xmm1 + psubd xmm1,xmm9 + movdqa XMMWORD PTR[32+rsp],xmm2 + psubd xmm2,xmm9 + jmp $L$oop_ssse3 +ALIGN 16 +$L$oop_ssse3:: + movdqa xmm4,xmm1 + add ebp,DWORD PTR[rsp] + xor ecx,edx + movdqa xmm8,xmm3 +DB 102,15,58,15,224,8 + mov edi,eax + rol eax,5 + paddd xmm9,xmm3 + and esi,ecx + xor ecx,edx + psrldq xmm8,4 + xor esi,edx + add ebp,eax + pxor xmm4,xmm0 + ror ebx,2 + add ebp,esi + pxor xmm8,xmm2 + add edx,DWORD PTR[4+rsp] + xor ebx,ecx + mov esi,ebp + rol ebp,5 + pxor xmm4,xmm8 + and edi,ebx + xor ebx,ecx + movdqa XMMWORD PTR[48+rsp],xmm9 + xor edi,ecx + add edx,ebp + movdqa xmm10,xmm4 + movdqa xmm8,xmm4 + ror eax,7 + add edx,edi + add ecx,DWORD PTR[8+rsp] + xor eax,ebx + pslldq xmm10,12 + paddd xmm4,xmm4 + mov edi,edx rol edx,5 - xor eax,DWORD PTR[8+rsp] - and ecx,r12d - add esi,edx - rol eax,1 - or ebx,ecx - rol ebp,30 - mov DWORD PTR[20+rsp],eax - add esi,ebx - lea edx,DWORD PTR[08f1bbcdch+r12*1+rax] - mov eax,DWORD PTR[24+rsp] - mov ebx,edi - mov ecx,edi - xor eax,DWORD PTR[32+rsp] - mov r12d,esi - and ebx,ebp - xor eax,DWORD PTR[56+rsp] - or ecx,ebp - rol r12d,5 - xor eax,DWORD PTR[12+rsp] - and ecx,r11d - add edx,r12d - rol eax,1 - or ebx,ecx - rol edi,30 - mov DWORD PTR[24+rsp],eax - add edx,ebx - lea r12d,DWORD PTR[08f1bbcdch+r11*1+rax] - mov eax,DWORD PTR[28+rsp] - mov ebx,esi - mov ecx,esi - xor eax,DWORD PTR[36+rsp] - mov r11d,edx - and ebx,edi - xor eax,DWORD PTR[60+rsp] - or ecx,edi - rol r11d,5 - xor eax,DWORD PTR[16+rsp] - and ecx,ebp - add r12d,r11d - rol eax,1 - or ebx,ecx - rol esi,30 - mov DWORD PTR[28+rsp],eax - add r12d,ebx - lea r11d,DWORD PTR[08f1bbcdch+rbp*1+rax] - mov eax,DWORD PTR[32+rsp] - mov ebx,edx - mov ecx,edx - xor eax,DWORD PTR[40+rsp] - mov ebp,r12d - and ebx,esi - xor eax,DWORD PTR[rsp] - or ecx,esi + and esi,eax + xor eax,ebx + psrld xmm8,31 + xor esi,ebx + add ecx,edx + movdqa xmm9,xmm10 + ror ebp,7 + add ecx,esi + psrld xmm10,30 + por xmm4,xmm8 + add ebx,DWORD PTR[12+rsp] + xor ebp,eax + mov esi,ecx + rol ecx,5 + pslld xmm9,2 + pxor xmm4,xmm10 + and edi,ebp + xor ebp,eax + movdqa xmm10,XMMWORD PTR[r11] + xor edi,eax + add ebx,ecx + pxor xmm4,xmm9 + ror edx,7 + add ebx,edi + movdqa xmm5,xmm2 + add eax,DWORD PTR[16+rsp] + xor edx,ebp + movdqa xmm9,xmm4 +DB 102,15,58,15,233,8 + mov edi,ebx + rol ebx,5 + paddd xmm10,xmm4 + and esi,edx + xor edx,ebp + psrldq xmm9,4 + xor esi,ebp + add eax,ebx + pxor xmm5,xmm1 + ror ecx,7 + add eax,esi + pxor xmm9,xmm3 + add ebp,DWORD PTR[20+rsp] + xor ecx,edx + mov esi,eax + rol eax,5 + pxor xmm5,xmm9 + and edi,ecx + xor ecx,edx + movdqa XMMWORD PTR[rsp],xmm10 + xor edi,edx + add ebp,eax + movdqa xmm8,xmm5 + movdqa xmm9,xmm5 + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[24+rsp] + xor ebx,ecx + pslldq xmm8,12 + paddd xmm5,xmm5 + mov edi,ebp rol ebp,5 - xor eax,DWORD PTR[20+rsp] - and ecx,edi - add r11d,ebp - rol eax,1 - or ebx,ecx - rol edx,30 - mov DWORD PTR[32+rsp],eax - add r11d,ebx - lea ebp,DWORD PTR[08f1bbcdch+rdi*1+rax] - mov eax,DWORD PTR[36+rsp] - mov ebx,r12d - mov ecx,r12d - xor eax,DWORD PTR[44+rsp] - mov edi,r11d - and ebx,edx - xor eax,DWORD PTR[4+rsp] - or ecx,edx - rol edi,5 - xor eax,DWORD PTR[24+rsp] - and ecx,esi + and esi,ebx + xor ebx,ecx + psrld xmm9,31 + xor esi,ecx + add edx,ebp + movdqa xmm10,xmm8 + ror eax,7 + add edx,esi + psrld xmm8,30 + por xmm5,xmm9 + add ecx,DWORD PTR[28+rsp] + xor eax,ebx + mov esi,edx + rol edx,5 + pslld xmm10,2 + pxor xmm5,xmm8 + and edi,eax + xor eax,ebx + movdqa xmm8,XMMWORD PTR[16+r11] + xor edi,ebx + add ecx,edx + pxor xmm5,xmm10 + ror ebp,7 + add ecx,edi + movdqa xmm6,xmm3 + add ebx,DWORD PTR[32+rsp] + xor ebp,eax + movdqa xmm10,xmm5 +DB 102,15,58,15,242,8 + mov edi,ecx + rol ecx,5 + paddd xmm8,xmm5 + and esi,ebp + xor ebp,eax + psrldq xmm10,4 + xor esi,eax + add ebx,ecx + pxor xmm6,xmm2 + ror edx,7 + add ebx,esi + pxor xmm10,xmm4 + add eax,DWORD PTR[36+rsp] + xor edx,ebp + mov esi,ebx + rol ebx,5 + pxor xmm6,xmm10 + and edi,edx + xor edx,ebp + movdqa XMMWORD PTR[16+rsp],xmm8 + xor edi,ebp + add eax,ebx + movdqa xmm9,xmm6 + movdqa xmm10,xmm6 + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[40+rsp] + xor ecx,edx + pslldq xmm9,12 + paddd xmm6,xmm6 + mov edi,eax + rol eax,5 + and esi,ecx + xor ecx,edx + psrld xmm10,31 + xor esi,edx + add ebp,eax + movdqa xmm8,xmm9 + ror ebx,7 + add ebp,esi + psrld xmm9,30 + por xmm6,xmm10 + add edx,DWORD PTR[44+rsp] + xor ebx,ecx + mov esi,ebp + rol ebp,5 + pslld xmm8,2 + pxor xmm6,xmm9 + and edi,ebx + xor ebx,ecx + movdqa xmm9,XMMWORD PTR[16+r11] + xor edi,ecx + add edx,ebp + pxor xmm6,xmm8 + ror eax,7 + add edx,edi + movdqa xmm7,xmm4 + add ecx,DWORD PTR[48+rsp] + xor eax,ebx + movdqa xmm8,xmm6 +DB 102,15,58,15,251,8 + mov edi,edx + rol edx,5 + paddd xmm9,xmm6 + and esi,eax + xor eax,ebx + psrldq xmm8,4 + xor esi,ebx + add ecx,edx + pxor xmm7,xmm3 + ror ebp,7 + add ecx,esi + pxor xmm8,xmm5 + add ebx,DWORD PTR[52+rsp] + xor ebp,eax + mov esi,ecx + rol ecx,5 + pxor xmm7,xmm8 + and edi,ebp + xor ebp,eax + movdqa XMMWORD PTR[32+rsp],xmm9 + xor edi,eax + add ebx,ecx + movdqa xmm10,xmm7 + movdqa xmm8,xmm7 + ror edx,7 + add ebx,edi + add eax,DWORD PTR[56+rsp] + xor edx,ebp + pslldq xmm10,12 + paddd xmm7,xmm7 + mov edi,ebx + rol ebx,5 + and esi,edx + xor edx,ebp + psrld xmm8,31 + xor esi,ebp + add eax,ebx + movdqa xmm9,xmm10 + ror ecx,7 + add eax,esi + psrld xmm10,30 + por xmm7,xmm8 + add ebp,DWORD PTR[60+rsp] + xor ecx,edx + mov esi,eax + rol eax,5 + pslld xmm9,2 + pxor xmm7,xmm10 + and edi,ecx + xor ecx,edx + movdqa xmm10,XMMWORD PTR[16+r11] + xor edi,edx + add ebp,eax + pxor xmm7,xmm9 + ror ebx,7 add ebp,edi - rol eax,1 - or ebx,ecx - rol r12d,30 - mov DWORD PTR[36+rsp],eax - add ebp,ebx - lea edi,DWORD PTR[08f1bbcdch+rsi*1+rax] - mov eax,DWORD PTR[40+rsp] - mov ebx,r11d - mov ecx,r11d - xor eax,DWORD PTR[48+rsp] + movdqa xmm9,xmm7 + add edx,DWORD PTR[rsp] + pxor xmm0,xmm4 +DB 102,68,15,58,15,206,8 + xor ebx,ecx + mov edi,ebp + rol ebp,5 + pxor xmm0,xmm1 + and esi,ebx + xor ebx,ecx + movdqa xmm8,xmm10 + paddd xmm10,xmm7 + xor esi,ecx + add edx,ebp + pxor xmm0,xmm9 + ror eax,7 + add edx,esi + add ecx,DWORD PTR[4+rsp] + xor eax,ebx + movdqa xmm9,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm10 + mov esi,edx + rol edx,5 + and edi,eax + xor eax,ebx + pslld xmm0,2 + xor edi,ebx + add ecx,edx + psrld xmm9,30 + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[8+rsp] + xor ebp,eax + mov edi,ecx + rol ecx,5 + por xmm0,xmm9 + and esi,ebp + xor ebp,eax + movdqa xmm10,xmm0 + xor esi,eax + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[12+rsp] + xor edx,ebp + mov esi,ebx + rol ebx,5 + and edi,edx + xor edx,ebp + xor edi,ebp + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[16+rsp] + pxor xmm1,xmm5 +DB 102,68,15,58,15,215,8 + xor esi,edx + mov edi,eax + rol eax,5 + pxor xmm1,xmm2 + xor esi,ecx + add ebp,eax + movdqa xmm9,xmm8 + paddd xmm8,xmm0 + ror ebx,7 + add ebp,esi + pxor xmm1,xmm10 + add edx,DWORD PTR[20+rsp] + xor edi,ecx mov esi,ebp - and ebx,r12d - xor eax,DWORD PTR[8+rsp] - or ecx,r12d - rol esi,5 - xor eax,DWORD PTR[28+rsp] - and ecx,edx - add edi,esi - rol eax,1 - or ebx,ecx - rol r11d,30 - mov DWORD PTR[40+rsp],eax - add edi,ebx - lea esi,DWORD PTR[08f1bbcdch+rdx*1+rax] - mov eax,DWORD PTR[44+rsp] - mov ebx,ebp - mov ecx,ebp - xor eax,DWORD PTR[52+rsp] - mov edx,edi - and ebx,r11d - xor eax,DWORD PTR[12+rsp] - or ecx,r11d + rol ebp,5 + movdqa xmm10,xmm1 + movdqa XMMWORD PTR[rsp],xmm8 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + pslld xmm1,2 + add ecx,DWORD PTR[24+rsp] + xor esi,ebx + psrld xmm10,30 + mov edi,edx rol edx,5 - xor eax,DWORD PTR[32+rsp] - and ecx,r12d - add esi,edx - rol eax,1 - or ebx,ecx - rol ebp,30 - mov DWORD PTR[44+rsp],eax - add esi,ebx - lea edx,DWORD PTR[08f1bbcdch+r12*1+rax] - mov eax,DWORD PTR[48+rsp] - mov ebx,edi - mov ecx,edi - xor eax,DWORD PTR[56+rsp] - mov r12d,esi - and ebx,ebp - xor eax,DWORD PTR[16+rsp] - or ecx,ebp - rol r12d,5 - xor eax,DWORD PTR[36+rsp] - and ecx,r11d - add edx,r12d - rol eax,1 - or ebx,ecx - rol edi,30 - mov DWORD PTR[48+rsp],eax - add edx,ebx - lea r12d,DWORD PTR[3395469782+r11*1+rax] - mov eax,DWORD PTR[52+rsp] - mov ebx,edi - mov r11d,edx - xor eax,DWORD PTR[60+rsp] - xor ebx,esi - rol r11d,5 - xor eax,DWORD PTR[20+rsp] - xor ebx,ebp - add r12d,r11d - xor eax,DWORD PTR[40+rsp] - rol esi,30 - add r12d,ebx - rol eax,1 - mov DWORD PTR[52+rsp],eax - lea r11d,DWORD PTR[3395469782+rbp*1+rax] - mov eax,DWORD PTR[56+rsp] - mov ebx,esi - mov ebp,r12d - xor eax,DWORD PTR[rsp] - xor ebx,edx + xor esi,eax + add ecx,edx + ror ebp,7 + add ecx,esi + por xmm1,xmm10 + add ebx,DWORD PTR[28+rsp] + xor edi,eax + movdqa xmm8,xmm1 + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[32+rsp] + pxor xmm2,xmm6 +DB 102,68,15,58,15,192,8 + xor esi,ebp + mov edi,ebx + rol ebx,5 + pxor xmm2,xmm3 + xor esi,edx + add eax,ebx + movdqa xmm10,XMMWORD PTR[32+r11] + paddd xmm9,xmm1 + ror ecx,7 + add eax,esi + pxor xmm2,xmm8 + add ebp,DWORD PTR[36+rsp] + xor edi,edx + mov esi,eax + rol eax,5 + movdqa xmm8,xmm2 + movdqa XMMWORD PTR[16+rsp],xmm9 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + pslld xmm2,2 + add edx,DWORD PTR[40+rsp] + xor esi,ecx + psrld xmm8,30 + mov edi,ebp rol ebp,5 - xor eax,DWORD PTR[24+rsp] - xor ebx,edi - add r11d,ebp - xor eax,DWORD PTR[44+rsp] - rol edx,30 - add r11d,ebx - rol eax,1 - mov DWORD PTR[56+rsp],eax - lea ebp,DWORD PTR[3395469782+rdi*1+rax] - mov eax,DWORD PTR[60+rsp] - mov ebx,edx - mov edi,r11d - xor eax,DWORD PTR[4+rsp] - xor ebx,r12d - rol edi,5 - xor eax,DWORD PTR[28+rsp] - xor ebx,esi + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + por xmm2,xmm8 + add ecx,DWORD PTR[44+rsp] + xor edi,ebx + movdqa xmm9,xmm2 + mov esi,edx + rol edx,5 + xor edi,eax + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[48+rsp] + pxor xmm3,xmm7 +DB 102,68,15,58,15,201,8 + xor esi,eax + mov edi,ecx + rol ecx,5 + pxor xmm3,xmm4 + xor esi,ebp + add ebx,ecx + movdqa xmm8,xmm10 + paddd xmm10,xmm2 + ror edx,7 + add ebx,esi + pxor xmm3,xmm9 + add eax,DWORD PTR[52+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + movdqa xmm9,xmm3 + movdqa XMMWORD PTR[32+rsp],xmm10 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + pslld xmm3,2 + add ebp,DWORD PTR[56+rsp] + xor esi,edx + psrld xmm9,30 + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + por xmm3,xmm9 + add edx,DWORD PTR[60+rsp] + xor edi,ecx + movdqa xmm10,xmm3 + mov esi,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[rsp] + pxor xmm4,xmm0 +DB 102,68,15,58,15,210,8 + xor esi,ebx + mov edi,edx + rol edx,5 + pxor xmm4,xmm5 + xor esi,eax + add ecx,edx + movdqa xmm9,xmm8 + paddd xmm8,xmm3 + ror ebp,7 + add ecx,esi + pxor xmm4,xmm10 + add ebx,DWORD PTR[4+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + movdqa xmm10,xmm4 + movdqa XMMWORD PTR[48+rsp],xmm8 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + pslld xmm4,2 + add eax,DWORD PTR[8+rsp] + xor esi,ebp + psrld xmm10,30 + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + por xmm4,xmm10 + add ebp,DWORD PTR[12+rsp] + xor edi,edx + movdqa xmm8,xmm4 + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 add ebp,edi - xor eax,DWORD PTR[48+rsp] - rol r12d,30 - add ebp,ebx - rol eax,1 - mov DWORD PTR[60+rsp],eax - lea edi,DWORD PTR[3395469782+rsi*1+rax] - mov eax,DWORD PTR[rsp] - mov ebx,r12d + add edx,DWORD PTR[16+rsp] + pxor xmm5,xmm1 +DB 102,68,15,58,15,195,8 + xor esi,ecx + mov edi,ebp + rol ebp,5 + pxor xmm5,xmm6 + xor esi,ebx + add edx,ebp + movdqa xmm10,xmm9 + paddd xmm9,xmm4 + ror eax,7 + add edx,esi + pxor xmm5,xmm8 + add ecx,DWORD PTR[20+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + movdqa xmm8,xmm5 + movdqa XMMWORD PTR[rsp],xmm9 + xor edi,eax + add ecx,edx + ror ebp,7 + add ecx,edi + pslld xmm5,2 + add ebx,DWORD PTR[24+rsp] + xor esi,eax + psrld xmm8,30 + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + por xmm5,xmm8 + add eax,DWORD PTR[28+rsp] + xor edi,ebp + movdqa xmm9,xmm5 + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + mov edi,ecx + pxor xmm6,xmm2 +DB 102,68,15,58,15,204,8 + xor ecx,edx + add ebp,DWORD PTR[32+rsp] + and edi,edx + pxor xmm6,xmm7 + and esi,ecx + ror ebx,7 + movdqa xmm8,xmm10 + paddd xmm10,xmm5 + add ebp,edi + mov edi,eax + pxor xmm6,xmm9 + rol eax,5 + add ebp,esi + xor ecx,edx + add ebp,eax + movdqa xmm9,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm10 + mov esi,ebx + xor ebx,ecx + add edx,DWORD PTR[36+rsp] + and esi,ecx + pslld xmm6,2 + and edi,ebx + ror eax,7 + psrld xmm9,30 + add edx,esi mov esi,ebp - xor eax,DWORD PTR[8+rsp] - xor ebx,r11d - rol esi,5 - xor eax,DWORD PTR[32+rsp] - xor ebx,edx - add edi,esi - xor eax,DWORD PTR[52+rsp] - rol r11d,30 - add edi,ebx - rol eax,1 - mov DWORD PTR[rsp],eax - lea esi,DWORD PTR[3395469782+rdx*1+rax] - mov eax,DWORD PTR[4+rsp] - mov ebx,r11d - mov edx,edi - xor eax,DWORD PTR[12+rsp] - xor ebx,ebp + rol ebp,5 + add edx,edi + xor ebx,ecx + add edx,ebp + por xmm6,xmm9 + mov edi,eax + xor eax,ebx + movdqa xmm10,xmm6 + add ecx,DWORD PTR[40+rsp] + and edi,ebx + and esi,eax + ror ebp,7 + add ecx,edi + mov edi,edx rol edx,5 - xor eax,DWORD PTR[36+rsp] - xor ebx,r12d - add esi,edx - xor eax,DWORD PTR[56+rsp] - rol ebp,30 - add esi,ebx - rol eax,1 - mov DWORD PTR[4+rsp],eax - lea edx,DWORD PTR[3395469782+r12*1+rax] - mov eax,DWORD PTR[8+rsp] - mov ebx,ebp - mov r12d,esi - xor eax,DWORD PTR[16+rsp] - xor ebx,edi - rol r12d,5 - xor eax,DWORD PTR[40+rsp] - xor ebx,r11d - add edx,r12d - xor eax,DWORD PTR[60+rsp] - rol edi,30 - add edx,ebx - rol eax,1 - mov DWORD PTR[8+rsp],eax - lea r12d,DWORD PTR[3395469782+r11*1+rax] - mov eax,DWORD PTR[12+rsp] - mov ebx,edi - mov r11d,edx - xor eax,DWORD PTR[20+rsp] - xor ebx,esi - rol r11d,5 - xor eax,DWORD PTR[44+rsp] - xor ebx,ebp - add r12d,r11d - xor eax,DWORD PTR[rsp] - rol esi,30 - add r12d,ebx - rol eax,1 - mov DWORD PTR[12+rsp],eax - lea r11d,DWORD PTR[3395469782+rbp*1+rax] - mov eax,DWORD PTR[16+rsp] - mov ebx,esi - mov ebp,r12d - xor eax,DWORD PTR[24+rsp] - xor ebx,edx + add ecx,esi + xor eax,ebx + add ecx,edx + mov esi,ebp + xor ebp,eax + add ebx,DWORD PTR[44+rsp] + and esi,eax + and edi,ebp + ror edx,7 + add ebx,esi + mov esi,ecx + rol ecx,5 + add ebx,edi + xor ebp,eax + add ebx,ecx + mov edi,edx + pxor xmm7,xmm3 +DB 102,68,15,58,15,213,8 + xor edx,ebp + add eax,DWORD PTR[48+rsp] + and edi,ebp + pxor xmm7,xmm0 + and esi,edx + ror ecx,7 + movdqa xmm9,XMMWORD PTR[48+r11] + paddd xmm8,xmm6 + add eax,edi + mov edi,ebx + pxor xmm7,xmm10 + rol ebx,5 + add eax,esi + xor edx,ebp + add eax,ebx + movdqa xmm10,xmm7 + movdqa XMMWORD PTR[32+rsp],xmm8 + mov esi,ecx + xor ecx,edx + add ebp,DWORD PTR[52+rsp] + and esi,edx + pslld xmm7,2 + and edi,ecx + ror ebx,7 + psrld xmm10,30 + add ebp,esi + mov esi,eax + rol eax,5 + add ebp,edi + xor ecx,edx + add ebp,eax + por xmm7,xmm10 + mov edi,ebx + xor ebx,ecx + movdqa xmm8,xmm7 + add edx,DWORD PTR[56+rsp] + and edi,ecx + and esi,ebx + ror eax,7 + add edx,edi + mov edi,ebp rol ebp,5 - xor eax,DWORD PTR[48+rsp] - xor ebx,edi - add r11d,ebp - xor eax,DWORD PTR[4+rsp] - rol edx,30 - add r11d,ebx - rol eax,1 - mov DWORD PTR[16+rsp],eax - lea ebp,DWORD PTR[3395469782+rdi*1+rax] - mov eax,DWORD PTR[20+rsp] - mov ebx,edx - mov edi,r11d - xor eax,DWORD PTR[28+rsp] - xor ebx,r12d - rol edi,5 - xor eax,DWORD PTR[52+rsp] - xor ebx,esi + add edx,esi + xor ebx,ecx + add edx,ebp + mov esi,eax + xor eax,ebx + add ecx,DWORD PTR[60+rsp] + and esi,ebx + and edi,eax + ror ebp,7 + add ecx,esi + mov esi,edx + rol edx,5 + add ecx,edi + xor eax,ebx + add ecx,edx + mov edi,ebp + pxor xmm0,xmm4 +DB 102,68,15,58,15,198,8 + xor ebp,eax + add ebx,DWORD PTR[rsp] + and edi,eax + pxor xmm0,xmm1 + and esi,ebp + ror edx,7 + movdqa xmm10,xmm9 + paddd xmm9,xmm7 + add ebx,edi + mov edi,ecx + pxor xmm0,xmm8 + rol ecx,5 + add ebx,esi + xor ebp,eax + add ebx,ecx + movdqa xmm8,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm9 + mov esi,edx + xor edx,ebp + add eax,DWORD PTR[4+rsp] + and esi,ebp + pslld xmm0,2 + and edi,edx + ror ecx,7 + psrld xmm8,30 + add eax,esi + mov esi,ebx + rol ebx,5 + add eax,edi + xor edx,ebp + add eax,ebx + por xmm0,xmm8 + mov edi,ecx + xor ecx,edx + movdqa xmm9,xmm0 + add ebp,DWORD PTR[8+rsp] + and edi,edx + and esi,ecx + ror ebx,7 add ebp,edi - xor eax,DWORD PTR[8+rsp] - rol r12d,30 - add ebp,ebx - rol eax,1 - mov DWORD PTR[20+rsp],eax - lea edi,DWORD PTR[3395469782+rsi*1+rax] - mov eax,DWORD PTR[24+rsp] - mov ebx,r12d + mov edi,eax + rol eax,5 + add ebp,esi + xor ecx,edx + add ebp,eax + mov esi,ebx + xor ebx,ecx + add edx,DWORD PTR[12+rsp] + and esi,ecx + and edi,ebx + ror eax,7 + add edx,esi mov esi,ebp - xor eax,DWORD PTR[32+rsp] - xor ebx,r11d - rol esi,5 - xor eax,DWORD PTR[56+rsp] - xor ebx,edx - add edi,esi - xor eax,DWORD PTR[12+rsp] - rol r11d,30 - add edi,ebx - rol eax,1 - mov DWORD PTR[24+rsp],eax - lea esi,DWORD PTR[3395469782+rdx*1+rax] - mov eax,DWORD PTR[28+rsp] - mov ebx,r11d - mov edx,edi - xor eax,DWORD PTR[36+rsp] - xor ebx,ebp + rol ebp,5 + add edx,edi + xor ebx,ecx + add edx,ebp + mov edi,eax + pxor xmm1,xmm5 +DB 102,68,15,58,15,207,8 + xor eax,ebx + add ecx,DWORD PTR[16+rsp] + and edi,ebx + pxor xmm1,xmm2 + and esi,eax + ror ebp,7 + movdqa xmm8,xmm10 + paddd xmm10,xmm0 + add ecx,edi + mov edi,edx + pxor xmm1,xmm9 rol edx,5 - xor eax,DWORD PTR[60+rsp] - xor ebx,r12d - add esi,edx - xor eax,DWORD PTR[16+rsp] - rol ebp,30 - add esi,ebx - rol eax,1 - mov DWORD PTR[28+rsp],eax - lea edx,DWORD PTR[3395469782+r12*1+rax] - mov eax,DWORD PTR[32+rsp] - mov ebx,ebp - mov r12d,esi - xor eax,DWORD PTR[40+rsp] - xor ebx,edi - rol r12d,5 - xor eax,DWORD PTR[rsp] - xor ebx,r11d - add edx,r12d - xor eax,DWORD PTR[20+rsp] - rol edi,30 - add edx,ebx - rol eax,1 - mov DWORD PTR[32+rsp],eax - lea r12d,DWORD PTR[3395469782+r11*1+rax] - mov eax,DWORD PTR[36+rsp] - mov ebx,edi - mov r11d,edx - xor eax,DWORD PTR[44+rsp] - xor ebx,esi - rol r11d,5 - xor eax,DWORD PTR[4+rsp] - xor ebx,ebp - add r12d,r11d - xor eax,DWORD PTR[24+rsp] - rol esi,30 - add r12d,ebx - rol eax,1 - mov DWORD PTR[36+rsp],eax - lea r11d,DWORD PTR[3395469782+rbp*1+rax] - mov eax,DWORD PTR[40+rsp] - mov ebx,esi - mov ebp,r12d - xor eax,DWORD PTR[48+rsp] - xor ebx,edx + add ecx,esi + xor eax,ebx + add ecx,edx + movdqa xmm9,xmm1 + movdqa XMMWORD PTR[rsp],xmm10 + mov esi,ebp + xor ebp,eax + add ebx,DWORD PTR[20+rsp] + and esi,eax + pslld xmm1,2 + and edi,ebp + ror edx,7 + psrld xmm9,30 + add ebx,esi + mov esi,ecx + rol ecx,5 + add ebx,edi + xor ebp,eax + add ebx,ecx + por xmm1,xmm9 + mov edi,edx + xor edx,ebp + movdqa xmm10,xmm1 + add eax,DWORD PTR[24+rsp] + and edi,ebp + and esi,edx + ror ecx,7 + add eax,edi + mov edi,ebx + rol ebx,5 + add eax,esi + xor edx,ebp + add eax,ebx + mov esi,ecx + xor ecx,edx + add ebp,DWORD PTR[28+rsp] + and esi,edx + and edi,ecx + ror ebx,7 + add ebp,esi + mov esi,eax + rol eax,5 + add ebp,edi + xor ecx,edx + add ebp,eax + mov edi,ebx + pxor xmm2,xmm6 +DB 102,68,15,58,15,208,8 + xor ebx,ecx + add edx,DWORD PTR[32+rsp] + and edi,ecx + pxor xmm2,xmm3 + and esi,ebx + ror eax,7 + movdqa xmm9,xmm8 + paddd xmm8,xmm1 + add edx,edi + mov edi,ebp + pxor xmm2,xmm10 rol ebp,5 - xor eax,DWORD PTR[8+rsp] - xor ebx,edi - add r11d,ebp - xor eax,DWORD PTR[28+rsp] - rol edx,30 - add r11d,ebx - rol eax,1 - mov DWORD PTR[40+rsp],eax - lea ebp,DWORD PTR[3395469782+rdi*1+rax] - mov eax,DWORD PTR[44+rsp] - mov ebx,edx - mov edi,r11d - xor eax,DWORD PTR[52+rsp] - xor ebx,r12d - rol edi,5 - xor eax,DWORD PTR[12+rsp] - xor ebx,esi + add edx,esi + xor ebx,ecx + add edx,ebp + movdqa xmm10,xmm2 + movdqa XMMWORD PTR[16+rsp],xmm8 + mov esi,eax + xor eax,ebx + add ecx,DWORD PTR[36+rsp] + and esi,ebx + pslld xmm2,2 + and edi,eax + ror ebp,7 + psrld xmm10,30 + add ecx,esi + mov esi,edx + rol edx,5 + add ecx,edi + xor eax,ebx + add ecx,edx + por xmm2,xmm10 + mov edi,ebp + xor ebp,eax + movdqa xmm8,xmm2 + add ebx,DWORD PTR[40+rsp] + and edi,eax + and esi,ebp + ror edx,7 + add ebx,edi + mov edi,ecx + rol ecx,5 + add ebx,esi + xor ebp,eax + add ebx,ecx + mov esi,edx + xor edx,ebp + add eax,DWORD PTR[44+rsp] + and esi,ebp + and edi,edx + ror ecx,7 + add eax,esi + mov esi,ebx + rol ebx,5 + add eax,edi + xor edx,ebp + add eax,ebx + add ebp,DWORD PTR[48+rsp] + pxor xmm3,xmm7 +DB 102,68,15,58,15,193,8 + xor esi,edx + mov edi,eax + rol eax,5 + pxor xmm3,xmm4 + xor esi,ecx + add ebp,eax + movdqa xmm10,xmm9 + paddd xmm9,xmm2 + ror ebx,7 + add ebp,esi + pxor xmm3,xmm8 + add edx,DWORD PTR[52+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + movdqa xmm8,xmm3 + movdqa XMMWORD PTR[32+rsp],xmm9 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + pslld xmm3,2 + add ecx,DWORD PTR[56+rsp] + xor esi,ebx + psrld xmm8,30 + mov edi,edx + rol edx,5 + xor esi,eax + add ecx,edx + ror ebp,7 + add ecx,esi + por xmm3,xmm8 + add ebx,DWORD PTR[60+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[rsp] + paddd xmm10,xmm3 + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + movdqa XMMWORD PTR[48+rsp],xmm10 + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[4+rsp] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 add ebp,edi - xor eax,DWORD PTR[32+rsp] - rol r12d,30 - add ebp,ebx - rol eax,1 - mov DWORD PTR[44+rsp],eax - lea edi,DWORD PTR[3395469782+rsi*1+rax] - mov eax,DWORD PTR[48+rsp] - mov ebx,r12d + add edx,DWORD PTR[8+rsp] + xor esi,ecx + mov edi,ebp + rol ebp,5 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + add ecx,DWORD PTR[12+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + xor edi,eax + add ecx,edx + ror ebp,7 + add ecx,edi + cmp r9,r10 + je $L$done_ssse3 + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[r11] + movdqu xmm0,XMMWORD PTR[r9] + movdqu xmm1,XMMWORD PTR[16+r9] + movdqu xmm2,XMMWORD PTR[32+r9] + movdqu xmm3,XMMWORD PTR[48+r9] +DB 102,15,56,0,198 + add r9,64 + add ebx,DWORD PTR[16+rsp] + xor esi,eax +DB 102,15,56,0,206 + mov edi,ecx + rol ecx,5 + paddd xmm0,xmm9 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + movdqa XMMWORD PTR[rsp],xmm0 + add eax,DWORD PTR[20+rsp] + xor edi,ebp + psubd xmm0,xmm9 + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[24+rsp] + xor esi,edx + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + add edx,DWORD PTR[28+rsp] + xor edi,ecx mov esi,ebp - xor eax,DWORD PTR[56+rsp] - xor ebx,r11d - rol esi,5 - xor eax,DWORD PTR[16+rsp] - xor ebx,edx - add edi,esi - xor eax,DWORD PTR[36+rsp] - rol r11d,30 - add edi,ebx - rol eax,1 - mov DWORD PTR[48+rsp],eax - lea esi,DWORD PTR[3395469782+rdx*1+rax] - mov eax,DWORD PTR[52+rsp] - mov ebx,r11d - mov edx,edi - xor eax,DWORD PTR[60+rsp] - xor ebx,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[32+rsp] + xor esi,ebx +DB 102,15,56,0,214 + mov edi,edx rol edx,5 - xor eax,DWORD PTR[20+rsp] - xor ebx,r12d - add esi,edx - xor eax,DWORD PTR[40+rsp] - rol ebp,30 - add esi,ebx - rol eax,1 - lea edx,DWORD PTR[3395469782+r12*1+rax] - mov eax,DWORD PTR[56+rsp] - mov ebx,ebp - mov r12d,esi - xor eax,DWORD PTR[rsp] - xor ebx,edi - rol r12d,5 - xor eax,DWORD PTR[24+rsp] - xor ebx,r11d - add edx,r12d - xor eax,DWORD PTR[44+rsp] - rol edi,30 - add edx,ebx - rol eax,1 - lea r12d,DWORD PTR[3395469782+r11*1+rax] - mov eax,DWORD PTR[60+rsp] - mov ebx,edi - mov r11d,edx - xor eax,DWORD PTR[4+rsp] - xor ebx,esi - rol r11d,5 - xor eax,DWORD PTR[28+rsp] - xor ebx,ebp - add r12d,r11d - xor eax,DWORD PTR[48+rsp] - rol esi,30 - add r12d,ebx - rol eax,1 - lea r11d,DWORD PTR[3395469782+rbp*1+rax] - mov ebx,esi - mov ebp,r12d - xor ebx,edx + paddd xmm1,xmm9 + xor esi,eax + add ecx,edx + ror ebp,7 + add ecx,esi + movdqa XMMWORD PTR[16+rsp],xmm1 + add ebx,DWORD PTR[36+rsp] + xor edi,eax + psubd xmm1,xmm9 + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[40+rsp] + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[44+rsp] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[48+rsp] + xor esi,ecx +DB 102,15,56,0,222 + mov edi,ebp rol ebp,5 - xor ebx,edi - add r11d,ebp - rol edx,30 - add r11d,ebx - add r11d,DWORD PTR[r8] - add r12d,DWORD PTR[4+r8] - add edx,DWORD PTR[8+r8] - add esi,DWORD PTR[12+r8] - add edi,DWORD PTR[16+r8] - mov DWORD PTR[r8],r11d - mov DWORD PTR[4+r8],r12d - mov DWORD PTR[8+r8],edx - mov DWORD PTR[12+r8],esi - mov DWORD PTR[16+r8],edi - - xchg edx,r11d - xchg esi,r12d - xchg edi,r11d - xchg ebp,r12d + paddd xmm2,xmm9 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + movdqa XMMWORD PTR[32+rsp],xmm2 + add ecx,DWORD PTR[52+rsp] + xor edi,ebx + psubd xmm2,xmm9 + mov esi,edx + rol edx,5 + xor edi,eax + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[56+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + add edx,DWORD PTR[12+r8] + mov DWORD PTR[r8],eax + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[4+r8],esi + mov ebx,esi + mov DWORD PTR[8+r8],ecx + mov DWORD PTR[12+r8],edx + mov DWORD PTR[16+r8],ebp + jmp $L$oop_ssse3 - lea r9,QWORD PTR[64+r9] - sub r10,1 - jnz $L$loop - mov rsi,QWORD PTR[64+rsp] +ALIGN 16 +$L$done_ssse3:: + add ebx,DWORD PTR[16+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[20+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[24+rsp] + xor esi,edx + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + add edx,DWORD PTR[28+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[32+rsp] + xor esi,ebx + mov edi,edx + rol edx,5 + xor esi,eax + add ecx,edx + ror ebp,7 + add ecx,esi + add ebx,DWORD PTR[36+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[40+rsp] + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[44+rsp] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[48+rsp] + xor esi,ecx + mov edi,ebp + rol ebp,5 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + add ecx,DWORD PTR[52+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + xor edi,eax + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[56+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + mov DWORD PTR[r8],eax + add edx,DWORD PTR[12+r8] + mov DWORD PTR[4+r8],esi + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[8+r8],ecx + mov DWORD PTR[12+r8],edx + mov DWORD PTR[16+r8],ebp + movaps xmm6,XMMWORD PTR[((64+0))+rsp] + movaps xmm7,XMMWORD PTR[((64+16))+rsp] + movaps xmm8,XMMWORD PTR[((64+32))+rsp] + movaps xmm9,XMMWORD PTR[((64+48))+rsp] + movaps xmm10,XMMWORD PTR[((64+64))+rsp] + lea rsi,QWORD PTR[144+rsp] mov r12,QWORD PTR[rsi] mov rbp,QWORD PTR[8+rsi] mov rbx,QWORD PTR[16+rsi] lea rsp,QWORD PTR[24+rsi] -$L$epilogue:: +$L$epilogue_ssse3:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_sha1_block_data_order:: -sha1_block_data_order ENDP +$L$SEH_end_sha1_block_data_order_ssse3:: +sha1_block_data_order_ssse3 ENDP +ALIGN 64 +K_XX_XX:: + DD 05a827999h,05a827999h,05a827999h,05a827999h + + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 DB 102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44 DB 32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60 DB 97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114 DB 103,62,0 -ALIGN 16 +ALIGN 64 EXTERN __imp_RtlVirtualUnwind:NEAR ALIGN 16 @@ -1317,16 +2549,67 @@ se_handler PROC PRIVATE lea r10,QWORD PTR[$L$prologue] cmp rbx,r10 - jb $L$in_prologue + jb $L$common_seh_tail mov rax,QWORD PTR[152+r8] lea r10,QWORD PTR[$L$epilogue] cmp rbx,r10 - jae $L$in_prologue + jae $L$common_seh_tail mov rax,QWORD PTR[64+rax] - lea rax,QWORD PTR[24+rax] + lea rax,QWORD PTR[32+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + + jmp $L$common_seh_tail +se_handler ENDP + + +ALIGN 16 +ssse3_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[64+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,10 + DD 0a548f3fch + + lea rax,QWORD PTR[168+rax] mov rbx,QWORD PTR[((-8))+rax] mov rbp,QWORD PTR[((-16))+rax] @@ -1335,7 +2618,7 @@ se_handler PROC PRIVATE mov QWORD PTR[160+r8],rbp mov QWORD PTR[216+r8],r12 -$L$in_prologue:: +$L$common_seh_tail:: mov rdi,QWORD PTR[8+rax] mov rsi,QWORD PTR[16+rax] mov QWORD PTR[152+r8],rax @@ -1374,7 +2657,7 @@ $L$in_prologue:: pop rdi pop rsi DB 0F3h,0C3h ;repret -se_handler ENDP +ssse3_handler ENDP .text$ ENDS .pdata SEGMENT READONLY ALIGN(4) @@ -1382,13 +2665,20 @@ ALIGN 4 DD imagerel $L$SEH_begin_sha1_block_data_order DD imagerel $L$SEH_end_sha1_block_data_order DD imagerel $L$SEH_info_sha1_block_data_order - + DD imagerel $L$SEH_begin_sha1_block_data_order_ssse3 + DD imagerel $L$SEH_end_sha1_block_data_order_ssse3 + DD imagerel $L$SEH_info_sha1_block_data_order_ssse3 .pdata ENDS .xdata SEGMENT READONLY ALIGN(8) ALIGN 8 $L$SEH_info_sha1_block_data_order:: DB 9,0,0,0 DD imagerel se_handler +$L$SEH_info_sha1_block_data_order_ssse3:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 + .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/sha/sha512-x86_64.asm b/deps/openssl/asm/x64-win32-masm/sha/sha512-x86_64.asm index 5ea4a6327a..f685c2fdfc 100644 --- a/deps/openssl/asm/x64-win32-masm/sha/sha512-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/sha/sha512-x86_64.asm @@ -26,1930 +26,1738 @@ $L$SEH_begin_sha256_block_data_order:: sub rsp,16*4+4*8 lea rdx,QWORD PTR[rdx*4+rsi] and rsp,-64 - mov QWORD PTR[((16*4+0*8))+rsp],rdi - mov QWORD PTR[((16*4+1*8))+rsp],rsi - mov QWORD PTR[((16*4+2*8))+rsp],rdx - mov QWORD PTR[((16*4+3*8))+rsp],r11 + mov QWORD PTR[((64+0))+rsp],rdi + mov QWORD PTR[((64+8))+rsp],rsi + mov QWORD PTR[((64+16))+rsp],rdx + mov QWORD PTR[((64+24))+rsp],r11 $L$prologue:: lea rbp,QWORD PTR[K256] - mov eax,DWORD PTR[((4*0))+rdi] - mov ebx,DWORD PTR[((4*1))+rdi] - mov ecx,DWORD PTR[((4*2))+rdi] - mov edx,DWORD PTR[((4*3))+rdi] - mov r8d,DWORD PTR[((4*4))+rdi] - mov r9d,DWORD PTR[((4*5))+rdi] - mov r10d,DWORD PTR[((4*6))+rdi] - mov r11d,DWORD PTR[((4*7))+rdi] + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + mov r8d,DWORD PTR[16+rdi] + mov r9d,DWORD PTR[20+rdi] + mov r10d,DWORD PTR[24+rdi] + mov r11d,DWORD PTR[28+rdi] jmp $L$loop ALIGN 16 $L$loop:: xor rdi,rdi - mov r12d,DWORD PTR[((4*0))+rsi] - bswap r12d + mov r12d,DWORD PTR[rsi] mov r13d,r8d - mov r14d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 mov r15d,r9d + mov DWORD PTR[rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r8d xor r15d,r10d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r11d + xor r14d,eax + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r8d - mov DWORD PTR[rsp],r12d + mov r11d,ebx - xor r13d,r14d + ror r14d,11 + xor r13d,r8d xor r15d,r10d - add r12d,r11d - - mov r11d,eax - add r12d,r13d + xor r11d,ecx + xor r14d,eax add r12d,r15d - mov r13d,eax - mov r14d,eax + mov r15d,ebx - ror r11d,2 - ror r13d,13 - mov r15d,eax - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r11d,eax + and r15d,ecx - xor r11d,r13d - ror r13d,9 - or r14d,ecx + ror r14d,2 + add r12d,r13d + add r11d,r15d - xor r11d,r13d - and r15d,ecx add edx,r12d - - and r14d,ebx add r11d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r11d,r14d - mov r12d,DWORD PTR[((4*1))+rsi] - bswap r12d + + mov r12d,DWORD PTR[4+rsi] mov r13d,edx - mov r14d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 mov r15d,r8d + mov DWORD PTR[4+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,edx xor r15d,r9d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r10d + xor r14d,r11d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,edx - mov DWORD PTR[4+rsp],r12d + mov r10d,eax - xor r13d,r14d + ror r14d,11 + xor r13d,edx xor r15d,r9d - add r12d,r10d - - mov r10d,r11d - add r12d,r13d + xor r10d,ebx + xor r14d,r11d add r12d,r15d - mov r13d,r11d - mov r14d,r11d + mov r15d,eax - ror r10d,2 - ror r13d,13 - mov r15d,r11d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r10d,r11d + and r15d,ebx - xor r10d,r13d - ror r13d,9 - or r14d,ebx + ror r14d,2 + add r12d,r13d + add r10d,r15d - xor r10d,r13d - and r15d,ebx add ecx,r12d - - and r14d,eax add r10d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r10d,r14d - mov r12d,DWORD PTR[((4*2))+rsi] - bswap r12d + + mov r12d,DWORD PTR[8+rsi] mov r13d,ecx - mov r14d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 mov r15d,edx + mov DWORD PTR[8+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,ecx xor r15d,r8d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r9d + xor r14d,r10d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,ecx - mov DWORD PTR[8+rsp],r12d + mov r9d,r11d - xor r13d,r14d + ror r14d,11 + xor r13d,ecx xor r15d,r8d - add r12d,r9d - - mov r9d,r10d - add r12d,r13d + xor r9d,eax + xor r14d,r10d add r12d,r15d - mov r13d,r10d - mov r14d,r10d + mov r15d,r11d - ror r9d,2 - ror r13d,13 - mov r15d,r10d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r9d,r10d + and r15d,eax - xor r9d,r13d - ror r13d,9 - or r14d,eax + ror r14d,2 + add r12d,r13d + add r9d,r15d - xor r9d,r13d - and r15d,eax add ebx,r12d - - and r14d,r11d add r9d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r9d,r14d - mov r12d,DWORD PTR[((4*3))+rsi] - bswap r12d + + mov r12d,DWORD PTR[12+rsi] mov r13d,ebx - mov r14d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 mov r15d,ecx + mov DWORD PTR[12+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,ebx xor r15d,edx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r8d + xor r14d,r9d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,ebx - mov DWORD PTR[12+rsp],r12d + mov r8d,r10d - xor r13d,r14d + ror r14d,11 + xor r13d,ebx xor r15d,edx - add r12d,r8d - - mov r8d,r9d - add r12d,r13d + xor r8d,r11d + xor r14d,r9d add r12d,r15d - mov r13d,r9d - mov r14d,r9d + mov r15d,r10d - ror r8d,2 - ror r13d,13 - mov r15d,r9d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r8d,r9d + and r15d,r11d - xor r8d,r13d - ror r13d,9 - or r14d,r11d + ror r14d,2 + add r12d,r13d + add r8d,r15d - xor r8d,r13d - and r15d,r11d add eax,r12d - - and r14d,r10d add r8d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r8d,r14d - mov r12d,DWORD PTR[((4*4))+rsi] - bswap r12d + + mov r12d,DWORD PTR[16+rsi] mov r13d,eax - mov r14d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 mov r15d,ebx + mov DWORD PTR[16+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,eax xor r15d,ecx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,edx + xor r14d,r8d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,eax - mov DWORD PTR[16+rsp],r12d + mov edx,r9d - xor r13d,r14d + ror r14d,11 + xor r13d,eax xor r15d,ecx - add r12d,edx - - mov edx,r8d - add r12d,r13d + xor edx,r10d + xor r14d,r8d add r12d,r15d - mov r13d,r8d - mov r14d,r8d + mov r15d,r9d - ror edx,2 - ror r13d,13 - mov r15d,r8d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and edx,r8d + and r15d,r10d - xor edx,r13d - ror r13d,9 - or r14d,r10d + ror r14d,2 + add r12d,r13d + add edx,r15d - xor edx,r13d - and r15d,r10d add r11d,r12d - - and r14d,r9d add edx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add edx,r14d - mov r12d,DWORD PTR[((4*5))+rsi] - bswap r12d + + mov r12d,DWORD PTR[20+rsi] mov r13d,r11d - mov r14d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 mov r15d,eax + mov DWORD PTR[20+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r11d xor r15d,ebx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,ecx + xor r14d,edx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r11d - mov DWORD PTR[20+rsp],r12d + mov ecx,r8d - xor r13d,r14d + ror r14d,11 + xor r13d,r11d xor r15d,ebx - add r12d,ecx - - mov ecx,edx - add r12d,r13d + xor ecx,r9d + xor r14d,edx add r12d,r15d - mov r13d,edx - mov r14d,edx + mov r15d,r8d - ror ecx,2 - ror r13d,13 - mov r15d,edx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and ecx,edx + and r15d,r9d - xor ecx,r13d - ror r13d,9 - or r14d,r9d + ror r14d,2 + add r12d,r13d + add ecx,r15d - xor ecx,r13d - and r15d,r9d add r10d,r12d - - and r14d,r8d add ecx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add ecx,r14d - mov r12d,DWORD PTR[((4*6))+rsi] - bswap r12d + + mov r12d,DWORD PTR[24+rsi] mov r13d,r10d - mov r14d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 mov r15d,r11d + mov DWORD PTR[24+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r10d xor r15d,eax - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,ebx + xor r14d,ecx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r10d - mov DWORD PTR[24+rsp],r12d + mov ebx,edx - xor r13d,r14d + ror r14d,11 + xor r13d,r10d xor r15d,eax - add r12d,ebx - - mov ebx,ecx - add r12d,r13d + xor ebx,r8d + xor r14d,ecx add r12d,r15d - mov r13d,ecx - mov r14d,ecx + mov r15d,edx - ror ebx,2 - ror r13d,13 - mov r15d,ecx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and ebx,ecx + and r15d,r8d - xor ebx,r13d - ror r13d,9 - or r14d,r8d + ror r14d,2 + add r12d,r13d + add ebx,r15d - xor ebx,r13d - and r15d,r8d add r9d,r12d - - and r14d,edx add ebx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add ebx,r14d - mov r12d,DWORD PTR[((4*7))+rsi] - bswap r12d + + mov r12d,DWORD PTR[28+rsi] mov r13d,r9d - mov r14d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 mov r15d,r10d + mov DWORD PTR[28+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r9d xor r15d,r11d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,eax + xor r14d,ebx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r9d - mov DWORD PTR[28+rsp],r12d + mov eax,ecx - xor r13d,r14d + ror r14d,11 + xor r13d,r9d xor r15d,r11d - add r12d,eax - - mov eax,ebx - add r12d,r13d + xor eax,edx + xor r14d,ebx add r12d,r15d - mov r13d,ebx - mov r14d,ebx + mov r15d,ecx - ror eax,2 - ror r13d,13 - mov r15d,ebx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and eax,ebx + and r15d,edx - xor eax,r13d - ror r13d,9 - or r14d,edx + ror r14d,2 + add r12d,r13d + add eax,r15d - xor eax,r13d - and r15d,edx add r8d,r12d - - and r14d,ecx add eax,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add eax,r14d - mov r12d,DWORD PTR[((4*8))+rsi] - bswap r12d + + mov r12d,DWORD PTR[32+rsi] mov r13d,r8d - mov r14d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 mov r15d,r9d + mov DWORD PTR[32+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r8d xor r15d,r10d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r11d + xor r14d,eax + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r8d - mov DWORD PTR[32+rsp],r12d + mov r11d,ebx - xor r13d,r14d + ror r14d,11 + xor r13d,r8d xor r15d,r10d - add r12d,r11d - - mov r11d,eax - add r12d,r13d + xor r11d,ecx + xor r14d,eax add r12d,r15d - mov r13d,eax - mov r14d,eax + mov r15d,ebx - ror r11d,2 - ror r13d,13 - mov r15d,eax - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r11d,eax + and r15d,ecx - xor r11d,r13d - ror r13d,9 - or r14d,ecx + ror r14d,2 + add r12d,r13d + add r11d,r15d - xor r11d,r13d - and r15d,ecx add edx,r12d - - and r14d,ebx add r11d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r11d,r14d - mov r12d,DWORD PTR[((4*9))+rsi] - bswap r12d + + mov r12d,DWORD PTR[36+rsi] mov r13d,edx - mov r14d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 mov r15d,r8d - - ror r13d,6 - ror r14d,11 - xor r15d,r9d - - xor r13d,r14d - ror r14d,14 - and r15d,edx mov DWORD PTR[36+rsp],r12d - xor r13d,r14d + ror r14d,9 + xor r13d,edx xor r15d,r9d - add r12d,r10d - - mov r10d,r11d - add r12d,r13d - add r12d,r15d - mov r13d,r11d - mov r14d,r11d + ror r13d,5 + add r12d,r10d + xor r14d,r11d - ror r10d,2 - ror r13d,13 - mov r15d,r11d add r12d,DWORD PTR[rdi*4+rbp] + and r15d,edx + mov r10d,eax + + ror r14d,11 + xor r13d,edx + xor r15d,r9d - xor r10d,r13d - ror r13d,9 - or r14d,ebx + xor r10d,ebx + xor r14d,r11d + add r12d,r15d + mov r15d,eax - xor r10d,r13d + ror r13d,6 + and r10d,r11d and r15d,ebx - add ecx,r12d - and r14d,eax - add r10d,r12d + ror r14d,2 + add r12d,r13d + add r10d,r15d - or r14d,r15d + add ecx,r12d + add r10d,r12d lea rdi,QWORD PTR[1+rdi] - add r10d,r14d - mov r12d,DWORD PTR[((4*10))+rsi] - bswap r12d + + mov r12d,DWORD PTR[40+rsi] mov r13d,ecx - mov r14d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 mov r15d,edx + mov DWORD PTR[40+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,ecx xor r15d,r8d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r9d + xor r14d,r10d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,ecx - mov DWORD PTR[40+rsp],r12d + mov r9d,r11d - xor r13d,r14d + ror r14d,11 + xor r13d,ecx xor r15d,r8d - add r12d,r9d - - mov r9d,r10d - add r12d,r13d + xor r9d,eax + xor r14d,r10d add r12d,r15d - mov r13d,r10d - mov r14d,r10d + mov r15d,r11d - ror r9d,2 - ror r13d,13 - mov r15d,r10d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r9d,r10d + and r15d,eax - xor r9d,r13d - ror r13d,9 - or r14d,eax + ror r14d,2 + add r12d,r13d + add r9d,r15d - xor r9d,r13d - and r15d,eax add ebx,r12d - - and r14d,r11d add r9d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r9d,r14d - mov r12d,DWORD PTR[((4*11))+rsi] - bswap r12d + + mov r12d,DWORD PTR[44+rsi] mov r13d,ebx - mov r14d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 mov r15d,ecx + mov DWORD PTR[44+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,ebx xor r15d,edx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r8d + xor r14d,r9d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,ebx - mov DWORD PTR[44+rsp],r12d + mov r8d,r10d - xor r13d,r14d + ror r14d,11 + xor r13d,ebx xor r15d,edx - add r12d,r8d - - mov r8d,r9d - add r12d,r13d + xor r8d,r11d + xor r14d,r9d add r12d,r15d - mov r13d,r9d - mov r14d,r9d + mov r15d,r10d - ror r8d,2 - ror r13d,13 - mov r15d,r9d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r8d,r9d + and r15d,r11d - xor r8d,r13d - ror r13d,9 - or r14d,r11d + ror r14d,2 + add r12d,r13d + add r8d,r15d - xor r8d,r13d - and r15d,r11d add eax,r12d - - and r14d,r10d add r8d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r8d,r14d - mov r12d,DWORD PTR[((4*12))+rsi] - bswap r12d + + mov r12d,DWORD PTR[48+rsi] mov r13d,eax - mov r14d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 mov r15d,ebx + mov DWORD PTR[48+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,eax xor r15d,ecx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,edx + xor r14d,r8d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,eax - mov DWORD PTR[48+rsp],r12d + mov edx,r9d - xor r13d,r14d + ror r14d,11 + xor r13d,eax xor r15d,ecx - add r12d,edx - - mov edx,r8d - add r12d,r13d + xor edx,r10d + xor r14d,r8d add r12d,r15d - mov r13d,r8d - mov r14d,r8d + mov r15d,r9d - ror edx,2 - ror r13d,13 - mov r15d,r8d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and edx,r8d + and r15d,r10d - xor edx,r13d - ror r13d,9 - or r14d,r10d + ror r14d,2 + add r12d,r13d + add edx,r15d - xor edx,r13d - and r15d,r10d add r11d,r12d - - and r14d,r9d add edx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add edx,r14d - mov r12d,DWORD PTR[((4*13))+rsi] - bswap r12d + + mov r12d,DWORD PTR[52+rsi] mov r13d,r11d - mov r14d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 mov r15d,eax + mov DWORD PTR[52+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r11d xor r15d,ebx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,ecx + xor r14d,edx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r11d - mov DWORD PTR[52+rsp],r12d + mov ecx,r8d - xor r13d,r14d + ror r14d,11 + xor r13d,r11d xor r15d,ebx - add r12d,ecx - - mov ecx,edx - add r12d,r13d + xor ecx,r9d + xor r14d,edx add r12d,r15d - mov r13d,edx - mov r14d,edx + mov r15d,r8d - ror ecx,2 - ror r13d,13 - mov r15d,edx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and ecx,edx + and r15d,r9d - xor ecx,r13d - ror r13d,9 - or r14d,r9d + ror r14d,2 + add r12d,r13d + add ecx,r15d - xor ecx,r13d - and r15d,r9d add r10d,r12d - - and r14d,r8d add ecx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add ecx,r14d - mov r12d,DWORD PTR[((4*14))+rsi] - bswap r12d + + mov r12d,DWORD PTR[56+rsi] mov r13d,r10d - mov r14d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 mov r15d,r11d + mov DWORD PTR[56+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r10d xor r15d,eax - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,ebx + xor r14d,ecx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r10d - mov DWORD PTR[56+rsp],r12d + mov ebx,edx - xor r13d,r14d + ror r14d,11 + xor r13d,r10d xor r15d,eax - add r12d,ebx - - mov ebx,ecx - add r12d,r13d + xor ebx,r8d + xor r14d,ecx add r12d,r15d - mov r13d,ecx - mov r14d,ecx + mov r15d,edx - ror ebx,2 - ror r13d,13 - mov r15d,ecx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and ebx,ecx + and r15d,r8d - xor ebx,r13d - ror r13d,9 - or r14d,r8d + ror r14d,2 + add r12d,r13d + add ebx,r15d - xor ebx,r13d - and r15d,r8d add r9d,r12d - - and r14d,edx add ebx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add ebx,r14d - mov r12d,DWORD PTR[((4*15))+rsi] - bswap r12d + + mov r12d,DWORD PTR[60+rsi] mov r13d,r9d - mov r14d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 mov r15d,r10d + mov DWORD PTR[60+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r9d xor r15d,r11d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,eax + xor r14d,ebx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r9d - mov DWORD PTR[60+rsp],r12d + mov eax,ecx - xor r13d,r14d + ror r14d,11 + xor r13d,r9d xor r15d,r11d - add r12d,eax - - mov eax,ebx - add r12d,r13d + xor eax,edx + xor r14d,ebx add r12d,r15d - mov r13d,ebx - mov r14d,ebx + mov r15d,ecx - ror eax,2 - ror r13d,13 - mov r15d,ebx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and eax,ebx + and r15d,edx - xor eax,r13d - ror r13d,9 - or r14d,edx + ror r14d,2 + add r12d,r13d + add eax,r15d - xor eax,r13d - and r15d,edx add r8d,r12d - - and r14d,ecx add eax,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add eax,r14d + jmp $L$rounds_16_xx ALIGN 16 $L$rounds_16_xx:: mov r13d,DWORD PTR[4+rsp] - mov r12d,DWORD PTR[56+rsp] - - mov r15d,r13d + mov r14d,DWORD PTR[56+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[36+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[36+rsp] + xor r14d,r15d add r12d,DWORD PTR[rsp] mov r13d,r8d - mov r14d,r8d + add r12d,r14d + mov r14d,eax + ror r13d,14 mov r15d,r9d + mov DWORD PTR[rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r8d xor r15d,r10d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r11d + xor r14d,eax + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r8d - mov DWORD PTR[rsp],r12d + mov r11d,ebx - xor r13d,r14d + ror r14d,11 + xor r13d,r8d xor r15d,r10d - add r12d,r11d - - mov r11d,eax - add r12d,r13d + xor r11d,ecx + xor r14d,eax add r12d,r15d - mov r13d,eax - mov r14d,eax + mov r15d,ebx - ror r11d,2 - ror r13d,13 - mov r15d,eax - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r11d,eax + and r15d,ecx - xor r11d,r13d - ror r13d,9 - or r14d,ecx + ror r14d,2 + add r12d,r13d + add r11d,r15d - xor r11d,r13d - and r15d,ecx add edx,r12d - - and r14d,ebx add r11d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r11d,r14d - mov r13d,DWORD PTR[8+rsp] - mov r12d,DWORD PTR[60+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[8+rsp] + mov r14d,DWORD PTR[60+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[40+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[40+rsp] + xor r14d,r15d add r12d,DWORD PTR[4+rsp] mov r13d,edx - mov r14d,edx + add r12d,r14d + mov r14d,r11d + ror r13d,14 mov r15d,r8d + mov DWORD PTR[4+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,edx xor r15d,r9d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r10d + xor r14d,r11d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,edx - mov DWORD PTR[4+rsp],r12d + mov r10d,eax - xor r13d,r14d + ror r14d,11 + xor r13d,edx xor r15d,r9d - add r12d,r10d - - mov r10d,r11d - add r12d,r13d + xor r10d,ebx + xor r14d,r11d add r12d,r15d - mov r13d,r11d - mov r14d,r11d + mov r15d,eax - ror r10d,2 - ror r13d,13 - mov r15d,r11d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r10d,r11d + and r15d,ebx - xor r10d,r13d - ror r13d,9 - or r14d,ebx + ror r14d,2 + add r12d,r13d + add r10d,r15d - xor r10d,r13d - and r15d,ebx add ecx,r12d - - and r14d,eax add r10d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r10d,r14d - mov r13d,DWORD PTR[12+rsp] - mov r12d,DWORD PTR[rsp] - mov r15d,r13d + mov r13d,DWORD PTR[12+rsp] + mov r14d,DWORD PTR[rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[44+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[44+rsp] + xor r14d,r15d add r12d,DWORD PTR[8+rsp] mov r13d,ecx - mov r14d,ecx + add r12d,r14d + mov r14d,r10d + ror r13d,14 mov r15d,edx + mov DWORD PTR[8+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,ecx xor r15d,r8d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r9d + xor r14d,r10d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,ecx - mov DWORD PTR[8+rsp],r12d + mov r9d,r11d - xor r13d,r14d + ror r14d,11 + xor r13d,ecx xor r15d,r8d - add r12d,r9d - - mov r9d,r10d - add r12d,r13d + xor r9d,eax + xor r14d,r10d add r12d,r15d - mov r13d,r10d - mov r14d,r10d + mov r15d,r11d - ror r9d,2 - ror r13d,13 - mov r15d,r10d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r9d,r10d + and r15d,eax - xor r9d,r13d - ror r13d,9 - or r14d,eax + ror r14d,2 + add r12d,r13d + add r9d,r15d - xor r9d,r13d - and r15d,eax add ebx,r12d - - and r14d,r11d add r9d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r9d,r14d - mov r13d,DWORD PTR[16+rsp] - mov r12d,DWORD PTR[4+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[16+rsp] + mov r14d,DWORD PTR[4+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[48+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[48+rsp] + xor r14d,r15d add r12d,DWORD PTR[12+rsp] mov r13d,ebx - mov r14d,ebx + add r12d,r14d + mov r14d,r9d + ror r13d,14 mov r15d,ecx + mov DWORD PTR[12+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,ebx xor r15d,edx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r8d + xor r14d,r9d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,ebx - mov DWORD PTR[12+rsp],r12d + mov r8d,r10d - xor r13d,r14d + ror r14d,11 + xor r13d,ebx xor r15d,edx - add r12d,r8d - - mov r8d,r9d - add r12d,r13d + xor r8d,r11d + xor r14d,r9d add r12d,r15d - mov r13d,r9d - mov r14d,r9d + mov r15d,r10d - ror r8d,2 - ror r13d,13 - mov r15d,r9d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r8d,r9d + and r15d,r11d - xor r8d,r13d - ror r13d,9 - or r14d,r11d + ror r14d,2 + add r12d,r13d + add r8d,r15d - xor r8d,r13d - and r15d,r11d add eax,r12d - - and r14d,r10d add r8d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r8d,r14d - mov r13d,DWORD PTR[20+rsp] - mov r12d,DWORD PTR[8+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[20+rsp] + mov r14d,DWORD PTR[8+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[52+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[52+rsp] + xor r14d,r15d add r12d,DWORD PTR[16+rsp] mov r13d,eax - mov r14d,eax + add r12d,r14d + mov r14d,r8d + ror r13d,14 mov r15d,ebx + mov DWORD PTR[16+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,eax xor r15d,ecx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,edx + xor r14d,r8d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,eax - mov DWORD PTR[16+rsp],r12d + mov edx,r9d - xor r13d,r14d + ror r14d,11 + xor r13d,eax xor r15d,ecx - add r12d,edx - - mov edx,r8d - add r12d,r13d + xor edx,r10d + xor r14d,r8d add r12d,r15d - mov r13d,r8d - mov r14d,r8d + mov r15d,r9d - ror edx,2 - ror r13d,13 - mov r15d,r8d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and edx,r8d + and r15d,r10d - xor edx,r13d - ror r13d,9 - or r14d,r10d + ror r14d,2 + add r12d,r13d + add edx,r15d - xor edx,r13d - and r15d,r10d add r11d,r12d - - and r14d,r9d add edx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add edx,r14d - mov r13d,DWORD PTR[24+rsp] - mov r12d,DWORD PTR[12+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[24+rsp] + mov r14d,DWORD PTR[12+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[56+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[56+rsp] + xor r14d,r15d add r12d,DWORD PTR[20+rsp] mov r13d,r11d - mov r14d,r11d + add r12d,r14d + mov r14d,edx + ror r13d,14 mov r15d,eax + mov DWORD PTR[20+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r11d xor r15d,ebx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,ecx + xor r14d,edx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r11d - mov DWORD PTR[20+rsp],r12d + mov ecx,r8d - xor r13d,r14d + ror r14d,11 + xor r13d,r11d xor r15d,ebx - add r12d,ecx - - mov ecx,edx - add r12d,r13d + xor ecx,r9d + xor r14d,edx add r12d,r15d - mov r13d,edx - mov r14d,edx + mov r15d,r8d - ror ecx,2 - ror r13d,13 - mov r15d,edx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and ecx,edx + and r15d,r9d - xor ecx,r13d - ror r13d,9 - or r14d,r9d + ror r14d,2 + add r12d,r13d + add ecx,r15d - xor ecx,r13d - and r15d,r9d add r10d,r12d - - and r14d,r8d add ecx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add ecx,r14d - mov r13d,DWORD PTR[28+rsp] - mov r12d,DWORD PTR[16+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[28+rsp] + mov r14d,DWORD PTR[16+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[60+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[60+rsp] + xor r14d,r15d add r12d,DWORD PTR[24+rsp] mov r13d,r10d - mov r14d,r10d + add r12d,r14d + mov r14d,ecx + ror r13d,14 mov r15d,r11d + mov DWORD PTR[24+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r10d xor r15d,eax - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,ebx + xor r14d,ecx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r10d - mov DWORD PTR[24+rsp],r12d + mov ebx,edx - xor r13d,r14d + ror r14d,11 + xor r13d,r10d xor r15d,eax - add r12d,ebx - - mov ebx,ecx - add r12d,r13d + xor ebx,r8d + xor r14d,ecx add r12d,r15d - mov r13d,ecx - mov r14d,ecx + mov r15d,edx - ror ebx,2 - ror r13d,13 - mov r15d,ecx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and ebx,ecx + and r15d,r8d - xor ebx,r13d - ror r13d,9 - or r14d,r8d + ror r14d,2 + add r12d,r13d + add ebx,r15d - xor ebx,r13d - and r15d,r8d add r9d,r12d - - and r14d,edx add ebx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add ebx,r14d - mov r13d,DWORD PTR[32+rsp] - mov r12d,DWORD PTR[20+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[32+rsp] + mov r14d,DWORD PTR[20+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[rsp] + xor r14d,r15d add r12d,DWORD PTR[28+rsp] mov r13d,r9d - mov r14d,r9d + add r12d,r14d + mov r14d,ebx + ror r13d,14 mov r15d,r10d + mov DWORD PTR[28+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r9d xor r15d,r11d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,eax + xor r14d,ebx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r9d - mov DWORD PTR[28+rsp],r12d + mov eax,ecx - xor r13d,r14d + ror r14d,11 + xor r13d,r9d xor r15d,r11d - add r12d,eax - - mov eax,ebx - add r12d,r13d + xor eax,edx + xor r14d,ebx add r12d,r15d - mov r13d,ebx - mov r14d,ebx + mov r15d,ecx - ror eax,2 - ror r13d,13 - mov r15d,ebx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and eax,ebx + and r15d,edx - xor eax,r13d - ror r13d,9 - or r14d,edx + ror r14d,2 + add r12d,r13d + add eax,r15d - xor eax,r13d - and r15d,edx add r8d,r12d - - and r14d,ecx add eax,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add eax,r14d - mov r13d,DWORD PTR[36+rsp] - mov r12d,DWORD PTR[24+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[36+rsp] + mov r14d,DWORD PTR[24+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[4+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[4+rsp] + xor r14d,r15d add r12d,DWORD PTR[32+rsp] mov r13d,r8d - mov r14d,r8d + add r12d,r14d + mov r14d,eax + ror r13d,14 mov r15d,r9d + mov DWORD PTR[32+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r8d xor r15d,r10d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r11d + xor r14d,eax + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r8d - mov DWORD PTR[32+rsp],r12d + mov r11d,ebx - xor r13d,r14d + ror r14d,11 + xor r13d,r8d xor r15d,r10d - add r12d,r11d - - mov r11d,eax - add r12d,r13d + xor r11d,ecx + xor r14d,eax add r12d,r15d - mov r13d,eax - mov r14d,eax - - ror r11d,2 - ror r13d,13 - mov r15d,eax - add r12d,DWORD PTR[rdi*4+rbp] - - xor r11d,r13d - ror r13d,9 - or r14d,ecx + mov r15d,ebx - xor r11d,r13d + ror r13d,6 + and r11d,eax and r15d,ecx - add edx,r12d - and r14d,ebx + ror r14d,2 + add r12d,r13d + add r11d,r15d + + add edx,r12d add r11d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r11d,r14d - mov r13d,DWORD PTR[40+rsp] - mov r12d,DWORD PTR[28+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[40+rsp] + mov r14d,DWORD PTR[28+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[8+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[8+rsp] + xor r14d,r15d add r12d,DWORD PTR[36+rsp] mov r13d,edx - mov r14d,edx + add r12d,r14d + mov r14d,r11d + ror r13d,14 mov r15d,r8d + mov DWORD PTR[36+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,edx xor r15d,r9d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r10d + xor r14d,r11d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,edx - mov DWORD PTR[36+rsp],r12d + mov r10d,eax - xor r13d,r14d + ror r14d,11 + xor r13d,edx xor r15d,r9d - add r12d,r10d - - mov r10d,r11d - add r12d,r13d + xor r10d,ebx + xor r14d,r11d add r12d,r15d - mov r13d,r11d - mov r14d,r11d + mov r15d,eax - ror r10d,2 - ror r13d,13 - mov r15d,r11d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r10d,r11d + and r15d,ebx - xor r10d,r13d - ror r13d,9 - or r14d,ebx + ror r14d,2 + add r12d,r13d + add r10d,r15d - xor r10d,r13d - and r15d,ebx add ecx,r12d - - and r14d,eax add r10d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r10d,r14d - mov r13d,DWORD PTR[44+rsp] - mov r12d,DWORD PTR[32+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[44+rsp] + mov r14d,DWORD PTR[32+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[12+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[12+rsp] + xor r14d,r15d add r12d,DWORD PTR[40+rsp] mov r13d,ecx - mov r14d,ecx + add r12d,r14d + mov r14d,r10d + ror r13d,14 mov r15d,edx + mov DWORD PTR[40+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,ecx xor r15d,r8d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r9d + xor r14d,r10d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,ecx - mov DWORD PTR[40+rsp],r12d + mov r9d,r11d - xor r13d,r14d + ror r14d,11 + xor r13d,ecx xor r15d,r8d - add r12d,r9d - - mov r9d,r10d - add r12d,r13d + xor r9d,eax + xor r14d,r10d add r12d,r15d - mov r13d,r10d - mov r14d,r10d + mov r15d,r11d - ror r9d,2 - ror r13d,13 - mov r15d,r10d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r9d,r10d + and r15d,eax - xor r9d,r13d - ror r13d,9 - or r14d,eax + ror r14d,2 + add r12d,r13d + add r9d,r15d - xor r9d,r13d - and r15d,eax add ebx,r12d - - and r14d,r11d add r9d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r9d,r14d - mov r13d,DWORD PTR[48+rsp] - mov r12d,DWORD PTR[36+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[48+rsp] + mov r14d,DWORD PTR[36+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[16+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[16+rsp] + xor r14d,r15d add r12d,DWORD PTR[44+rsp] mov r13d,ebx - mov r14d,ebx + add r12d,r14d + mov r14d,r9d + ror r13d,14 mov r15d,ecx + mov DWORD PTR[44+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,ebx xor r15d,edx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,r8d + xor r14d,r9d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,ebx - mov DWORD PTR[44+rsp],r12d + mov r8d,r10d - xor r13d,r14d + ror r14d,11 + xor r13d,ebx xor r15d,edx - add r12d,r8d - - mov r8d,r9d - add r12d,r13d + xor r8d,r11d + xor r14d,r9d add r12d,r15d - mov r13d,r9d - mov r14d,r9d + mov r15d,r10d - ror r8d,2 - ror r13d,13 - mov r15d,r9d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and r8d,r9d + and r15d,r11d - xor r8d,r13d - ror r13d,9 - or r14d,r11d + ror r14d,2 + add r12d,r13d + add r8d,r15d - xor r8d,r13d - and r15d,r11d add eax,r12d - - and r14d,r10d add r8d,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add r8d,r14d - mov r13d,DWORD PTR[52+rsp] - mov r12d,DWORD PTR[40+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[52+rsp] + mov r14d,DWORD PTR[40+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[20+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[20+rsp] + xor r14d,r15d add r12d,DWORD PTR[48+rsp] mov r13d,eax - mov r14d,eax + add r12d,r14d + mov r14d,r8d + ror r13d,14 mov r15d,ebx + mov DWORD PTR[48+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,eax xor r15d,ecx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,edx + xor r14d,r8d + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,eax - mov DWORD PTR[48+rsp],r12d + mov edx,r9d - xor r13d,r14d + ror r14d,11 + xor r13d,eax xor r15d,ecx - add r12d,edx - - mov edx,r8d - add r12d,r13d + xor edx,r10d + xor r14d,r8d add r12d,r15d - mov r13d,r8d - mov r14d,r8d + mov r15d,r9d - ror edx,2 - ror r13d,13 - mov r15d,r8d - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and edx,r8d + and r15d,r10d - xor edx,r13d - ror r13d,9 - or r14d,r10d + ror r14d,2 + add r12d,r13d + add edx,r15d - xor edx,r13d - and r15d,r10d add r11d,r12d - - and r14d,r9d add edx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add edx,r14d - mov r13d,DWORD PTR[56+rsp] - mov r12d,DWORD PTR[44+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[56+rsp] + mov r14d,DWORD PTR[44+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[24+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[24+rsp] + xor r14d,r15d add r12d,DWORD PTR[52+rsp] mov r13d,r11d - mov r14d,r11d + add r12d,r14d + mov r14d,edx + ror r13d,14 mov r15d,eax + mov DWORD PTR[52+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r11d xor r15d,ebx - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,ecx + xor r14d,edx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r11d - mov DWORD PTR[52+rsp],r12d + mov ecx,r8d - xor r13d,r14d + ror r14d,11 + xor r13d,r11d xor r15d,ebx - add r12d,ecx - - mov ecx,edx - add r12d,r13d + xor ecx,r9d + xor r14d,edx add r12d,r15d - mov r13d,edx - mov r14d,edx + mov r15d,r8d - ror ecx,2 - ror r13d,13 - mov r15d,edx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and ecx,edx + and r15d,r9d - xor ecx,r13d - ror r13d,9 - or r14d,r9d + ror r14d,2 + add r12d,r13d + add ecx,r15d - xor ecx,r13d - and r15d,r9d add r10d,r12d - - and r14d,r8d add ecx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add ecx,r14d - mov r13d,DWORD PTR[60+rsp] - mov r12d,DWORD PTR[48+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[60+rsp] + mov r14d,DWORD PTR[48+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[28+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[28+rsp] + xor r14d,r15d add r12d,DWORD PTR[56+rsp] mov r13d,r10d - mov r14d,r10d + add r12d,r14d + mov r14d,ecx + ror r13d,14 mov r15d,r11d + mov DWORD PTR[56+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r10d xor r15d,eax - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,ebx + xor r14d,ecx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r10d - mov DWORD PTR[56+rsp],r12d + mov ebx,edx - xor r13d,r14d + ror r14d,11 + xor r13d,r10d xor r15d,eax - add r12d,ebx - - mov ebx,ecx - add r12d,r13d + xor ebx,r8d + xor r14d,ecx add r12d,r15d - mov r13d,ecx - mov r14d,ecx + mov r15d,edx - ror ebx,2 - ror r13d,13 - mov r15d,ecx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and ebx,ecx + and r15d,r8d - xor ebx,r13d - ror r13d,9 - or r14d,r8d + ror r14d,2 + add r12d,r13d + add ebx,r15d - xor ebx,r13d - and r15d,r8d add r9d,r12d - - and r14d,edx add ebx,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add ebx,r14d - mov r13d,DWORD PTR[rsp] - mov r12d,DWORD PTR[52+rsp] - mov r15d,r13d + mov r13d,DWORD PTR[rsp] + mov r14d,DWORD PTR[52+rsp] + mov r12d,r13d + mov r15d,r14d + ror r12d,11 + xor r12d,r13d shr r13d,3 - ror r15d,7 - - xor r13d,r15d - ror r15d,11 - - xor r13d,r15d - mov r14d,r12d - shr r12d,10 - ror r14d,17 - - xor r12d,r14d - ror r14d,2 + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[32+rsp] - xor r12d,r14d + ror r15d,2 + xor r15d,r14d + shr r14d,10 + ror r15d,17 add r12d,r13d - - add r12d,DWORD PTR[32+rsp] + xor r14d,r15d add r12d,DWORD PTR[60+rsp] mov r13d,r9d - mov r14d,r9d + add r12d,r14d + mov r14d,ebx + ror r13d,14 mov r15d,r10d + mov DWORD PTR[60+rsp],r12d - ror r13d,6 - ror r14d,11 + ror r14d,9 + xor r13d,r9d xor r15d,r11d - xor r13d,r14d - ror r14d,14 + ror r13d,5 + add r12d,eax + xor r14d,ebx + + add r12d,DWORD PTR[rdi*4+rbp] and r15d,r9d - mov DWORD PTR[60+rsp],r12d + mov eax,ecx - xor r13d,r14d + ror r14d,11 + xor r13d,r9d xor r15d,r11d - add r12d,eax - - mov eax,ebx - add r12d,r13d + xor eax,edx + xor r14d,ebx add r12d,r15d - mov r13d,ebx - mov r14d,ebx + mov r15d,ecx - ror eax,2 - ror r13d,13 - mov r15d,ebx - add r12d,DWORD PTR[rdi*4+rbp] + ror r13d,6 + and eax,ebx + and r15d,edx - xor eax,r13d - ror r13d,9 - or r14d,edx + ror r14d,2 + add r12d,r13d + add eax,r15d - xor eax,r13d - and r15d,edx add r8d,r12d - - and r14d,ecx add eax,r12d - - or r14d,r15d lea rdi,QWORD PTR[1+rdi] - add eax,r14d + cmp rdi,64 jb $L$rounds_16_xx - mov rdi,QWORD PTR[((16*4+0*8))+rsp] - lea rsi,QWORD PTR[((16*4))+rsi] - - add eax,DWORD PTR[((4*0))+rdi] - add ebx,DWORD PTR[((4*1))+rdi] - add ecx,DWORD PTR[((4*2))+rdi] - add edx,DWORD PTR[((4*3))+rdi] - add r8d,DWORD PTR[((4*4))+rdi] - add r9d,DWORD PTR[((4*5))+rdi] - add r10d,DWORD PTR[((4*6))+rdi] - add r11d,DWORD PTR[((4*7))+rdi] - - cmp rsi,QWORD PTR[((16*4+2*8))+rsp] - - mov DWORD PTR[((4*0))+rdi],eax - mov DWORD PTR[((4*1))+rdi],ebx - mov DWORD PTR[((4*2))+rdi],ecx - mov DWORD PTR[((4*3))+rdi],edx - mov DWORD PTR[((4*4))+rdi],r8d - mov DWORD PTR[((4*5))+rdi],r9d - mov DWORD PTR[((4*6))+rdi],r10d - mov DWORD PTR[((4*7))+rdi],r11d + mov rdi,QWORD PTR[((64+0))+rsp] + lea rsi,QWORD PTR[64+rsi] + + add eax,DWORD PTR[rdi] + add ebx,DWORD PTR[4+rdi] + add ecx,DWORD PTR[8+rdi] + add edx,DWORD PTR[12+rdi] + add r8d,DWORD PTR[16+rdi] + add r9d,DWORD PTR[20+rdi] + add r10d,DWORD PTR[24+rdi] + add r11d,DWORD PTR[28+rdi] + + cmp rsi,QWORD PTR[((64+16))+rsp] + + mov DWORD PTR[rdi],eax + mov DWORD PTR[4+rdi],ebx + mov DWORD PTR[8+rdi],ecx + mov DWORD PTR[12+rdi],edx + mov DWORD PTR[16+rdi],r8d + mov DWORD PTR[20+rdi],r9d + mov DWORD PTR[24+rdi],r10d + mov DWORD PTR[28+rdi],r11d jb $L$loop - mov rsi,QWORD PTR[((16*4+3*8))+rsp] + mov rsi,QWORD PTR[((64+24))+rsp] mov r15,QWORD PTR[rsi] mov r14,QWORD PTR[8+rsi] mov r13,QWORD PTR[16+rsi] @@ -2010,7 +1818,7 @@ se_handler PROC PRIVATE cmp rbx,r10 jae $L$in_prologue - mov rax,QWORD PTR[((16*4+3*8))+rax] + mov rax,QWORD PTR[((64+24))+rax] lea rax,QWORD PTR[48+rax] mov rbx,QWORD PTR[((-8))+rax] diff --git a/deps/openssl/asm/x64-win32-masm/whrlpool/wp-x86_64.asm b/deps/openssl/asm/x64-win32-masm/whrlpool/wp-x86_64.asm index 25337b2440..42b524dc8f 100644 --- a/deps/openssl/asm/x64-win32-masm/whrlpool/wp-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/whrlpool/wp-x86_64.asm @@ -37,39 +37,39 @@ $L$prologue:: xor rcx,rcx xor rdx,rdx - mov r8,QWORD PTR[((0*8))+rdi] - mov r9,QWORD PTR[((1*8))+rdi] - mov r10,QWORD PTR[((2*8))+rdi] - mov r11,QWORD PTR[((3*8))+rdi] - mov r12,QWORD PTR[((4*8))+rdi] - mov r13,QWORD PTR[((5*8))+rdi] - mov r14,QWORD PTR[((6*8))+rdi] - mov r15,QWORD PTR[((7*8))+rdi] + mov r8,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + mov r12,QWORD PTR[32+rdi] + mov r13,QWORD PTR[40+rdi] + mov r14,QWORD PTR[48+rdi] + mov r15,QWORD PTR[56+rdi] $L$outerloop:: - mov QWORD PTR[((0*8))+rsp],r8 - mov QWORD PTR[((1*8))+rsp],r9 - mov QWORD PTR[((2*8))+rsp],r10 - mov QWORD PTR[((3*8))+rsp],r11 - mov QWORD PTR[((4*8))+rsp],r12 - mov QWORD PTR[((5*8))+rsp],r13 - mov QWORD PTR[((6*8))+rsp],r14 - mov QWORD PTR[((7*8))+rsp],r15 - xor r8,QWORD PTR[((0*8))+rsi] - xor r9,QWORD PTR[((1*8))+rsi] - xor r10,QWORD PTR[((2*8))+rsi] - xor r11,QWORD PTR[((3*8))+rsi] - xor r12,QWORD PTR[((4*8))+rsi] - xor r13,QWORD PTR[((5*8))+rsi] - xor r14,QWORD PTR[((6*8))+rsi] - xor r15,QWORD PTR[((7*8))+rsi] - mov QWORD PTR[((64+0*8))+rsp],r8 - mov QWORD PTR[((64+1*8))+rsp],r9 - mov QWORD PTR[((64+2*8))+rsp],r10 - mov QWORD PTR[((64+3*8))+rsp],r11 - mov QWORD PTR[((64+4*8))+rsp],r12 - mov QWORD PTR[((64+5*8))+rsp],r13 - mov QWORD PTR[((64+6*8))+rsp],r14 - mov QWORD PTR[((64+7*8))+rsp],r15 + mov QWORD PTR[rsp],r8 + mov QWORD PTR[8+rsp],r9 + mov QWORD PTR[16+rsp],r10 + mov QWORD PTR[24+rsp],r11 + mov QWORD PTR[32+rsp],r12 + mov QWORD PTR[40+rsp],r13 + mov QWORD PTR[48+rsp],r14 + mov QWORD PTR[56+rsp],r15 + xor r8,QWORD PTR[rsi] + xor r9,QWORD PTR[8+rsi] + xor r10,QWORD PTR[16+rsi] + xor r11,QWORD PTR[24+rsi] + xor r12,QWORD PTR[32+rsi] + xor r13,QWORD PTR[40+rsi] + xor r14,QWORD PTR[48+rsi] + xor r15,QWORD PTR[56+rsi] + mov QWORD PTR[((64+0))+rsp],r8 + mov QWORD PTR[((64+8))+rsp],r9 + mov QWORD PTR[((64+16))+rsp],r10 + mov QWORD PTR[((64+24))+rsp],r11 + mov QWORD PTR[((64+32))+rsp],r12 + mov QWORD PTR[((64+40))+rsp],r13 + mov QWORD PTR[((64+48))+rsp],r14 + mov QWORD PTR[((64+56))+rsp],r15 xor rsi,rsi mov QWORD PTR[24+rbx],rsi ALIGN 16 @@ -86,7 +86,7 @@ $L$round:: mov r9,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((0*8+8))+rsp] + mov eax,DWORD PTR[((0+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] mov r10,QWORD PTR[6+rsi*8+rbp] @@ -100,7 +100,7 @@ $L$round:: mov r13,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((0*8+8+4))+rsp] + mov ebx,DWORD PTR[((0+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] mov r14,QWORD PTR[2+rsi*8+rbp] @@ -114,7 +114,7 @@ $L$round:: xor r10,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((1*8+8))+rsp] + mov eax,DWORD PTR[((8+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r11,QWORD PTR[6+rsi*8+rbp] @@ -128,7 +128,7 @@ $L$round:: xor r14,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((1*8+8+4))+rsp] + mov ebx,DWORD PTR[((8+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r15,QWORD PTR[2+rsi*8+rbp] @@ -142,7 +142,7 @@ $L$round:: xor r11,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((2*8+8))+rsp] + mov eax,DWORD PTR[((16+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r12,QWORD PTR[6+rsi*8+rbp] @@ -156,7 +156,7 @@ $L$round:: xor r15,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((2*8+8+4))+rsp] + mov ebx,DWORD PTR[((16+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r8,QWORD PTR[2+rsi*8+rbp] @@ -170,7 +170,7 @@ $L$round:: xor r12,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((3*8+8))+rsp] + mov eax,DWORD PTR[((24+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r13,QWORD PTR[6+rsi*8+rbp] @@ -184,7 +184,7 @@ $L$round:: xor r8,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((3*8+8+4))+rsp] + mov ebx,DWORD PTR[((24+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r9,QWORD PTR[2+rsi*8+rbp] @@ -198,7 +198,7 @@ $L$round:: xor r13,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((4*8+8))+rsp] + mov eax,DWORD PTR[((32+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r14,QWORD PTR[6+rsi*8+rbp] @@ -212,7 +212,7 @@ $L$round:: xor r9,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((4*8+8+4))+rsp] + mov ebx,DWORD PTR[((32+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r10,QWORD PTR[2+rsi*8+rbp] @@ -226,7 +226,7 @@ $L$round:: xor r14,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((5*8+8))+rsp] + mov eax,DWORD PTR[((40+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r15,QWORD PTR[6+rsi*8+rbp] @@ -240,7 +240,7 @@ $L$round:: xor r10,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((5*8+8+4))+rsp] + mov ebx,DWORD PTR[((40+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r11,QWORD PTR[2+rsi*8+rbp] @@ -254,7 +254,7 @@ $L$round:: xor r15,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((6*8+8))+rsp] + mov eax,DWORD PTR[((48+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r8,QWORD PTR[6+rsi*8+rbp] @@ -268,7 +268,7 @@ $L$round:: xor r11,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((6*8+8+4))+rsp] + mov ebx,DWORD PTR[((48+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r12,QWORD PTR[2+rsi*8+rbp] @@ -282,7 +282,7 @@ $L$round:: xor r8,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((7*8+8))+rsp] + mov eax,DWORD PTR[((56+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r9,QWORD PTR[6+rsi*8+rbp] @@ -296,19 +296,19 @@ $L$round:: xor r12,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((7*8+8+4))+rsp] + mov ebx,DWORD PTR[((56+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r13,QWORD PTR[2+rsi*8+rbp] xor r14,QWORD PTR[1+rdi*8+rbp] - mov QWORD PTR[((0*8))+rsp],r8 - mov QWORD PTR[((1*8))+rsp],r9 - mov QWORD PTR[((2*8))+rsp],r10 - mov QWORD PTR[((3*8))+rsp],r11 - mov QWORD PTR[((4*8))+rsp],r12 - mov QWORD PTR[((5*8))+rsp],r13 - mov QWORD PTR[((6*8))+rsp],r14 - mov QWORD PTR[((7*8))+rsp],r15 + mov QWORD PTR[rsp],r8 + mov QWORD PTR[8+rsp],r9 + mov QWORD PTR[16+rsp],r10 + mov QWORD PTR[24+rsp],r11 + mov QWORD PTR[32+rsp],r12 + mov QWORD PTR[40+rsp],r13 + mov QWORD PTR[48+rsp],r14 + mov QWORD PTR[56+rsp],r15 mov cl,al mov dl,ah lea rsi,QWORD PTR[rcx*1+rcx] @@ -318,7 +318,7 @@ $L$round:: xor r9,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((64+0*8+8))+rsp] + mov eax,DWORD PTR[((64+0+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r10,QWORD PTR[6+rsi*8+rbp] @@ -332,7 +332,7 @@ $L$round:: xor r13,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((64+0*8+8+4))+rsp] + mov ebx,DWORD PTR[((64+0+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r14,QWORD PTR[2+rsi*8+rbp] @@ -346,7 +346,7 @@ $L$round:: xor r10,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((64+1*8+8))+rsp] + mov eax,DWORD PTR[((64+8+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r11,QWORD PTR[6+rsi*8+rbp] @@ -360,7 +360,7 @@ $L$round:: xor r14,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((64+1*8+8+4))+rsp] + mov ebx,DWORD PTR[((64+8+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r15,QWORD PTR[2+rsi*8+rbp] @@ -374,7 +374,7 @@ $L$round:: xor r11,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((64+2*8+8))+rsp] + mov eax,DWORD PTR[((64+16+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r12,QWORD PTR[6+rsi*8+rbp] @@ -388,7 +388,7 @@ $L$round:: xor r15,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((64+2*8+8+4))+rsp] + mov ebx,DWORD PTR[((64+16+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r8,QWORD PTR[2+rsi*8+rbp] @@ -402,7 +402,7 @@ $L$round:: xor r12,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((64+3*8+8))+rsp] + mov eax,DWORD PTR[((64+24+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r13,QWORD PTR[6+rsi*8+rbp] @@ -416,7 +416,7 @@ $L$round:: xor r8,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((64+3*8+8+4))+rsp] + mov ebx,DWORD PTR[((64+24+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r9,QWORD PTR[2+rsi*8+rbp] @@ -430,7 +430,7 @@ $L$round:: xor r13,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((64+4*8+8))+rsp] + mov eax,DWORD PTR[((64+32+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r14,QWORD PTR[6+rsi*8+rbp] @@ -444,7 +444,7 @@ $L$round:: xor r9,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((64+4*8+8+4))+rsp] + mov ebx,DWORD PTR[((64+32+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r10,QWORD PTR[2+rsi*8+rbp] @@ -458,7 +458,7 @@ $L$round:: xor r14,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((64+5*8+8))+rsp] + mov eax,DWORD PTR[((64+40+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r15,QWORD PTR[6+rsi*8+rbp] @@ -472,7 +472,7 @@ $L$round:: xor r10,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((64+5*8+8+4))+rsp] + mov ebx,DWORD PTR[((64+40+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r11,QWORD PTR[2+rsi*8+rbp] @@ -486,7 +486,7 @@ $L$round:: xor r15,QWORD PTR[7+rdi*8+rbp] mov cl,al mov dl,ah - mov eax,DWORD PTR[((64+6*8+8))+rsp] + mov eax,DWORD PTR[((64+48+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r8,QWORD PTR[6+rsi*8+rbp] @@ -500,7 +500,7 @@ $L$round:: xor r11,QWORD PTR[3+rdi*8+rbp] mov cl,bl mov dl,bh - mov ebx,DWORD PTR[((64+6*8+8+4))+rsp] + mov ebx,DWORD PTR[((64+48+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] lea rdi,QWORD PTR[rdx*1+rdx] xor r12,QWORD PTR[2+rsi*8+rbp] @@ -540,44 +540,44 @@ $L$round:: je $L$roundsdone mov QWORD PTR[24+rbx],rsi - mov QWORD PTR[((64+0*8))+rsp],r8 - mov QWORD PTR[((64+1*8))+rsp],r9 - mov QWORD PTR[((64+2*8))+rsp],r10 - mov QWORD PTR[((64+3*8))+rsp],r11 - mov QWORD PTR[((64+4*8))+rsp],r12 - mov QWORD PTR[((64+5*8))+rsp],r13 - mov QWORD PTR[((64+6*8))+rsp],r14 - mov QWORD PTR[((64+7*8))+rsp],r15 + mov QWORD PTR[((64+0))+rsp],r8 + mov QWORD PTR[((64+8))+rsp],r9 + mov QWORD PTR[((64+16))+rsp],r10 + mov QWORD PTR[((64+24))+rsp],r11 + mov QWORD PTR[((64+32))+rsp],r12 + mov QWORD PTR[((64+40))+rsp],r13 + mov QWORD PTR[((64+48))+rsp],r14 + mov QWORD PTR[((64+56))+rsp],r15 jmp $L$round ALIGN 16 $L$roundsdone:: mov rdi,QWORD PTR[rbx] mov rsi,QWORD PTR[8+rbx] mov rax,QWORD PTR[16+rbx] - xor r8,QWORD PTR[((0*8))+rsi] - xor r9,QWORD PTR[((1*8))+rsi] - xor r10,QWORD PTR[((2*8))+rsi] - xor r11,QWORD PTR[((3*8))+rsi] - xor r12,QWORD PTR[((4*8))+rsi] - xor r13,QWORD PTR[((5*8))+rsi] - xor r14,QWORD PTR[((6*8))+rsi] - xor r15,QWORD PTR[((7*8))+rsi] - xor r8,QWORD PTR[((0*8))+rdi] - xor r9,QWORD PTR[((1*8))+rdi] - xor r10,QWORD PTR[((2*8))+rdi] - xor r11,QWORD PTR[((3*8))+rdi] - xor r12,QWORD PTR[((4*8))+rdi] - xor r13,QWORD PTR[((5*8))+rdi] - xor r14,QWORD PTR[((6*8))+rdi] - xor r15,QWORD PTR[((7*8))+rdi] - mov QWORD PTR[((0*8))+rdi],r8 - mov QWORD PTR[((1*8))+rdi],r9 - mov QWORD PTR[((2*8))+rdi],r10 - mov QWORD PTR[((3*8))+rdi],r11 - mov QWORD PTR[((4*8))+rdi],r12 - mov QWORD PTR[((5*8))+rdi],r13 - mov QWORD PTR[((6*8))+rdi],r14 - mov QWORD PTR[((7*8))+rdi],r15 + xor r8,QWORD PTR[rsi] + xor r9,QWORD PTR[8+rsi] + xor r10,QWORD PTR[16+rsi] + xor r11,QWORD PTR[24+rsi] + xor r12,QWORD PTR[32+rsi] + xor r13,QWORD PTR[40+rsi] + xor r14,QWORD PTR[48+rsi] + xor r15,QWORD PTR[56+rsi] + xor r8,QWORD PTR[rdi] + xor r9,QWORD PTR[8+rdi] + xor r10,QWORD PTR[16+rdi] + xor r11,QWORD PTR[24+rdi] + xor r12,QWORD PTR[32+rdi] + xor r13,QWORD PTR[40+rdi] + xor r14,QWORD PTR[48+rdi] + xor r15,QWORD PTR[56+rdi] + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 lea rsi,QWORD PTR[64+rsi] sub rax,1 jz $L$alldone diff --git a/deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm b/deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm index cdf7f90ca0..497160cbcf 100644 --- a/deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm +++ b/deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm @@ -1,9 +1,15 @@ OPTION DOTNAME EXTERN OPENSSL_cpuid_setup:NEAR + .CRT$XCU SEGMENT READONLY ALIGN(8) DQ OPENSSL_cpuid_setup + .CRT$XCU ENDS +_DATA SEGMENT +COMM OPENSSL_ia32cap_P:DWORD:2 + +_DATA ENDS .text$ SEGMENT ALIGN(64) 'CODE' PUBLIC OPENSSL_atomic_add @@ -68,7 +74,15 @@ OPENSSL_ia32_cpuid PROC PUBLIC mov eax,080000000h cpuid - cmp eax,080000008h + cmp eax,080000001h + jb $L$intel + mov r10d,eax + mov eax,080000001h + cpuid + or r9d,ecx + and r9d,000000801h + + cmp r10d,080000008h jb $L$intel mov eax,080000008h @@ -79,12 +93,12 @@ OPENSSL_ia32_cpuid PROC PUBLIC mov eax,1 cpuid bt edx,28 - jnc $L$done + jnc $L$generic shr ebx,16 cmp bl,r10b - ja $L$done + ja $L$generic and edx,0efffffffh - jmp $L$done + jmp $L$generic $L$intel:: cmp r11d,4 @@ -101,30 +115,48 @@ $L$intel:: $L$nocacheinfo:: mov eax,1 cpuid + and edx,0bfefffffh cmp r9d,0 jne $L$notintel - or edx,000100000h + or edx,040000000h and ah,15 cmp ah,15 - je $L$notintel - or edx,040000000h + jne $L$notintel + or edx,000100000h $L$notintel:: bt edx,28 - jnc $L$done + jnc $L$generic and edx,0efffffffh cmp r10d,0 - je $L$done + je $L$generic or edx,010000000h shr ebx,16 cmp bl,1 - ja $L$done + ja $L$generic and edx,0efffffffh +$L$generic:: + and r9d,000000800h + and ecx,0fffff7ffh + or r9d,ecx + + mov r10d,edx + bt r9d,27 + jnc $L$clear_avx + xor ecx,ecx +DB 00fh,001h,0d0h + + and eax,6 + cmp eax,6 + je $L$done +$L$clear_avx:: + mov eax,0efffe7ffh + and r9d,eax $L$done:: - shl rcx,32 - mov eax,edx + shl r9,32 + mov eax,r10d mov rbx,r8 - or rax,rcx + or rax,r9 DB 0F3h,0C3h ;repret OPENSSL_ia32_cpuid ENDP @@ -181,6 +213,20 @@ OPENSSL_wipe_cpu PROC PUBLIC lea rax,QWORD PTR[8+rsp] DB 0F3h,0C3h ;repret OPENSSL_wipe_cpu ENDP +PUBLIC OPENSSL_ia32_rdrand + +ALIGN 16 +OPENSSL_ia32_rdrand PROC PUBLIC + mov ecx,8 +$L$oop_rdrand:: +DB 72,15,199,240 + jc $L$break_rdrand + loop $L$oop_rdrand +$L$break_rdrand:: + cmp rax,0 + cmove rax,rcx + DB 0F3h,0C3h ;repret +OPENSSL_ia32_rdrand ENDP .text$ ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm.orig b/deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm.orig new file mode 100644 index 0000000000..cf970738b0 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm.orig @@ -0,0 +1,235 @@ +OPTION DOTNAME +EXTERN OPENSSL_cpuid_setup:NEAR +<<<<<<< HEAD +======= + +>>>>>>> openssl: regenerate asm files for openssl 1.0.1 +.CRT$XCU SEGMENT READONLY ALIGN(8) + DQ OPENSSL_cpuid_setup + + +.CRT$XCU ENDS +_DATA SEGMENT +COMM OPENSSL_ia32cap_P:DWORD:2 + +_DATA ENDS +.text$ SEGMENT ALIGN(64) 'CODE' + +PUBLIC OPENSSL_atomic_add + +ALIGN 16 +OPENSSL_atomic_add PROC PUBLIC + mov eax,DWORD PTR[rcx] +$L$spin:: lea r8,QWORD PTR[rax*1+rdx] +DB 0f0h + + cmpxchg DWORD PTR[rcx],r8d + jne $L$spin + mov eax,r8d +DB 048h,098h + + DB 0F3h,0C3h ;repret +OPENSSL_atomic_add ENDP + +PUBLIC OPENSSL_rdtsc + +ALIGN 16 +OPENSSL_rdtsc PROC PUBLIC + rdtsc + shl rdx,32 + or rax,rdx + DB 0F3h,0C3h ;repret +OPENSSL_rdtsc ENDP + +PUBLIC OPENSSL_ia32_cpuid + +ALIGN 16 +OPENSSL_ia32_cpuid PROC PUBLIC + mov r8,rbx + + xor eax,eax + cpuid + mov r11d,eax + + xor eax,eax + cmp ebx,0756e6547h + setne al + mov r9d,eax + cmp edx,049656e69h + setne al + or r9d,eax + cmp ecx,06c65746eh + setne al + or r9d,eax + jz $L$intel + + cmp ebx,068747541h + setne al + mov r10d,eax + cmp edx,069746E65h + setne al + or r10d,eax + cmp ecx,0444D4163h + setne al + or r10d,eax + jnz $L$intel + + + mov eax,080000000h + cpuid + cmp eax,080000001h + jb $L$intel + mov r10d,eax + mov eax,080000001h + cpuid + or r9d,ecx + and r9d,000000801h + + cmp r10d,080000008h + jb $L$intel + + mov eax,080000008h + cpuid + movzx r10,cl + inc r10 + + mov eax,1 + cpuid + bt edx,28 + jnc $L$generic + shr ebx,16 + cmp bl,r10b + ja $L$generic + and edx,0efffffffh + jmp $L$generic + +$L$intel:: + cmp r11d,4 + mov r10d,-1 + jb $L$nocacheinfo + + mov eax,4 + mov ecx,0 + cpuid + mov r10d,eax + shr r10d,14 + and r10d,0fffh + +$L$nocacheinfo:: + mov eax,1 + cpuid + and edx,0bfefffffh + cmp r9d,0 + jne $L$notintel + or edx,040000000h + and ah,15 + cmp ah,15 + jne $L$notintel + or edx,000100000h +$L$notintel:: + bt edx,28 + jnc $L$generic + and edx,0efffffffh + cmp r10d,0 + je $L$generic + + or edx,010000000h + shr ebx,16 + cmp bl,1 + ja $L$generic + and edx,0efffffffh +$L$generic:: + and r9d,000000800h + and ecx,0fffff7ffh + or r9d,ecx + + mov r10d,edx + bt r9d,27 + jnc $L$clear_avx + xor ecx,ecx +DB 00fh,001h,0d0h + + and eax,6 + cmp eax,6 + je $L$done +$L$clear_avx:: + mov eax,0efffe7ffh + and r9d,eax +$L$done:: + shl r9,32 + mov eax,r10d + mov rbx,r8 + or rax,r9 + DB 0F3h,0C3h ;repret +OPENSSL_ia32_cpuid ENDP + +PUBLIC OPENSSL_cleanse + +ALIGN 16 +OPENSSL_cleanse PROC PUBLIC + xor rax,rax + cmp rdx,15 + jae $L$ot + cmp rdx,0 + je $L$ret +$L$ittle:: + mov BYTE PTR[rcx],al + sub rdx,1 + lea rcx,QWORD PTR[1+rcx] + jnz $L$ittle +$L$ret:: + DB 0F3h,0C3h ;repret +ALIGN 16 +$L$ot:: + test rcx,7 + jz $L$aligned + mov BYTE PTR[rcx],al + lea rdx,QWORD PTR[((-1))+rdx] + lea rcx,QWORD PTR[1+rcx] + jmp $L$ot +$L$aligned:: + mov QWORD PTR[rcx],rax + lea rdx,QWORD PTR[((-8))+rdx] + test rdx,-8 + lea rcx,QWORD PTR[8+rcx] + jnz $L$aligned + cmp rdx,0 + jne $L$ittle + DB 0F3h,0C3h ;repret +OPENSSL_cleanse ENDP +PUBLIC OPENSSL_wipe_cpu + +ALIGN 16 +OPENSSL_wipe_cpu PROC PUBLIC + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + xor rcx,rcx + xor rdx,rdx + xor r8,r8 + xor r9,r9 + xor r10,r10 + xor r11,r11 + lea rax,QWORD PTR[8+rsp] + DB 0F3h,0C3h ;repret +OPENSSL_wipe_cpu ENDP +PUBLIC OPENSSL_ia32_rdrand + +ALIGN 16 +OPENSSL_ia32_rdrand PROC PUBLIC + mov ecx,8 +$L$oop_rdrand:: +DB 72,15,199,240 + jc $L$break_rdrand + loop $L$oop_rdrand +$L$break_rdrand:: + cmp rax,0 + cmove rax,rcx + DB 0F3h,0C3h ;repret +OPENSSL_ia32_rdrand ENDP + +.text$ ENDS +END diff --git a/deps/openssl/asm/x86-elf-gas/aes/aes-586.s b/deps/openssl/asm/x86-elf-gas/aes/aes-586.s index 34c90a068b..f586d3df60 100644 --- a/deps/openssl/asm/x86-elf-gas/aes/aes-586.s +++ b/deps/openssl/asm/x86-elf-gas/aes/aes-586.s @@ -2986,19 +2986,19 @@ _x86_AES_set_encrypt_key: popl %ebp ret .size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key -.globl AES_set_encrypt_key -.type AES_set_encrypt_key,@function +.globl private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,@function .align 16 -AES_set_encrypt_key: -.L_AES_set_encrypt_key_begin: +private_AES_set_encrypt_key: +.L_private_AES_set_encrypt_key_begin: call _x86_AES_set_encrypt_key ret -.size AES_set_encrypt_key,.-.L_AES_set_encrypt_key_begin -.globl AES_set_decrypt_key -.type AES_set_decrypt_key,@function +.size private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin +.globl private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,@function .align 16 -AES_set_decrypt_key: -.L_AES_set_decrypt_key_begin: +private_AES_set_decrypt_key: +.L_private_AES_set_decrypt_key_begin: call _x86_AES_set_encrypt_key cmpl $0,%eax je .L054proceed @@ -3227,8 +3227,8 @@ AES_set_decrypt_key: popl %ebx popl %ebp ret -.size AES_set_decrypt_key,.-.L_AES_set_decrypt_key_begin +.size private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin .byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.comm OPENSSL_ia32cap_P,4,4 +.comm OPENSSL_ia32cap_P,8,4 diff --git a/deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s b/deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s new file mode 100644 index 0000000000..d9f2688558 --- /dev/null +++ b/deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s @@ -0,0 +1,2143 @@ +.file "../openssl/crypto/aes/asm/aesni-x86.s" +.text +.globl aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: +.L_aesni_encrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L000enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L000enc1_loop_1 +.byte 102,15,56,221,209 + movups %xmm2,(%eax) + ret +.size aesni_encrypt,.-.L_aesni_encrypt_begin +.globl aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: +.L_aesni_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L001dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L001dec1_loop_2 +.byte 102,15,56,223,209 + movups %xmm2,(%eax) + ret +.size aesni_decrypt,.-.L_aesni_decrypt_begin +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups (%edx),%xmm0 +.L002enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 + movups (%edx),%xmm0 + jnz .L002enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups (%edx),%xmm0 +.L003dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 + movups (%edx),%xmm0 + jnz .L003dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shrl $1,%ecx + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups (%edx),%xmm0 +.L004enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups (%edx),%xmm0 + jnz .L004enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shrl $1,%ecx + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups (%edx),%xmm0 +.L005dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups (%edx),%xmm0 + jnz .L005dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + decl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + jmp .L_aesni_encrypt6_enter +.align 16 +.L006enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.align 16 +.L_aesni_encrypt6_enter: + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%edx),%xmm0 + jnz .L006enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + decl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,241 + movups (%edx),%xmm0 +.byte 102,15,56,222,249 + jmp .L_aesni_decrypt6_enter +.align 16 +.L007dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.align 16 +.L_aesni_decrypt6_enter: + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups (%edx),%xmm0 + jnz .L007dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.size _aesni_decrypt6,.-_aesni_decrypt6 +.globl aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: +.L_aesni_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz .L008ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz .L009ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L010ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L011ecb_enc_loop6_enter +.align 16 +.L012ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L011ecb_enc_loop6_enter: + call _aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L012ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L008ecb_ret +.L010ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L013ecb_enc_one + movups 16(%esi),%xmm3 + je .L014ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L015ecb_enc_three + movups 48(%esi),%xmm5 + je .L016ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L008ecb_ret +.align 16 +.L013ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L017enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L017enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp .L008ecb_ret +.align 16 +.L014ecb_enc_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L008ecb_ret +.align 16 +.L015ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L008ecb_ret +.align 16 +.L016ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp .L008ecb_ret +.align 16 +.L009ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L018ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L019ecb_dec_loop6_enter +.align 16 +.L020ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L019ecb_dec_loop6_enter: + call _aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L020ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L008ecb_ret +.L018ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L021ecb_dec_one + movups 16(%esi),%xmm3 + je .L022ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L023ecb_dec_three + movups 48(%esi),%xmm5 + je .L024ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L008ecb_ret +.align 16 +.L021ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L025dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L025dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp .L008ecb_ret +.align 16 +.L022ecb_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L008ecb_ret +.align 16 +.L023ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L008ecb_ret +.align 16 +.L024ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L008ecb_ret: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin +.globl aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: +.L_aesni_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shrl $1,%ecx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %ecx,%ebx +.byte 102,15,56,0,253 +.L026ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + leal 32(%ebp),%edx + xorps %xmm0,%xmm3 + movups (%edx),%xmm0 +.L027ccm64_enc2_loop: +.byte 102,15,56,220,209 + decl %ecx +.byte 102,15,56,220,217 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 + leal 32(%edx),%edx +.byte 102,15,56,220,216 + movups (%edx),%xmm0 + jnz .L027ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + decl %eax + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + jnz .L026ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin +.globl aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: +.L_aesni_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L028enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L028enc1_loop_5 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + jmp .L029ccm64_dec_outer +.align 16 +.L029ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movl %ebx,%ecx + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz .L030ccm64_dec_break + movups (%ebp),%xmm0 + shrl $1,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%ebp),%edx + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups (%edx),%xmm0 +.L031ccm64_dec2_loop: +.byte 102,15,56,220,209 + decl %ecx +.byte 102,15,56,220,217 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 + leal 32(%edx),%edx +.byte 102,15,56,220,216 + movups (%edx),%xmm0 + jnz .L031ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leal 16(%esi),%esi +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + jmp .L029ccm64_dec_outer +.align 16 +.L030ccm64_dec_break: + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +.L032enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L032enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin +.globl aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: +.L_aesni_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je .L033ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm0 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,203,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,197,0 + incl %ebx +.byte 102,15,58,34,203,1 + incl %ebp +.byte 102,15,58,34,197,1 + incl %ebx +.byte 102,15,58,34,203,2 + incl %ebp +.byte 102,15,58,34,197,2 + movdqa %xmm1,48(%esp) +.byte 102,15,56,0,202 + movdqa %xmm0,64(%esp) +.byte 102,15,56,0,194 + pshufd $192,%xmm1,%xmm2 + pshufd $128,%xmm1,%xmm3 + cmpl $6,%eax + jb .L034ctr32_tail + movdqa %xmm7,32(%esp) + shrl $1,%ecx + movl %edx,%ebp + movl %ecx,%ebx + subl $6,%eax + jmp .L035ctr32_loop6 +.align 16 +.L035ctr32_loop6: + pshufd $64,%xmm1,%xmm4 + movdqa 32(%esp),%xmm1 + pshufd $192,%xmm0,%xmm5 + por %xmm1,%xmm2 + pshufd $128,%xmm0,%xmm6 + por %xmm1,%xmm3 + pshufd $64,%xmm0,%xmm7 + por %xmm1,%xmm4 + por %xmm1,%xmm5 + por %xmm1,%xmm6 + por %xmm1,%xmm7 + movups (%ebp),%xmm0 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + decl %ecx + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 48(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 64(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm1,48(%esp) +.byte 102,15,56,0,202 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm0,64(%esp) +.byte 102,15,56,0,194 + movups %xmm6,64(%edi) + pshufd $192,%xmm1,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movl %ebx,%ecx + pshufd $128,%xmm1,%xmm3 + subl $6,%eax + jnc .L035ctr32_loop6 + addl $6,%eax + jz .L036ctr32_ret + movl %ebp,%edx + leal 1(,%ecx,2),%ecx + movdqa 32(%esp),%xmm7 +.L034ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb .L037ctr32_one + pshufd $64,%xmm1,%xmm4 + por %xmm7,%xmm3 + je .L038ctr32_two + pshufd $192,%xmm0,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb .L039ctr32_three + pshufd $128,%xmm0,%xmm6 + por %xmm7,%xmm5 + je .L040ctr32_four + por %xmm7,%xmm6 + call _aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L036ctr32_ret +.align 16 +.L033ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +.L037ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L041enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L041enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp .L036ctr32_ret +.align 16 +.L038ctr32_two: + call _aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L036ctr32_ret +.align 16 +.L039ctr32_three: + call _aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L036ctr32_ret +.align 16 +.L040ctr32_four: + call _aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L036ctr32_ret: + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin +.globl aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: +.L_aesni_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L042enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L042enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc .L043xts_enc_short + shrl $1,%ecx + movl %ecx,%ebx + jmp .L044xts_enc_loop6 +.align 16 +.L044xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + pxor 16(%esp),%xmm3 +.byte 102,15,56,220,209 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,217 + pxor 48(%esp),%xmm5 + decl %ecx +.byte 102,15,56,220,225 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + movl %ebx,%ecx + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L044xts_enc_loop6 + leal 1(,%ecx,2),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L043xts_enc_short: + addl $96,%eax + jz .L045xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L046xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L047xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L048xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L049xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L050xts_enc_done +.align 16 +.L046xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L051enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L051enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L047xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L048xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L049xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L045xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L052xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp .L053xts_enc_steal +.align 16 +.L050xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L052xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +.L053xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L053xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L054enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L054enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +.L052xts_enc_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin +.globl aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: +.L_aesni_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L055enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L055enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc .L056xts_dec_short + shrl $1,%ecx + movl %ecx,%ebx + jmp .L057xts_dec_loop6 +.align 16 +.L057xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + pxor 16(%esp),%xmm3 +.byte 102,15,56,222,209 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,217 + pxor 48(%esp),%xmm5 + decl %ecx +.byte 102,15,56,222,225 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,241 + movups (%edx),%xmm0 +.byte 102,15,56,222,249 + call .L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + movl %ebx,%ecx + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L057xts_dec_loop6 + leal 1(,%ecx,2),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L056xts_dec_short: + addl $96,%eax + jz .L058xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L059xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L060xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L061xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L062xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L063xts_dec_done +.align 16 +.L059xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L064dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L064dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L060xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call _aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L061xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L062xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L058xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L065xts_dec_ret + movl %eax,112(%esp) + jmp .L066xts_dec_only_one_more +.align 16 +.L063xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L065xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +.L066xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L067dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L067dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +.L068xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L068xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L069dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L069dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +.L065xts_dec_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin +.globl aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: +.L_aesni_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz .L070cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je .L071cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb .L072cbc_enc_tail + subl $16,%eax + jmp .L073cbc_enc_loop +.align 16 +.L073cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +.L074enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L074enc1_loop_15 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc .L073cbc_enc_loop + addl $16,%eax + jnz .L072cbc_enc_tail + movaps %xmm2,%xmm7 + jmp .L075cbc_ret +.L072cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp .L073cbc_enc_loop +.align 16 +.L071cbc_decrypt: + cmpl $80,%eax + jbe .L076cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp .L077cbc_dec_loop6_enter +.align 16 +.L078cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +.L077cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja .L078cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle .L079cbc_dec_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +.L076cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe .L080cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe .L081cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe .L082cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe .L083cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + subl $80,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L080cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L084dec1_loop_16: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L084dec1_loop_16 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L081cbc_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L082cbc_dec_three: + call _aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L083cbc_dec_four: + call _aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + subl $64,%eax +.L079cbc_dec_tail_collected: + andl $15,%eax + jnz .L085cbc_dec_tail_partial + movups %xmm2,(%edi) + jmp .L075cbc_ret +.align 16 +.L085cbc_dec_tail_partial: + movaps %xmm2,(%esp) + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 +.L075cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + movups %xmm7,(%ebp) +.L070cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin +.type _aesni_set_encrypt_key,@function +.align 16 +_aesni_set_encrypt_key: + testl %eax,%eax + jz .L086bad_pointer + testl %edx,%edx + jz .L086bad_pointer + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + leal 16(%edx),%edx + cmpl $256,%ecx + je .L08714rounds + cmpl $192,%ecx + je .L08812rounds + cmpl $128,%ecx + jne .L089bad_keybits +.align 16 +.L09010rounds: + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call .L091key_128_cold +.byte 102,15,58,223,200,2 + call .L092key_128 +.byte 102,15,58,223,200,4 + call .L092key_128 +.byte 102,15,58,223,200,8 + call .L092key_128 +.byte 102,15,58,223,200,16 + call .L092key_128 +.byte 102,15,58,223,200,32 + call .L092key_128 +.byte 102,15,58,223,200,64 + call .L092key_128 +.byte 102,15,58,223,200,128 + call .L092key_128 +.byte 102,15,58,223,200,27 + call .L092key_128 +.byte 102,15,58,223,200,54 + call .L092key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + xorl %eax,%eax + ret +.align 16 +.L092key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.L091key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L08812rounds: + movq 16(%eax),%xmm2 + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call .L093key_192a_cold +.byte 102,15,58,223,202,2 + call .L094key_192b +.byte 102,15,58,223,202,4 + call .L095key_192a +.byte 102,15,58,223,202,8 + call .L094key_192b +.byte 102,15,58,223,202,16 + call .L095key_192a +.byte 102,15,58,223,202,32 + call .L094key_192b +.byte 102,15,58,223,202,64 + call .L095key_192a +.byte 102,15,58,223,202,128 + call .L094key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + xorl %eax,%eax + ret +.align 16 +.L095key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 16 +.L093key_192a_cold: + movaps %xmm2,%xmm5 +.L096key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 16 +.L094key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp .L096key_192b_warm +.align 16 +.L08714rounds: + movups 16(%eax),%xmm2 + movl $13,%ecx + leal 16(%edx),%edx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call .L097key_256a_cold +.byte 102,15,58,223,200,1 + call .L098key_256b +.byte 102,15,58,223,202,2 + call .L099key_256a +.byte 102,15,58,223,200,2 + call .L098key_256b +.byte 102,15,58,223,202,4 + call .L099key_256a +.byte 102,15,58,223,200,4 + call .L098key_256b +.byte 102,15,58,223,202,8 + call .L099key_256a +.byte 102,15,58,223,200,8 + call .L098key_256b +.byte 102,15,58,223,202,16 + call .L099key_256a +.byte 102,15,58,223,200,16 + call .L098key_256b +.byte 102,15,58,223,202,32 + call .L099key_256a +.byte 102,15,58,223,200,32 + call .L098key_256b +.byte 102,15,58,223,202,64 + call .L099key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + ret +.align 16 +.L099key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +.L097key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L098key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 4 +.L086bad_pointer: + movl $-1,%eax + ret +.align 4 +.L089bad_keybits: + movl $-2,%eax + ret +.size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key +.globl aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +.L_aesni_set_encrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + ret +.size aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin +.globl aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: +.L_aesni_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz .L100dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +.L101dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja .L101dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + xorl %eax,%eax +.L100dec_key_ret: + ret +.size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm/x86-elf-gas/camellia/cmll-x86.s b/deps/openssl/asm/x86-elf-gas/camellia/cmll-x86.s index a896314cfe..5c87910e34 100644 --- a/deps/openssl/asm/x86-elf-gas/camellia/cmll-x86.s +++ b/deps/openssl/asm/x86-elf-gas/camellia/cmll-x86.s @@ -1537,11 +1537,11 @@ Camellia_Ekeygen: popl %ebp ret .size Camellia_Ekeygen,.-.L_Camellia_Ekeygen_begin -.globl Camellia_set_key -.type Camellia_set_key,@function +.globl private_Camellia_set_key +.type private_Camellia_set_key,@function .align 16 -Camellia_set_key: -.L_Camellia_set_key_begin: +private_Camellia_set_key: +.L_private_Camellia_set_key_begin: pushl %ebx movl 8(%esp),%ecx movl 12(%esp),%ebx @@ -1571,7 +1571,7 @@ Camellia_set_key: .L014done: popl %ebx ret -.size Camellia_set_key,.-.L_Camellia_set_key_begin +.size private_Camellia_set_key,.-.L_private_Camellia_set_key_begin .align 64 .LCamellia_SIGMA: .long 2694735487,1003262091,3061508184,1286239154,3337565999,3914302142,1426019237,4057165596,283453434,3731369245,2958461122,3018244605,0,0,0,0 diff --git a/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s b/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s index 9ba94e4b1a..513ce6a58b 100644 --- a/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s +++ b/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s @@ -29,11 +29,146 @@ RC4: movl (%edi,%eax,4),%ecx andl $-4,%edx jz .L002loop1 - leal -4(%esi,%edx,1),%edx - movl %edx,28(%esp) + testl $-8,%edx movl %ebp,32(%esp) + jz .L003go4loop4 + leal OPENSSL_ia32cap_P,%ebp + btl $26,(%ebp) + jnc .L003go4loop4 + movl 32(%esp),%ebp + andl $-8,%edx + leal -8(%esi,%edx,1),%edx + movl %edx,-4(%edi) + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + movq (%esi),%mm0 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 + jmp .L004loop_mmx_enter +.align 16 +.L005loop_mmx: + addb %cl,%bl + psllq $56,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movq (%esi),%mm0 + movq %mm2,-8(%ebp,%esi,1) + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 +.L004loop_mmx_enter: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm0,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $8,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $16,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $24,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $32,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $40,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $48,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + movl %ebx,%edx + xorl %ebx,%ebx + movb %dl,%bl + cmpl -4(%edi),%esi + leal 8(%esi),%esi + jb .L005loop_mmx + psllq $56,%mm1 + pxor %mm1,%mm2 + movq %mm2,-8(%ebp,%esi,1) + emms + cmpl 24(%esp),%esi + je .L006done + jmp .L002loop1 .align 16 -.L003loop4: +.L003go4loop4: + leal -4(%esi,%edx,1),%edx + movl %edx,28(%esp) +.L007loop4: addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) @@ -79,9 +214,9 @@ RC4: movl %ebp,(%ecx,%esi,1) leal 4(%esi),%esi movl (%edi,%eax,4),%ecx - jb .L003loop4 + jb .L007loop4 cmpl 24(%esp),%esi - je .L004done + je .L006done movl 32(%esp),%ebp .align 16 .L002loop1: @@ -99,11 +234,11 @@ RC4: cmpl 24(%esp),%esi movb %dl,-1(%ebp,%esi,1) jb .L002loop1 - jmp .L004done + jmp .L006done .align 16 .L001RC4_CHAR: movzbl (%edi,%eax,1),%ecx -.L005cloop1: +.L008cloop1: addb %cl,%bl movzbl (%edi,%ebx,1),%edx movb %cl,(%edi,%ebx,1) @@ -116,10 +251,10 @@ RC4: movzbl (%edi,%eax,1),%ecx cmpl 24(%esp),%esi movb %dl,-1(%ebp,%esi,1) - jb .L005cloop1 -.L004done: + jb .L008cloop1 +.L006done: decb %al - movb %bl,-4(%edi) + movl %ebx,-4(%edi) movb %al,-8(%edi) .L000abort: popl %edi @@ -128,11 +263,11 @@ RC4: popl %ebp ret .size RC4,.-.L_RC4_begin -.globl RC4_set_key -.type RC4_set_key,@function +.globl private_RC4_set_key +.type private_RC4_set_key,@function .align 16 -RC4_set_key: -.L_RC4_set_key_begin: +private_RC4_set_key: +.L_private_RC4_set_key_begin: pushl %ebp pushl %ebx pushl %esi @@ -147,53 +282,53 @@ RC4_set_key: xorl %eax,%eax movl %ebp,-4(%edi) btl $20,(%edx) - jc .L006c1stloop + jc .L009c1stloop .align 16 -.L007w1stloop: +.L010w1stloop: movl %eax,(%edi,%eax,4) addb $1,%al - jnc .L007w1stloop + jnc .L010w1stloop xorl %ecx,%ecx xorl %edx,%edx .align 16 -.L008w2ndloop: +.L011w2ndloop: movl (%edi,%ecx,4),%eax addb (%esi,%ebp,1),%dl addb %al,%dl addl $1,%ebp movl (%edi,%edx,4),%ebx - jnz .L009wnowrap + jnz .L012wnowrap movl -4(%edi),%ebp -.L009wnowrap: +.L012wnowrap: movl %eax,(%edi,%edx,4) movl %ebx,(%edi,%ecx,4) addb $1,%cl - jnc .L008w2ndloop - jmp .L010exit + jnc .L011w2ndloop + jmp .L013exit .align 16 -.L006c1stloop: +.L009c1stloop: movb %al,(%edi,%eax,1) addb $1,%al - jnc .L006c1stloop + jnc .L009c1stloop xorl %ecx,%ecx xorl %edx,%edx xorl %ebx,%ebx .align 16 -.L011c2ndloop: +.L014c2ndloop: movb (%edi,%ecx,1),%al addb (%esi,%ebp,1),%dl addb %al,%dl addl $1,%ebp movb (%edi,%edx,1),%bl - jnz .L012cnowrap + jnz .L015cnowrap movl -4(%edi),%ebp -.L012cnowrap: +.L015cnowrap: movb %al,(%edi,%edx,1) movb %bl,(%edi,%ecx,1) addb $1,%cl - jnc .L011c2ndloop + jnc .L014c2ndloop movl $-1,256(%edi) -.L010exit: +.L013exit: xorl %eax,%eax movl %eax,-8(%edi) movl %eax,-4(%edi) @@ -202,29 +337,36 @@ RC4_set_key: popl %ebx popl %ebp ret -.size RC4_set_key,.-.L_RC4_set_key_begin +.size private_RC4_set_key,.-.L_private_RC4_set_key_begin .globl RC4_options .type RC4_options,@function .align 16 RC4_options: .L_RC4_options_begin: - call .L013pic_point -.L013pic_point: + call .L016pic_point +.L016pic_point: popl %eax - leal .L014opts-.L013pic_point(%eax),%eax + leal .L017opts-.L016pic_point(%eax),%eax leal OPENSSL_ia32cap_P,%edx - btl $20,(%edx) - jnc .L015skip + movl (%edx),%edx + btl $20,%edx + jc .L0181xchar + btl $26,%edx + jnc .L019ret + addl $25,%eax + ret +.L0181xchar: addl $12,%eax -.L015skip: +.L019ret: ret .align 64 -.L014opts: +.L017opts: .byte 114,99,52,40,52,120,44,105,110,116,41,0 .byte 114,99,52,40,49,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,56,120,44,109,109,120,41,0 .byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 .size RC4_options,.-.L_RC4_options_begin -.comm OPENSSL_ia32cap_P,4,4 +.comm OPENSSL_ia32cap_P,8,4 diff --git a/deps/openssl/asm/x86-elf-gas/sha/sha1-586.s b/deps/openssl/asm/x86-elf-gas/sha/sha1-586.s index cccb1aba85..e77f65412f 100644 --- a/deps/openssl/asm/x86-elf-gas/sha/sha1-586.s +++ b/deps/openssl/asm/x86-elf-gas/sha/sha1-586.s @@ -12,11 +12,12 @@ sha1_block_data_order: movl 20(%esp),%ebp movl 24(%esp),%esi movl 28(%esp),%eax - subl $64,%esp + subl $76,%esp shll $6,%eax addl %esi,%eax - movl %eax,92(%esp) + movl %eax,104(%esp) movl 16(%ebp),%edi + jmp .L000loop .align 16 .L000loop: movl (%esi),%eax @@ -67,7 +68,7 @@ sha1_block_data_order: movl %ebx,52(%esp) movl %ecx,56(%esp) movl %edx,60(%esp) - movl %esi,88(%esp) + movl %esi,100(%esp) movl (%ebp),%eax movl 4(%ebp),%ebx movl 8(%ebp),%ecx @@ -78,10 +79,10 @@ sha1_block_data_order: roll $5,%ebp xorl %edx,%esi addl %edi,%ebp - andl %ebx,%esi movl (%esp),%edi - xorl %edx,%esi + andl %ebx,%esi rorl $2,%ebx + xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp @@ -90,10 +91,10 @@ sha1_block_data_order: roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp - andl %eax,%edi movl 4(%esp),%edx - xorl %ecx,%edi + andl %eax,%edi rorl $2,%eax + xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp @@ -102,10 +103,10 @@ sha1_block_data_order: roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp - andl %esi,%edx movl 8(%esp),%ecx - xorl %ebx,%edx + andl %esi,%edx rorl $2,%esi + xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp @@ -114,10 +115,10 @@ sha1_block_data_order: roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp - andl %edi,%ecx movl 12(%esp),%ebx - xorl %eax,%ecx + andl %edi,%ecx rorl $2,%edi + xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp addl %ecx,%ebp @@ -126,10 +127,10 @@ sha1_block_data_order: roll $5,%ebp xorl %esi,%ebx addl %eax,%ebp - andl %edx,%ebx movl 16(%esp),%eax - xorl %esi,%ebx + andl %edx,%ebx rorl $2,%edx + xorl %esi,%ebx leal 1518500249(%ebp,%eax,1),%ebp addl %ebx,%ebp @@ -138,10 +139,10 @@ sha1_block_data_order: roll $5,%ebp xorl %edi,%eax addl %esi,%ebp - andl %ecx,%eax movl 20(%esp),%esi - xorl %edi,%eax + andl %ecx,%eax rorl $2,%ecx + xorl %edi,%eax leal 1518500249(%ebp,%esi,1),%ebp addl %eax,%ebp @@ -150,10 +151,10 @@ sha1_block_data_order: roll $5,%ebp xorl %edx,%esi addl %edi,%ebp - andl %ebx,%esi movl 24(%esp),%edi - xorl %edx,%esi + andl %ebx,%esi rorl $2,%ebx + xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp @@ -162,10 +163,10 @@ sha1_block_data_order: roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp - andl %eax,%edi movl 28(%esp),%edx - xorl %ecx,%edi + andl %eax,%edi rorl $2,%eax + xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp @@ -174,10 +175,10 @@ sha1_block_data_order: roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp - andl %esi,%edx movl 32(%esp),%ecx - xorl %ebx,%edx + andl %esi,%edx rorl $2,%esi + xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp @@ -186,10 +187,10 @@ sha1_block_data_order: roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp - andl %edi,%ecx movl 36(%esp),%ebx - xorl %eax,%ecx + andl %edi,%ecx rorl $2,%edi + xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp addl %ecx,%ebp @@ -198,10 +199,10 @@ sha1_block_data_order: roll $5,%ebp xorl %esi,%ebx addl %eax,%ebp - andl %edx,%ebx movl 40(%esp),%eax - xorl %esi,%ebx + andl %edx,%ebx rorl $2,%edx + xorl %esi,%ebx leal 1518500249(%ebp,%eax,1),%ebp addl %ebx,%ebp @@ -210,10 +211,10 @@ sha1_block_data_order: roll $5,%ebp xorl %edi,%eax addl %esi,%ebp - andl %ecx,%eax movl 44(%esp),%esi - xorl %edi,%eax + andl %ecx,%eax rorl $2,%ecx + xorl %edi,%eax leal 1518500249(%ebp,%esi,1),%ebp addl %eax,%ebp @@ -222,10 +223,10 @@ sha1_block_data_order: roll $5,%ebp xorl %edx,%esi addl %edi,%ebp - andl %ebx,%esi movl 48(%esp),%edi - xorl %edx,%esi + andl %ebx,%esi rorl $2,%ebx + xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp @@ -234,10 +235,10 @@ sha1_block_data_order: roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp - andl %eax,%edi movl 52(%esp),%edx - xorl %ecx,%edi + andl %eax,%edi rorl $2,%eax + xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp @@ -246,10 +247,10 @@ sha1_block_data_order: roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp - andl %esi,%edx movl 56(%esp),%ecx - xorl %ebx,%edx + andl %esi,%edx rorl $2,%esi + xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp @@ -258,1162 +259,1099 @@ sha1_block_data_order: roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp - andl %edi,%ecx movl 60(%esp),%ebx - xorl %eax,%ecx + andl %edi,%ecx rorl $2,%edi + xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx addl %ebp,%ecx - movl (%esp),%ebx movl %edi,%ebp xorl 8(%esp),%ebx xorl %esi,%ebp xorl 32(%esp),%ebx andl %edx,%ebp - rorl $2,%edx xorl 52(%esp),%ebx roll $1,%ebx xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx movl %ebx,(%esp) + roll $5,%ebp leal 1518500249(%ebx,%eax,1),%ebx - movl %ecx,%eax - roll $5,%eax + movl 4(%esp),%eax addl %ebp,%ebx - addl %eax,%ebx - movl 4(%esp),%eax movl %edx,%ebp xorl 12(%esp),%eax xorl %edi,%ebp xorl 36(%esp),%eax andl %ecx,%ebp - rorl $2,%ecx xorl 56(%esp),%eax roll $1,%eax xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx movl %eax,4(%esp) + roll $5,%ebp leal 1518500249(%eax,%esi,1),%eax - movl %ebx,%esi - roll $5,%esi + movl 8(%esp),%esi addl %ebp,%eax - addl %esi,%eax - movl 8(%esp),%esi movl %ecx,%ebp xorl 16(%esp),%esi xorl %edx,%ebp xorl 40(%esp),%esi andl %ebx,%ebp - rorl $2,%ebx xorl 60(%esp),%esi roll $1,%esi xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx movl %esi,8(%esp) + roll $5,%ebp leal 1518500249(%esi,%edi,1),%esi - movl %eax,%edi - roll $5,%edi + movl 12(%esp),%edi addl %ebp,%esi - addl %edi,%esi - movl 12(%esp),%edi movl %ebx,%ebp xorl 20(%esp),%edi xorl %ecx,%ebp xorl 44(%esp),%edi andl %eax,%ebp - rorl $2,%eax xorl (%esp),%edi roll $1,%edi xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax movl %edi,12(%esp) + roll $5,%ebp leal 1518500249(%edi,%edx,1),%edi - movl %esi,%edx - roll $5,%edx + movl 16(%esp),%edx addl %ebp,%edi - addl %edx,%edi movl %esi,%ebp - movl 16(%esp),%edx - rorl $2,%esi xorl 24(%esp),%edx xorl %eax,%ebp xorl 48(%esp),%edx xorl %ebx,%ebp xorl 4(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,16(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 1859775393(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx movl %edi,%ebp - movl 20(%esp),%ecx - rorl $2,%edi xorl 28(%esp),%ecx xorl %esi,%ebp xorl 52(%esp),%ecx xorl %eax,%ebp xorl 8(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,20(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 1859775393(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx movl %edx,%ebp - movl 24(%esp),%ebx - rorl $2,%edx xorl 32(%esp),%ebx xorl %edi,%ebp xorl 56(%esp),%ebx xorl %esi,%ebp xorl 12(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,24(%esp) - movl %ecx,%eax - roll $5,%eax - leal 1859775393(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx movl %ecx,%ebp - movl 28(%esp),%eax - rorl $2,%ecx xorl 36(%esp),%eax xorl %edx,%ebp xorl 60(%esp),%eax xorl %edi,%ebp xorl 16(%esp),%eax roll $1,%eax - addl %esi,%ebp + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp movl %eax,28(%esp) - movl %ebx,%esi - roll $5,%esi - leal 1859775393(%eax,%ebp,1),%eax - addl %esi,%eax + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax movl %ebx,%ebp - movl 32(%esp),%esi - rorl $2,%ebx xorl 40(%esp),%esi xorl %ecx,%ebp xorl (%esp),%esi xorl %edx,%ebp xorl 20(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,32(%esp) - movl %eax,%edi - roll $5,%edi - leal 1859775393(%esi,%ebp,1),%esi - addl %edi,%esi + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi movl %eax,%ebp - movl 36(%esp),%edi - rorl $2,%eax xorl 44(%esp),%edi xorl %ebx,%ebp xorl 4(%esp),%edi xorl %ecx,%ebp xorl 24(%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,36(%esp) - movl %esi,%edx - roll $5,%edx - leal 1859775393(%edi,%ebp,1),%edi - addl %edx,%edi + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi movl %esi,%ebp - movl 40(%esp),%edx - rorl $2,%esi xorl 48(%esp),%edx xorl %eax,%ebp xorl 8(%esp),%edx xorl %ebx,%ebp xorl 28(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,40(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 1859775393(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx movl %edi,%ebp - movl 44(%esp),%ecx - rorl $2,%edi xorl 52(%esp),%ecx xorl %esi,%ebp xorl 12(%esp),%ecx xorl %eax,%ebp xorl 32(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,44(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 1859775393(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx movl %edx,%ebp - movl 48(%esp),%ebx - rorl $2,%edx xorl 56(%esp),%ebx xorl %edi,%ebp xorl 16(%esp),%ebx xorl %esi,%ebp xorl 36(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,48(%esp) - movl %ecx,%eax - roll $5,%eax - leal 1859775393(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx movl %ecx,%ebp - movl 52(%esp),%eax - rorl $2,%ecx xorl 60(%esp),%eax xorl %edx,%ebp xorl 20(%esp),%eax xorl %edi,%ebp xorl 40(%esp),%eax roll $1,%eax - addl %esi,%ebp + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp movl %eax,52(%esp) - movl %ebx,%esi - roll $5,%esi - leal 1859775393(%eax,%ebp,1),%eax - addl %esi,%eax + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax movl %ebx,%ebp - movl 56(%esp),%esi - rorl $2,%ebx xorl (%esp),%esi xorl %ecx,%ebp xorl 24(%esp),%esi xorl %edx,%ebp xorl 44(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,56(%esp) - movl %eax,%edi - roll $5,%edi - leal 1859775393(%esi,%ebp,1),%esi - addl %edi,%esi + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi movl %eax,%ebp - movl 60(%esp),%edi - rorl $2,%eax xorl 4(%esp),%edi xorl %ebx,%ebp xorl 28(%esp),%edi xorl %ecx,%ebp xorl 48(%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,60(%esp) - movl %esi,%edx - roll $5,%edx - leal 1859775393(%edi,%ebp,1),%edi - addl %edx,%edi + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi movl %esi,%ebp - movl (%esp),%edx - rorl $2,%esi xorl 8(%esp),%edx xorl %eax,%ebp xorl 32(%esp),%edx xorl %ebx,%ebp xorl 52(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 1859775393(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx movl %edi,%ebp - movl 4(%esp),%ecx - rorl $2,%edi xorl 12(%esp),%ecx xorl %esi,%ebp xorl 36(%esp),%ecx xorl %eax,%ebp xorl 56(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,4(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 1859775393(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx movl %edx,%ebp - movl 8(%esp),%ebx - rorl $2,%edx xorl 16(%esp),%ebx xorl %edi,%ebp xorl 40(%esp),%ebx xorl %esi,%ebp xorl 60(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,8(%esp) - movl %ecx,%eax - roll $5,%eax - leal 1859775393(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx movl %ecx,%ebp - movl 12(%esp),%eax - rorl $2,%ecx xorl 20(%esp),%eax xorl %edx,%ebp xorl 44(%esp),%eax xorl %edi,%ebp xorl (%esp),%eax roll $1,%eax - addl %esi,%ebp + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp movl %eax,12(%esp) - movl %ebx,%esi - roll $5,%esi - leal 1859775393(%eax,%ebp,1),%eax - addl %esi,%eax + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax movl %ebx,%ebp - movl 16(%esp),%esi - rorl $2,%ebx xorl 24(%esp),%esi xorl %ecx,%ebp xorl 48(%esp),%esi xorl %edx,%ebp xorl 4(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,16(%esp) - movl %eax,%edi - roll $5,%edi - leal 1859775393(%esi,%ebp,1),%esi - addl %edi,%esi + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi movl %eax,%ebp - movl 20(%esp),%edi - rorl $2,%eax xorl 28(%esp),%edi xorl %ebx,%ebp xorl 52(%esp),%edi xorl %ecx,%ebp xorl 8(%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,20(%esp) - movl %esi,%edx - roll $5,%edx - leal 1859775393(%edi,%ebp,1),%edi - addl %edx,%edi + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi movl %esi,%ebp - movl 24(%esp),%edx - rorl $2,%esi xorl 32(%esp),%edx xorl %eax,%ebp xorl 56(%esp),%edx xorl %ebx,%ebp xorl 12(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,24(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 1859775393(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx movl %edi,%ebp - movl 28(%esp),%ecx - rorl $2,%edi xorl 36(%esp),%ecx xorl %esi,%ebp xorl 60(%esp),%ecx xorl %eax,%ebp xorl 16(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,28(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 1859775393(%ecx,%ebp,1),%ecx - addl %ebx,%ecx - + leal 1859775393(%ecx,%ebx,1),%ecx movl 32(%esp),%ebx - movl 40(%esp),%ebp - xorl %ebp,%ebx - movl (%esp),%ebp - xorl %ebp,%ebx - movl 20(%esp),%ebp - xorl %ebp,%ebx - movl %edx,%ebp + addl %ebp,%ecx + + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx roll $1,%ebx - orl %edi,%ebp - movl %ebx,32(%esp) - andl %esi,%ebp - leal 2400959708(%ebx,%eax,1),%ebx - movl %edx,%eax + addl %eax,%ebp rorl $2,%edx - andl %edi,%eax - orl %eax,%ebp movl %ecx,%eax roll $5,%eax - addl %ebp,%ebx + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp addl %eax,%ebx - + andl %esi,%ebp movl 36(%esp),%eax - movl 44(%esp),%ebp - xorl %ebp,%eax - movl 4(%esp),%ebp - xorl %ebp,%eax - movl 24(%esp),%ebp - xorl %ebp,%eax - movl %ecx,%ebp + addl %ebp,%ebx + + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax roll $1,%eax - orl %edx,%ebp - movl %eax,36(%esp) - andl %edi,%ebp - leal 2400959708(%eax,%esi,1),%eax - movl %ecx,%esi + addl %esi,%ebp rorl $2,%ecx - andl %edx,%esi - orl %esi,%ebp movl %ebx,%esi roll $5,%esi - addl %ebp,%eax + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp addl %esi,%eax - + andl %edi,%ebp movl 40(%esp),%esi - movl 48(%esp),%ebp - xorl %ebp,%esi - movl 8(%esp),%ebp - xorl %ebp,%esi - movl 28(%esp),%ebp - xorl %ebp,%esi - movl %ebx,%ebp + addl %ebp,%eax + + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi roll $1,%esi - orl %ecx,%ebp - movl %esi,40(%esp) - andl %edx,%ebp - leal 2400959708(%esi,%edi,1),%esi - movl %ebx,%edi + addl %edi,%ebp rorl $2,%ebx - andl %ecx,%edi - orl %edi,%ebp movl %eax,%edi roll $5,%edi - addl %ebp,%esi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp addl %edi,%esi - + andl %edx,%ebp movl 44(%esp),%edi - movl 52(%esp),%ebp - xorl %ebp,%edi - movl 12(%esp),%ebp - xorl %ebp,%edi - movl 32(%esp),%ebp - xorl %ebp,%edi - movl %eax,%ebp + addl %ebp,%esi + + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi roll $1,%edi - orl %ebx,%ebp - movl %edi,44(%esp) - andl %ecx,%ebp - leal 2400959708(%edi,%edx,1),%edi - movl %eax,%edx + addl %edx,%ebp rorl $2,%eax - andl %ebx,%edx - orl %edx,%ebp movl %esi,%edx roll $5,%edx - addl %ebp,%edi + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp addl %edx,%edi - + andl %ecx,%ebp movl 48(%esp),%edx - movl 56(%esp),%ebp - xorl %ebp,%edx - movl 16(%esp),%ebp - xorl %ebp,%edx - movl 36(%esp),%ebp - xorl %ebp,%edx - movl %esi,%ebp + addl %ebp,%edi + + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx roll $1,%edx - orl %eax,%ebp - movl %edx,48(%esp) - andl %ebx,%ebp - leal 2400959708(%edx,%ecx,1),%edx - movl %esi,%ecx + addl %ecx,%ebp rorl $2,%esi - andl %eax,%ecx - orl %ecx,%ebp movl %edi,%ecx roll $5,%ecx - addl %ebp,%edx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp addl %ecx,%edx - + andl %ebx,%ebp movl 52(%esp),%ecx - movl 60(%esp),%ebp - xorl %ebp,%ecx - movl 20(%esp),%ebp - xorl %ebp,%ecx - movl 40(%esp),%ebp - xorl %ebp,%ecx - movl %edi,%ebp + addl %ebp,%edx + + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx roll $1,%ecx - orl %esi,%ebp - movl %ecx,52(%esp) - andl %eax,%ebp - leal 2400959708(%ecx,%ebx,1),%ecx - movl %edi,%ebx + addl %ebx,%ebp rorl $2,%edi - andl %esi,%ebx - orl %ebx,%ebp movl %edx,%ebx roll $5,%ebx - addl %ebp,%ecx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp addl %ebx,%ecx - + andl %eax,%ebp movl 56(%esp),%ebx - movl (%esp),%ebp - xorl %ebp,%ebx - movl 24(%esp),%ebp - xorl %ebp,%ebx - movl 44(%esp),%ebp - xorl %ebp,%ebx - movl %edx,%ebp + addl %ebp,%ecx + + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx roll $1,%ebx - orl %edi,%ebp - movl %ebx,56(%esp) - andl %esi,%ebp - leal 2400959708(%ebx,%eax,1),%ebx - movl %edx,%eax + addl %eax,%ebp rorl $2,%edx - andl %edi,%eax - orl %eax,%ebp movl %ecx,%eax roll $5,%eax - addl %ebp,%ebx + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp addl %eax,%ebx - + andl %esi,%ebp movl 60(%esp),%eax - movl 4(%esp),%ebp - xorl %ebp,%eax - movl 28(%esp),%ebp - xorl %ebp,%eax - movl 48(%esp),%ebp - xorl %ebp,%eax - movl %ecx,%ebp + addl %ebp,%ebx + + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax roll $1,%eax - orl %edx,%ebp - movl %eax,60(%esp) - andl %edi,%ebp - leal 2400959708(%eax,%esi,1),%eax - movl %ecx,%esi + addl %esi,%ebp rorl $2,%ecx - andl %edx,%esi - orl %esi,%ebp movl %ebx,%esi roll $5,%esi - addl %ebp,%eax + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp addl %esi,%eax - + andl %edi,%ebp movl (%esp),%esi - movl 8(%esp),%ebp - xorl %ebp,%esi - movl 32(%esp),%ebp - xorl %ebp,%esi - movl 52(%esp),%ebp - xorl %ebp,%esi - movl %ebx,%ebp + addl %ebp,%eax + + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi roll $1,%esi - orl %ecx,%ebp - movl %esi,(%esp) - andl %edx,%ebp - leal 2400959708(%esi,%edi,1),%esi - movl %ebx,%edi + addl %edi,%ebp rorl $2,%ebx - andl %ecx,%edi - orl %edi,%ebp movl %eax,%edi roll $5,%edi - addl %ebp,%esi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp addl %edi,%esi - + andl %edx,%ebp movl 4(%esp),%edi - movl 12(%esp),%ebp - xorl %ebp,%edi - movl 36(%esp),%ebp - xorl %ebp,%edi - movl 56(%esp),%ebp - xorl %ebp,%edi - movl %eax,%ebp + addl %ebp,%esi + + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi roll $1,%edi - orl %ebx,%ebp - movl %edi,4(%esp) - andl %ecx,%ebp - leal 2400959708(%edi,%edx,1),%edi - movl %eax,%edx + addl %edx,%ebp rorl $2,%eax - andl %ebx,%edx - orl %edx,%ebp movl %esi,%edx roll $5,%edx - addl %ebp,%edi + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp addl %edx,%edi - + andl %ecx,%ebp movl 8(%esp),%edx - movl 16(%esp),%ebp - xorl %ebp,%edx - movl 40(%esp),%ebp - xorl %ebp,%edx - movl 60(%esp),%ebp - xorl %ebp,%edx - movl %esi,%ebp + addl %ebp,%edi + + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx roll $1,%edx - orl %eax,%ebp - movl %edx,8(%esp) - andl %ebx,%ebp - leal 2400959708(%edx,%ecx,1),%edx - movl %esi,%ecx + addl %ecx,%ebp rorl $2,%esi - andl %eax,%ecx - orl %ecx,%ebp movl %edi,%ecx roll $5,%ecx - addl %ebp,%edx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp addl %ecx,%edx - + andl %ebx,%ebp movl 12(%esp),%ecx - movl 20(%esp),%ebp - xorl %ebp,%ecx - movl 44(%esp),%ebp - xorl %ebp,%ecx - movl (%esp),%ebp - xorl %ebp,%ecx - movl %edi,%ebp + addl %ebp,%edx + + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx roll $1,%ecx - orl %esi,%ebp - movl %ecx,12(%esp) - andl %eax,%ebp - leal 2400959708(%ecx,%ebx,1),%ecx - movl %edi,%ebx + addl %ebx,%ebp rorl $2,%edi - andl %esi,%ebx - orl %ebx,%ebp movl %edx,%ebx roll $5,%ebx - addl %ebp,%ecx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp addl %ebx,%ecx - + andl %eax,%ebp movl 16(%esp),%ebx - movl 24(%esp),%ebp - xorl %ebp,%ebx - movl 48(%esp),%ebp - xorl %ebp,%ebx - movl 4(%esp),%ebp - xorl %ebp,%ebx - movl %edx,%ebp + addl %ebp,%ecx + + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx roll $1,%ebx - orl %edi,%ebp - movl %ebx,16(%esp) - andl %esi,%ebp - leal 2400959708(%ebx,%eax,1),%ebx - movl %edx,%eax + addl %eax,%ebp rorl $2,%edx - andl %edi,%eax - orl %eax,%ebp movl %ecx,%eax roll $5,%eax - addl %ebp,%ebx + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp addl %eax,%ebx - + andl %esi,%ebp movl 20(%esp),%eax - movl 28(%esp),%ebp - xorl %ebp,%eax - movl 52(%esp),%ebp - xorl %ebp,%eax - movl 8(%esp),%ebp - xorl %ebp,%eax - movl %ecx,%ebp + addl %ebp,%ebx + + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax roll $1,%eax - orl %edx,%ebp - movl %eax,20(%esp) - andl %edi,%ebp - leal 2400959708(%eax,%esi,1),%eax - movl %ecx,%esi + addl %esi,%ebp rorl $2,%ecx - andl %edx,%esi - orl %esi,%ebp movl %ebx,%esi roll $5,%esi - addl %ebp,%eax + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp addl %esi,%eax - + andl %edi,%ebp movl 24(%esp),%esi - movl 32(%esp),%ebp - xorl %ebp,%esi - movl 56(%esp),%ebp - xorl %ebp,%esi - movl 12(%esp),%ebp - xorl %ebp,%esi - movl %ebx,%ebp + addl %ebp,%eax + + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi roll $1,%esi - orl %ecx,%ebp - movl %esi,24(%esp) - andl %edx,%ebp - leal 2400959708(%esi,%edi,1),%esi - movl %ebx,%edi + addl %edi,%ebp rorl $2,%ebx - andl %ecx,%edi - orl %edi,%ebp movl %eax,%edi roll $5,%edi - addl %ebp,%esi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp addl %edi,%esi - + andl %edx,%ebp movl 28(%esp),%edi - movl 36(%esp),%ebp - xorl %ebp,%edi - movl 60(%esp),%ebp - xorl %ebp,%edi - movl 16(%esp),%ebp - xorl %ebp,%edi - movl %eax,%ebp + addl %ebp,%esi + + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi roll $1,%edi - orl %ebx,%ebp - movl %edi,28(%esp) - andl %ecx,%ebp - leal 2400959708(%edi,%edx,1),%edi - movl %eax,%edx + addl %edx,%ebp rorl $2,%eax - andl %ebx,%edx - orl %edx,%ebp movl %esi,%edx roll $5,%edx - addl %ebp,%edi + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp addl %edx,%edi - + andl %ecx,%ebp movl 32(%esp),%edx - movl 40(%esp),%ebp - xorl %ebp,%edx - movl (%esp),%ebp - xorl %ebp,%edx - movl 20(%esp),%ebp - xorl %ebp,%edx - movl %esi,%ebp + addl %ebp,%edi + + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx roll $1,%edx - orl %eax,%ebp - movl %edx,32(%esp) - andl %ebx,%ebp - leal 2400959708(%edx,%ecx,1),%edx - movl %esi,%ecx + addl %ecx,%ebp rorl $2,%esi - andl %eax,%ecx - orl %ecx,%ebp movl %edi,%ecx roll $5,%ecx - addl %ebp,%edx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp addl %ecx,%edx - + andl %ebx,%ebp movl 36(%esp),%ecx - movl 44(%esp),%ebp - xorl %ebp,%ecx - movl 4(%esp),%ebp - xorl %ebp,%ecx - movl 24(%esp),%ebp - xorl %ebp,%ecx - movl %edi,%ebp + addl %ebp,%edx + + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx roll $1,%ecx - orl %esi,%ebp - movl %ecx,36(%esp) - andl %eax,%ebp - leal 2400959708(%ecx,%ebx,1),%ecx - movl %edi,%ebx + addl %ebx,%ebp rorl $2,%edi - andl %esi,%ebx - orl %ebx,%ebp movl %edx,%ebx roll $5,%ebx - addl %ebp,%ecx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp addl %ebx,%ecx - + andl %eax,%ebp movl 40(%esp),%ebx - movl 48(%esp),%ebp - xorl %ebp,%ebx - movl 8(%esp),%ebp - xorl %ebp,%ebx - movl 28(%esp),%ebp - xorl %ebp,%ebx - movl %edx,%ebp + addl %ebp,%ecx + + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx roll $1,%ebx - orl %edi,%ebp - movl %ebx,40(%esp) - andl %esi,%ebp - leal 2400959708(%ebx,%eax,1),%ebx - movl %edx,%eax + addl %eax,%ebp rorl $2,%edx - andl %edi,%eax - orl %eax,%ebp movl %ecx,%eax roll $5,%eax - addl %ebp,%ebx + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp addl %eax,%ebx - + andl %esi,%ebp movl 44(%esp),%eax - movl 52(%esp),%ebp - xorl %ebp,%eax - movl 12(%esp),%ebp - xorl %ebp,%eax - movl 32(%esp),%ebp - xorl %ebp,%eax - movl %ecx,%ebp + addl %ebp,%ebx + + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax roll $1,%eax - orl %edx,%ebp - movl %eax,44(%esp) - andl %edi,%ebp - leal 2400959708(%eax,%esi,1),%eax - movl %ecx,%esi + addl %esi,%ebp rorl $2,%ecx - andl %edx,%esi - orl %esi,%ebp movl %ebx,%esi roll $5,%esi - addl %ebp,%eax + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax movl %ebx,%ebp - movl 48(%esp),%esi - rorl $2,%ebx xorl 56(%esp),%esi xorl %ecx,%ebp xorl 16(%esp),%esi xorl %edx,%ebp xorl 36(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,48(%esp) - movl %eax,%edi - roll $5,%edi - leal 3395469782(%esi,%ebp,1),%esi - addl %edi,%esi + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi movl %eax,%ebp - movl 52(%esp),%edi - rorl $2,%eax xorl 60(%esp),%edi xorl %ebx,%ebp xorl 20(%esp),%edi xorl %ecx,%ebp xorl 40(%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,52(%esp) - movl %esi,%edx - roll $5,%edx - leal 3395469782(%edi,%ebp,1),%edi - addl %edx,%edi + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi movl %esi,%ebp - movl 56(%esp),%edx - rorl $2,%esi xorl (%esp),%edx xorl %eax,%ebp xorl 24(%esp),%edx xorl %ebx,%ebp xorl 44(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,56(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 3395469782(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx movl %edi,%ebp - movl 60(%esp),%ecx - rorl $2,%edi xorl 4(%esp),%ecx xorl %esi,%ebp xorl 28(%esp),%ecx xorl %eax,%ebp xorl 48(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,60(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 3395469782(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx movl %edx,%ebp - movl (%esp),%ebx - rorl $2,%edx xorl 8(%esp),%ebx xorl %edi,%ebp xorl 32(%esp),%ebx xorl %esi,%ebp xorl 52(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,(%esp) - movl %ecx,%eax - roll $5,%eax - leal 3395469782(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx movl %ecx,%ebp - movl 4(%esp),%eax - rorl $2,%ecx xorl 12(%esp),%eax xorl %edx,%ebp xorl 36(%esp),%eax xorl %edi,%ebp xorl 56(%esp),%eax roll $1,%eax - addl %esi,%ebp + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp movl %eax,4(%esp) - movl %ebx,%esi - roll $5,%esi - leal 3395469782(%eax,%ebp,1),%eax - addl %esi,%eax + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax movl %ebx,%ebp - movl 8(%esp),%esi - rorl $2,%ebx xorl 16(%esp),%esi xorl %ecx,%ebp xorl 40(%esp),%esi xorl %edx,%ebp xorl 60(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,8(%esp) - movl %eax,%edi - roll $5,%edi - leal 3395469782(%esi,%ebp,1),%esi - addl %edi,%esi + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi movl %eax,%ebp - movl 12(%esp),%edi - rorl $2,%eax xorl 20(%esp),%edi xorl %ebx,%ebp xorl 44(%esp),%edi xorl %ecx,%ebp xorl (%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,12(%esp) - movl %esi,%edx - roll $5,%edx - leal 3395469782(%edi,%ebp,1),%edi - addl %edx,%edi + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi movl %esi,%ebp - movl 16(%esp),%edx - rorl $2,%esi xorl 24(%esp),%edx xorl %eax,%ebp xorl 48(%esp),%edx xorl %ebx,%ebp xorl 4(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,16(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 3395469782(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx movl %edi,%ebp - movl 20(%esp),%ecx - rorl $2,%edi xorl 28(%esp),%ecx xorl %esi,%ebp xorl 52(%esp),%ecx xorl %eax,%ebp xorl 8(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,20(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 3395469782(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx movl %edx,%ebp - movl 24(%esp),%ebx - rorl $2,%edx xorl 32(%esp),%ebx xorl %edi,%ebp xorl 56(%esp),%ebx xorl %esi,%ebp xorl 12(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,24(%esp) - movl %ecx,%eax - roll $5,%eax - leal 3395469782(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx movl %ecx,%ebp - movl 28(%esp),%eax - rorl $2,%ecx xorl 36(%esp),%eax xorl %edx,%ebp xorl 60(%esp),%eax xorl %edi,%ebp xorl 16(%esp),%eax roll $1,%eax - addl %esi,%ebp + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp movl %eax,28(%esp) - movl %ebx,%esi - roll $5,%esi - leal 3395469782(%eax,%ebp,1),%eax - addl %esi,%eax + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax movl %ebx,%ebp - movl 32(%esp),%esi - rorl $2,%ebx xorl 40(%esp),%esi xorl %ecx,%ebp xorl (%esp),%esi xorl %edx,%ebp xorl 20(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,32(%esp) - movl %eax,%edi - roll $5,%edi - leal 3395469782(%esi,%ebp,1),%esi - addl %edi,%esi + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi movl %eax,%ebp - movl 36(%esp),%edi - rorl $2,%eax xorl 44(%esp),%edi xorl %ebx,%ebp xorl 4(%esp),%edi xorl %ecx,%ebp xorl 24(%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,36(%esp) - movl %esi,%edx - roll $5,%edx - leal 3395469782(%edi,%ebp,1),%edi - addl %edx,%edi + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi movl %esi,%ebp - movl 40(%esp),%edx - rorl $2,%esi xorl 48(%esp),%edx xorl %eax,%ebp xorl 8(%esp),%edx xorl %ebx,%ebp xorl 28(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,40(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 3395469782(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx movl %edi,%ebp - movl 44(%esp),%ecx - rorl $2,%edi xorl 52(%esp),%ecx xorl %esi,%ebp xorl 12(%esp),%ecx xorl %eax,%ebp xorl 32(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,44(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 3395469782(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx movl %edx,%ebp - movl 48(%esp),%ebx - rorl $2,%edx xorl 56(%esp),%ebx xorl %edi,%ebp xorl 16(%esp),%ebx xorl %esi,%ebp xorl 36(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,48(%esp) - movl %ecx,%eax - roll $5,%eax - leal 3395469782(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx movl %ecx,%ebp - movl 52(%esp),%eax - rorl $2,%ecx xorl 60(%esp),%eax xorl %edx,%ebp xorl 20(%esp),%eax xorl %edi,%ebp xorl 40(%esp),%eax roll $1,%eax - addl %esi,%ebp - movl %eax,52(%esp) - movl %ebx,%esi - roll $5,%esi - leal 3395469782(%eax,%ebp,1),%eax - addl %esi,%eax - + addl %ebp,%esi + rorl $2,%ecx movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax movl 56(%esp),%esi - rorl $2,%ebx + addl %ebp,%eax + + movl %ebx,%ebp xorl (%esp),%esi xorl %ecx,%ebp xorl 24(%esp),%esi xorl %edx,%ebp xorl 44(%esp),%esi roll $1,%esi - addl %edi,%ebp - movl %esi,56(%esp) - movl %eax,%edi - roll $5,%edi - leal 3395469782(%esi,%ebp,1),%esi - addl %edi,%esi - + addl %ebp,%edi + rorl $2,%ebx movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi movl 60(%esp),%edi - rorl $2,%eax + addl %ebp,%esi + + movl %eax,%ebp xorl 4(%esp),%edi xorl %ebx,%ebp xorl 28(%esp),%edi xorl %ecx,%ebp xorl 48(%esp),%edi roll $1,%edi - addl %edx,%ebp - movl %edi,60(%esp) - movl %esi,%edx - roll $5,%edx - leal 3395469782(%edi,%ebp,1),%edi - addl %edx,%edi - movl 84(%esp),%ebp - movl 88(%esp),%edx + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx addl (%ebp),%edi addl 4(%ebp),%esi addl 8(%ebp),%eax @@ -1422,14 +1360,14 @@ sha1_block_data_order: movl %edi,(%ebp) addl $64,%edx movl %esi,4(%ebp) - cmpl 92(%esp),%edx + cmpl 104(%esp),%edx movl %eax,8(%ebp) movl %ecx,%edi movl %ebx,12(%ebp) movl %edx,%esi movl %ecx,16(%ebp) jb .L000loop - addl $64,%esp + addl $76,%esp popl %edi popl %esi popl %ebx diff --git a/deps/openssl/asm/x86-elf-gas/sha/sha256-586.s b/deps/openssl/asm/x86-elf-gas/sha/sha256-586.s index 973e50d198..77a89514f1 100644 --- a/deps/openssl/asm/x86-elf-gas/sha/sha256-586.s +++ b/deps/openssl/asm/x86-elf-gas/sha/sha256-586.s @@ -96,31 +96,30 @@ sha256_block_data_order: .L00300_15: movl 92(%esp),%ebx movl %edx,%ecx - rorl $6,%ecx - movl %edx,%edi - rorl $11,%edi + rorl $14,%ecx movl 20(%esp),%esi - xorl %edi,%ecx - rorl $14,%edi - xorl %edi,%ecx + xorl %edx,%ecx + rorl $5,%ecx + xorl %edx,%ecx + rorl $6,%ecx movl 24(%esp),%edi addl %ecx,%ebx - movl %edx,16(%esp) xorl %edi,%esi + movl %edx,16(%esp) movl %eax,%ecx andl %edx,%esi movl 12(%esp),%edx xorl %edi,%esi movl %eax,%edi addl %esi,%ebx - rorl $2,%ecx + rorl $9,%ecx addl 28(%esp),%ebx - rorl $13,%edi + xorl %eax,%ecx + rorl $11,%ecx movl 4(%esp),%esi - xorl %edi,%ecx - rorl $9,%edi + xorl %eax,%ecx + rorl $2,%ecx addl %ebx,%edx - xorl %edi,%ecx movl 8(%esp),%edi addl %ecx,%ebx movl %eax,(%esp) @@ -142,48 +141,46 @@ sha256_block_data_order: .L00416_63: movl %ebx,%esi movl 100(%esp),%ecx - shrl $3,%ebx - rorl $7,%esi - xorl %esi,%ebx rorl $11,%esi movl %ecx,%edi + xorl %ebx,%esi + rorl $7,%esi + shrl $3,%ebx + rorl $2,%edi xorl %esi,%ebx - shrl $10,%ecx - movl 156(%esp),%esi + xorl %ecx,%edi rorl $17,%edi - xorl %edi,%ecx - rorl $2,%edi - addl %esi,%ebx + shrl $10,%ecx + addl 156(%esp),%ebx xorl %ecx,%edi - addl %edi,%ebx - movl %edx,%ecx addl 120(%esp),%ebx - rorl $6,%ecx - movl %edx,%edi - rorl $11,%edi + movl %edx,%ecx + addl %edi,%ebx + rorl $14,%ecx movl 20(%esp),%esi - xorl %edi,%ecx - rorl $14,%edi + xorl %edx,%ecx + rorl $5,%ecx movl %ebx,92(%esp) - xorl %edi,%ecx + xorl %edx,%ecx + rorl $6,%ecx movl 24(%esp),%edi addl %ecx,%ebx - movl %edx,16(%esp) xorl %edi,%esi + movl %edx,16(%esp) movl %eax,%ecx andl %edx,%esi movl 12(%esp),%edx xorl %edi,%esi movl %eax,%edi addl %esi,%ebx - rorl $2,%ecx + rorl $9,%ecx addl 28(%esp),%ebx - rorl $13,%edi + xorl %eax,%ecx + rorl $11,%ecx movl 4(%esp),%esi - xorl %edi,%ecx - rorl $9,%edi + xorl %eax,%ecx + rorl $2,%ecx addl %ebx,%edx - xorl %edi,%ecx movl 8(%esp),%edi addl %ecx,%ebx movl %eax,(%esp) diff --git a/deps/openssl/asm/x86-elf-gas/x86cpuid.s b/deps/openssl/asm/x86-elf-gas/x86cpuid.s index 56a92bfcbe..f9cd038059 100644 --- a/deps/openssl/asm/x86-elf-gas/x86cpuid.s +++ b/deps/openssl/asm/x86-elf-gas/x86cpuid.s @@ -19,9 +19,9 @@ OPENSSL_ia32_cpuid: pushfl popl %eax xorl %eax,%ecx - btl $21,%ecx - jnc .L000done xorl %eax,%eax + btl $21,%ecx + jnc .L000nocpuid .byte 0x0f,0xa2 movl %eax,%edi xorl %eax,%eax @@ -47,7 +47,14 @@ OPENSSL_ia32_cpuid: jnz .L001intel movl $2147483648,%eax .byte 0x0f,0xa2 - cmpl $2147483656,%eax + cmpl $2147483649,%eax + jb .L001intel + movl %eax,%esi + movl $2147483649,%eax + .byte 0x0f,0xa2 + orl %ecx,%ebp + andl $2049,%ebp + cmpl $2147483656,%esi jb .L001intel movl $2147483656,%eax .byte 0x0f,0xa2 @@ -56,46 +63,68 @@ OPENSSL_ia32_cpuid: movl $1,%eax .byte 0x0f,0xa2 btl $28,%edx - jnc .L000done + jnc .L002generic shrl $16,%ebx andl $255,%ebx cmpl %esi,%ebx - ja .L000done + ja .L002generic andl $4026531839,%edx - jmp .L000done + jmp .L002generic .L001intel: cmpl $4,%edi movl $-1,%edi - jb .L002nocacheinfo + jb .L003nocacheinfo movl $4,%eax movl $0,%ecx .byte 0x0f,0xa2 movl %eax,%edi shrl $14,%edi andl $4095,%edi -.L002nocacheinfo: +.L003nocacheinfo: movl $1,%eax .byte 0x0f,0xa2 + andl $3220176895,%edx cmpl $0,%ebp - jne .L003notP4 + jne .L004notintel + orl $1073741824,%edx andb $15,%ah cmpb $15,%ah - jne .L003notP4 + jne .L004notintel orl $1048576,%edx -.L003notP4: +.L004notintel: btl $28,%edx - jnc .L000done + jnc .L002generic andl $4026531839,%edx cmpl $0,%edi - je .L000done + je .L002generic orl $268435456,%edx shrl $16,%ebx cmpb $1,%bl - ja .L000done + ja .L002generic andl $4026531839,%edx -.L000done: - movl %edx,%eax - movl %ecx,%edx +.L002generic: + andl $2048,%ebp + andl $4294965247,%ecx + movl %edx,%esi + orl %ecx,%ebp + btl $27,%ecx + jnc .L005clear_avx + xorl %ecx,%ecx +.byte 15,1,208 + andl $6,%eax + cmpl $6,%eax + je .L006done + cmpl $2,%eax + je .L005clear_avx +.L007clear_xmm: + andl $4261412861,%ebp + andl $4278190079,%esi +.L005clear_avx: + andl $4026525695,%ebp +.L006done: + movl %esi,%eax + movl %ebp,%edx +.L000nocpuid: popl %edi popl %esi popl %ebx @@ -111,9 +140,9 @@ OPENSSL_rdtsc: xorl %edx,%edx leal OPENSSL_ia32cap_P,%ecx btl $4,(%ecx) - jnc .L004notsc + jnc .L008notsc .byte 0x0f,0x31 -.L004notsc: +.L008notsc: ret .size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin .globl OPENSSL_instrument_halt @@ -123,14 +152,14 @@ OPENSSL_instrument_halt: .L_OPENSSL_instrument_halt_begin: leal OPENSSL_ia32cap_P,%ecx btl $4,(%ecx) - jnc .L005nohalt + jnc .L009nohalt .long 2421723150 andl $3,%eax - jnz .L005nohalt + jnz .L009nohalt pushfl popl %eax btl $9,%eax - jnc .L005nohalt + jnc .L009nohalt .byte 0x0f,0x31 pushl %edx pushl %eax @@ -140,7 +169,7 @@ OPENSSL_instrument_halt: sbbl 4(%esp),%edx addl $8,%esp ret -.L005nohalt: +.L009nohalt: xorl %eax,%eax xorl %edx,%edx ret @@ -153,21 +182,21 @@ OPENSSL_far_spin: pushfl popl %eax btl $9,%eax - jnc .L006nospin + jnc .L010nospin movl 4(%esp),%eax movl 8(%esp),%ecx .long 2430111262 xorl %eax,%eax movl (%ecx),%edx - jmp .L007spin + jmp .L011spin .align 16 -.L007spin: +.L011spin: incl %eax cmpl (%ecx),%edx - je .L007spin + je .L011spin .long 529567888 ret -.L006nospin: +.L010nospin: xorl %eax,%eax xorl %edx,%edx ret @@ -182,9 +211,9 @@ OPENSSL_wipe_cpu: leal OPENSSL_ia32cap_P,%ecx movl (%ecx),%ecx btl $1,(%ecx) - jnc .L008no_x87 + jnc .L012no_x87 .long 4007259865,4007259865,4007259865,4007259865,2430851995 -.L008no_x87: +.L012no_x87: leal 4(%esp),%eax ret .size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin @@ -198,11 +227,11 @@ OPENSSL_atomic_add: pushl %ebx nop movl (%edx),%eax -.L009spin: +.L013spin: leal (%eax,%ecx,1),%ebx nop .long 447811568 - jne .L009spin + jne .L013spin movl %ebx,%eax popl %ebx ret @@ -243,37 +272,49 @@ OPENSSL_cleanse: movl 8(%esp),%ecx xorl %eax,%eax cmpl $7,%ecx - jae .L010lot + jae .L014lot cmpl $0,%ecx - je .L011ret -.L012little: + je .L015ret +.L016little: movb %al,(%edx) subl $1,%ecx leal 1(%edx),%edx - jnz .L012little -.L011ret: + jnz .L016little +.L015ret: ret .align 16 -.L010lot: +.L014lot: testl $3,%edx - jz .L013aligned + jz .L017aligned movb %al,(%edx) leal -1(%ecx),%ecx leal 1(%edx),%edx - jmp .L010lot -.L013aligned: + jmp .L014lot +.L017aligned: movl %eax,(%edx) leal -4(%ecx),%ecx testl $-4,%ecx leal 4(%edx),%edx - jnz .L013aligned + jnz .L017aligned cmpl $0,%ecx - jne .L012little + jne .L016little ret .size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin -.comm OPENSSL_ia32cap_P,4,4 +.globl OPENSSL_ia32_rdrand +.type OPENSSL_ia32_rdrand,@function +.align 16 +OPENSSL_ia32_rdrand: +.L_OPENSSL_ia32_rdrand_begin: + movl $8,%ecx +.L018loop: +.byte 15,199,240 + jc .L019break + loop .L018loop +.L019break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin +.comm OPENSSL_ia32cap_P,8,4 .section .init call OPENSSL_cpuid_setup - jmp .Linitalign -.align 16 -.Linitalign: diff --git a/deps/openssl/asm/x86-macosx-gas/aes/aes-586.s b/deps/openssl/asm/x86-macosx-gas/aes/aes-586.s index ff56a4bef7..a58ea6f76d 100644 --- a/deps/openssl/asm/x86-macosx-gas/aes/aes-586.s +++ b/deps/openssl/asm/x86-macosx-gas/aes/aes-586.s @@ -975,7 +975,7 @@ L_AES_encrypt_begin: call L004pic_point L004pic_point: popl %ebp - leal _OPENSSL_ia32cap_P,%eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L004pic_point(%ebp),%eax leal LAES_Te-L004pic_point(%ebp),%ebp leal 764(%esp),%ebx subl %ebp,%ebx @@ -2153,7 +2153,7 @@ L_AES_decrypt_begin: call L010pic_point L010pic_point: popl %ebp - leal _OPENSSL_ia32cap_P,%eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010pic_point(%ebp),%eax leal LAES_Td-L010pic_point(%ebp),%ebp leal 764(%esp),%ebx subl %ebp,%ebx @@ -2207,7 +2207,7 @@ L_AES_cbc_encrypt_begin: call L013pic_point L013pic_point: popl %ebp - leal _OPENSSL_ia32cap_P,%eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L013pic_point(%ebp),%eax cmpl $0,40(%esp) leal LAES_Te-L013pic_point(%ebp),%ebp jne L014picked_te @@ -2950,16 +2950,16 @@ L045exit: popl %ebx popl %ebp ret -.globl _AES_set_encrypt_key +.globl _private_AES_set_encrypt_key .align 4 -_AES_set_encrypt_key: -L_AES_set_encrypt_key_begin: +_private_AES_set_encrypt_key: +L_private_AES_set_encrypt_key_begin: call __x86_AES_set_encrypt_key ret -.globl _AES_set_decrypt_key +.globl _private_AES_set_decrypt_key .align 4 -_AES_set_decrypt_key: -L_AES_set_decrypt_key_begin: +_private_AES_set_decrypt_key: +L_private_AES_set_decrypt_key_begin: call __x86_AES_set_encrypt_key cmpl $0,%eax je L054proceed @@ -3191,4 +3191,8 @@ L056permute: .byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.comm _OPENSSL_ia32cap_P,4 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,8,2 diff --git a/deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s b/deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s new file mode 100644 index 0000000000..183ecad299 --- /dev/null +++ b/deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s @@ -0,0 +1,2107 @@ +.file "../openssl/crypto/aes/asm/aesni-x86.s" +.text +.globl _aesni_encrypt +.align 4 +_aesni_encrypt: +L_aesni_encrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L000enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L000enc1_loop_1 +.byte 102,15,56,221,209 + movups %xmm2,(%eax) + ret +.globl _aesni_decrypt +.align 4 +_aesni_decrypt: +L_aesni_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L001dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L001dec1_loop_2 +.byte 102,15,56,223,209 + movups %xmm2,(%eax) + ret +.align 4 +__aesni_encrypt3: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups (%edx),%xmm0 +L002enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 + movups (%edx),%xmm0 + jnz L002enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.align 4 +__aesni_decrypt3: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups (%edx),%xmm0 +L003dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 + movups (%edx),%xmm0 + jnz L003dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.align 4 +__aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shrl $1,%ecx + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups (%edx),%xmm0 +L004enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups (%edx),%xmm0 + jnz L004enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.align 4 +__aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shrl $1,%ecx + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups (%edx),%xmm0 +L005dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups (%edx),%xmm0 + jnz L005dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.align 4 +__aesni_encrypt6: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + decl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + jmp L_aesni_encrypt6_enter +.align 4,0x90 +L006enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.align 4,0x90 +L_aesni_encrypt6_enter: + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%edx),%xmm0 + jnz L006enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.align 4 +__aesni_decrypt6: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + decl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,241 + movups (%edx),%xmm0 +.byte 102,15,56,222,249 + jmp L_aesni_decrypt6_enter +.align 4,0x90 +L007dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.align 4,0x90 +L_aesni_decrypt6_enter: + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups (%edx),%xmm0 + jnz L007dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.globl _aesni_ecb_encrypt +.align 4 +_aesni_ecb_encrypt: +L_aesni_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz L008ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz L009ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb L010ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp L011ecb_enc_loop6_enter +.align 4,0x90 +L012ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +L011ecb_enc_loop6_enter: + call __aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc L012ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz L008ecb_ret +L010ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb L013ecb_enc_one + movups 16(%esi),%xmm3 + je L014ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb L015ecb_enc_three + movups 48(%esi),%xmm5 + je L016ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call __aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L008ecb_ret +.align 4,0x90 +L013ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L017enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L017enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp L008ecb_ret +.align 4,0x90 +L014ecb_enc_two: + xorps %xmm4,%xmm4 + call __aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L008ecb_ret +.align 4,0x90 +L015ecb_enc_three: + call __aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L008ecb_ret +.align 4,0x90 +L016ecb_enc_four: + call __aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp L008ecb_ret +.align 4,0x90 +L009ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb L018ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp L019ecb_dec_loop6_enter +.align 4,0x90 +L020ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +L019ecb_dec_loop6_enter: + call __aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc L020ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz L008ecb_ret +L018ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb L021ecb_dec_one + movups 16(%esi),%xmm3 + je L022ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb L023ecb_dec_three + movups 48(%esi),%xmm5 + je L024ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call __aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L008ecb_ret +.align 4,0x90 +L021ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L025dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L025dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp L008ecb_ret +.align 4,0x90 +L022ecb_dec_two: + xorps %xmm4,%xmm4 + call __aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L008ecb_ret +.align 4,0x90 +L023ecb_dec_three: + call __aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L008ecb_ret +.align 4,0x90 +L024ecb_dec_four: + call __aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +L008ecb_ret: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ccm64_encrypt_blocks +.align 4 +_aesni_ccm64_encrypt_blocks: +L_aesni_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shrl $1,%ecx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %ecx,%ebx +.byte 102,15,56,0,253 +L026ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + leal 32(%ebp),%edx + xorps %xmm0,%xmm3 + movups (%edx),%xmm0 +L027ccm64_enc2_loop: +.byte 102,15,56,220,209 + decl %ecx +.byte 102,15,56,220,217 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 + leal 32(%edx),%edx +.byte 102,15,56,220,216 + movups (%edx),%xmm0 + jnz L027ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + decl %eax + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + jnz L026ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ccm64_decrypt_blocks +.align 4 +_aesni_ccm64_decrypt_blocks: +L_aesni_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L028enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L028enc1_loop_5 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + jmp L029ccm64_dec_outer +.align 4,0x90 +L029ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movl %ebx,%ecx + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz L030ccm64_dec_break + movups (%ebp),%xmm0 + shrl $1,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%ebp),%edx + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups (%edx),%xmm0 +L031ccm64_dec2_loop: +.byte 102,15,56,220,209 + decl %ecx +.byte 102,15,56,220,217 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 + leal 32(%edx),%edx +.byte 102,15,56,220,216 + movups (%edx),%xmm0 + jnz L031ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leal 16(%esi),%esi +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + jmp L029ccm64_dec_outer +.align 4,0x90 +L030ccm64_dec_break: + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +L032enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L032enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ctr32_encrypt_blocks +.align 4 +_aesni_ctr32_encrypt_blocks: +L_aesni_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je L033ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm0 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,203,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,197,0 + incl %ebx +.byte 102,15,58,34,203,1 + incl %ebp +.byte 102,15,58,34,197,1 + incl %ebx +.byte 102,15,58,34,203,2 + incl %ebp +.byte 102,15,58,34,197,2 + movdqa %xmm1,48(%esp) +.byte 102,15,56,0,202 + movdqa %xmm0,64(%esp) +.byte 102,15,56,0,194 + pshufd $192,%xmm1,%xmm2 + pshufd $128,%xmm1,%xmm3 + cmpl $6,%eax + jb L034ctr32_tail + movdqa %xmm7,32(%esp) + shrl $1,%ecx + movl %edx,%ebp + movl %ecx,%ebx + subl $6,%eax + jmp L035ctr32_loop6 +.align 4,0x90 +L035ctr32_loop6: + pshufd $64,%xmm1,%xmm4 + movdqa 32(%esp),%xmm1 + pshufd $192,%xmm0,%xmm5 + por %xmm1,%xmm2 + pshufd $128,%xmm0,%xmm6 + por %xmm1,%xmm3 + pshufd $64,%xmm0,%xmm7 + por %xmm1,%xmm4 + por %xmm1,%xmm5 + por %xmm1,%xmm6 + por %xmm1,%xmm7 + movups (%ebp),%xmm0 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + decl %ecx + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + call L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 48(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 64(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm1,48(%esp) +.byte 102,15,56,0,202 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm0,64(%esp) +.byte 102,15,56,0,194 + movups %xmm6,64(%edi) + pshufd $192,%xmm1,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movl %ebx,%ecx + pshufd $128,%xmm1,%xmm3 + subl $6,%eax + jnc L035ctr32_loop6 + addl $6,%eax + jz L036ctr32_ret + movl %ebp,%edx + leal 1(,%ecx,2),%ecx + movdqa 32(%esp),%xmm7 +L034ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb L037ctr32_one + pshufd $64,%xmm1,%xmm4 + por %xmm7,%xmm3 + je L038ctr32_two + pshufd $192,%xmm0,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb L039ctr32_three + pshufd $128,%xmm0,%xmm6 + por %xmm7,%xmm5 + je L040ctr32_four + por %xmm7,%xmm6 + call __aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L036ctr32_ret +.align 4,0x90 +L033ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +L037ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L041enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L041enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp L036ctr32_ret +.align 4,0x90 +L038ctr32_two: + call __aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L036ctr32_ret +.align 4,0x90 +L039ctr32_three: + call __aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L036ctr32_ret +.align 4,0x90 +L040ctr32_four: + call __aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +L036ctr32_ret: + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_xts_encrypt +.align 4 +_aesni_xts_encrypt: +L_aesni_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L042enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L042enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc L043xts_enc_short + shrl $1,%ecx + movl %ecx,%ebx + jmp L044xts_enc_loop6 +.align 4,0x90 +L044xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + pxor 16(%esp),%xmm3 +.byte 102,15,56,220,209 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,217 + pxor 48(%esp),%xmm5 + decl %ecx +.byte 102,15,56,220,225 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + call L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + movl %ebx,%ecx + pxor %xmm2,%xmm1 + subl $96,%eax + jnc L044xts_enc_loop6 + leal 1(,%ecx,2),%ecx + movl %ebp,%edx + movl %ecx,%ebx +L043xts_enc_short: + addl $96,%eax + jz L045xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb L046xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je L047xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb L048xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je L049xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call __aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp L050xts_enc_done +.align 4,0x90 +L046xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L051enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L051enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp L050xts_enc_done +.align 4,0x90 +L047xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm4,%xmm4 + call __aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L050xts_enc_done +.align 4,0x90 +L048xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call __aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp L050xts_enc_done +.align 4,0x90 +L049xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call __aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L050xts_enc_done +.align 4,0x90 +L045xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz L052xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp L053xts_enc_steal +.align 4,0x90 +L050xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz L052xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +L053xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz L053xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L054enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L054enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +L052xts_enc_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_xts_decrypt +.align 4 +_aesni_xts_decrypt: +L_aesni_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L055enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L055enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc L056xts_dec_short + shrl $1,%ecx + movl %ecx,%ebx + jmp L057xts_dec_loop6 +.align 4,0x90 +L057xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + pxor 16(%esp),%xmm3 +.byte 102,15,56,222,209 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,217 + pxor 48(%esp),%xmm5 + decl %ecx +.byte 102,15,56,222,225 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,241 + movups (%edx),%xmm0 +.byte 102,15,56,222,249 + call L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + movl %ebx,%ecx + pxor %xmm2,%xmm1 + subl $96,%eax + jnc L057xts_dec_loop6 + leal 1(,%ecx,2),%ecx + movl %ebp,%edx + movl %ecx,%ebx +L056xts_dec_short: + addl $96,%eax + jz L058xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb L059xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je L060xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb L061xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je L062xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call __aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp L063xts_dec_done +.align 4,0x90 +L059xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L064dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L064dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp L063xts_dec_done +.align 4,0x90 +L060xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call __aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L063xts_dec_done +.align 4,0x90 +L061xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call __aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp L063xts_dec_done +.align 4,0x90 +L062xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call __aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L063xts_dec_done +.align 4,0x90 +L058xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz L065xts_dec_ret + movl %eax,112(%esp) + jmp L066xts_dec_only_one_more +.align 4,0x90 +L063xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz L065xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +L066xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L067dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L067dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +L068xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz L068xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L069dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L069dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +L065xts_dec_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_cbc_encrypt +.align 4 +_aesni_cbc_encrypt: +L_aesni_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz L070cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je L071cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb L072cbc_enc_tail + subl $16,%eax + jmp L073cbc_enc_loop +.align 4,0x90 +L073cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +L074enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L074enc1_loop_15 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc L073cbc_enc_loop + addl $16,%eax + jnz L072cbc_enc_tail + movaps %xmm2,%xmm7 + jmp L075cbc_ret +L072cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp L073cbc_enc_loop +.align 4,0x90 +L071cbc_decrypt: + cmpl $80,%eax + jbe L076cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp L077cbc_dec_loop6_enter +.align 4,0x90 +L078cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +L077cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call __aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja L078cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle L079cbc_dec_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +L076cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe L080cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe L081cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe L082cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe L083cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call __aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + subl $80,%eax + jmp L079cbc_dec_tail_collected +.align 4,0x90 +L080cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L084dec1_loop_16: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L084dec1_loop_16 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp L079cbc_dec_tail_collected +.align 4,0x90 +L081cbc_dec_two: + xorps %xmm4,%xmm4 + call __aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp L079cbc_dec_tail_collected +.align 4,0x90 +L082cbc_dec_three: + call __aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp L079cbc_dec_tail_collected +.align 4,0x90 +L083cbc_dec_four: + call __aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + subl $64,%eax +L079cbc_dec_tail_collected: + andl $15,%eax + jnz L085cbc_dec_tail_partial + movups %xmm2,(%edi) + jmp L075cbc_ret +.align 4,0x90 +L085cbc_dec_tail_partial: + movaps %xmm2,(%esp) + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 +L075cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + movups %xmm7,(%ebp) +L070cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4 +__aesni_set_encrypt_key: + testl %eax,%eax + jz L086bad_pointer + testl %edx,%edx + jz L086bad_pointer + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + leal 16(%edx),%edx + cmpl $256,%ecx + je L08714rounds + cmpl $192,%ecx + je L08812rounds + cmpl $128,%ecx + jne L089bad_keybits +.align 4,0x90 +L09010rounds: + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call L091key_128_cold +.byte 102,15,58,223,200,2 + call L092key_128 +.byte 102,15,58,223,200,4 + call L092key_128 +.byte 102,15,58,223,200,8 + call L092key_128 +.byte 102,15,58,223,200,16 + call L092key_128 +.byte 102,15,58,223,200,32 + call L092key_128 +.byte 102,15,58,223,200,64 + call L092key_128 +.byte 102,15,58,223,200,128 + call L092key_128 +.byte 102,15,58,223,200,27 + call L092key_128 +.byte 102,15,58,223,200,54 + call L092key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + xorl %eax,%eax + ret +.align 4,0x90 +L092key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +L091key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 4,0x90 +L08812rounds: + movq 16(%eax),%xmm2 + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call L093key_192a_cold +.byte 102,15,58,223,202,2 + call L094key_192b +.byte 102,15,58,223,202,4 + call L095key_192a +.byte 102,15,58,223,202,8 + call L094key_192b +.byte 102,15,58,223,202,16 + call L095key_192a +.byte 102,15,58,223,202,32 + call L094key_192b +.byte 102,15,58,223,202,64 + call L095key_192a +.byte 102,15,58,223,202,128 + call L094key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + xorl %eax,%eax + ret +.align 4,0x90 +L095key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 4,0x90 +L093key_192a_cold: + movaps %xmm2,%xmm5 +L096key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 4,0x90 +L094key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp L096key_192b_warm +.align 4,0x90 +L08714rounds: + movups 16(%eax),%xmm2 + movl $13,%ecx + leal 16(%edx),%edx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call L097key_256a_cold +.byte 102,15,58,223,200,1 + call L098key_256b +.byte 102,15,58,223,202,2 + call L099key_256a +.byte 102,15,58,223,200,2 + call L098key_256b +.byte 102,15,58,223,202,4 + call L099key_256a +.byte 102,15,58,223,200,4 + call L098key_256b +.byte 102,15,58,223,202,8 + call L099key_256a +.byte 102,15,58,223,200,8 + call L098key_256b +.byte 102,15,58,223,202,16 + call L099key_256a +.byte 102,15,58,223,200,16 + call L098key_256b +.byte 102,15,58,223,202,32 + call L099key_256a +.byte 102,15,58,223,200,32 + call L098key_256b +.byte 102,15,58,223,202,64 + call L099key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + ret +.align 4,0x90 +L099key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +L097key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 4,0x90 +L098key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 2,0x90 +L086bad_pointer: + movl $-1,%eax + ret +.align 2,0x90 +L089bad_keybits: + movl $-2,%eax + ret +.globl _aesni_set_encrypt_key +.align 4 +_aesni_set_encrypt_key: +L_aesni_set_encrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call __aesni_set_encrypt_key + ret +.globl _aesni_set_decrypt_key +.align 4 +_aesni_set_decrypt_key: +L_aesni_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call __aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz L100dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +L101dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja L101dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + xorl %eax,%eax +L100dec_key_ret: + ret +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm/x86-macosx-gas/camellia/cmll-x86.s b/deps/openssl/asm/x86-macosx-gas/camellia/cmll-x86.s index 4d61caa680..2367cee780 100644 --- a/deps/openssl/asm/x86-macosx-gas/camellia/cmll-x86.s +++ b/deps/openssl/asm/x86-macosx-gas/camellia/cmll-x86.s @@ -1519,10 +1519,10 @@ L013done: popl %ebx popl %ebp ret -.globl _Camellia_set_key +.globl _private_Camellia_set_key .align 4 -_Camellia_set_key: -L_Camellia_set_key_begin: +_private_Camellia_set_key: +L_private_Camellia_set_key_begin: pushl %ebx movl 8(%esp),%ecx movl 12(%esp),%ebx diff --git a/deps/openssl/asm/x86-macosx-gas/des/crypt586.s b/deps/openssl/asm/x86-macosx-gas/des/crypt586.s index edb1bb3915..7d0074ec2a 100644 --- a/deps/openssl/asm/x86-macosx-gas/des/crypt586.s +++ b/deps/openssl/asm/x86-macosx-gas/des/crypt586.s @@ -13,11 +13,14 @@ L_fcrypt_body_begin: xorl %edi,%edi xorl %esi,%esi - leal _DES_SPtrans,%edx + call L000PIC_me_up +L000PIC_me_up: + popl %edx + movl L_DES_SPtrans$non_lazy_ptr-L000PIC_me_up(%edx),%edx pushl %edx movl 28(%esp),%ebp pushl $25 -L000start: +L001start: # Round 0 @@ -840,7 +843,7 @@ L000start: movl %esi,%edi movl %eax,%esi movl %ebx,(%esp) - jnz L000start + jnz L001start # FP @@ -889,3 +892,7 @@ L000start: popl %ebx popl %ebp ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_DES_SPtrans$non_lazy_ptr: +.indirect_symbol _DES_SPtrans +.long 0 diff --git a/deps/openssl/asm/x86-macosx-gas/rc4/rc4-586.s b/deps/openssl/asm/x86-macosx-gas/rc4/rc4-586.s index a821dc9503..882a02d74c 100644 --- a/deps/openssl/asm/x86-macosx-gas/rc4/rc4-586.s +++ b/deps/openssl/asm/x86-macosx-gas/rc4/rc4-586.s @@ -28,11 +28,149 @@ L_RC4_begin: movl (%edi,%eax,4),%ecx andl $-4,%edx jz L002loop1 - leal -4(%esi,%edx,1),%edx - movl %edx,28(%esp) + testl $-8,%edx movl %ebp,32(%esp) + jz L003go4loop4 + call L004PIC_me_up +L004PIC_me_up: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L004PIC_me_up(%ebp),%ebp + btl $26,(%ebp) + jnc L003go4loop4 + movl 32(%esp),%ebp + andl $-8,%edx + leal -8(%esi,%edx,1),%edx + movl %edx,-4(%edi) + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + movq (%esi),%mm0 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 + jmp L005loop_mmx_enter .align 4,0x90 -L003loop4: +L006loop_mmx: + addb %cl,%bl + psllq $56,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movq (%esi),%mm0 + movq %mm2,-8(%ebp,%esi,1) + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 +L005loop_mmx_enter: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm0,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $8,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $16,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $24,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $32,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $40,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $48,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + movl %ebx,%edx + xorl %ebx,%ebx + movb %dl,%bl + cmpl -4(%edi),%esi + leal 8(%esi),%esi + jb L006loop_mmx + psllq $56,%mm1 + pxor %mm1,%mm2 + movq %mm2,-8(%ebp,%esi,1) + emms + cmpl 24(%esp),%esi + je L007done + jmp L002loop1 +.align 4,0x90 +L003go4loop4: + leal -4(%esi,%edx,1),%edx + movl %edx,28(%esp) +L008loop4: addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) @@ -78,9 +216,9 @@ L003loop4: movl %ebp,(%ecx,%esi,1) leal 4(%esi),%esi movl (%edi,%eax,4),%ecx - jb L003loop4 + jb L008loop4 cmpl 24(%esp),%esi - je L004done + je L007done movl 32(%esp),%ebp .align 4,0x90 L002loop1: @@ -98,11 +236,11 @@ L002loop1: cmpl 24(%esp),%esi movb %dl,-1(%ebp,%esi,1) jb L002loop1 - jmp L004done + jmp L007done .align 4,0x90 L001RC4_CHAR: movzbl (%edi,%eax,1),%ecx -L005cloop1: +L009cloop1: addb %cl,%bl movzbl (%edi,%ebx,1),%edx movb %cl,(%edi,%ebx,1) @@ -115,10 +253,10 @@ L005cloop1: movzbl (%edi,%eax,1),%ecx cmpl 24(%esp),%esi movb %dl,-1(%ebp,%esi,1) - jb L005cloop1 -L004done: + jb L009cloop1 +L007done: decb %al - movb %bl,-4(%edi) + movl %ebx,-4(%edi) movb %al,-8(%edi) L000abort: popl %edi @@ -126,10 +264,10 @@ L000abort: popl %ebx popl %ebp ret -.globl _RC4_set_key +.globl _private_RC4_set_key .align 4 -_RC4_set_key: -L_RC4_set_key_begin: +_private_RC4_set_key: +L_private_RC4_set_key_begin: pushl %ebp pushl %ebx pushl %esi @@ -137,60 +275,63 @@ L_RC4_set_key_begin: movl 20(%esp),%edi movl 24(%esp),%ebp movl 28(%esp),%esi - leal _OPENSSL_ia32cap_P,%edx + call L010PIC_me_up +L010PIC_me_up: + popl %edx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%edx),%edx leal 8(%edi),%edi leal (%esi,%ebp,1),%esi negl %ebp xorl %eax,%eax movl %ebp,-4(%edi) btl $20,(%edx) - jc L006c1stloop + jc L011c1stloop .align 4,0x90 -L007w1stloop: +L012w1stloop: movl %eax,(%edi,%eax,4) addb $1,%al - jnc L007w1stloop + jnc L012w1stloop xorl %ecx,%ecx xorl %edx,%edx .align 4,0x90 -L008w2ndloop: +L013w2ndloop: movl (%edi,%ecx,4),%eax addb (%esi,%ebp,1),%dl addb %al,%dl addl $1,%ebp movl (%edi,%edx,4),%ebx - jnz L009wnowrap + jnz L014wnowrap movl -4(%edi),%ebp -L009wnowrap: +L014wnowrap: movl %eax,(%edi,%edx,4) movl %ebx,(%edi,%ecx,4) addb $1,%cl - jnc L008w2ndloop - jmp L010exit + jnc L013w2ndloop + jmp L015exit .align 4,0x90 -L006c1stloop: +L011c1stloop: movb %al,(%edi,%eax,1) addb $1,%al - jnc L006c1stloop + jnc L011c1stloop xorl %ecx,%ecx xorl %edx,%edx xorl %ebx,%ebx .align 4,0x90 -L011c2ndloop: +L016c2ndloop: movb (%edi,%ecx,1),%al addb (%esi,%ebp,1),%dl addb %al,%dl addl $1,%ebp movb (%edi,%edx,1),%bl - jnz L012cnowrap + jnz L017cnowrap movl -4(%edi),%ebp -L012cnowrap: +L017cnowrap: movb %al,(%edi,%edx,1) movb %bl,(%edi,%ecx,1) addb $1,%cl - jnc L011c2ndloop + jnc L016c2ndloop movl $-1,256(%edi) -L010exit: +L015exit: xorl %eax,%eax movl %eax,-8(%edi) movl %eax,-4(%edi) @@ -203,22 +344,36 @@ L010exit: .align 4 _RC4_options: L_RC4_options_begin: - call L013pic_point -L013pic_point: + call L018pic_point +L018pic_point: popl %eax - leal L014opts-L013pic_point(%eax),%eax - leal _OPENSSL_ia32cap_P,%edx - btl $20,(%edx) - jnc L015skip + leal L019opts-L018pic_point(%eax),%eax + call L020PIC_me_up +L020PIC_me_up: + popl %edx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L020PIC_me_up(%edx),%edx + movl (%edx),%edx + btl $20,%edx + jc L0211xchar + btl $26,%edx + jnc L022ret + addl $25,%eax + ret +L0211xchar: addl $12,%eax -L015skip: +L022ret: ret .align 6,0x90 -L014opts: +L019opts: .byte 114,99,52,40,52,120,44,105,110,116,41,0 .byte 114,99,52,40,49,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,56,120,44,109,109,120,41,0 .byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 6,0x90 -.comm _OPENSSL_ia32cap_P,4 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,8,2 diff --git a/deps/openssl/asm/x86-macosx-gas/sha/sha1-586.s b/deps/openssl/asm/x86-macosx-gas/sha/sha1-586.s index 4f356fe70f..28d95721b8 100644 --- a/deps/openssl/asm/x86-macosx-gas/sha/sha1-586.s +++ b/deps/openssl/asm/x86-macosx-gas/sha/sha1-586.s @@ -11,11 +11,12 @@ L_sha1_block_data_order_begin: movl 20(%esp),%ebp movl 24(%esp),%esi movl 28(%esp),%eax - subl $64,%esp + subl $76,%esp shll $6,%eax addl %esi,%eax - movl %eax,92(%esp) + movl %eax,104(%esp) movl 16(%ebp),%edi + jmp L000loop .align 4,0x90 L000loop: movl (%esi),%eax @@ -66,7 +67,7 @@ L000loop: movl %ebx,52(%esp) movl %ecx,56(%esp) movl %edx,60(%esp) - movl %esi,88(%esp) + movl %esi,100(%esp) movl (%ebp),%eax movl 4(%ebp),%ebx movl 8(%ebp),%ecx @@ -78,10 +79,10 @@ L000loop: roll $5,%ebp xorl %edx,%esi addl %edi,%ebp - andl %ebx,%esi movl (%esp),%edi - xorl %edx,%esi + andl %ebx,%esi rorl $2,%ebx + xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp # 00_15 1 @@ -91,10 +92,10 @@ L000loop: roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp - andl %eax,%edi movl 4(%esp),%edx - xorl %ecx,%edi + andl %eax,%edi rorl $2,%eax + xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp # 00_15 2 @@ -104,10 +105,10 @@ L000loop: roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp - andl %esi,%edx movl 8(%esp),%ecx - xorl %ebx,%edx + andl %esi,%edx rorl $2,%esi + xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp # 00_15 3 @@ -117,10 +118,10 @@ L000loop: roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp - andl %edi,%ecx movl 12(%esp),%ebx - xorl %eax,%ecx + andl %edi,%ecx rorl $2,%edi + xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp addl %ecx,%ebp # 00_15 4 @@ -130,10 +131,10 @@ L000loop: roll $5,%ebp xorl %esi,%ebx addl %eax,%ebp - andl %edx,%ebx movl 16(%esp),%eax - xorl %esi,%ebx + andl %edx,%ebx rorl $2,%edx + xorl %esi,%ebx leal 1518500249(%ebp,%eax,1),%ebp addl %ebx,%ebp # 00_15 5 @@ -143,10 +144,10 @@ L000loop: roll $5,%ebp xorl %edi,%eax addl %esi,%ebp - andl %ecx,%eax movl 20(%esp),%esi - xorl %edi,%eax + andl %ecx,%eax rorl $2,%ecx + xorl %edi,%eax leal 1518500249(%ebp,%esi,1),%ebp addl %eax,%ebp # 00_15 6 @@ -156,10 +157,10 @@ L000loop: roll $5,%ebp xorl %edx,%esi addl %edi,%ebp - andl %ebx,%esi movl 24(%esp),%edi - xorl %edx,%esi + andl %ebx,%esi rorl $2,%ebx + xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp # 00_15 7 @@ -169,10 +170,10 @@ L000loop: roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp - andl %eax,%edi movl 28(%esp),%edx - xorl %ecx,%edi + andl %eax,%edi rorl $2,%eax + xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp # 00_15 8 @@ -182,10 +183,10 @@ L000loop: roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp - andl %esi,%edx movl 32(%esp),%ecx - xorl %ebx,%edx + andl %esi,%edx rorl $2,%esi + xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp # 00_15 9 @@ -195,10 +196,10 @@ L000loop: roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp - andl %edi,%ecx movl 36(%esp),%ebx - xorl %eax,%ecx + andl %edi,%ecx rorl $2,%edi + xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp addl %ecx,%ebp # 00_15 10 @@ -208,10 +209,10 @@ L000loop: roll $5,%ebp xorl %esi,%ebx addl %eax,%ebp - andl %edx,%ebx movl 40(%esp),%eax - xorl %esi,%ebx + andl %edx,%ebx rorl $2,%edx + xorl %esi,%ebx leal 1518500249(%ebp,%eax,1),%ebp addl %ebx,%ebp # 00_15 11 @@ -221,10 +222,10 @@ L000loop: roll $5,%ebp xorl %edi,%eax addl %esi,%ebp - andl %ecx,%eax movl 44(%esp),%esi - xorl %edi,%eax + andl %ecx,%eax rorl $2,%ecx + xorl %edi,%eax leal 1518500249(%ebp,%esi,1),%ebp addl %eax,%ebp # 00_15 12 @@ -234,10 +235,10 @@ L000loop: roll $5,%ebp xorl %edx,%esi addl %edi,%ebp - andl %ebx,%esi movl 48(%esp),%edi - xorl %edx,%esi + andl %ebx,%esi rorl $2,%ebx + xorl %edx,%esi leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp # 00_15 13 @@ -247,10 +248,10 @@ L000loop: roll $5,%ebp xorl %ecx,%edi addl %edx,%ebp - andl %eax,%edi movl 52(%esp),%edx - xorl %ecx,%edi + andl %eax,%edi rorl $2,%eax + xorl %ecx,%edi leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp # 00_15 14 @@ -260,10 +261,10 @@ L000loop: roll $5,%ebp xorl %ebx,%edx addl %ecx,%ebp - andl %esi,%edx movl 56(%esp),%ecx - xorl %ebx,%edx + andl %esi,%edx rorl $2,%esi + xorl %ebx,%edx leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp # 00_15 15 @@ -273,1226 +274,1163 @@ L000loop: roll $5,%ebp xorl %eax,%ecx addl %ebx,%ebp - andl %edi,%ecx movl 60(%esp),%ebx - xorl %eax,%ecx + andl %edi,%ecx rorl $2,%edi + xorl %eax,%ecx leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx addl %ebp,%ecx # 16_19 16 - movl (%esp),%ebx movl %edi,%ebp xorl 8(%esp),%ebx xorl %esi,%ebp xorl 32(%esp),%ebx andl %edx,%ebp - rorl $2,%edx xorl 52(%esp),%ebx roll $1,%ebx xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx movl %ebx,(%esp) + roll $5,%ebp leal 1518500249(%ebx,%eax,1),%ebx - movl %ecx,%eax - roll $5,%eax + movl 4(%esp),%eax addl %ebp,%ebx - addl %eax,%ebx # 16_19 17 - movl 4(%esp),%eax movl %edx,%ebp xorl 12(%esp),%eax xorl %edi,%ebp xorl 36(%esp),%eax andl %ecx,%ebp - rorl $2,%ecx xorl 56(%esp),%eax roll $1,%eax xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx movl %eax,4(%esp) + roll $5,%ebp leal 1518500249(%eax,%esi,1),%eax - movl %ebx,%esi - roll $5,%esi + movl 8(%esp),%esi addl %ebp,%eax - addl %esi,%eax # 16_19 18 - movl 8(%esp),%esi movl %ecx,%ebp xorl 16(%esp),%esi xorl %edx,%ebp xorl 40(%esp),%esi andl %ebx,%ebp - rorl $2,%ebx xorl 60(%esp),%esi roll $1,%esi xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx movl %esi,8(%esp) + roll $5,%ebp leal 1518500249(%esi,%edi,1),%esi - movl %eax,%edi - roll $5,%edi + movl 12(%esp),%edi addl %ebp,%esi - addl %edi,%esi # 16_19 19 - movl 12(%esp),%edi movl %ebx,%ebp xorl 20(%esp),%edi xorl %ecx,%ebp xorl 44(%esp),%edi andl %eax,%ebp - rorl $2,%eax xorl (%esp),%edi roll $1,%edi xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax movl %edi,12(%esp) + roll $5,%ebp leal 1518500249(%edi,%edx,1),%edi - movl %esi,%edx - roll $5,%edx + movl 16(%esp),%edx addl %ebp,%edi - addl %edx,%edi # 20_39 20 movl %esi,%ebp - movl 16(%esp),%edx - rorl $2,%esi xorl 24(%esp),%edx xorl %eax,%ebp xorl 48(%esp),%edx xorl %ebx,%ebp xorl 4(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,16(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 1859775393(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx # 20_39 21 movl %edi,%ebp - movl 20(%esp),%ecx - rorl $2,%edi xorl 28(%esp),%ecx xorl %esi,%ebp xorl 52(%esp),%ecx xorl %eax,%ebp xorl 8(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,20(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 1859775393(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx # 20_39 22 movl %edx,%ebp - movl 24(%esp),%ebx - rorl $2,%edx xorl 32(%esp),%ebx xorl %edi,%ebp xorl 56(%esp),%ebx xorl %esi,%ebp xorl 12(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,24(%esp) - movl %ecx,%eax - roll $5,%eax - leal 1859775393(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx # 20_39 23 movl %ecx,%ebp - movl 28(%esp),%eax - rorl $2,%ecx xorl 36(%esp),%eax xorl %edx,%ebp xorl 60(%esp),%eax xorl %edi,%ebp xorl 16(%esp),%eax roll $1,%eax - addl %esi,%ebp + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp movl %eax,28(%esp) - movl %ebx,%esi - roll $5,%esi - leal 1859775393(%eax,%ebp,1),%eax - addl %esi,%eax + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax # 20_39 24 movl %ebx,%ebp - movl 32(%esp),%esi - rorl $2,%ebx xorl 40(%esp),%esi xorl %ecx,%ebp xorl (%esp),%esi xorl %edx,%ebp xorl 20(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,32(%esp) - movl %eax,%edi - roll $5,%edi - leal 1859775393(%esi,%ebp,1),%esi - addl %edi,%esi + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi # 20_39 25 movl %eax,%ebp - movl 36(%esp),%edi - rorl $2,%eax xorl 44(%esp),%edi xorl %ebx,%ebp xorl 4(%esp),%edi xorl %ecx,%ebp xorl 24(%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,36(%esp) - movl %esi,%edx - roll $5,%edx - leal 1859775393(%edi,%ebp,1),%edi - addl %edx,%edi + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi # 20_39 26 movl %esi,%ebp - movl 40(%esp),%edx - rorl $2,%esi xorl 48(%esp),%edx xorl %eax,%ebp xorl 8(%esp),%edx xorl %ebx,%ebp xorl 28(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,40(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 1859775393(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx # 20_39 27 movl %edi,%ebp - movl 44(%esp),%ecx - rorl $2,%edi xorl 52(%esp),%ecx xorl %esi,%ebp xorl 12(%esp),%ecx xorl %eax,%ebp xorl 32(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,44(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 1859775393(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx # 20_39 28 movl %edx,%ebp - movl 48(%esp),%ebx - rorl $2,%edx xorl 56(%esp),%ebx xorl %edi,%ebp xorl 16(%esp),%ebx xorl %esi,%ebp xorl 36(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,48(%esp) - movl %ecx,%eax - roll $5,%eax - leal 1859775393(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx # 20_39 29 movl %ecx,%ebp - movl 52(%esp),%eax - rorl $2,%ecx xorl 60(%esp),%eax xorl %edx,%ebp xorl 20(%esp),%eax xorl %edi,%ebp xorl 40(%esp),%eax roll $1,%eax - addl %esi,%ebp + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp movl %eax,52(%esp) - movl %ebx,%esi - roll $5,%esi - leal 1859775393(%eax,%ebp,1),%eax - addl %esi,%eax + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax # 20_39 30 movl %ebx,%ebp - movl 56(%esp),%esi - rorl $2,%ebx xorl (%esp),%esi xorl %ecx,%ebp xorl 24(%esp),%esi xorl %edx,%ebp xorl 44(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,56(%esp) - movl %eax,%edi - roll $5,%edi - leal 1859775393(%esi,%ebp,1),%esi - addl %edi,%esi + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi # 20_39 31 movl %eax,%ebp - movl 60(%esp),%edi - rorl $2,%eax xorl 4(%esp),%edi xorl %ebx,%ebp xorl 28(%esp),%edi xorl %ecx,%ebp xorl 48(%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,60(%esp) - movl %esi,%edx - roll $5,%edx - leal 1859775393(%edi,%ebp,1),%edi - addl %edx,%edi + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi # 20_39 32 movl %esi,%ebp - movl (%esp),%edx - rorl $2,%esi xorl 8(%esp),%edx xorl %eax,%ebp xorl 32(%esp),%edx xorl %ebx,%ebp xorl 52(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 1859775393(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx # 20_39 33 movl %edi,%ebp - movl 4(%esp),%ecx - rorl $2,%edi xorl 12(%esp),%ecx xorl %esi,%ebp xorl 36(%esp),%ecx xorl %eax,%ebp xorl 56(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,4(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 1859775393(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx # 20_39 34 movl %edx,%ebp - movl 8(%esp),%ebx - rorl $2,%edx xorl 16(%esp),%ebx xorl %edi,%ebp xorl 40(%esp),%ebx xorl %esi,%ebp xorl 60(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,8(%esp) - movl %ecx,%eax - roll $5,%eax - leal 1859775393(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx # 20_39 35 movl %ecx,%ebp - movl 12(%esp),%eax - rorl $2,%ecx xorl 20(%esp),%eax xorl %edx,%ebp xorl 44(%esp),%eax xorl %edi,%ebp xorl (%esp),%eax roll $1,%eax - addl %esi,%ebp + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp movl %eax,12(%esp) - movl %ebx,%esi - roll $5,%esi - leal 1859775393(%eax,%ebp,1),%eax - addl %esi,%eax + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax # 20_39 36 movl %ebx,%ebp - movl 16(%esp),%esi - rorl $2,%ebx xorl 24(%esp),%esi xorl %ecx,%ebp xorl 48(%esp),%esi xorl %edx,%ebp xorl 4(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,16(%esp) - movl %eax,%edi - roll $5,%edi - leal 1859775393(%esi,%ebp,1),%esi - addl %edi,%esi + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi # 20_39 37 movl %eax,%ebp - movl 20(%esp),%edi - rorl $2,%eax xorl 28(%esp),%edi xorl %ebx,%ebp xorl 52(%esp),%edi xorl %ecx,%ebp xorl 8(%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,20(%esp) - movl %esi,%edx - roll $5,%edx - leal 1859775393(%edi,%ebp,1),%edi - addl %edx,%edi + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi # 20_39 38 movl %esi,%ebp - movl 24(%esp),%edx - rorl $2,%esi xorl 32(%esp),%edx xorl %eax,%ebp xorl 56(%esp),%edx xorl %ebx,%ebp xorl 12(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,24(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 1859775393(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx # 20_39 39 movl %edi,%ebp - movl 28(%esp),%ecx - rorl $2,%edi xorl 36(%esp),%ecx xorl %esi,%ebp xorl 60(%esp),%ecx xorl %eax,%ebp xorl 16(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,28(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 1859775393(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 1859775393(%ecx,%ebx,1),%ecx + movl 32(%esp),%ebx + addl %ebp,%ecx # 40_59 40 - movl 32(%esp),%ebx - movl 40(%esp),%ebp - xorl %ebp,%ebx - movl (%esp),%ebp - xorl %ebp,%ebx - movl 20(%esp),%ebp - xorl %ebp,%ebx - movl %edx,%ebp + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx roll $1,%ebx - orl %edi,%ebp - movl %ebx,32(%esp) - andl %esi,%ebp - leal 2400959708(%ebx,%eax,1),%ebx - movl %edx,%eax + addl %eax,%ebp rorl $2,%edx - andl %edi,%eax - orl %eax,%ebp movl %ecx,%eax roll $5,%eax - addl %ebp,%ebx + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp addl %eax,%ebx + andl %esi,%ebp + movl 36(%esp),%eax + addl %ebp,%ebx # 40_59 41 - movl 36(%esp),%eax - movl 44(%esp),%ebp - xorl %ebp,%eax - movl 4(%esp),%ebp - xorl %ebp,%eax - movl 24(%esp),%ebp - xorl %ebp,%eax - movl %ecx,%ebp + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax roll $1,%eax - orl %edx,%ebp - movl %eax,36(%esp) - andl %edi,%ebp - leal 2400959708(%eax,%esi,1),%eax - movl %ecx,%esi + addl %esi,%ebp rorl $2,%ecx - andl %edx,%esi - orl %esi,%ebp movl %ebx,%esi roll $5,%esi - addl %ebp,%eax + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp addl %esi,%eax + andl %edi,%ebp + movl 40(%esp),%esi + addl %ebp,%eax # 40_59 42 - movl 40(%esp),%esi - movl 48(%esp),%ebp - xorl %ebp,%esi - movl 8(%esp),%ebp - xorl %ebp,%esi - movl 28(%esp),%ebp - xorl %ebp,%esi - movl %ebx,%ebp + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi roll $1,%esi - orl %ecx,%ebp - movl %esi,40(%esp) - andl %edx,%ebp - leal 2400959708(%esi,%edi,1),%esi - movl %ebx,%edi + addl %edi,%ebp rorl $2,%ebx - andl %ecx,%edi - orl %edi,%ebp movl %eax,%edi roll $5,%edi - addl %ebp,%esi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp addl %edi,%esi + andl %edx,%ebp + movl 44(%esp),%edi + addl %ebp,%esi # 40_59 43 - movl 44(%esp),%edi - movl 52(%esp),%ebp - xorl %ebp,%edi - movl 12(%esp),%ebp - xorl %ebp,%edi - movl 32(%esp),%ebp - xorl %ebp,%edi - movl %eax,%ebp + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi roll $1,%edi - orl %ebx,%ebp - movl %edi,44(%esp) - andl %ecx,%ebp - leal 2400959708(%edi,%edx,1),%edi - movl %eax,%edx + addl %edx,%ebp rorl $2,%eax - andl %ebx,%edx - orl %edx,%ebp movl %esi,%edx roll $5,%edx - addl %ebp,%edi + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp addl %edx,%edi + andl %ecx,%ebp + movl 48(%esp),%edx + addl %ebp,%edi # 40_59 44 - movl 48(%esp),%edx - movl 56(%esp),%ebp - xorl %ebp,%edx - movl 16(%esp),%ebp - xorl %ebp,%edx - movl 36(%esp),%ebp - xorl %ebp,%edx - movl %esi,%ebp + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx roll $1,%edx - orl %eax,%ebp - movl %edx,48(%esp) - andl %ebx,%ebp - leal 2400959708(%edx,%ecx,1),%edx - movl %esi,%ecx + addl %ecx,%ebp rorl $2,%esi - andl %eax,%ecx - orl %ecx,%ebp movl %edi,%ecx roll $5,%ecx - addl %ebp,%edx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp addl %ecx,%edx + andl %ebx,%ebp + movl 52(%esp),%ecx + addl %ebp,%edx # 40_59 45 - movl 52(%esp),%ecx - movl 60(%esp),%ebp - xorl %ebp,%ecx - movl 20(%esp),%ebp - xorl %ebp,%ecx - movl 40(%esp),%ebp - xorl %ebp,%ecx - movl %edi,%ebp + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx roll $1,%ecx - orl %esi,%ebp - movl %ecx,52(%esp) - andl %eax,%ebp - leal 2400959708(%ecx,%ebx,1),%ecx - movl %edi,%ebx + addl %ebx,%ebp rorl $2,%edi - andl %esi,%ebx - orl %ebx,%ebp movl %edx,%ebx roll $5,%ebx - addl %ebp,%ecx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp addl %ebx,%ecx + andl %eax,%ebp + movl 56(%esp),%ebx + addl %ebp,%ecx # 40_59 46 - movl 56(%esp),%ebx - movl (%esp),%ebp - xorl %ebp,%ebx - movl 24(%esp),%ebp - xorl %ebp,%ebx - movl 44(%esp),%ebp - xorl %ebp,%ebx - movl %edx,%ebp + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx roll $1,%ebx - orl %edi,%ebp - movl %ebx,56(%esp) - andl %esi,%ebp - leal 2400959708(%ebx,%eax,1),%ebx - movl %edx,%eax + addl %eax,%ebp rorl $2,%edx - andl %edi,%eax - orl %eax,%ebp movl %ecx,%eax roll $5,%eax - addl %ebp,%ebx + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp addl %eax,%ebx + andl %esi,%ebp + movl 60(%esp),%eax + addl %ebp,%ebx # 40_59 47 - movl 60(%esp),%eax - movl 4(%esp),%ebp - xorl %ebp,%eax - movl 28(%esp),%ebp - xorl %ebp,%eax - movl 48(%esp),%ebp - xorl %ebp,%eax - movl %ecx,%ebp + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax roll $1,%eax - orl %edx,%ebp - movl %eax,60(%esp) - andl %edi,%ebp - leal 2400959708(%eax,%esi,1),%eax - movl %ecx,%esi + addl %esi,%ebp rorl $2,%ecx - andl %edx,%esi - orl %esi,%ebp movl %ebx,%esi roll $5,%esi - addl %ebp,%eax + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp addl %esi,%eax + andl %edi,%ebp + movl (%esp),%esi + addl %ebp,%eax # 40_59 48 - movl (%esp),%esi - movl 8(%esp),%ebp - xorl %ebp,%esi - movl 32(%esp),%ebp - xorl %ebp,%esi - movl 52(%esp),%ebp - xorl %ebp,%esi - movl %ebx,%ebp + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi roll $1,%esi - orl %ecx,%ebp - movl %esi,(%esp) - andl %edx,%ebp - leal 2400959708(%esi,%edi,1),%esi - movl %ebx,%edi + addl %edi,%ebp rorl $2,%ebx - andl %ecx,%edi - orl %edi,%ebp movl %eax,%edi roll $5,%edi - addl %ebp,%esi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp addl %edi,%esi + andl %edx,%ebp + movl 4(%esp),%edi + addl %ebp,%esi # 40_59 49 - movl 4(%esp),%edi - movl 12(%esp),%ebp - xorl %ebp,%edi - movl 36(%esp),%ebp - xorl %ebp,%edi - movl 56(%esp),%ebp - xorl %ebp,%edi - movl %eax,%ebp + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi roll $1,%edi - orl %ebx,%ebp - movl %edi,4(%esp) - andl %ecx,%ebp - leal 2400959708(%edi,%edx,1),%edi - movl %eax,%edx + addl %edx,%ebp rorl $2,%eax - andl %ebx,%edx - orl %edx,%ebp movl %esi,%edx roll $5,%edx - addl %ebp,%edi + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp addl %edx,%edi + andl %ecx,%ebp + movl 8(%esp),%edx + addl %ebp,%edi # 40_59 50 - movl 8(%esp),%edx - movl 16(%esp),%ebp - xorl %ebp,%edx - movl 40(%esp),%ebp - xorl %ebp,%edx - movl 60(%esp),%ebp - xorl %ebp,%edx - movl %esi,%ebp + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx roll $1,%edx - orl %eax,%ebp - movl %edx,8(%esp) - andl %ebx,%ebp - leal 2400959708(%edx,%ecx,1),%edx - movl %esi,%ecx + addl %ecx,%ebp rorl $2,%esi - andl %eax,%ecx - orl %ecx,%ebp movl %edi,%ecx roll $5,%ecx - addl %ebp,%edx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp addl %ecx,%edx + andl %ebx,%ebp + movl 12(%esp),%ecx + addl %ebp,%edx # 40_59 51 - movl 12(%esp),%ecx - movl 20(%esp),%ebp - xorl %ebp,%ecx - movl 44(%esp),%ebp - xorl %ebp,%ecx - movl (%esp),%ebp - xorl %ebp,%ecx - movl %edi,%ebp + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx roll $1,%ecx - orl %esi,%ebp - movl %ecx,12(%esp) - andl %eax,%ebp - leal 2400959708(%ecx,%ebx,1),%ecx - movl %edi,%ebx + addl %ebx,%ebp rorl $2,%edi - andl %esi,%ebx - orl %ebx,%ebp movl %edx,%ebx roll $5,%ebx - addl %ebp,%ecx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp addl %ebx,%ecx + andl %eax,%ebp + movl 16(%esp),%ebx + addl %ebp,%ecx # 40_59 52 - movl 16(%esp),%ebx - movl 24(%esp),%ebp - xorl %ebp,%ebx - movl 48(%esp),%ebp - xorl %ebp,%ebx - movl 4(%esp),%ebp - xorl %ebp,%ebx - movl %edx,%ebp + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx roll $1,%ebx - orl %edi,%ebp - movl %ebx,16(%esp) - andl %esi,%ebp - leal 2400959708(%ebx,%eax,1),%ebx - movl %edx,%eax + addl %eax,%ebp rorl $2,%edx - andl %edi,%eax - orl %eax,%ebp movl %ecx,%eax roll $5,%eax - addl %ebp,%ebx + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp addl %eax,%ebx + andl %esi,%ebp + movl 20(%esp),%eax + addl %ebp,%ebx # 40_59 53 - movl 20(%esp),%eax - movl 28(%esp),%ebp - xorl %ebp,%eax - movl 52(%esp),%ebp - xorl %ebp,%eax - movl 8(%esp),%ebp - xorl %ebp,%eax - movl %ecx,%ebp + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax roll $1,%eax - orl %edx,%ebp - movl %eax,20(%esp) - andl %edi,%ebp - leal 2400959708(%eax,%esi,1),%eax - movl %ecx,%esi + addl %esi,%ebp rorl $2,%ecx - andl %edx,%esi - orl %esi,%ebp movl %ebx,%esi roll $5,%esi - addl %ebp,%eax + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp addl %esi,%eax + andl %edi,%ebp + movl 24(%esp),%esi + addl %ebp,%eax # 40_59 54 - movl 24(%esp),%esi - movl 32(%esp),%ebp - xorl %ebp,%esi - movl 56(%esp),%ebp - xorl %ebp,%esi - movl 12(%esp),%ebp - xorl %ebp,%esi - movl %ebx,%ebp + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi roll $1,%esi - orl %ecx,%ebp - movl %esi,24(%esp) - andl %edx,%ebp - leal 2400959708(%esi,%edi,1),%esi - movl %ebx,%edi + addl %edi,%ebp rorl $2,%ebx - andl %ecx,%edi - orl %edi,%ebp movl %eax,%edi roll $5,%edi - addl %ebp,%esi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp addl %edi,%esi + andl %edx,%ebp + movl 28(%esp),%edi + addl %ebp,%esi # 40_59 55 - movl 28(%esp),%edi - movl 36(%esp),%ebp - xorl %ebp,%edi - movl 60(%esp),%ebp - xorl %ebp,%edi - movl 16(%esp),%ebp - xorl %ebp,%edi - movl %eax,%ebp + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi roll $1,%edi - orl %ebx,%ebp - movl %edi,28(%esp) - andl %ecx,%ebp - leal 2400959708(%edi,%edx,1),%edi - movl %eax,%edx + addl %edx,%ebp rorl $2,%eax - andl %ebx,%edx - orl %edx,%ebp movl %esi,%edx roll $5,%edx - addl %ebp,%edi + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp addl %edx,%edi + andl %ecx,%ebp + movl 32(%esp),%edx + addl %ebp,%edi # 40_59 56 - movl 32(%esp),%edx - movl 40(%esp),%ebp - xorl %ebp,%edx - movl (%esp),%ebp - xorl %ebp,%edx - movl 20(%esp),%ebp - xorl %ebp,%edx - movl %esi,%ebp + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx roll $1,%edx - orl %eax,%ebp - movl %edx,32(%esp) - andl %ebx,%ebp - leal 2400959708(%edx,%ecx,1),%edx - movl %esi,%ecx + addl %ecx,%ebp rorl $2,%esi - andl %eax,%ecx - orl %ecx,%ebp movl %edi,%ecx roll $5,%ecx - addl %ebp,%edx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp addl %ecx,%edx + andl %ebx,%ebp + movl 36(%esp),%ecx + addl %ebp,%edx # 40_59 57 - movl 36(%esp),%ecx - movl 44(%esp),%ebp - xorl %ebp,%ecx - movl 4(%esp),%ebp - xorl %ebp,%ecx - movl 24(%esp),%ebp - xorl %ebp,%ecx - movl %edi,%ebp + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx roll $1,%ecx - orl %esi,%ebp - movl %ecx,36(%esp) - andl %eax,%ebp - leal 2400959708(%ecx,%ebx,1),%ecx - movl %edi,%ebx + addl %ebx,%ebp rorl $2,%edi - andl %esi,%ebx - orl %ebx,%ebp movl %edx,%ebx roll $5,%ebx - addl %ebp,%ecx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp addl %ebx,%ecx + andl %eax,%ebp + movl 40(%esp),%ebx + addl %ebp,%ecx # 40_59 58 - movl 40(%esp),%ebx - movl 48(%esp),%ebp - xorl %ebp,%ebx - movl 8(%esp),%ebp - xorl %ebp,%ebx - movl 28(%esp),%ebp - xorl %ebp,%ebx - movl %edx,%ebp + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx roll $1,%ebx - orl %edi,%ebp - movl %ebx,40(%esp) - andl %esi,%ebp - leal 2400959708(%ebx,%eax,1),%ebx - movl %edx,%eax + addl %eax,%ebp rorl $2,%edx - andl %edi,%eax - orl %eax,%ebp movl %ecx,%eax roll $5,%eax - addl %ebp,%ebx + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp addl %eax,%ebx + andl %esi,%ebp + movl 44(%esp),%eax + addl %ebp,%ebx # 40_59 59 - movl 44(%esp),%eax - movl 52(%esp),%ebp - xorl %ebp,%eax - movl 12(%esp),%ebp - xorl %ebp,%eax - movl 32(%esp),%ebp - xorl %ebp,%eax - movl %ecx,%ebp + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax roll $1,%eax - orl %edx,%ebp - movl %eax,44(%esp) - andl %edi,%ebp - leal 2400959708(%eax,%esi,1),%eax - movl %ecx,%esi + addl %esi,%ebp rorl $2,%ecx - andl %edx,%esi - orl %esi,%ebp movl %ebx,%esi roll $5,%esi - addl %ebp,%eax + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax # 20_39 60 movl %ebx,%ebp - movl 48(%esp),%esi - rorl $2,%ebx xorl 56(%esp),%esi xorl %ecx,%ebp xorl 16(%esp),%esi xorl %edx,%ebp xorl 36(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,48(%esp) - movl %eax,%edi - roll $5,%edi - leal 3395469782(%esi,%ebp,1),%esi - addl %edi,%esi + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi # 20_39 61 movl %eax,%ebp - movl 52(%esp),%edi - rorl $2,%eax xorl 60(%esp),%edi xorl %ebx,%ebp xorl 20(%esp),%edi xorl %ecx,%ebp xorl 40(%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,52(%esp) - movl %esi,%edx - roll $5,%edx - leal 3395469782(%edi,%ebp,1),%edi - addl %edx,%edi + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi # 20_39 62 movl %esi,%ebp - movl 56(%esp),%edx - rorl $2,%esi xorl (%esp),%edx xorl %eax,%ebp xorl 24(%esp),%edx xorl %ebx,%ebp xorl 44(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,56(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 3395469782(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx # 20_39 63 movl %edi,%ebp - movl 60(%esp),%ecx - rorl $2,%edi xorl 4(%esp),%ecx xorl %esi,%ebp xorl 28(%esp),%ecx xorl %eax,%ebp xorl 48(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,60(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 3395469782(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx # 20_39 64 movl %edx,%ebp - movl (%esp),%ebx - rorl $2,%edx xorl 8(%esp),%ebx xorl %edi,%ebp xorl 32(%esp),%ebx xorl %esi,%ebp xorl 52(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,(%esp) - movl %ecx,%eax - roll $5,%eax - leal 3395469782(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx # 20_39 65 movl %ecx,%ebp - movl 4(%esp),%eax - rorl $2,%ecx xorl 12(%esp),%eax xorl %edx,%ebp xorl 36(%esp),%eax xorl %edi,%ebp xorl 56(%esp),%eax roll $1,%eax - addl %esi,%ebp + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp movl %eax,4(%esp) - movl %ebx,%esi - roll $5,%esi - leal 3395469782(%eax,%ebp,1),%eax - addl %esi,%eax + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax # 20_39 66 movl %ebx,%ebp - movl 8(%esp),%esi - rorl $2,%ebx xorl 16(%esp),%esi xorl %ecx,%ebp xorl 40(%esp),%esi xorl %edx,%ebp xorl 60(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,8(%esp) - movl %eax,%edi - roll $5,%edi - leal 3395469782(%esi,%ebp,1),%esi - addl %edi,%esi + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi # 20_39 67 movl %eax,%ebp - movl 12(%esp),%edi - rorl $2,%eax xorl 20(%esp),%edi xorl %ebx,%ebp xorl 44(%esp),%edi xorl %ecx,%ebp xorl (%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,12(%esp) - movl %esi,%edx - roll $5,%edx - leal 3395469782(%edi,%ebp,1),%edi - addl %edx,%edi + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi # 20_39 68 movl %esi,%ebp - movl 16(%esp),%edx - rorl $2,%esi xorl 24(%esp),%edx xorl %eax,%ebp xorl 48(%esp),%edx xorl %ebx,%ebp xorl 4(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,16(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 3395469782(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx # 20_39 69 movl %edi,%ebp - movl 20(%esp),%ecx - rorl $2,%edi xorl 28(%esp),%ecx xorl %esi,%ebp xorl 52(%esp),%ecx xorl %eax,%ebp xorl 8(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,20(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 3395469782(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx # 20_39 70 movl %edx,%ebp - movl 24(%esp),%ebx - rorl $2,%edx xorl 32(%esp),%ebx xorl %edi,%ebp xorl 56(%esp),%ebx xorl %esi,%ebp xorl 12(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,24(%esp) - movl %ecx,%eax - roll $5,%eax - leal 3395469782(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx # 20_39 71 movl %ecx,%ebp - movl 28(%esp),%eax - rorl $2,%ecx xorl 36(%esp),%eax xorl %edx,%ebp xorl 60(%esp),%eax xorl %edi,%ebp xorl 16(%esp),%eax roll $1,%eax - addl %esi,%ebp + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp movl %eax,28(%esp) - movl %ebx,%esi - roll $5,%esi - leal 3395469782(%eax,%ebp,1),%eax - addl %esi,%eax + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax # 20_39 72 movl %ebx,%ebp - movl 32(%esp),%esi - rorl $2,%ebx xorl 40(%esp),%esi xorl %ecx,%ebp xorl (%esp),%esi xorl %edx,%ebp xorl 20(%esp),%esi roll $1,%esi - addl %edi,%ebp + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp movl %esi,32(%esp) - movl %eax,%edi - roll $5,%edi - leal 3395469782(%esi,%ebp,1),%esi - addl %edi,%esi + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi # 20_39 73 movl %eax,%ebp - movl 36(%esp),%edi - rorl $2,%eax xorl 44(%esp),%edi xorl %ebx,%ebp xorl 4(%esp),%edi xorl %ecx,%ebp xorl 24(%esp),%edi roll $1,%edi - addl %edx,%ebp + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp movl %edi,36(%esp) - movl %esi,%edx - roll $5,%edx - leal 3395469782(%edi,%ebp,1),%edi - addl %edx,%edi + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi # 20_39 74 movl %esi,%ebp - movl 40(%esp),%edx - rorl $2,%esi xorl 48(%esp),%edx xorl %eax,%ebp xorl 8(%esp),%edx xorl %ebx,%ebp xorl 28(%esp),%edx roll $1,%edx - addl %ecx,%ebp + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp movl %edx,40(%esp) - movl %edi,%ecx - roll $5,%ecx - leal 3395469782(%edx,%ebp,1),%edx - addl %ecx,%edx + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx # 20_39 75 movl %edi,%ebp - movl 44(%esp),%ecx - rorl $2,%edi xorl 52(%esp),%ecx xorl %esi,%ebp xorl 12(%esp),%ecx xorl %eax,%ebp xorl 32(%esp),%ecx roll $1,%ecx - addl %ebx,%ebp + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp movl %ecx,44(%esp) - movl %edx,%ebx - roll $5,%ebx - leal 3395469782(%ecx,%ebp,1),%ecx - addl %ebx,%ecx + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx # 20_39 76 movl %edx,%ebp - movl 48(%esp),%ebx - rorl $2,%edx xorl 56(%esp),%ebx xorl %edi,%ebp xorl 16(%esp),%ebx xorl %esi,%ebp xorl 36(%esp),%ebx roll $1,%ebx - addl %eax,%ebp + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp movl %ebx,48(%esp) - movl %ecx,%eax - roll $5,%eax - leal 3395469782(%ebx,%ebp,1),%ebx - addl %eax,%ebx + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx # 20_39 77 movl %ecx,%ebp - movl 52(%esp),%eax - rorl $2,%ecx xorl 60(%esp),%eax xorl %edx,%ebp xorl 20(%esp),%eax xorl %edi,%ebp xorl 40(%esp),%eax roll $1,%eax - addl %esi,%ebp - movl %eax,52(%esp) - movl %ebx,%esi - roll $5,%esi - leal 3395469782(%eax,%ebp,1),%eax - addl %esi,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax # 20_39 78 movl %ebx,%ebp - movl 56(%esp),%esi - rorl $2,%ebx xorl (%esp),%esi xorl %ecx,%ebp xorl 24(%esp),%esi xorl %edx,%ebp xorl 44(%esp),%esi roll $1,%esi - addl %edi,%ebp - movl %esi,56(%esp) - movl %eax,%edi - roll $5,%edi - leal 3395469782(%esi,%ebp,1),%esi - addl %edi,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi # 20_39 79 movl %eax,%ebp - movl 60(%esp),%edi - rorl $2,%eax xorl 4(%esp),%edi xorl %ebx,%ebp xorl 28(%esp),%edi xorl %ecx,%ebp xorl 48(%esp),%edi roll $1,%edi - addl %edx,%ebp - movl %edi,60(%esp) - movl %esi,%edx - roll $5,%edx - leal 3395469782(%edi,%ebp,1),%edi - addl %edx,%edi - movl 84(%esp),%ebp - movl 88(%esp),%edx + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx addl (%ebp),%edi addl 4(%ebp),%esi addl 8(%ebp),%eax @@ -1501,14 +1439,14 @@ L000loop: movl %edi,(%ebp) addl $64,%edx movl %esi,4(%ebp) - cmpl 92(%esp),%edx + cmpl 104(%esp),%edx movl %eax,8(%ebp) movl %ecx,%edi movl %ebx,12(%ebp) movl %edx,%esi movl %ecx,16(%ebp) jb L000loop - addl $64,%esp + addl $76,%esp popl %edi popl %esi popl %ebx diff --git a/deps/openssl/asm/x86-macosx-gas/sha/sha256-586.s b/deps/openssl/asm/x86-macosx-gas/sha/sha256-586.s index 1190be7503..67c7a96bc0 100644 --- a/deps/openssl/asm/x86-macosx-gas/sha/sha256-586.s +++ b/deps/openssl/asm/x86-macosx-gas/sha/sha256-586.s @@ -95,31 +95,30 @@ L002loop: L00300_15: movl 92(%esp),%ebx movl %edx,%ecx - rorl $6,%ecx - movl %edx,%edi - rorl $11,%edi + rorl $14,%ecx movl 20(%esp),%esi - xorl %edi,%ecx - rorl $14,%edi - xorl %edi,%ecx + xorl %edx,%ecx + rorl $5,%ecx + xorl %edx,%ecx + rorl $6,%ecx movl 24(%esp),%edi addl %ecx,%ebx - movl %edx,16(%esp) xorl %edi,%esi + movl %edx,16(%esp) movl %eax,%ecx andl %edx,%esi movl 12(%esp),%edx xorl %edi,%esi movl %eax,%edi addl %esi,%ebx - rorl $2,%ecx + rorl $9,%ecx addl 28(%esp),%ebx - rorl $13,%edi + xorl %eax,%ecx + rorl $11,%ecx movl 4(%esp),%esi - xorl %edi,%ecx - rorl $9,%edi + xorl %eax,%ecx + rorl $2,%ecx addl %ebx,%edx - xorl %edi,%ecx movl 8(%esp),%edi addl %ecx,%ebx movl %eax,(%esp) @@ -141,48 +140,46 @@ L00300_15: L00416_63: movl %ebx,%esi movl 100(%esp),%ecx - shrl $3,%ebx - rorl $7,%esi - xorl %esi,%ebx rorl $11,%esi movl %ecx,%edi + xorl %ebx,%esi + rorl $7,%esi + shrl $3,%ebx + rorl $2,%edi xorl %esi,%ebx - shrl $10,%ecx - movl 156(%esp),%esi + xorl %ecx,%edi rorl $17,%edi - xorl %edi,%ecx - rorl $2,%edi - addl %esi,%ebx + shrl $10,%ecx + addl 156(%esp),%ebx xorl %ecx,%edi - addl %edi,%ebx - movl %edx,%ecx addl 120(%esp),%ebx - rorl $6,%ecx - movl %edx,%edi - rorl $11,%edi + movl %edx,%ecx + addl %edi,%ebx + rorl $14,%ecx movl 20(%esp),%esi - xorl %edi,%ecx - rorl $14,%edi + xorl %edx,%ecx + rorl $5,%ecx movl %ebx,92(%esp) - xorl %edi,%ecx + xorl %edx,%ecx + rorl $6,%ecx movl 24(%esp),%edi addl %ecx,%ebx - movl %edx,16(%esp) xorl %edi,%esi + movl %edx,16(%esp) movl %eax,%ecx andl %edx,%esi movl 12(%esp),%edx xorl %edi,%esi movl %eax,%edi addl %esi,%ebx - rorl $2,%ecx + rorl $9,%ecx addl 28(%esp),%ebx - rorl $13,%edi + xorl %eax,%ecx + rorl $11,%ecx movl 4(%esp),%esi - xorl %edi,%ecx - rorl $9,%edi + xorl %eax,%ecx + rorl $2,%ecx addl %ebx,%edx - xorl %edi,%ecx movl 8(%esp),%edi addl %ecx,%ebx movl %eax,(%esp) diff --git a/deps/openssl/asm/x86-macosx-gas/x86cpuid.s b/deps/openssl/asm/x86-macosx-gas/x86cpuid.s index b5e80f83a3..db36e6f503 100644 --- a/deps/openssl/asm/x86-macosx-gas/x86cpuid.s +++ b/deps/openssl/asm/x86-macosx-gas/x86cpuid.s @@ -18,9 +18,9 @@ L_OPENSSL_ia32_cpuid_begin: pushfl popl %eax xorl %eax,%ecx - btl $21,%ecx - jnc L000done xorl %eax,%eax + btl $21,%ecx + jnc L000nocpuid .byte 0x0f,0xa2 movl %eax,%edi xorl %eax,%eax @@ -46,7 +46,14 @@ L_OPENSSL_ia32_cpuid_begin: jnz L001intel movl $2147483648,%eax .byte 0x0f,0xa2 - cmpl $2147483656,%eax + cmpl $2147483649,%eax + jb L001intel + movl %eax,%esi + movl $2147483649,%eax + .byte 0x0f,0xa2 + orl %ecx,%ebp + andl $2049,%ebp + cmpl $2147483656,%esi jb L001intel movl $2147483656,%eax .byte 0x0f,0xa2 @@ -55,46 +62,68 @@ L_OPENSSL_ia32_cpuid_begin: movl $1,%eax .byte 0x0f,0xa2 btl $28,%edx - jnc L000done + jnc L002generic shrl $16,%ebx andl $255,%ebx cmpl %esi,%ebx - ja L000done + ja L002generic andl $4026531839,%edx - jmp L000done + jmp L002generic L001intel: cmpl $4,%edi movl $-1,%edi - jb L002nocacheinfo + jb L003nocacheinfo movl $4,%eax movl $0,%ecx .byte 0x0f,0xa2 movl %eax,%edi shrl $14,%edi andl $4095,%edi -L002nocacheinfo: +L003nocacheinfo: movl $1,%eax .byte 0x0f,0xa2 + andl $3220176895,%edx cmpl $0,%ebp - jne L003notP4 + jne L004notintel + orl $1073741824,%edx andb $15,%ah cmpb $15,%ah - jne L003notP4 + jne L004notintel orl $1048576,%edx -L003notP4: +L004notintel: btl $28,%edx - jnc L000done + jnc L002generic andl $4026531839,%edx cmpl $0,%edi - je L000done + je L002generic orl $268435456,%edx shrl $16,%ebx cmpb $1,%bl - ja L000done + ja L002generic andl $4026531839,%edx -L000done: - movl %edx,%eax - movl %ecx,%edx +L002generic: + andl $2048,%ebp + andl $4294965247,%ecx + movl %edx,%esi + orl %ecx,%ebp + btl $27,%ecx + jnc L005clear_avx + xorl %ecx,%ecx +.byte 15,1,208 + andl $6,%eax + cmpl $6,%eax + je L006done + cmpl $2,%eax + je L005clear_avx +L007clear_xmm: + andl $4261412861,%ebp + andl $4278190079,%esi +L005clear_avx: + andl $4026525695,%ebp +L006done: + movl %esi,%eax + movl %ebp,%edx +L000nocpuid: popl %edi popl %esi popl %ebx @@ -106,26 +135,32 @@ _OPENSSL_rdtsc: L_OPENSSL_rdtsc_begin: xorl %eax,%eax xorl %edx,%edx - leal _OPENSSL_ia32cap_P,%ecx + call L008PIC_me_up +L008PIC_me_up: + popl %ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L008PIC_me_up(%ecx),%ecx btl $4,(%ecx) - jnc L004notsc + jnc L009notsc .byte 0x0f,0x31 -L004notsc: +L009notsc: ret .globl _OPENSSL_instrument_halt .align 4 _OPENSSL_instrument_halt: L_OPENSSL_instrument_halt_begin: - leal _OPENSSL_ia32cap_P,%ecx + call L010PIC_me_up +L010PIC_me_up: + popl %ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%ecx),%ecx btl $4,(%ecx) - jnc L005nohalt + jnc L011nohalt .long 2421723150 andl $3,%eax - jnz L005nohalt + jnz L011nohalt pushfl popl %eax btl $9,%eax - jnc L005nohalt + jnc L011nohalt .byte 0x0f,0x31 pushl %edx pushl %eax @@ -135,7 +170,7 @@ L_OPENSSL_instrument_halt_begin: sbbl 4(%esp),%edx addl $8,%esp ret -L005nohalt: +L011nohalt: xorl %eax,%eax xorl %edx,%edx ret @@ -146,21 +181,21 @@ L_OPENSSL_far_spin_begin: pushfl popl %eax btl $9,%eax - jnc L006nospin + jnc L012nospin movl 4(%esp),%eax movl 8(%esp),%ecx .long 2430111262 xorl %eax,%eax movl (%ecx),%edx - jmp L007spin + jmp L013spin .align 4,0x90 -L007spin: +L013spin: incl %eax cmpl (%ecx),%edx - je L007spin + je L013spin .long 529567888 ret -L006nospin: +L012nospin: xorl %eax,%eax xorl %edx,%edx ret @@ -170,12 +205,15 @@ _OPENSSL_wipe_cpu: L_OPENSSL_wipe_cpu_begin: xorl %eax,%eax xorl %edx,%edx - leal _OPENSSL_ia32cap_P,%ecx + call L014PIC_me_up +L014PIC_me_up: + popl %ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L014PIC_me_up(%ecx),%ecx movl (%ecx),%ecx btl $1,(%ecx) - jnc L008no_x87 + jnc L015no_x87 .long 4007259865,4007259865,4007259865,4007259865,2430851995 -L008no_x87: +L015no_x87: leal 4(%esp),%eax ret .globl _OPENSSL_atomic_add @@ -187,11 +225,11 @@ L_OPENSSL_atomic_add_begin: pushl %ebx nop movl (%edx),%eax -L009spin: +L016spin: leal (%eax,%ecx,1),%ebx nop .long 447811568 - jne L009spin + jne L016spin movl %ebx,%eax popl %ebx ret @@ -228,34 +266,51 @@ L_OPENSSL_cleanse_begin: movl 8(%esp),%ecx xorl %eax,%eax cmpl $7,%ecx - jae L010lot + jae L017lot cmpl $0,%ecx - je L011ret -L012little: + je L018ret +L019little: movb %al,(%edx) subl $1,%ecx leal 1(%edx),%edx - jnz L012little -L011ret: + jnz L019little +L018ret: ret .align 4,0x90 -L010lot: +L017lot: testl $3,%edx - jz L013aligned + jz L020aligned movb %al,(%edx) leal -1(%ecx),%ecx leal 1(%edx),%edx - jmp L010lot -L013aligned: + jmp L017lot +L020aligned: movl %eax,(%edx) leal -4(%ecx),%ecx testl $-4,%ecx leal 4(%edx),%edx - jnz L013aligned + jnz L020aligned cmpl $0,%ecx - jne L012little + jne L019little + ret +.globl _OPENSSL_ia32_rdrand +.align 4 +_OPENSSL_ia32_rdrand: +L_OPENSSL_ia32_rdrand_begin: + movl $8,%ecx +L021loop: +.byte 15,199,240 + jc L022break + loop L021loop +L022break: + cmpl $0,%eax + cmovel %ecx,%eax ret -.comm _OPENSSL_ia32cap_P,4 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,8,2 .mod_init_func .align 2 .long _OPENSSL_cpuid_setup diff --git a/deps/openssl/asm/x86-win32-masm/aes/aes-586.asm b/deps/openssl/asm/x86-win32-masm/aes/aes-586.asm index 22dd21fbcd..e4ac96e646 100644 --- a/deps/openssl/asm/x86-win32-masm/aes/aes-586.asm +++ b/deps/openssl/asm/x86-win32-masm/aes/aes-586.asm @@ -2975,14 +2975,14 @@ $L045exit: ret __x86_AES_set_encrypt_key ENDP ALIGN 16 -_AES_set_encrypt_key PROC PUBLIC -$L_AES_set_encrypt_key_begin:: +_private_AES_set_encrypt_key PROC PUBLIC +$L_private_AES_set_encrypt_key_begin:: call __x86_AES_set_encrypt_key ret -_AES_set_encrypt_key ENDP +_private_AES_set_encrypt_key ENDP ALIGN 16 -_AES_set_decrypt_key PROC PUBLIC -$L_AES_set_decrypt_key_begin:: +_private_AES_set_decrypt_key PROC PUBLIC +$L_private_AES_set_decrypt_key_begin:: call __x86_AES_set_encrypt_key cmp eax,0 je $L054proceed @@ -3211,12 +3211,12 @@ $L056permute: pop ebx pop ebp ret -_AES_set_decrypt_key ENDP +_private_AES_set_decrypt_key ENDP DB 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 DB 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 DB 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .text$ ENDS .bss SEGMENT 'BSS' -COMM _OPENSSL_ia32cap_P:DWORD +COMM _OPENSSL_ia32cap_P:QWORD .bss ENDS END diff --git a/deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm b/deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm new file mode 100644 index 0000000000..a1602cc692 --- /dev/null +++ b/deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm @@ -0,0 +1,2133 @@ +TITLE ../openssl/crypto/aes/asm/aesni-x86.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +_aesni_encrypt PROC PUBLIC +$L_aesni_encrypt_begin:: + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 12[esp] + movups xmm2,XMMWORD PTR [eax] + mov ecx,DWORD PTR 240[edx] + mov eax,DWORD PTR 8[esp] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L000enc1_loop_1: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L000enc1_loop_1 +DB 102,15,56,221,209 + movups XMMWORD PTR [eax],xmm2 + ret +_aesni_encrypt ENDP +ALIGN 16 +_aesni_decrypt PROC PUBLIC +$L_aesni_decrypt_begin:: + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 12[esp] + movups xmm2,XMMWORD PTR [eax] + mov ecx,DWORD PTR 240[edx] + mov eax,DWORD PTR 8[esp] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L001dec1_loop_2: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L001dec1_loop_2 +DB 102,15,56,223,209 + movups XMMWORD PTR [eax],xmm2 + ret +_aesni_decrypt ENDP +ALIGN 16 +__aesni_encrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shr ecx,1 + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + movups xmm0,XMMWORD PTR [edx] +$L002enc3_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + dec ecx +DB 102,15,56,220,225 + movups xmm1,XMMWORD PTR 16[edx] +DB 102,15,56,220,208 +DB 102,15,56,220,216 + lea edx,DWORD PTR 32[edx] +DB 102,15,56,220,224 + movups xmm0,XMMWORD PTR [edx] + jnz $L002enc3_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 + ret +__aesni_encrypt3 ENDP +ALIGN 16 +__aesni_decrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shr ecx,1 + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + movups xmm0,XMMWORD PTR [edx] +$L003dec3_loop: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + dec ecx +DB 102,15,56,222,225 + movups xmm1,XMMWORD PTR 16[edx] +DB 102,15,56,222,208 +DB 102,15,56,222,216 + lea edx,DWORD PTR 32[edx] +DB 102,15,56,222,224 + movups xmm0,XMMWORD PTR [edx] + jnz $L003dec3_loop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 + ret +__aesni_decrypt3 ENDP +ALIGN 16 +__aesni_encrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + shr ecx,1 + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + movups xmm0,XMMWORD PTR [edx] +$L004enc4_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + dec ecx +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR 16[edx] +DB 102,15,56,220,208 +DB 102,15,56,220,216 + lea edx,DWORD PTR 32[edx] +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR [edx] + jnz $L004enc4_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 + ret +__aesni_encrypt4 ENDP +ALIGN 16 +__aesni_decrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + shr ecx,1 + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + movups xmm0,XMMWORD PTR [edx] +$L005dec4_loop: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + dec ecx +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR 16[edx] +DB 102,15,56,222,208 +DB 102,15,56,222,216 + lea edx,DWORD PTR 32[edx] +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR [edx] + jnz $L005dec4_loop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 + ret +__aesni_decrypt4 ENDP +ALIGN 16 +__aesni_encrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shr ecx,1 + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 +DB 102,15,56,220,209 + pxor xmm4,xmm0 +DB 102,15,56,220,217 + pxor xmm5,xmm0 + dec ecx +DB 102,15,56,220,225 + pxor xmm6,xmm0 +DB 102,15,56,220,233 + pxor xmm7,xmm0 +DB 102,15,56,220,241 + movups xmm0,XMMWORD PTR [edx] +DB 102,15,56,220,249 + jmp $L_aesni_encrypt6_enter +ALIGN 16 +$L006enc6_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + dec ecx +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +ALIGN 16 +$L_aesni_encrypt6_enter:: + movups xmm1,XMMWORD PTR 16[edx] +DB 102,15,56,220,208 +DB 102,15,56,220,216 + lea edx,DWORD PTR 32[edx] +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR [edx] + jnz $L006enc6_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 +DB 102,15,56,221,240 +DB 102,15,56,221,248 + ret +__aesni_encrypt6 ENDP +ALIGN 16 +__aesni_decrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shr ecx,1 + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 +DB 102,15,56,222,209 + pxor xmm4,xmm0 +DB 102,15,56,222,217 + pxor xmm5,xmm0 + dec ecx +DB 102,15,56,222,225 + pxor xmm6,xmm0 +DB 102,15,56,222,233 + pxor xmm7,xmm0 +DB 102,15,56,222,241 + movups xmm0,XMMWORD PTR [edx] +DB 102,15,56,222,249 + jmp $L_aesni_decrypt6_enter +ALIGN 16 +$L007dec6_loop: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + dec ecx +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +ALIGN 16 +$L_aesni_decrypt6_enter:: + movups xmm1,XMMWORD PTR 16[edx] +DB 102,15,56,222,208 +DB 102,15,56,222,216 + lea edx,DWORD PTR 32[edx] +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR [edx] + jnz $L007dec6_loop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 +DB 102,15,56,223,240 +DB 102,15,56,223,248 + ret +__aesni_decrypt6 ENDP +ALIGN 16 +_aesni_ecb_encrypt PROC PUBLIC +$L_aesni_ecb_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebx,DWORD PTR 36[esp] + and eax,-16 + jz $L008ecb_ret + mov ecx,DWORD PTR 240[edx] + test ebx,ebx + jz $L009ecb_decrypt + mov ebp,edx + mov ebx,ecx + cmp eax,96 + jb $L010ecb_enc_tail + movdqu xmm2,XMMWORD PTR [esi] + movdqu xmm3,XMMWORD PTR 16[esi] + movdqu xmm4,XMMWORD PTR 32[esi] + movdqu xmm5,XMMWORD PTR 48[esi] + movdqu xmm6,XMMWORD PTR 64[esi] + movdqu xmm7,XMMWORD PTR 80[esi] + lea esi,DWORD PTR 96[esi] + sub eax,96 + jmp $L011ecb_enc_loop6_enter +ALIGN 16 +$L012ecb_enc_loop6: + movups XMMWORD PTR [edi],xmm2 + movdqu xmm2,XMMWORD PTR [esi] + movups XMMWORD PTR 16[edi],xmm3 + movdqu xmm3,XMMWORD PTR 16[esi] + movups XMMWORD PTR 32[edi],xmm4 + movdqu xmm4,XMMWORD PTR 32[esi] + movups XMMWORD PTR 48[edi],xmm5 + movdqu xmm5,XMMWORD PTR 48[esi] + movups XMMWORD PTR 64[edi],xmm6 + movdqu xmm6,XMMWORD PTR 64[esi] + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + movdqu xmm7,XMMWORD PTR 80[esi] + lea esi,DWORD PTR 96[esi] +$L011ecb_enc_loop6_enter: + call __aesni_encrypt6 + mov edx,ebp + mov ecx,ebx + sub eax,96 + jnc $L012ecb_enc_loop6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + add eax,96 + jz $L008ecb_ret +$L010ecb_enc_tail: + movups xmm2,XMMWORD PTR [esi] + cmp eax,32 + jb $L013ecb_enc_one + movups xmm3,XMMWORD PTR 16[esi] + je $L014ecb_enc_two + movups xmm4,XMMWORD PTR 32[esi] + cmp eax,64 + jb $L015ecb_enc_three + movups xmm5,XMMWORD PTR 48[esi] + je $L016ecb_enc_four + movups xmm6,XMMWORD PTR 64[esi] + xorps xmm7,xmm7 + call __aesni_encrypt6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + jmp $L008ecb_ret +ALIGN 16 +$L013ecb_enc_one: + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L017enc1_loop_3: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L017enc1_loop_3 +DB 102,15,56,221,209 + movups XMMWORD PTR [edi],xmm2 + jmp $L008ecb_ret +ALIGN 16 +$L014ecb_enc_two: + xorps xmm4,xmm4 + call __aesni_encrypt3 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + jmp $L008ecb_ret +ALIGN 16 +$L015ecb_enc_three: + call __aesni_encrypt3 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + jmp $L008ecb_ret +ALIGN 16 +$L016ecb_enc_four: + call __aesni_encrypt4 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + jmp $L008ecb_ret +ALIGN 16 +$L009ecb_decrypt: + mov ebp,edx + mov ebx,ecx + cmp eax,96 + jb $L018ecb_dec_tail + movdqu xmm2,XMMWORD PTR [esi] + movdqu xmm3,XMMWORD PTR 16[esi] + movdqu xmm4,XMMWORD PTR 32[esi] + movdqu xmm5,XMMWORD PTR 48[esi] + movdqu xmm6,XMMWORD PTR 64[esi] + movdqu xmm7,XMMWORD PTR 80[esi] + lea esi,DWORD PTR 96[esi] + sub eax,96 + jmp $L019ecb_dec_loop6_enter +ALIGN 16 +$L020ecb_dec_loop6: + movups XMMWORD PTR [edi],xmm2 + movdqu xmm2,XMMWORD PTR [esi] + movups XMMWORD PTR 16[edi],xmm3 + movdqu xmm3,XMMWORD PTR 16[esi] + movups XMMWORD PTR 32[edi],xmm4 + movdqu xmm4,XMMWORD PTR 32[esi] + movups XMMWORD PTR 48[edi],xmm5 + movdqu xmm5,XMMWORD PTR 48[esi] + movups XMMWORD PTR 64[edi],xmm6 + movdqu xmm6,XMMWORD PTR 64[esi] + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + movdqu xmm7,XMMWORD PTR 80[esi] + lea esi,DWORD PTR 96[esi] +$L019ecb_dec_loop6_enter: + call __aesni_decrypt6 + mov edx,ebp + mov ecx,ebx + sub eax,96 + jnc $L020ecb_dec_loop6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + add eax,96 + jz $L008ecb_ret +$L018ecb_dec_tail: + movups xmm2,XMMWORD PTR [esi] + cmp eax,32 + jb $L021ecb_dec_one + movups xmm3,XMMWORD PTR 16[esi] + je $L022ecb_dec_two + movups xmm4,XMMWORD PTR 32[esi] + cmp eax,64 + jb $L023ecb_dec_three + movups xmm5,XMMWORD PTR 48[esi] + je $L024ecb_dec_four + movups xmm6,XMMWORD PTR 64[esi] + xorps xmm7,xmm7 + call __aesni_decrypt6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + jmp $L008ecb_ret +ALIGN 16 +$L021ecb_dec_one: + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L025dec1_loop_4: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L025dec1_loop_4 +DB 102,15,56,223,209 + movups XMMWORD PTR [edi],xmm2 + jmp $L008ecb_ret +ALIGN 16 +$L022ecb_dec_two: + xorps xmm4,xmm4 + call __aesni_decrypt3 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + jmp $L008ecb_ret +ALIGN 16 +$L023ecb_dec_three: + call __aesni_decrypt3 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + jmp $L008ecb_ret +ALIGN 16 +$L024ecb_dec_four: + call __aesni_decrypt4 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 +$L008ecb_ret: + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_ecb_encrypt ENDP +ALIGN 16 +_aesni_ccm64_encrypt_blocks PROC PUBLIC +$L_aesni_ccm64_encrypt_blocks_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebx,DWORD PTR 36[esp] + mov ecx,DWORD PTR 40[esp] + mov ebp,esp + sub esp,60 + and esp,-16 + mov DWORD PTR 48[esp],ebp + movdqu xmm7,XMMWORD PTR [ebx] + movdqu xmm3,XMMWORD PTR [ecx] + mov ecx,DWORD PTR 240[edx] + mov DWORD PTR [esp],202182159 + mov DWORD PTR 4[esp],134810123 + mov DWORD PTR 8[esp],67438087 + mov DWORD PTR 12[esp],66051 + mov ebx,1 + xor ebp,ebp + mov DWORD PTR 16[esp],ebx + mov DWORD PTR 20[esp],ebp + mov DWORD PTR 24[esp],ebp + mov DWORD PTR 28[esp],ebp + shr ecx,1 + lea ebp,DWORD PTR [edx] + movdqa xmm5,XMMWORD PTR [esp] + movdqa xmm2,xmm7 + mov ebx,ecx +DB 102,15,56,0,253 +$L026ccm64_enc_outer: + movups xmm0,XMMWORD PTR [ebp] + mov ecx,ebx + movups xmm6,XMMWORD PTR [esi] + xorps xmm2,xmm0 + movups xmm1,XMMWORD PTR 16[ebp] + xorps xmm0,xmm6 + lea edx,DWORD PTR 32[ebp] + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR [edx] +$L027ccm64_enc2_loop: +DB 102,15,56,220,209 + dec ecx +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR 16[edx] +DB 102,15,56,220,208 + lea edx,DWORD PTR 32[edx] +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR [edx] + jnz $L027ccm64_enc2_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 + paddq xmm7,XMMWORD PTR 16[esp] +DB 102,15,56,221,208 +DB 102,15,56,221,216 + dec eax + lea esi,DWORD PTR 16[esi] + xorps xmm6,xmm2 + movdqa xmm2,xmm7 + movups XMMWORD PTR [edi],xmm6 + lea edi,DWORD PTR 16[edi] +DB 102,15,56,0,213 + jnz $L026ccm64_enc_outer + mov esp,DWORD PTR 48[esp] + mov edi,DWORD PTR 40[esp] + movups XMMWORD PTR [edi],xmm3 + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_ccm64_encrypt_blocks ENDP +ALIGN 16 +_aesni_ccm64_decrypt_blocks PROC PUBLIC +$L_aesni_ccm64_decrypt_blocks_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebx,DWORD PTR 36[esp] + mov ecx,DWORD PTR 40[esp] + mov ebp,esp + sub esp,60 + and esp,-16 + mov DWORD PTR 48[esp],ebp + movdqu xmm7,XMMWORD PTR [ebx] + movdqu xmm3,XMMWORD PTR [ecx] + mov ecx,DWORD PTR 240[edx] + mov DWORD PTR [esp],202182159 + mov DWORD PTR 4[esp],134810123 + mov DWORD PTR 8[esp],67438087 + mov DWORD PTR 12[esp],66051 + mov ebx,1 + xor ebp,ebp + mov DWORD PTR 16[esp],ebx + mov DWORD PTR 20[esp],ebp + mov DWORD PTR 24[esp],ebp + mov DWORD PTR 28[esp],ebp + movdqa xmm5,XMMWORD PTR [esp] + movdqa xmm2,xmm7 + mov ebp,edx + mov ebx,ecx +DB 102,15,56,0,253 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L028enc1_loop_5: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L028enc1_loop_5 +DB 102,15,56,221,209 + movups xmm6,XMMWORD PTR [esi] + paddq xmm7,XMMWORD PTR 16[esp] + lea esi,QWORD PTR 16[esi] + jmp $L029ccm64_dec_outer +ALIGN 16 +$L029ccm64_dec_outer: + xorps xmm6,xmm2 + movdqa xmm2,xmm7 + mov ecx,ebx + movups XMMWORD PTR [edi],xmm6 + lea edi,DWORD PTR 16[edi] +DB 102,15,56,0,213 + sub eax,1 + jz $L030ccm64_dec_break + movups xmm0,XMMWORD PTR [ebp] + shr ecx,1 + movups xmm1,XMMWORD PTR 16[ebp] + xorps xmm6,xmm0 + lea edx,DWORD PTR 32[ebp] + xorps xmm2,xmm0 + xorps xmm3,xmm6 + movups xmm0,XMMWORD PTR [edx] +$L031ccm64_dec2_loop: +DB 102,15,56,220,209 + dec ecx +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR 16[edx] +DB 102,15,56,220,208 + lea edx,DWORD PTR 32[edx] +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR [edx] + jnz $L031ccm64_dec2_loop + movups xmm6,XMMWORD PTR [esi] + paddq xmm7,XMMWORD PTR 16[esp] +DB 102,15,56,220,209 +DB 102,15,56,220,217 + lea esi,QWORD PTR 16[esi] +DB 102,15,56,221,208 +DB 102,15,56,221,216 + jmp $L029ccm64_dec_outer +ALIGN 16 +$L030ccm64_dec_break: + mov edx,ebp + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm6,xmm0 + lea edx,DWORD PTR 32[edx] + xorps xmm3,xmm6 +$L032enc1_loop_6: +DB 102,15,56,220,217 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L032enc1_loop_6 +DB 102,15,56,221,217 + mov esp,DWORD PTR 48[esp] + mov edi,DWORD PTR 40[esp] + movups XMMWORD PTR [edi],xmm3 + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_ccm64_decrypt_blocks ENDP +ALIGN 16 +_aesni_ctr32_encrypt_blocks PROC PUBLIC +$L_aesni_ctr32_encrypt_blocks_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebx,DWORD PTR 36[esp] + mov ebp,esp + sub esp,88 + and esp,-16 + mov DWORD PTR 80[esp],ebp + cmp eax,1 + je $L033ctr32_one_shortcut + movdqu xmm7,XMMWORD PTR [ebx] + mov DWORD PTR [esp],202182159 + mov DWORD PTR 4[esp],134810123 + mov DWORD PTR 8[esp],67438087 + mov DWORD PTR 12[esp],66051 + mov ecx,6 + xor ebp,ebp + mov DWORD PTR 16[esp],ecx + mov DWORD PTR 20[esp],ecx + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esp],ebp +DB 102,15,58,22,251,3 +DB 102,15,58,34,253,3 + mov ecx,DWORD PTR 240[edx] + bswap ebx + pxor xmm1,xmm1 + pxor xmm0,xmm0 + movdqa xmm2,XMMWORD PTR [esp] +DB 102,15,58,34,203,0 + lea ebp,DWORD PTR 3[ebx] +DB 102,15,58,34,197,0 + inc ebx +DB 102,15,58,34,203,1 + inc ebp +DB 102,15,58,34,197,1 + inc ebx +DB 102,15,58,34,203,2 + inc ebp +DB 102,15,58,34,197,2 + movdqa XMMWORD PTR 48[esp],xmm1 +DB 102,15,56,0,202 + movdqa XMMWORD PTR 64[esp],xmm0 +DB 102,15,56,0,194 + pshufd xmm2,xmm1,192 + pshufd xmm3,xmm1,128 + cmp eax,6 + jb $L034ctr32_tail + movdqa XMMWORD PTR 32[esp],xmm7 + shr ecx,1 + mov ebp,edx + mov ebx,ecx + sub eax,6 + jmp $L035ctr32_loop6 +ALIGN 16 +$L035ctr32_loop6: + pshufd xmm4,xmm1,64 + movdqa xmm1,XMMWORD PTR 32[esp] + pshufd xmm5,xmm0,192 + por xmm2,xmm1 + pshufd xmm6,xmm0,128 + por xmm3,xmm1 + pshufd xmm7,xmm0,64 + por xmm4,xmm1 + por xmm5,xmm1 + por xmm6,xmm1 + por xmm7,xmm1 + movups xmm0,XMMWORD PTR [ebp] + movups xmm1,XMMWORD PTR 16[ebp] + lea edx,DWORD PTR 32[ebp] + dec ecx + pxor xmm2,xmm0 + pxor xmm3,xmm0 +DB 102,15,56,220,209 + pxor xmm4,xmm0 +DB 102,15,56,220,217 + pxor xmm5,xmm0 +DB 102,15,56,220,225 + pxor xmm6,xmm0 +DB 102,15,56,220,233 + pxor xmm7,xmm0 +DB 102,15,56,220,241 + movups xmm0,XMMWORD PTR [edx] +DB 102,15,56,220,249 + call $L_aesni_encrypt6_enter + movups xmm1,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR 16[esi] + xorps xmm2,xmm1 + movups xmm1,XMMWORD PTR 32[esi] + xorps xmm3,xmm0 + movups XMMWORD PTR [edi],xmm2 + movdqa xmm0,XMMWORD PTR 16[esp] + xorps xmm4,xmm1 + movdqa xmm1,XMMWORD PTR 48[esp] + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + paddd xmm1,xmm0 + paddd xmm0,XMMWORD PTR 64[esp] + movdqa xmm2,XMMWORD PTR [esp] + movups xmm3,XMMWORD PTR 48[esi] + movups xmm4,XMMWORD PTR 64[esi] + xorps xmm5,xmm3 + movups xmm3,XMMWORD PTR 80[esi] + lea esi,DWORD PTR 96[esi] + movdqa XMMWORD PTR 48[esp],xmm1 +DB 102,15,56,0,202 + xorps xmm6,xmm4 + movups XMMWORD PTR 48[edi],xmm5 + xorps xmm7,xmm3 + movdqa XMMWORD PTR 64[esp],xmm0 +DB 102,15,56,0,194 + movups XMMWORD PTR 64[edi],xmm6 + pshufd xmm2,xmm1,192 + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + mov ecx,ebx + pshufd xmm3,xmm1,128 + sub eax,6 + jnc $L035ctr32_loop6 + add eax,6 + jz $L036ctr32_ret + mov edx,ebp + lea ecx,DWORD PTR 1[ecx*2] + movdqa xmm7,XMMWORD PTR 32[esp] +$L034ctr32_tail: + por xmm2,xmm7 + cmp eax,2 + jb $L037ctr32_one + pshufd xmm4,xmm1,64 + por xmm3,xmm7 + je $L038ctr32_two + pshufd xmm5,xmm0,192 + por xmm4,xmm7 + cmp eax,4 + jb $L039ctr32_three + pshufd xmm6,xmm0,128 + por xmm5,xmm7 + je $L040ctr32_four + por xmm6,xmm7 + call __aesni_encrypt6 + movups xmm1,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR 16[esi] + xorps xmm2,xmm1 + movups xmm1,XMMWORD PTR 32[esi] + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR 48[esi] + xorps xmm4,xmm1 + movups xmm1,XMMWORD PTR 64[esi] + xorps xmm5,xmm0 + movups XMMWORD PTR [edi],xmm2 + xorps xmm6,xmm1 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + jmp $L036ctr32_ret +ALIGN 16 +$L033ctr32_one_shortcut: + movups xmm2,XMMWORD PTR [ebx] + mov ecx,DWORD PTR 240[edx] +$L037ctr32_one: + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L041enc1_loop_7: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L041enc1_loop_7 +DB 102,15,56,221,209 + movups xmm6,XMMWORD PTR [esi] + xorps xmm6,xmm2 + movups XMMWORD PTR [edi],xmm6 + jmp $L036ctr32_ret +ALIGN 16 +$L038ctr32_two: + call __aesni_encrypt3 + movups xmm5,XMMWORD PTR [esi] + movups xmm6,XMMWORD PTR 16[esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + jmp $L036ctr32_ret +ALIGN 16 +$L039ctr32_three: + call __aesni_encrypt3 + movups xmm5,XMMWORD PTR [esi] + movups xmm6,XMMWORD PTR 16[esi] + xorps xmm2,xmm5 + movups xmm7,XMMWORD PTR 32[esi] + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + xorps xmm4,xmm7 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + jmp $L036ctr32_ret +ALIGN 16 +$L040ctr32_four: + call __aesni_encrypt4 + movups xmm6,XMMWORD PTR [esi] + movups xmm7,XMMWORD PTR 16[esi] + movups xmm1,XMMWORD PTR 32[esi] + xorps xmm2,xmm6 + movups xmm0,XMMWORD PTR 48[esi] + xorps xmm3,xmm7 + movups XMMWORD PTR [edi],xmm2 + xorps xmm4,xmm1 + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm5,xmm0 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 +$L036ctr32_ret: + mov esp,DWORD PTR 80[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_ctr32_encrypt_blocks ENDP +ALIGN 16 +_aesni_xts_encrypt PROC PUBLIC +$L_aesni_xts_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov edx,DWORD PTR 36[esp] + mov esi,DWORD PTR 40[esp] + mov ecx,DWORD PTR 240[edx] + movups xmm2,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L042enc1_loop_8: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L042enc1_loop_8 +DB 102,15,56,221,209 + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebp,esp + sub esp,120 + mov ecx,DWORD PTR 240[edx] + and esp,-16 + mov DWORD PTR 96[esp],135 + mov DWORD PTR 100[esp],0 + mov DWORD PTR 104[esp],1 + mov DWORD PTR 108[esp],0 + mov DWORD PTR 112[esp],eax + mov DWORD PTR 116[esp],ebp + movdqa xmm1,xmm2 + pxor xmm0,xmm0 + movdqa xmm3,XMMWORD PTR 96[esp] + pcmpgtd xmm0,xmm1 + and eax,-16 + mov ebp,edx + mov ebx,ecx + sub eax,96 + jc $L043xts_enc_short + shr ecx,1 + mov ebx,ecx + jmp $L044xts_enc_loop6 +ALIGN 16 +$L044xts_enc_loop6: + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR [esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 16[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 32[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 48[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm7,xmm0,19 + movdqa XMMWORD PTR 64[esp],xmm1 + paddq xmm1,xmm1 + movups xmm0,XMMWORD PTR [ebp] + pand xmm7,xmm3 + movups xmm2,XMMWORD PTR [esi] + pxor xmm7,xmm1 + movdqu xmm3,XMMWORD PTR 16[esi] + xorps xmm2,xmm0 + movdqu xmm4,XMMWORD PTR 32[esi] + pxor xmm3,xmm0 + movdqu xmm5,XMMWORD PTR 48[esi] + pxor xmm4,xmm0 + movdqu xmm6,XMMWORD PTR 64[esi] + pxor xmm5,xmm0 + movdqu xmm1,XMMWORD PTR 80[esi] + pxor xmm6,xmm0 + lea esi,DWORD PTR 96[esi] + pxor xmm2,XMMWORD PTR [esp] + movdqa XMMWORD PTR 80[esp],xmm7 + pxor xmm7,xmm1 + movups xmm1,XMMWORD PTR 16[ebp] + lea edx,DWORD PTR 32[ebp] + pxor xmm3,XMMWORD PTR 16[esp] +DB 102,15,56,220,209 + pxor xmm4,XMMWORD PTR 32[esp] +DB 102,15,56,220,217 + pxor xmm5,XMMWORD PTR 48[esp] + dec ecx +DB 102,15,56,220,225 + pxor xmm6,XMMWORD PTR 64[esp] +DB 102,15,56,220,233 + pxor xmm7,xmm0 +DB 102,15,56,220,241 + movups xmm0,XMMWORD PTR [edx] +DB 102,15,56,220,249 + call $L_aesni_encrypt6_enter + movdqa xmm1,XMMWORD PTR 80[esp] + pxor xmm0,xmm0 + xorps xmm2,XMMWORD PTR [esp] + pcmpgtd xmm0,xmm1 + xorps xmm3,XMMWORD PTR 16[esp] + movups XMMWORD PTR [edi],xmm2 + xorps xmm4,XMMWORD PTR 32[esp] + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm5,XMMWORD PTR 48[esp] + movups XMMWORD PTR 32[edi],xmm4 + xorps xmm6,XMMWORD PTR 64[esp] + movups XMMWORD PTR 48[edi],xmm5 + xorps xmm7,xmm1 + movups XMMWORD PTR 64[edi],xmm6 + pshufd xmm2,xmm0,19 + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + movdqa xmm3,XMMWORD PTR 96[esp] + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + mov ecx,ebx + pxor xmm1,xmm2 + sub eax,96 + jnc $L044xts_enc_loop6 + lea ecx,DWORD PTR 1[ecx*2] + mov edx,ebp + mov ebx,ecx +$L043xts_enc_short: + add eax,96 + jz $L045xts_enc_done6x + movdqa xmm5,xmm1 + cmp eax,32 + jb $L046xts_enc_one + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + je $L047xts_enc_two + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + cmp eax,64 + jb $L048xts_enc_three + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm7,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + movdqa XMMWORD PTR [esp],xmm5 + movdqa XMMWORD PTR 16[esp],xmm6 + je $L049xts_enc_four + movdqa XMMWORD PTR 32[esp],xmm7 + pshufd xmm7,xmm0,19 + movdqa XMMWORD PTR 48[esp],xmm1 + paddq xmm1,xmm1 + pand xmm7,xmm3 + pxor xmm7,xmm1 + movdqu xmm2,XMMWORD PTR [esi] + movdqu xmm3,XMMWORD PTR 16[esi] + movdqu xmm4,XMMWORD PTR 32[esi] + pxor xmm2,XMMWORD PTR [esp] + movdqu xmm5,XMMWORD PTR 48[esi] + pxor xmm3,XMMWORD PTR 16[esp] + movdqu xmm6,XMMWORD PTR 64[esi] + pxor xmm4,XMMWORD PTR 32[esp] + lea esi,DWORD PTR 80[esi] + pxor xmm5,XMMWORD PTR 48[esp] + movdqa XMMWORD PTR 64[esp],xmm7 + pxor xmm6,xmm7 + call __aesni_encrypt6 + movaps xmm1,XMMWORD PTR 64[esp] + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,XMMWORD PTR 32[esp] + movups XMMWORD PTR [edi],xmm2 + xorps xmm5,XMMWORD PTR 48[esp] + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm6,xmm1 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + lea edi,DWORD PTR 80[edi] + jmp $L050xts_enc_done +ALIGN 16 +$L046xts_enc_one: + movups xmm2,XMMWORD PTR [esi] + lea esi,DWORD PTR 16[esi] + xorps xmm2,xmm5 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L051enc1_loop_9: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L051enc1_loop_9 +DB 102,15,56,221,209 + xorps xmm2,xmm5 + movups XMMWORD PTR [edi],xmm2 + lea edi,DWORD PTR 16[edi] + movdqa xmm1,xmm5 + jmp $L050xts_enc_done +ALIGN 16 +$L047xts_enc_two: + movaps xmm6,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + lea esi,DWORD PTR 32[esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm4 + call __aesni_encrypt3 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + lea edi,DWORD PTR 32[edi] + movdqa xmm1,xmm6 + jmp $L050xts_enc_done +ALIGN 16 +$L048xts_enc_three: + movaps xmm7,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + movups xmm4,XMMWORD PTR 32[esi] + lea esi,DWORD PTR 48[esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + call __aesni_encrypt3 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + lea edi,DWORD PTR 48[edi] + movdqa xmm1,xmm7 + jmp $L050xts_enc_done +ALIGN 16 +$L049xts_enc_four: + movaps xmm6,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + movups xmm4,XMMWORD PTR 32[esi] + xorps xmm2,XMMWORD PTR [esp] + movups xmm5,XMMWORD PTR 48[esi] + lea esi,DWORD PTR 64[esi] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,xmm7 + xorps xmm5,xmm6 + call __aesni_encrypt4 + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,xmm7 + movups XMMWORD PTR [edi],xmm2 + xorps xmm5,xmm6 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + lea edi,DWORD PTR 64[edi] + movdqa xmm1,xmm6 + jmp $L050xts_enc_done +ALIGN 16 +$L045xts_enc_done6x: + mov eax,DWORD PTR 112[esp] + and eax,15 + jz $L052xts_enc_ret + movdqa xmm5,xmm1 + mov DWORD PTR 112[esp],eax + jmp $L053xts_enc_steal +ALIGN 16 +$L050xts_enc_done: + mov eax,DWORD PTR 112[esp] + pxor xmm0,xmm0 + and eax,15 + jz $L052xts_enc_ret + pcmpgtd xmm0,xmm1 + mov DWORD PTR 112[esp],eax + pshufd xmm5,xmm0,19 + paddq xmm1,xmm1 + pand xmm5,XMMWORD PTR 96[esp] + pxor xmm5,xmm1 +$L053xts_enc_steal: + movzx ecx,BYTE PTR [esi] + movzx edx,BYTE PTR [edi-16] + lea esi,DWORD PTR 1[esi] + mov BYTE PTR [edi-16],cl + mov BYTE PTR [edi],dl + lea edi,DWORD PTR 1[edi] + sub eax,1 + jnz $L053xts_enc_steal + sub edi,DWORD PTR 112[esp] + mov edx,ebp + mov ecx,ebx + movups xmm2,XMMWORD PTR [edi-16] + xorps xmm2,xmm5 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L054enc1_loop_10: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L054enc1_loop_10 +DB 102,15,56,221,209 + xorps xmm2,xmm5 + movups XMMWORD PTR [edi-16],xmm2 +$L052xts_enc_ret: + mov esp,DWORD PTR 116[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_xts_encrypt ENDP +ALIGN 16 +_aesni_xts_decrypt PROC PUBLIC +$L_aesni_xts_decrypt_begin:: + push ebp + push ebx + push esi + push edi + mov edx,DWORD PTR 36[esp] + mov esi,DWORD PTR 40[esp] + mov ecx,DWORD PTR 240[edx] + movups xmm2,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L055enc1_loop_11: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L055enc1_loop_11 +DB 102,15,56,221,209 + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebp,esp + sub esp,120 + and esp,-16 + xor ebx,ebx + test eax,15 + setnz bl + shl ebx,4 + sub eax,ebx + mov DWORD PTR 96[esp],135 + mov DWORD PTR 100[esp],0 + mov DWORD PTR 104[esp],1 + mov DWORD PTR 108[esp],0 + mov DWORD PTR 112[esp],eax + mov DWORD PTR 116[esp],ebp + mov ecx,DWORD PTR 240[edx] + mov ebp,edx + mov ebx,ecx + movdqa xmm1,xmm2 + pxor xmm0,xmm0 + movdqa xmm3,XMMWORD PTR 96[esp] + pcmpgtd xmm0,xmm1 + and eax,-16 + sub eax,96 + jc $L056xts_dec_short + shr ecx,1 + mov ebx,ecx + jmp $L057xts_dec_loop6 +ALIGN 16 +$L057xts_dec_loop6: + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR [esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 16[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 32[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 48[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm7,xmm0,19 + movdqa XMMWORD PTR 64[esp],xmm1 + paddq xmm1,xmm1 + movups xmm0,XMMWORD PTR [ebp] + pand xmm7,xmm3 + movups xmm2,XMMWORD PTR [esi] + pxor xmm7,xmm1 + movdqu xmm3,XMMWORD PTR 16[esi] + xorps xmm2,xmm0 + movdqu xmm4,XMMWORD PTR 32[esi] + pxor xmm3,xmm0 + movdqu xmm5,XMMWORD PTR 48[esi] + pxor xmm4,xmm0 + movdqu xmm6,XMMWORD PTR 64[esi] + pxor xmm5,xmm0 + movdqu xmm1,XMMWORD PTR 80[esi] + pxor xmm6,xmm0 + lea esi,DWORD PTR 96[esi] + pxor xmm2,XMMWORD PTR [esp] + movdqa XMMWORD PTR 80[esp],xmm7 + pxor xmm7,xmm1 + movups xmm1,XMMWORD PTR 16[ebp] + lea edx,DWORD PTR 32[ebp] + pxor xmm3,XMMWORD PTR 16[esp] +DB 102,15,56,222,209 + pxor xmm4,XMMWORD PTR 32[esp] +DB 102,15,56,222,217 + pxor xmm5,XMMWORD PTR 48[esp] + dec ecx +DB 102,15,56,222,225 + pxor xmm6,XMMWORD PTR 64[esp] +DB 102,15,56,222,233 + pxor xmm7,xmm0 +DB 102,15,56,222,241 + movups xmm0,XMMWORD PTR [edx] +DB 102,15,56,222,249 + call $L_aesni_decrypt6_enter + movdqa xmm1,XMMWORD PTR 80[esp] + pxor xmm0,xmm0 + xorps xmm2,XMMWORD PTR [esp] + pcmpgtd xmm0,xmm1 + xorps xmm3,XMMWORD PTR 16[esp] + movups XMMWORD PTR [edi],xmm2 + xorps xmm4,XMMWORD PTR 32[esp] + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm5,XMMWORD PTR 48[esp] + movups XMMWORD PTR 32[edi],xmm4 + xorps xmm6,XMMWORD PTR 64[esp] + movups XMMWORD PTR 48[edi],xmm5 + xorps xmm7,xmm1 + movups XMMWORD PTR 64[edi],xmm6 + pshufd xmm2,xmm0,19 + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + movdqa xmm3,XMMWORD PTR 96[esp] + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + mov ecx,ebx + pxor xmm1,xmm2 + sub eax,96 + jnc $L057xts_dec_loop6 + lea ecx,DWORD PTR 1[ecx*2] + mov edx,ebp + mov ebx,ecx +$L056xts_dec_short: + add eax,96 + jz $L058xts_dec_done6x + movdqa xmm5,xmm1 + cmp eax,32 + jb $L059xts_dec_one + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + je $L060xts_dec_two + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + cmp eax,64 + jb $L061xts_dec_three + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm7,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + movdqa XMMWORD PTR [esp],xmm5 + movdqa XMMWORD PTR 16[esp],xmm6 + je $L062xts_dec_four + movdqa XMMWORD PTR 32[esp],xmm7 + pshufd xmm7,xmm0,19 + movdqa XMMWORD PTR 48[esp],xmm1 + paddq xmm1,xmm1 + pand xmm7,xmm3 + pxor xmm7,xmm1 + movdqu xmm2,XMMWORD PTR [esi] + movdqu xmm3,XMMWORD PTR 16[esi] + movdqu xmm4,XMMWORD PTR 32[esi] + pxor xmm2,XMMWORD PTR [esp] + movdqu xmm5,XMMWORD PTR 48[esi] + pxor xmm3,XMMWORD PTR 16[esp] + movdqu xmm6,XMMWORD PTR 64[esi] + pxor xmm4,XMMWORD PTR 32[esp] + lea esi,DWORD PTR 80[esi] + pxor xmm5,XMMWORD PTR 48[esp] + movdqa XMMWORD PTR 64[esp],xmm7 + pxor xmm6,xmm7 + call __aesni_decrypt6 + movaps xmm1,XMMWORD PTR 64[esp] + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,XMMWORD PTR 32[esp] + movups XMMWORD PTR [edi],xmm2 + xorps xmm5,XMMWORD PTR 48[esp] + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm6,xmm1 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + lea edi,DWORD PTR 80[edi] + jmp $L063xts_dec_done +ALIGN 16 +$L059xts_dec_one: + movups xmm2,XMMWORD PTR [esi] + lea esi,DWORD PTR 16[esi] + xorps xmm2,xmm5 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L064dec1_loop_12: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L064dec1_loop_12 +DB 102,15,56,223,209 + xorps xmm2,xmm5 + movups XMMWORD PTR [edi],xmm2 + lea edi,DWORD PTR 16[edi] + movdqa xmm1,xmm5 + jmp $L063xts_dec_done +ALIGN 16 +$L060xts_dec_two: + movaps xmm6,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + lea esi,DWORD PTR 32[esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + call __aesni_decrypt3 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + lea edi,DWORD PTR 32[edi] + movdqa xmm1,xmm6 + jmp $L063xts_dec_done +ALIGN 16 +$L061xts_dec_three: + movaps xmm7,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + movups xmm4,XMMWORD PTR 32[esi] + lea esi,DWORD PTR 48[esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + call __aesni_decrypt3 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + lea edi,DWORD PTR 48[edi] + movdqa xmm1,xmm7 + jmp $L063xts_dec_done +ALIGN 16 +$L062xts_dec_four: + movaps xmm6,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + movups xmm4,XMMWORD PTR 32[esi] + xorps xmm2,XMMWORD PTR [esp] + movups xmm5,XMMWORD PTR 48[esi] + lea esi,DWORD PTR 64[esi] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,xmm7 + xorps xmm5,xmm6 + call __aesni_decrypt4 + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,xmm7 + movups XMMWORD PTR [edi],xmm2 + xorps xmm5,xmm6 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + lea edi,DWORD PTR 64[edi] + movdqa xmm1,xmm6 + jmp $L063xts_dec_done +ALIGN 16 +$L058xts_dec_done6x: + mov eax,DWORD PTR 112[esp] + and eax,15 + jz $L065xts_dec_ret + mov DWORD PTR 112[esp],eax + jmp $L066xts_dec_only_one_more +ALIGN 16 +$L063xts_dec_done: + mov eax,DWORD PTR 112[esp] + pxor xmm0,xmm0 + and eax,15 + jz $L065xts_dec_ret + pcmpgtd xmm0,xmm1 + mov DWORD PTR 112[esp],eax + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm3,XMMWORD PTR 96[esp] + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 +$L066xts_dec_only_one_more: + pshufd xmm5,xmm0,19 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm5,xmm3 + pxor xmm5,xmm1 + mov edx,ebp + mov ecx,ebx + movups xmm2,XMMWORD PTR [esi] + xorps xmm2,xmm5 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L067dec1_loop_13: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L067dec1_loop_13 +DB 102,15,56,223,209 + xorps xmm2,xmm5 + movups XMMWORD PTR [edi],xmm2 +$L068xts_dec_steal: + movzx ecx,BYTE PTR 16[esi] + movzx edx,BYTE PTR [edi] + lea esi,DWORD PTR 1[esi] + mov BYTE PTR [edi],cl + mov BYTE PTR 16[edi],dl + lea edi,DWORD PTR 1[edi] + sub eax,1 + jnz $L068xts_dec_steal + sub edi,DWORD PTR 112[esp] + mov edx,ebp + mov ecx,ebx + movups xmm2,XMMWORD PTR [edi] + xorps xmm2,xmm6 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L069dec1_loop_14: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L069dec1_loop_14 +DB 102,15,56,223,209 + xorps xmm2,xmm6 + movups XMMWORD PTR [edi],xmm2 +$L065xts_dec_ret: + mov esp,DWORD PTR 116[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_xts_decrypt ENDP +ALIGN 16 +_aesni_cbc_encrypt PROC PUBLIC +$L_aesni_cbc_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov ebx,esp + mov edi,DWORD PTR 24[esp] + sub ebx,24 + mov eax,DWORD PTR 28[esp] + and ebx,-16 + mov edx,DWORD PTR 32[esp] + mov ebp,DWORD PTR 36[esp] + test eax,eax + jz $L070cbc_abort + cmp DWORD PTR 40[esp],0 + xchg ebx,esp + movups xmm7,XMMWORD PTR [ebp] + mov ecx,DWORD PTR 240[edx] + mov ebp,edx + mov DWORD PTR 16[esp],ebx + mov ebx,ecx + je $L071cbc_decrypt + movaps xmm2,xmm7 + cmp eax,16 + jb $L072cbc_enc_tail + sub eax,16 + jmp $L073cbc_enc_loop +ALIGN 16 +$L073cbc_enc_loop: + movups xmm7,XMMWORD PTR [esi] + lea esi,DWORD PTR 16[esi] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm7,xmm0 + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm7 +$L074enc1_loop_15: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L074enc1_loop_15 +DB 102,15,56,221,209 + mov ecx,ebx + mov edx,ebp + movups XMMWORD PTR [edi],xmm2 + lea edi,DWORD PTR 16[edi] + sub eax,16 + jnc $L073cbc_enc_loop + add eax,16 + jnz $L072cbc_enc_tail + movaps xmm7,xmm2 + jmp $L075cbc_ret +$L072cbc_enc_tail: + mov ecx,eax +DD 2767451785 + mov ecx,16 + sub ecx,eax + xor eax,eax +DD 2868115081 + lea edi,DWORD PTR [edi-16] + mov ecx,ebx + mov esi,edi + mov edx,ebp + jmp $L073cbc_enc_loop +ALIGN 16 +$L071cbc_decrypt: + cmp eax,80 + jbe $L076cbc_dec_tail + movaps XMMWORD PTR [esp],xmm7 + sub eax,80 + jmp $L077cbc_dec_loop6_enter +ALIGN 16 +$L078cbc_dec_loop6: + movaps XMMWORD PTR [esp],xmm0 + movups XMMWORD PTR [edi],xmm7 + lea edi,DWORD PTR 16[edi] +$L077cbc_dec_loop6_enter: + movdqu xmm2,XMMWORD PTR [esi] + movdqu xmm3,XMMWORD PTR 16[esi] + movdqu xmm4,XMMWORD PTR 32[esi] + movdqu xmm5,XMMWORD PTR 48[esi] + movdqu xmm6,XMMWORD PTR 64[esi] + movdqu xmm7,XMMWORD PTR 80[esi] + call __aesni_decrypt6 + movups xmm1,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR 16[esi] + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,xmm1 + movups xmm1,XMMWORD PTR 32[esi] + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR 48[esi] + xorps xmm5,xmm1 + movups xmm1,XMMWORD PTR 64[esi] + xorps xmm6,xmm0 + movups xmm0,XMMWORD PTR 80[esi] + xorps xmm7,xmm1 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + lea esi,DWORD PTR 96[esi] + movups XMMWORD PTR 32[edi],xmm4 + mov ecx,ebx + movups XMMWORD PTR 48[edi],xmm5 + mov edx,ebp + movups XMMWORD PTR 64[edi],xmm6 + lea edi,DWORD PTR 80[edi] + sub eax,96 + ja $L078cbc_dec_loop6 + movaps xmm2,xmm7 + movaps xmm7,xmm0 + add eax,80 + jle $L079cbc_dec_tail_collected + movups XMMWORD PTR [edi],xmm2 + lea edi,DWORD PTR 16[edi] +$L076cbc_dec_tail: + movups xmm2,XMMWORD PTR [esi] + movaps xmm6,xmm2 + cmp eax,16 + jbe $L080cbc_dec_one + movups xmm3,XMMWORD PTR 16[esi] + movaps xmm5,xmm3 + cmp eax,32 + jbe $L081cbc_dec_two + movups xmm4,XMMWORD PTR 32[esi] + cmp eax,48 + jbe $L082cbc_dec_three + movups xmm5,XMMWORD PTR 48[esi] + cmp eax,64 + jbe $L083cbc_dec_four + movups xmm6,XMMWORD PTR 64[esi] + movaps XMMWORD PTR [esp],xmm7 + movups xmm2,XMMWORD PTR [esi] + xorps xmm7,xmm7 + call __aesni_decrypt6 + movups xmm1,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR 16[esi] + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,xmm1 + movups xmm1,XMMWORD PTR 32[esi] + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR 48[esi] + xorps xmm5,xmm1 + movups xmm7,XMMWORD PTR 64[esi] + xorps xmm6,xmm0 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + lea edi,DWORD PTR 64[edi] + movaps xmm2,xmm6 + sub eax,80 + jmp $L079cbc_dec_tail_collected +ALIGN 16 +$L080cbc_dec_one: + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L084dec1_loop_16: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L084dec1_loop_16 +DB 102,15,56,223,209 + xorps xmm2,xmm7 + movaps xmm7,xmm6 + sub eax,16 + jmp $L079cbc_dec_tail_collected +ALIGN 16 +$L081cbc_dec_two: + xorps xmm4,xmm4 + call __aesni_decrypt3 + xorps xmm2,xmm7 + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + movaps xmm2,xmm3 + lea edi,DWORD PTR 16[edi] + movaps xmm7,xmm5 + sub eax,32 + jmp $L079cbc_dec_tail_collected +ALIGN 16 +$L082cbc_dec_three: + call __aesni_decrypt3 + xorps xmm2,xmm7 + xorps xmm3,xmm6 + xorps xmm4,xmm5 + movups XMMWORD PTR [edi],xmm2 + movaps xmm2,xmm4 + movups XMMWORD PTR 16[edi],xmm3 + lea edi,DWORD PTR 32[edi] + movups xmm7,XMMWORD PTR 32[esi] + sub eax,48 + jmp $L079cbc_dec_tail_collected +ALIGN 16 +$L083cbc_dec_four: + call __aesni_decrypt4 + movups xmm1,XMMWORD PTR 16[esi] + movups xmm0,XMMWORD PTR 32[esi] + xorps xmm2,xmm7 + movups xmm7,XMMWORD PTR 48[esi] + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + xorps xmm4,xmm1 + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm5,xmm0 + movups XMMWORD PTR 32[edi],xmm4 + lea edi,DWORD PTR 48[edi] + movaps xmm2,xmm5 + sub eax,64 +$L079cbc_dec_tail_collected: + and eax,15 + jnz $L085cbc_dec_tail_partial + movups XMMWORD PTR [edi],xmm2 + jmp $L075cbc_ret +ALIGN 16 +$L085cbc_dec_tail_partial: + movaps XMMWORD PTR [esp],xmm2 + mov ecx,16 + mov esi,esp + sub ecx,eax +DD 2767451785 +$L075cbc_ret: + mov esp,DWORD PTR 16[esp] + mov ebp,DWORD PTR 36[esp] + movups XMMWORD PTR [ebp],xmm7 +$L070cbc_abort: + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_cbc_encrypt ENDP +ALIGN 16 +__aesni_set_encrypt_key PROC PRIVATE + test eax,eax + jz $L086bad_pointer + test edx,edx + jz $L086bad_pointer + movups xmm0,XMMWORD PTR [eax] + xorps xmm4,xmm4 + lea edx,DWORD PTR 16[edx] + cmp ecx,256 + je $L08714rounds + cmp ecx,192 + je $L08812rounds + cmp ecx,128 + jne $L089bad_keybits +ALIGN 16 +$L09010rounds: + mov ecx,9 + movups XMMWORD PTR [edx-16],xmm0 +DB 102,15,58,223,200,1 + call $L091key_128_cold +DB 102,15,58,223,200,2 + call $L092key_128 +DB 102,15,58,223,200,4 + call $L092key_128 +DB 102,15,58,223,200,8 + call $L092key_128 +DB 102,15,58,223,200,16 + call $L092key_128 +DB 102,15,58,223,200,32 + call $L092key_128 +DB 102,15,58,223,200,64 + call $L092key_128 +DB 102,15,58,223,200,128 + call $L092key_128 +DB 102,15,58,223,200,27 + call $L092key_128 +DB 102,15,58,223,200,54 + call $L092key_128 + movups XMMWORD PTR [edx],xmm0 + mov DWORD PTR 80[edx],ecx + xor eax,eax + ret +ALIGN 16 +$L092key_128: + movups XMMWORD PTR [edx],xmm0 + lea edx,DWORD PTR 16[edx] +$L091key_128_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + ret +ALIGN 16 +$L08812rounds: + movq xmm2,QWORD PTR 16[eax] + mov ecx,11 + movups XMMWORD PTR [edx-16],xmm0 +DB 102,15,58,223,202,1 + call $L093key_192a_cold +DB 102,15,58,223,202,2 + call $L094key_192b +DB 102,15,58,223,202,4 + call $L095key_192a +DB 102,15,58,223,202,8 + call $L094key_192b +DB 102,15,58,223,202,16 + call $L095key_192a +DB 102,15,58,223,202,32 + call $L094key_192b +DB 102,15,58,223,202,64 + call $L095key_192a +DB 102,15,58,223,202,128 + call $L094key_192b + movups XMMWORD PTR [edx],xmm0 + mov DWORD PTR 48[edx],ecx + xor eax,eax + ret +ALIGN 16 +$L095key_192a: + movups XMMWORD PTR [edx],xmm0 + lea edx,DWORD PTR 16[edx] +ALIGN 16 +$L093key_192a_cold: + movaps xmm5,xmm2 +$L096key_192b_warm: + shufps xmm4,xmm0,16 + movdqa xmm3,xmm2 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + pslldq xmm3,4 + xorps xmm0,xmm4 + pshufd xmm1,xmm1,85 + pxor xmm2,xmm3 + pxor xmm0,xmm1 + pshufd xmm3,xmm0,255 + pxor xmm2,xmm3 + ret +ALIGN 16 +$L094key_192b: + movaps xmm3,xmm0 + shufps xmm5,xmm0,68 + movups XMMWORD PTR [edx],xmm5 + shufps xmm3,xmm2,78 + movups XMMWORD PTR 16[edx],xmm3 + lea edx,DWORD PTR 32[edx] + jmp $L096key_192b_warm +ALIGN 16 +$L08714rounds: + movups xmm2,XMMWORD PTR 16[eax] + mov ecx,13 + lea edx,DWORD PTR 16[edx] + movups XMMWORD PTR [edx-32],xmm0 + movups XMMWORD PTR [edx-16],xmm2 +DB 102,15,58,223,202,1 + call $L097key_256a_cold +DB 102,15,58,223,200,1 + call $L098key_256b +DB 102,15,58,223,202,2 + call $L099key_256a +DB 102,15,58,223,200,2 + call $L098key_256b +DB 102,15,58,223,202,4 + call $L099key_256a +DB 102,15,58,223,200,4 + call $L098key_256b +DB 102,15,58,223,202,8 + call $L099key_256a +DB 102,15,58,223,200,8 + call $L098key_256b +DB 102,15,58,223,202,16 + call $L099key_256a +DB 102,15,58,223,200,16 + call $L098key_256b +DB 102,15,58,223,202,32 + call $L099key_256a +DB 102,15,58,223,200,32 + call $L098key_256b +DB 102,15,58,223,202,64 + call $L099key_256a + movups XMMWORD PTR [edx],xmm0 + mov DWORD PTR 16[edx],ecx + xor eax,eax + ret +ALIGN 16 +$L099key_256a: + movups XMMWORD PTR [edx],xmm2 + lea edx,DWORD PTR 16[edx] +$L097key_256a_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + ret +ALIGN 16 +$L098key_256b: + movups XMMWORD PTR [edx],xmm0 + lea edx,DWORD PTR 16[edx] + shufps xmm4,xmm2,16 + xorps xmm2,xmm4 + shufps xmm4,xmm2,140 + xorps xmm2,xmm4 + shufps xmm1,xmm1,170 + xorps xmm2,xmm1 + ret +ALIGN 4 +$L086bad_pointer: + mov eax,-1 + ret +ALIGN 4 +$L089bad_keybits: + mov eax,-2 + ret +__aesni_set_encrypt_key ENDP +ALIGN 16 +_aesni_set_encrypt_key PROC PUBLIC +$L_aesni_set_encrypt_key_begin:: + mov eax,DWORD PTR 4[esp] + mov ecx,DWORD PTR 8[esp] + mov edx,DWORD PTR 12[esp] + call __aesni_set_encrypt_key + ret +_aesni_set_encrypt_key ENDP +ALIGN 16 +_aesni_set_decrypt_key PROC PUBLIC +$L_aesni_set_decrypt_key_begin:: + mov eax,DWORD PTR 4[esp] + mov ecx,DWORD PTR 8[esp] + mov edx,DWORD PTR 12[esp] + call __aesni_set_encrypt_key + mov edx,DWORD PTR 12[esp] + shl ecx,4 + test eax,eax + jnz $L100dec_key_ret + lea eax,DWORD PTR 16[ecx*1+edx] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR [eax] + movups XMMWORD PTR [eax],xmm0 + movups XMMWORD PTR [edx],xmm1 + lea edx,DWORD PTR 16[edx] + lea eax,DWORD PTR [eax-16] +$L101dec_key_inverse: + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR [eax] +DB 102,15,56,219,192 +DB 102,15,56,219,201 + lea edx,DWORD PTR 16[edx] + lea eax,DWORD PTR [eax-16] + movups XMMWORD PTR 16[eax],xmm0 + movups XMMWORD PTR [edx-16],xmm1 + cmp eax,edx + ja $L101dec_key_inverse + movups xmm0,XMMWORD PTR [edx] +DB 102,15,56,219,192 + movups XMMWORD PTR [edx],xmm0 + xor eax,eax +$L100dec_key_ret: + ret +_aesni_set_decrypt_key ENDP +DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +.text$ ENDS +END diff --git a/deps/openssl/asm/x86-win32-masm/bf/bf-686.asm b/deps/openssl/asm/x86-win32-masm/bf/bf-686.asm index a802e7292f..2883179674 100644 --- a/deps/openssl/asm/x86-win32-masm/bf/bf-686.asm +++ b/deps/openssl/asm/x86-win32-masm/bf/bf-686.asm @@ -2,7 +2,7 @@ TITLE bf-686.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 diff --git a/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm b/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm index eaad4a073a..031be4e7ea 100644 --- a/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm +++ b/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm @@ -2,7 +2,7 @@ TITLE ../openssl/crypto/bn/asm/x86-mont.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 diff --git a/deps/openssl/asm/x86-win32-masm/bn/x86.asm b/deps/openssl/asm/x86-win32-masm/bn/x86.asm index d7051fa4e5..2e7a0d4aaf 100644 --- a/deps/openssl/asm/x86-win32-masm/bn/x86.asm +++ b/deps/openssl/asm/x86-win32-masm/bn/x86.asm @@ -2,7 +2,7 @@ TITLE ../openssl/crypto/bn/asm/x86.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 diff --git a/deps/openssl/asm/x86-win32-masm/camellia/cmll-x86.asm b/deps/openssl/asm/x86-win32-masm/camellia/cmll-x86.asm index acdf6a2f8b..e32d28135b 100644 --- a/deps/openssl/asm/x86-win32-masm/camellia/cmll-x86.asm +++ b/deps/openssl/asm/x86-win32-masm/camellia/cmll-x86.asm @@ -2,7 +2,7 @@ TITLE cmll-586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 @@ -1532,8 +1532,8 @@ $L013done: ret _Camellia_Ekeygen ENDP ALIGN 16 -_Camellia_set_key PROC PUBLIC -$L_Camellia_set_key_begin:: +_private_Camellia_set_key PROC PUBLIC +$L_private_Camellia_set_key_begin:: push ebx mov ecx,DWORD PTR 8[esp] mov ebx,DWORD PTR 12[esp] @@ -1563,7 +1563,7 @@ ALIGN 4 $L014done: pop ebx ret -_Camellia_set_key ENDP +_private_Camellia_set_key ENDP ALIGN 64 $LCamellia_SIGMA:: DD 2694735487,1003262091,3061508184,1286239154,3337565999,3914302142,1426019237,4057165596,283453434,3731369245,2958461122,3018244605,0,0,0,0 diff --git a/deps/openssl/asm/x86-win32-masm/cast/cast-586.asm b/deps/openssl/asm/x86-win32-masm/cast/cast-586.asm index 1f2f0708a5..6f85c34d28 100644 --- a/deps/openssl/asm/x86-win32-masm/cast/cast-586.asm +++ b/deps/openssl/asm/x86-win32-masm/cast/cast-586.asm @@ -2,7 +2,7 @@ TITLE cast-586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 diff --git a/deps/openssl/asm/x86-win32-masm/des/crypt586.asm b/deps/openssl/asm/x86-win32-masm/des/crypt586.asm index 24e474dfc5..4c82c7a265 100644 --- a/deps/openssl/asm/x86-win32-masm/des/crypt586.asm +++ b/deps/openssl/asm/x86-win32-masm/des/crypt586.asm @@ -2,7 +2,7 @@ TITLE crypt586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 diff --git a/deps/openssl/asm/x86-win32-masm/des/des-586.asm b/deps/openssl/asm/x86-win32-masm/des/des-586.asm index 3c630daff9..24f19a6603 100644 --- a/deps/openssl/asm/x86-win32-masm/des/des-586.asm +++ b/deps/openssl/asm/x86-win32-masm/des/des-586.asm @@ -2,7 +2,7 @@ TITLE des-586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 diff --git a/deps/openssl/asm/x86-win32-masm/md5/md5-586.asm b/deps/openssl/asm/x86-win32-masm/md5/md5-586.asm index c8edae762d..8e263de0fd 100644 --- a/deps/openssl/asm/x86-win32-masm/md5/md5-586.asm +++ b/deps/openssl/asm/x86-win32-masm/md5/md5-586.asm @@ -2,7 +2,7 @@ TITLE ../openssl/crypto/md5/asm/md5-586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 diff --git a/deps/openssl/asm/x86-win32-masm/rc4/rc4-586.asm b/deps/openssl/asm/x86-win32-masm/rc4/rc4-586.asm index 3eb66f7350..d179090911 100644 --- a/deps/openssl/asm/x86-win32-masm/rc4/rc4-586.asm +++ b/deps/openssl/asm/x86-win32-masm/rc4/rc4-586.asm @@ -2,7 +2,14 @@ TITLE rc4-586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + .MODEL FLAT OPTION DOTNAME IF @Version LT 800 @@ -10,6 +17,7 @@ IF @Version LT 800 ELSE .text$ SEGMENT ALIGN(64) 'CODE' ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR ALIGN 16 _RC4 PROC PUBLIC $L_RC4_begin:: @@ -37,11 +45,146 @@ $L_RC4_begin:: mov ecx,DWORD PTR [eax*4+edi] and edx,-4 jz $L002loop1 - lea edx,DWORD PTR [edx*1+esi-4] - mov DWORD PTR 28[esp],edx + test edx,-8 mov DWORD PTR 32[esp],ebp + jz $L003go4loop4 + lea ebp,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [ebp],26 + jnc $L003go4loop4 + mov ebp,DWORD PTR 32[esp] + and edx,-8 + lea edx,DWORD PTR [edx*1+esi-8] + mov DWORD PTR [edi-4],edx + add bl,cl + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + movq mm0,QWORD PTR [esi] + mov ecx,DWORD PTR [eax*4+edi] + movd mm2,DWORD PTR [edx*4+edi] + jmp $L004loop_mmx_enter +ALIGN 16 +$L005loop_mmx: + add bl,cl + psllq mm1,56 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + movq mm0,QWORD PTR [esi] + movq QWORD PTR [esi*1+ebp-8],mm2 + mov ecx,DWORD PTR [eax*4+edi] + movd mm2,DWORD PTR [edx*4+edi] +$L004loop_mmx_enter: + add bl,cl + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm0 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,8 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,16 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,24 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,32 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,40 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,48 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + mov edx,ebx + xor ebx,ebx + mov bl,dl + cmp esi,DWORD PTR [edi-4] + lea esi,DWORD PTR 8[esi] + jb $L005loop_mmx + psllq mm1,56 + pxor mm2,mm1 + movq QWORD PTR [esi*1+ebp-8],mm2 + emms + cmp esi,DWORD PTR 24[esp] + je $L006done + jmp $L002loop1 ALIGN 16 -$L003loop4: +$L003go4loop4: + lea edx,DWORD PTR [edx*1+esi-4] + mov DWORD PTR 28[esp],edx +$L007loop4: add bl,cl mov edx,DWORD PTR [ebx*4+edi] mov DWORD PTR [ebx*4+edi],ecx @@ -87,9 +230,9 @@ $L003loop4: mov DWORD PTR [esi*1+ecx],ebp lea esi,DWORD PTR 4[esi] mov ecx,DWORD PTR [eax*4+edi] - jb $L003loop4 + jb $L007loop4 cmp esi,DWORD PTR 24[esp] - je $L004done + je $L006done mov ebp,DWORD PTR 32[esp] ALIGN 16 $L002loop1: @@ -107,11 +250,11 @@ $L002loop1: cmp esi,DWORD PTR 24[esp] mov BYTE PTR [esi*1+ebp-1],dl jb $L002loop1 - jmp $L004done + jmp $L006done ALIGN 16 $L001RC4_CHAR: movzx ecx,BYTE PTR [eax*1+edi] -$L005cloop1: +$L008cloop1: add bl,cl movzx edx,BYTE PTR [ebx*1+edi] mov BYTE PTR [ebx*1+edi],cl @@ -124,10 +267,10 @@ $L005cloop1: movzx ecx,BYTE PTR [eax*1+edi] cmp esi,DWORD PTR 24[esp] mov BYTE PTR [esi*1+ebp-1],dl - jb $L005cloop1 -$L004done: + jb $L008cloop1 +$L006done: dec al - mov BYTE PTR [edi-4],bl + mov DWORD PTR [edi-4],ebx mov BYTE PTR [edi-8],al $L000abort: pop edi @@ -136,10 +279,9 @@ $L000abort: pop ebp ret _RC4 ENDP -;EXTERN _OPENSSL_ia32cap_P:NEAR ALIGN 16 -_RC4_set_key PROC PUBLIC -$L_RC4_set_key_begin:: +_private_RC4_set_key PROC PUBLIC +$L_private_RC4_set_key_begin:: push ebp push ebx push esi @@ -154,53 +296,53 @@ $L_RC4_set_key_begin:: xor eax,eax mov DWORD PTR [edi-4],ebp bt DWORD PTR [edx],20 - jc $L006c1stloop + jc $L009c1stloop ALIGN 16 -$L007w1stloop: +$L010w1stloop: mov DWORD PTR [eax*4+edi],eax add al,1 - jnc $L007w1stloop + jnc $L010w1stloop xor ecx,ecx xor edx,edx ALIGN 16 -$L008w2ndloop: +$L011w2ndloop: mov eax,DWORD PTR [ecx*4+edi] add dl,BYTE PTR [ebp*1+esi] add dl,al add ebp,1 mov ebx,DWORD PTR [edx*4+edi] - jnz $L009wnowrap + jnz $L012wnowrap mov ebp,DWORD PTR [edi-4] -$L009wnowrap: +$L012wnowrap: mov DWORD PTR [edx*4+edi],eax mov DWORD PTR [ecx*4+edi],ebx add cl,1 - jnc $L008w2ndloop - jmp $L010exit + jnc $L011w2ndloop + jmp $L013exit ALIGN 16 -$L006c1stloop: +$L009c1stloop: mov BYTE PTR [eax*1+edi],al add al,1 - jnc $L006c1stloop + jnc $L009c1stloop xor ecx,ecx xor edx,edx xor ebx,ebx ALIGN 16 -$L011c2ndloop: +$L014c2ndloop: mov al,BYTE PTR [ecx*1+edi] add dl,BYTE PTR [ebp*1+esi] add dl,al add ebp,1 mov bl,BYTE PTR [edx*1+edi] - jnz $L012cnowrap + jnz $L015cnowrap mov ebp,DWORD PTR [edi-4] -$L012cnowrap: +$L015cnowrap: mov BYTE PTR [edx*1+edi],al mov BYTE PTR [ecx*1+edi],bl add cl,1 - jnc $L011c2ndloop + jnc $L014c2ndloop mov DWORD PTR 256[edi],-1 -$L010exit: +$L013exit: xor eax,eax mov DWORD PTR [edi-8],eax mov DWORD PTR [edi-4],eax @@ -209,24 +351,31 @@ $L010exit: pop ebx pop ebp ret -_RC4_set_key ENDP +_private_RC4_set_key ENDP ALIGN 16 _RC4_options PROC PUBLIC $L_RC4_options_begin:: - call $L013pic_point -$L013pic_point: + call $L016pic_point +$L016pic_point: pop eax - lea eax,DWORD PTR ($L014opts-$L013pic_point)[eax] + lea eax,DWORD PTR ($L017opts-$L016pic_point)[eax] lea edx,DWORD PTR _OPENSSL_ia32cap_P - bt DWORD PTR [edx],20 - jnc $L015skip + mov edx,DWORD PTR [edx] + bt edx,20 + jc $L0181xchar + bt edx,26 + jnc $L019ret + add eax,25 + ret +$L0181xchar: add eax,12 -$L015skip: +$L019ret: ret ALIGN 64 -$L014opts: +$L017opts: DB 114,99,52,40,52,120,44,105,110,116,41,0 DB 114,99,52,40,49,120,44,99,104,97,114,41,0 +DB 114,99,52,40,56,120,44,109,109,120,41,0 DB 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 DB 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 DB 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 @@ -234,6 +383,6 @@ ALIGN 64 _RC4_options ENDP .text$ ENDS .bss SEGMENT 'BSS' -COMM _OPENSSL_ia32cap_P:DWORD +COMM _OPENSSL_ia32cap_P:QWORD .bss ENDS END diff --git a/deps/openssl/asm/x86-win32-masm/rc5/rc5-586.asm b/deps/openssl/asm/x86-win32-masm/rc5/rc5-586.asm index e699d9173f..7ce74110e2 100644 --- a/deps/openssl/asm/x86-win32-masm/rc5/rc5-586.asm +++ b/deps/openssl/asm/x86-win32-masm/rc5/rc5-586.asm @@ -2,7 +2,7 @@ TITLE rc5-586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 diff --git a/deps/openssl/asm/x86-win32-masm/ripemd/rmd-586.asm b/deps/openssl/asm/x86-win32-masm/ripemd/rmd-586.asm index 8fa61f8f98..7f6458cefd 100644 --- a/deps/openssl/asm/x86-win32-masm/ripemd/rmd-586.asm +++ b/deps/openssl/asm/x86-win32-masm/ripemd/rmd-586.asm @@ -2,7 +2,7 @@ TITLE ../openssl/crypto/ripemd/asm/rmd-586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 diff --git a/deps/openssl/asm/x86-win32-masm/sha/sha1-586.asm b/deps/openssl/asm/x86-win32-masm/sha/sha1-586.asm index ce9f8d5b45..878b1d3b99 100644 --- a/deps/openssl/asm/x86-win32-masm/sha/sha1-586.asm +++ b/deps/openssl/asm/x86-win32-masm/sha/sha1-586.asm @@ -2,7 +2,7 @@ TITLE sha1-586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 @@ -20,11 +20,12 @@ $L_sha1_block_data_order_begin:: mov ebp,DWORD PTR 20[esp] mov esi,DWORD PTR 24[esp] mov eax,DWORD PTR 28[esp] - sub esp,64 + sub esp,76 shl eax,6 add eax,esi - mov DWORD PTR 92[esp],eax + mov DWORD PTR 104[esp],eax mov edi,DWORD PTR 16[ebp] + jmp $L000loop ALIGN 16 $L000loop: mov eax,DWORD PTR [esi] @@ -75,7 +76,7 @@ $L000loop: mov DWORD PTR 52[esp],ebx mov DWORD PTR 56[esp],ecx mov DWORD PTR 60[esp],edx - mov DWORD PTR 88[esp],esi + mov DWORD PTR 100[esp],esi mov eax,DWORD PTR [ebp] mov ebx,DWORD PTR 4[ebp] mov ecx,DWORD PTR 8[ebp] @@ -86,10 +87,10 @@ $L000loop: rol ebp,5 xor esi,edx add ebp,edi - and esi,ebx mov edi,DWORD PTR [esp] - xor esi,edx + and esi,ebx ror ebx,2 + xor esi,edx lea ebp,DWORD PTR 1518500249[edi*1+ebp] add ebp,esi ; 00_15 1 @@ -98,10 +99,10 @@ $L000loop: rol ebp,5 xor edi,ecx add ebp,edx - and edi,eax mov edx,DWORD PTR 4[esp] - xor edi,ecx + and edi,eax ror eax,2 + xor edi,ecx lea ebp,DWORD PTR 1518500249[edx*1+ebp] add ebp,edi ; 00_15 2 @@ -110,10 +111,10 @@ $L000loop: rol ebp,5 xor edx,ebx add ebp,ecx - and edx,esi mov ecx,DWORD PTR 8[esp] - xor edx,ebx + and edx,esi ror esi,2 + xor edx,ebx lea ebp,DWORD PTR 1518500249[ecx*1+ebp] add ebp,edx ; 00_15 3 @@ -122,10 +123,10 @@ $L000loop: rol ebp,5 xor ecx,eax add ebp,ebx - and ecx,edi mov ebx,DWORD PTR 12[esp] - xor ecx,eax + and ecx,edi ror edi,2 + xor ecx,eax lea ebp,DWORD PTR 1518500249[ebx*1+ebp] add ebp,ecx ; 00_15 4 @@ -134,10 +135,10 @@ $L000loop: rol ebp,5 xor ebx,esi add ebp,eax - and ebx,edx mov eax,DWORD PTR 16[esp] - xor ebx,esi + and ebx,edx ror edx,2 + xor ebx,esi lea ebp,DWORD PTR 1518500249[eax*1+ebp] add ebp,ebx ; 00_15 5 @@ -146,10 +147,10 @@ $L000loop: rol ebp,5 xor eax,edi add ebp,esi - and eax,ecx mov esi,DWORD PTR 20[esp] - xor eax,edi + and eax,ecx ror ecx,2 + xor eax,edi lea ebp,DWORD PTR 1518500249[esi*1+ebp] add ebp,eax ; 00_15 6 @@ -158,10 +159,10 @@ $L000loop: rol ebp,5 xor esi,edx add ebp,edi - and esi,ebx mov edi,DWORD PTR 24[esp] - xor esi,edx + and esi,ebx ror ebx,2 + xor esi,edx lea ebp,DWORD PTR 1518500249[edi*1+ebp] add ebp,esi ; 00_15 7 @@ -170,10 +171,10 @@ $L000loop: rol ebp,5 xor edi,ecx add ebp,edx - and edi,eax mov edx,DWORD PTR 28[esp] - xor edi,ecx + and edi,eax ror eax,2 + xor edi,ecx lea ebp,DWORD PTR 1518500249[edx*1+ebp] add ebp,edi ; 00_15 8 @@ -182,10 +183,10 @@ $L000loop: rol ebp,5 xor edx,ebx add ebp,ecx - and edx,esi mov ecx,DWORD PTR 32[esp] - xor edx,ebx + and edx,esi ror esi,2 + xor edx,ebx lea ebp,DWORD PTR 1518500249[ecx*1+ebp] add ebp,edx ; 00_15 9 @@ -194,10 +195,10 @@ $L000loop: rol ebp,5 xor ecx,eax add ebp,ebx - and ecx,edi mov ebx,DWORD PTR 36[esp] - xor ecx,eax + and ecx,edi ror edi,2 + xor ecx,eax lea ebp,DWORD PTR 1518500249[ebx*1+ebp] add ebp,ecx ; 00_15 10 @@ -206,10 +207,10 @@ $L000loop: rol ebp,5 xor ebx,esi add ebp,eax - and ebx,edx mov eax,DWORD PTR 40[esp] - xor ebx,esi + and ebx,edx ror edx,2 + xor ebx,esi lea ebp,DWORD PTR 1518500249[eax*1+ebp] add ebp,ebx ; 00_15 11 @@ -218,10 +219,10 @@ $L000loop: rol ebp,5 xor eax,edi add ebp,esi - and eax,ecx mov esi,DWORD PTR 44[esp] - xor eax,edi + and eax,ecx ror ecx,2 + xor eax,edi lea ebp,DWORD PTR 1518500249[esi*1+ebp] add ebp,eax ; 00_15 12 @@ -230,10 +231,10 @@ $L000loop: rol ebp,5 xor esi,edx add ebp,edi - and esi,ebx mov edi,DWORD PTR 48[esp] - xor esi,edx + and esi,ebx ror ebx,2 + xor esi,edx lea ebp,DWORD PTR 1518500249[edi*1+ebp] add ebp,esi ; 00_15 13 @@ -242,10 +243,10 @@ $L000loop: rol ebp,5 xor edi,ecx add ebp,edx - and edi,eax mov edx,DWORD PTR 52[esp] - xor edi,ecx + and edi,eax ror eax,2 + xor edi,ecx lea ebp,DWORD PTR 1518500249[edx*1+ebp] add ebp,edi ; 00_15 14 @@ -254,10 +255,10 @@ $L000loop: rol ebp,5 xor edx,ebx add ebp,ecx - and edx,esi mov ecx,DWORD PTR 56[esp] - xor edx,ebx + and edx,esi ror esi,2 + xor edx,ebx lea ebp,DWORD PTR 1518500249[ecx*1+ebp] add ebp,edx ; 00_15 15 @@ -266,1162 +267,1099 @@ $L000loop: rol ebp,5 xor ecx,eax add ebp,ebx - and ecx,edi mov ebx,DWORD PTR 60[esp] - xor ecx,eax + and ecx,edi ror edi,2 + xor ecx,eax lea ebp,DWORD PTR 1518500249[ebx*1+ebp] + mov ebx,DWORD PTR [esp] add ecx,ebp ; 16_19 16 - mov ebx,DWORD PTR [esp] mov ebp,edi xor ebx,DWORD PTR 8[esp] xor ebp,esi xor ebx,DWORD PTR 32[esp] and ebp,edx - ror edx,2 xor ebx,DWORD PTR 52[esp] rol ebx,1 xor ebp,esi + add eax,ebp + mov ebp,ecx + ror edx,2 mov DWORD PTR [esp],ebx + rol ebp,5 lea ebx,DWORD PTR 1518500249[eax*1+ebx] - mov eax,ecx - rol eax,5 + mov eax,DWORD PTR 4[esp] add ebx,ebp - add ebx,eax ; 16_19 17 - mov eax,DWORD PTR 4[esp] mov ebp,edx xor eax,DWORD PTR 12[esp] xor ebp,edi xor eax,DWORD PTR 36[esp] and ebp,ecx - ror ecx,2 xor eax,DWORD PTR 56[esp] rol eax,1 xor ebp,edi + add esi,ebp + mov ebp,ebx + ror ecx,2 mov DWORD PTR 4[esp],eax + rol ebp,5 lea eax,DWORD PTR 1518500249[esi*1+eax] - mov esi,ebx - rol esi,5 + mov esi,DWORD PTR 8[esp] add eax,ebp - add eax,esi ; 16_19 18 - mov esi,DWORD PTR 8[esp] mov ebp,ecx xor esi,DWORD PTR 16[esp] xor ebp,edx xor esi,DWORD PTR 40[esp] and ebp,ebx - ror ebx,2 xor esi,DWORD PTR 60[esp] rol esi,1 xor ebp,edx + add edi,ebp + mov ebp,eax + ror ebx,2 mov DWORD PTR 8[esp],esi + rol ebp,5 lea esi,DWORD PTR 1518500249[edi*1+esi] - mov edi,eax - rol edi,5 + mov edi,DWORD PTR 12[esp] add esi,ebp - add esi,edi ; 16_19 19 - mov edi,DWORD PTR 12[esp] mov ebp,ebx xor edi,DWORD PTR 20[esp] xor ebp,ecx xor edi,DWORD PTR 44[esp] and ebp,eax - ror eax,2 xor edi,DWORD PTR [esp] rol edi,1 xor ebp,ecx + add edx,ebp + mov ebp,esi + ror eax,2 mov DWORD PTR 12[esp],edi + rol ebp,5 lea edi,DWORD PTR 1518500249[edx*1+edi] - mov edx,esi - rol edx,5 + mov edx,DWORD PTR 16[esp] add edi,ebp - add edi,edx ; 20_39 20 mov ebp,esi - mov edx,DWORD PTR 16[esp] - ror esi,2 xor edx,DWORD PTR 24[esp] xor ebp,eax xor edx,DWORD PTR 48[esp] xor ebp,ebx xor edx,DWORD PTR 4[esp] rol edx,1 - add ebp,ecx + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 mov DWORD PTR 16[esp],edx - mov ecx,edi - rol ecx,5 - lea edx,DWORD PTR 1859775393[ebp*1+edx] - add edx,ecx + lea edx,DWORD PTR 1859775393[ecx*1+edx] + mov ecx,DWORD PTR 20[esp] + add edx,ebp ; 20_39 21 mov ebp,edi - mov ecx,DWORD PTR 20[esp] - ror edi,2 xor ecx,DWORD PTR 28[esp] xor ebp,esi xor ecx,DWORD PTR 52[esp] xor ebp,eax xor ecx,DWORD PTR 8[esp] rol ecx,1 - add ebp,ebx + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 mov DWORD PTR 20[esp],ecx - mov ebx,edx - rol ebx,5 - lea ecx,DWORD PTR 1859775393[ebp*1+ecx] - add ecx,ebx + lea ecx,DWORD PTR 1859775393[ebx*1+ecx] + mov ebx,DWORD PTR 24[esp] + add ecx,ebp ; 20_39 22 mov ebp,edx - mov ebx,DWORD PTR 24[esp] - ror edx,2 xor ebx,DWORD PTR 32[esp] xor ebp,edi xor ebx,DWORD PTR 56[esp] xor ebp,esi xor ebx,DWORD PTR 12[esp] rol ebx,1 - add ebp,eax + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 mov DWORD PTR 24[esp],ebx - mov eax,ecx - rol eax,5 - lea ebx,DWORD PTR 1859775393[ebp*1+ebx] - add ebx,eax + lea ebx,DWORD PTR 1859775393[eax*1+ebx] + mov eax,DWORD PTR 28[esp] + add ebx,ebp ; 20_39 23 mov ebp,ecx - mov eax,DWORD PTR 28[esp] - ror ecx,2 xor eax,DWORD PTR 36[esp] xor ebp,edx xor eax,DWORD PTR 60[esp] xor ebp,edi xor eax,DWORD PTR 16[esp] rol eax,1 - add ebp,esi + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 mov DWORD PTR 28[esp],eax - mov esi,ebx - rol esi,5 - lea eax,DWORD PTR 1859775393[ebp*1+eax] - add eax,esi + lea eax,DWORD PTR 1859775393[esi*1+eax] + mov esi,DWORD PTR 32[esp] + add eax,ebp ; 20_39 24 mov ebp,ebx - mov esi,DWORD PTR 32[esp] - ror ebx,2 xor esi,DWORD PTR 40[esp] xor ebp,ecx xor esi,DWORD PTR [esp] xor ebp,edx xor esi,DWORD PTR 20[esp] rol esi,1 - add ebp,edi + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 mov DWORD PTR 32[esp],esi - mov edi,eax - rol edi,5 - lea esi,DWORD PTR 1859775393[ebp*1+esi] - add esi,edi + lea esi,DWORD PTR 1859775393[edi*1+esi] + mov edi,DWORD PTR 36[esp] + add esi,ebp ; 20_39 25 mov ebp,eax - mov edi,DWORD PTR 36[esp] - ror eax,2 xor edi,DWORD PTR 44[esp] xor ebp,ebx xor edi,DWORD PTR 4[esp] xor ebp,ecx xor edi,DWORD PTR 24[esp] rol edi,1 - add ebp,edx + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 mov DWORD PTR 36[esp],edi - mov edx,esi - rol edx,5 - lea edi,DWORD PTR 1859775393[ebp*1+edi] - add edi,edx + lea edi,DWORD PTR 1859775393[edx*1+edi] + mov edx,DWORD PTR 40[esp] + add edi,ebp ; 20_39 26 mov ebp,esi - mov edx,DWORD PTR 40[esp] - ror esi,2 xor edx,DWORD PTR 48[esp] xor ebp,eax xor edx,DWORD PTR 8[esp] xor ebp,ebx xor edx,DWORD PTR 28[esp] rol edx,1 - add ebp,ecx + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 mov DWORD PTR 40[esp],edx - mov ecx,edi - rol ecx,5 - lea edx,DWORD PTR 1859775393[ebp*1+edx] - add edx,ecx + lea edx,DWORD PTR 1859775393[ecx*1+edx] + mov ecx,DWORD PTR 44[esp] + add edx,ebp ; 20_39 27 mov ebp,edi - mov ecx,DWORD PTR 44[esp] - ror edi,2 xor ecx,DWORD PTR 52[esp] xor ebp,esi xor ecx,DWORD PTR 12[esp] xor ebp,eax xor ecx,DWORD PTR 32[esp] rol ecx,1 - add ebp,ebx + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 mov DWORD PTR 44[esp],ecx - mov ebx,edx - rol ebx,5 - lea ecx,DWORD PTR 1859775393[ebp*1+ecx] - add ecx,ebx + lea ecx,DWORD PTR 1859775393[ebx*1+ecx] + mov ebx,DWORD PTR 48[esp] + add ecx,ebp ; 20_39 28 mov ebp,edx - mov ebx,DWORD PTR 48[esp] - ror edx,2 xor ebx,DWORD PTR 56[esp] xor ebp,edi xor ebx,DWORD PTR 16[esp] xor ebp,esi xor ebx,DWORD PTR 36[esp] rol ebx,1 - add ebp,eax + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 mov DWORD PTR 48[esp],ebx - mov eax,ecx - rol eax,5 - lea ebx,DWORD PTR 1859775393[ebp*1+ebx] - add ebx,eax + lea ebx,DWORD PTR 1859775393[eax*1+ebx] + mov eax,DWORD PTR 52[esp] + add ebx,ebp ; 20_39 29 mov ebp,ecx - mov eax,DWORD PTR 52[esp] - ror ecx,2 xor eax,DWORD PTR 60[esp] xor ebp,edx xor eax,DWORD PTR 20[esp] xor ebp,edi xor eax,DWORD PTR 40[esp] rol eax,1 - add ebp,esi + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 mov DWORD PTR 52[esp],eax - mov esi,ebx - rol esi,5 - lea eax,DWORD PTR 1859775393[ebp*1+eax] - add eax,esi + lea eax,DWORD PTR 1859775393[esi*1+eax] + mov esi,DWORD PTR 56[esp] + add eax,ebp ; 20_39 30 mov ebp,ebx - mov esi,DWORD PTR 56[esp] - ror ebx,2 xor esi,DWORD PTR [esp] xor ebp,ecx xor esi,DWORD PTR 24[esp] xor ebp,edx xor esi,DWORD PTR 44[esp] rol esi,1 - add ebp,edi + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 mov DWORD PTR 56[esp],esi - mov edi,eax - rol edi,5 - lea esi,DWORD PTR 1859775393[ebp*1+esi] - add esi,edi + lea esi,DWORD PTR 1859775393[edi*1+esi] + mov edi,DWORD PTR 60[esp] + add esi,ebp ; 20_39 31 mov ebp,eax - mov edi,DWORD PTR 60[esp] - ror eax,2 xor edi,DWORD PTR 4[esp] xor ebp,ebx xor edi,DWORD PTR 28[esp] xor ebp,ecx xor edi,DWORD PTR 48[esp] rol edi,1 - add ebp,edx + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 mov DWORD PTR 60[esp],edi - mov edx,esi - rol edx,5 - lea edi,DWORD PTR 1859775393[ebp*1+edi] - add edi,edx + lea edi,DWORD PTR 1859775393[edx*1+edi] + mov edx,DWORD PTR [esp] + add edi,ebp ; 20_39 32 mov ebp,esi - mov edx,DWORD PTR [esp] - ror esi,2 xor edx,DWORD PTR 8[esp] xor ebp,eax xor edx,DWORD PTR 32[esp] xor ebp,ebx xor edx,DWORD PTR 52[esp] rol edx,1 - add ebp,ecx + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 mov DWORD PTR [esp],edx - mov ecx,edi - rol ecx,5 - lea edx,DWORD PTR 1859775393[ebp*1+edx] - add edx,ecx + lea edx,DWORD PTR 1859775393[ecx*1+edx] + mov ecx,DWORD PTR 4[esp] + add edx,ebp ; 20_39 33 mov ebp,edi - mov ecx,DWORD PTR 4[esp] - ror edi,2 xor ecx,DWORD PTR 12[esp] xor ebp,esi xor ecx,DWORD PTR 36[esp] xor ebp,eax xor ecx,DWORD PTR 56[esp] rol ecx,1 - add ebp,ebx + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 mov DWORD PTR 4[esp],ecx - mov ebx,edx - rol ebx,5 - lea ecx,DWORD PTR 1859775393[ebp*1+ecx] - add ecx,ebx + lea ecx,DWORD PTR 1859775393[ebx*1+ecx] + mov ebx,DWORD PTR 8[esp] + add ecx,ebp ; 20_39 34 mov ebp,edx - mov ebx,DWORD PTR 8[esp] - ror edx,2 xor ebx,DWORD PTR 16[esp] xor ebp,edi xor ebx,DWORD PTR 40[esp] xor ebp,esi xor ebx,DWORD PTR 60[esp] rol ebx,1 - add ebp,eax + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 mov DWORD PTR 8[esp],ebx - mov eax,ecx - rol eax,5 - lea ebx,DWORD PTR 1859775393[ebp*1+ebx] - add ebx,eax + lea ebx,DWORD PTR 1859775393[eax*1+ebx] + mov eax,DWORD PTR 12[esp] + add ebx,ebp ; 20_39 35 mov ebp,ecx - mov eax,DWORD PTR 12[esp] - ror ecx,2 xor eax,DWORD PTR 20[esp] xor ebp,edx xor eax,DWORD PTR 44[esp] xor ebp,edi xor eax,DWORD PTR [esp] rol eax,1 - add ebp,esi + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 mov DWORD PTR 12[esp],eax - mov esi,ebx - rol esi,5 - lea eax,DWORD PTR 1859775393[ebp*1+eax] - add eax,esi + lea eax,DWORD PTR 1859775393[esi*1+eax] + mov esi,DWORD PTR 16[esp] + add eax,ebp ; 20_39 36 mov ebp,ebx - mov esi,DWORD PTR 16[esp] - ror ebx,2 xor esi,DWORD PTR 24[esp] xor ebp,ecx xor esi,DWORD PTR 48[esp] xor ebp,edx xor esi,DWORD PTR 4[esp] rol esi,1 - add ebp,edi + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 mov DWORD PTR 16[esp],esi - mov edi,eax - rol edi,5 - lea esi,DWORD PTR 1859775393[ebp*1+esi] - add esi,edi + lea esi,DWORD PTR 1859775393[edi*1+esi] + mov edi,DWORD PTR 20[esp] + add esi,ebp ; 20_39 37 mov ebp,eax - mov edi,DWORD PTR 20[esp] - ror eax,2 xor edi,DWORD PTR 28[esp] xor ebp,ebx xor edi,DWORD PTR 52[esp] xor ebp,ecx xor edi,DWORD PTR 8[esp] rol edi,1 - add ebp,edx + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 mov DWORD PTR 20[esp],edi - mov edx,esi - rol edx,5 - lea edi,DWORD PTR 1859775393[ebp*1+edi] - add edi,edx + lea edi,DWORD PTR 1859775393[edx*1+edi] + mov edx,DWORD PTR 24[esp] + add edi,ebp ; 20_39 38 mov ebp,esi - mov edx,DWORD PTR 24[esp] - ror esi,2 xor edx,DWORD PTR 32[esp] xor ebp,eax xor edx,DWORD PTR 56[esp] xor ebp,ebx xor edx,DWORD PTR 12[esp] rol edx,1 - add ebp,ecx + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 mov DWORD PTR 24[esp],edx - mov ecx,edi - rol ecx,5 - lea edx,DWORD PTR 1859775393[ebp*1+edx] - add edx,ecx + lea edx,DWORD PTR 1859775393[ecx*1+edx] + mov ecx,DWORD PTR 28[esp] + add edx,ebp ; 20_39 39 mov ebp,edi - mov ecx,DWORD PTR 28[esp] - ror edi,2 xor ecx,DWORD PTR 36[esp] xor ebp,esi xor ecx,DWORD PTR 60[esp] xor ebp,eax xor ecx,DWORD PTR 16[esp] rol ecx,1 - add ebp,ebx + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 mov DWORD PTR 28[esp],ecx - mov ebx,edx - rol ebx,5 - lea ecx,DWORD PTR 1859775393[ebp*1+ecx] - add ecx,ebx - ; 40_59 40 + lea ecx,DWORD PTR 1859775393[ebx*1+ecx] mov ebx,DWORD PTR 32[esp] - mov ebp,DWORD PTR 40[esp] - xor ebx,ebp - mov ebp,DWORD PTR [esp] - xor ebx,ebp - mov ebp,DWORD PTR 20[esp] - xor ebx,ebp - mov ebp,edx + add ecx,ebp + ; 40_59 40 + mov ebp,edi + xor ebx,DWORD PTR 40[esp] + xor ebp,esi + xor ebx,DWORD PTR [esp] + and ebp,edx + xor ebx,DWORD PTR 20[esp] rol ebx,1 - or ebp,edi - mov DWORD PTR 32[esp],ebx - and ebp,esi - lea ebx,DWORD PTR 2400959708[eax*1+ebx] - mov eax,edx + add ebp,eax ror edx,2 - and eax,edi - or ebp,eax mov eax,ecx rol eax,5 - add ebx,ebp + mov DWORD PTR 32[esp],ebx + lea ebx,DWORD PTR 2400959708[ebp*1+ebx] + mov ebp,edi add ebx,eax - ; 40_59 41 + and ebp,esi mov eax,DWORD PTR 36[esp] - mov ebp,DWORD PTR 44[esp] - xor eax,ebp - mov ebp,DWORD PTR 4[esp] - xor eax,ebp - mov ebp,DWORD PTR 24[esp] - xor eax,ebp - mov ebp,ecx + add ebx,ebp + ; 40_59 41 + mov ebp,edx + xor eax,DWORD PTR 44[esp] + xor ebp,edi + xor eax,DWORD PTR 4[esp] + and ebp,ecx + xor eax,DWORD PTR 24[esp] rol eax,1 - or ebp,edx - mov DWORD PTR 36[esp],eax - and ebp,edi - lea eax,DWORD PTR 2400959708[esi*1+eax] - mov esi,ecx + add ebp,esi ror ecx,2 - and esi,edx - or ebp,esi mov esi,ebx rol esi,5 - add eax,ebp + mov DWORD PTR 36[esp],eax + lea eax,DWORD PTR 2400959708[ebp*1+eax] + mov ebp,edx add eax,esi - ; 40_59 42 + and ebp,edi mov esi,DWORD PTR 40[esp] - mov ebp,DWORD PTR 48[esp] - xor esi,ebp - mov ebp,DWORD PTR 8[esp] - xor esi,ebp - mov ebp,DWORD PTR 28[esp] - xor esi,ebp - mov ebp,ebx + add eax,ebp + ; 40_59 42 + mov ebp,ecx + xor esi,DWORD PTR 48[esp] + xor ebp,edx + xor esi,DWORD PTR 8[esp] + and ebp,ebx + xor esi,DWORD PTR 28[esp] rol esi,1 - or ebp,ecx - mov DWORD PTR 40[esp],esi - and ebp,edx - lea esi,DWORD PTR 2400959708[edi*1+esi] - mov edi,ebx + add ebp,edi ror ebx,2 - and edi,ecx - or ebp,edi mov edi,eax rol edi,5 - add esi,ebp + mov DWORD PTR 40[esp],esi + lea esi,DWORD PTR 2400959708[ebp*1+esi] + mov ebp,ecx add esi,edi - ; 40_59 43 + and ebp,edx mov edi,DWORD PTR 44[esp] - mov ebp,DWORD PTR 52[esp] - xor edi,ebp - mov ebp,DWORD PTR 12[esp] - xor edi,ebp - mov ebp,DWORD PTR 32[esp] - xor edi,ebp - mov ebp,eax + add esi,ebp + ; 40_59 43 + mov ebp,ebx + xor edi,DWORD PTR 52[esp] + xor ebp,ecx + xor edi,DWORD PTR 12[esp] + and ebp,eax + xor edi,DWORD PTR 32[esp] rol edi,1 - or ebp,ebx - mov DWORD PTR 44[esp],edi - and ebp,ecx - lea edi,DWORD PTR 2400959708[edx*1+edi] - mov edx,eax + add ebp,edx ror eax,2 - and edx,ebx - or ebp,edx mov edx,esi rol edx,5 - add edi,ebp + mov DWORD PTR 44[esp],edi + lea edi,DWORD PTR 2400959708[ebp*1+edi] + mov ebp,ebx add edi,edx - ; 40_59 44 + and ebp,ecx mov edx,DWORD PTR 48[esp] - mov ebp,DWORD PTR 56[esp] - xor edx,ebp - mov ebp,DWORD PTR 16[esp] - xor edx,ebp - mov ebp,DWORD PTR 36[esp] - xor edx,ebp - mov ebp,esi + add edi,ebp + ; 40_59 44 + mov ebp,eax + xor edx,DWORD PTR 56[esp] + xor ebp,ebx + xor edx,DWORD PTR 16[esp] + and ebp,esi + xor edx,DWORD PTR 36[esp] rol edx,1 - or ebp,eax - mov DWORD PTR 48[esp],edx - and ebp,ebx - lea edx,DWORD PTR 2400959708[ecx*1+edx] - mov ecx,esi + add ebp,ecx ror esi,2 - and ecx,eax - or ebp,ecx mov ecx,edi rol ecx,5 - add edx,ebp + mov DWORD PTR 48[esp],edx + lea edx,DWORD PTR 2400959708[ebp*1+edx] + mov ebp,eax add edx,ecx - ; 40_59 45 + and ebp,ebx mov ecx,DWORD PTR 52[esp] - mov ebp,DWORD PTR 60[esp] - xor ecx,ebp - mov ebp,DWORD PTR 20[esp] - xor ecx,ebp - mov ebp,DWORD PTR 40[esp] - xor ecx,ebp - mov ebp,edi + add edx,ebp + ; 40_59 45 + mov ebp,esi + xor ecx,DWORD PTR 60[esp] + xor ebp,eax + xor ecx,DWORD PTR 20[esp] + and ebp,edi + xor ecx,DWORD PTR 40[esp] rol ecx,1 - or ebp,esi - mov DWORD PTR 52[esp],ecx - and ebp,eax - lea ecx,DWORD PTR 2400959708[ebx*1+ecx] - mov ebx,edi + add ebp,ebx ror edi,2 - and ebx,esi - or ebp,ebx mov ebx,edx rol ebx,5 - add ecx,ebp + mov DWORD PTR 52[esp],ecx + lea ecx,DWORD PTR 2400959708[ebp*1+ecx] + mov ebp,esi add ecx,ebx - ; 40_59 46 + and ebp,eax mov ebx,DWORD PTR 56[esp] - mov ebp,DWORD PTR [esp] - xor ebx,ebp - mov ebp,DWORD PTR 24[esp] - xor ebx,ebp - mov ebp,DWORD PTR 44[esp] - xor ebx,ebp - mov ebp,edx + add ecx,ebp + ; 40_59 46 + mov ebp,edi + xor ebx,DWORD PTR [esp] + xor ebp,esi + xor ebx,DWORD PTR 24[esp] + and ebp,edx + xor ebx,DWORD PTR 44[esp] rol ebx,1 - or ebp,edi - mov DWORD PTR 56[esp],ebx - and ebp,esi - lea ebx,DWORD PTR 2400959708[eax*1+ebx] - mov eax,edx + add ebp,eax ror edx,2 - and eax,edi - or ebp,eax mov eax,ecx rol eax,5 - add ebx,ebp + mov DWORD PTR 56[esp],ebx + lea ebx,DWORD PTR 2400959708[ebp*1+ebx] + mov ebp,edi add ebx,eax - ; 40_59 47 + and ebp,esi mov eax,DWORD PTR 60[esp] - mov ebp,DWORD PTR 4[esp] - xor eax,ebp - mov ebp,DWORD PTR 28[esp] - xor eax,ebp - mov ebp,DWORD PTR 48[esp] - xor eax,ebp - mov ebp,ecx + add ebx,ebp + ; 40_59 47 + mov ebp,edx + xor eax,DWORD PTR 4[esp] + xor ebp,edi + xor eax,DWORD PTR 28[esp] + and ebp,ecx + xor eax,DWORD PTR 48[esp] rol eax,1 - or ebp,edx - mov DWORD PTR 60[esp],eax - and ebp,edi - lea eax,DWORD PTR 2400959708[esi*1+eax] - mov esi,ecx + add ebp,esi ror ecx,2 - and esi,edx - or ebp,esi mov esi,ebx rol esi,5 - add eax,ebp + mov DWORD PTR 60[esp],eax + lea eax,DWORD PTR 2400959708[ebp*1+eax] + mov ebp,edx add eax,esi - ; 40_59 48 + and ebp,edi mov esi,DWORD PTR [esp] - mov ebp,DWORD PTR 8[esp] - xor esi,ebp - mov ebp,DWORD PTR 32[esp] - xor esi,ebp - mov ebp,DWORD PTR 52[esp] - xor esi,ebp - mov ebp,ebx + add eax,ebp + ; 40_59 48 + mov ebp,ecx + xor esi,DWORD PTR 8[esp] + xor ebp,edx + xor esi,DWORD PTR 32[esp] + and ebp,ebx + xor esi,DWORD PTR 52[esp] rol esi,1 - or ebp,ecx - mov DWORD PTR [esp],esi - and ebp,edx - lea esi,DWORD PTR 2400959708[edi*1+esi] - mov edi,ebx + add ebp,edi ror ebx,2 - and edi,ecx - or ebp,edi mov edi,eax rol edi,5 - add esi,ebp + mov DWORD PTR [esp],esi + lea esi,DWORD PTR 2400959708[ebp*1+esi] + mov ebp,ecx add esi,edi - ; 40_59 49 + and ebp,edx mov edi,DWORD PTR 4[esp] - mov ebp,DWORD PTR 12[esp] - xor edi,ebp - mov ebp,DWORD PTR 36[esp] - xor edi,ebp - mov ebp,DWORD PTR 56[esp] - xor edi,ebp - mov ebp,eax + add esi,ebp + ; 40_59 49 + mov ebp,ebx + xor edi,DWORD PTR 12[esp] + xor ebp,ecx + xor edi,DWORD PTR 36[esp] + and ebp,eax + xor edi,DWORD PTR 56[esp] rol edi,1 - or ebp,ebx - mov DWORD PTR 4[esp],edi - and ebp,ecx - lea edi,DWORD PTR 2400959708[edx*1+edi] - mov edx,eax + add ebp,edx ror eax,2 - and edx,ebx - or ebp,edx mov edx,esi rol edx,5 - add edi,ebp + mov DWORD PTR 4[esp],edi + lea edi,DWORD PTR 2400959708[ebp*1+edi] + mov ebp,ebx add edi,edx - ; 40_59 50 + and ebp,ecx mov edx,DWORD PTR 8[esp] - mov ebp,DWORD PTR 16[esp] - xor edx,ebp - mov ebp,DWORD PTR 40[esp] - xor edx,ebp - mov ebp,DWORD PTR 60[esp] - xor edx,ebp - mov ebp,esi + add edi,ebp + ; 40_59 50 + mov ebp,eax + xor edx,DWORD PTR 16[esp] + xor ebp,ebx + xor edx,DWORD PTR 40[esp] + and ebp,esi + xor edx,DWORD PTR 60[esp] rol edx,1 - or ebp,eax - mov DWORD PTR 8[esp],edx - and ebp,ebx - lea edx,DWORD PTR 2400959708[ecx*1+edx] - mov ecx,esi + add ebp,ecx ror esi,2 - and ecx,eax - or ebp,ecx mov ecx,edi rol ecx,5 - add edx,ebp + mov DWORD PTR 8[esp],edx + lea edx,DWORD PTR 2400959708[ebp*1+edx] + mov ebp,eax add edx,ecx - ; 40_59 51 + and ebp,ebx mov ecx,DWORD PTR 12[esp] - mov ebp,DWORD PTR 20[esp] - xor ecx,ebp - mov ebp,DWORD PTR 44[esp] - xor ecx,ebp - mov ebp,DWORD PTR [esp] - xor ecx,ebp - mov ebp,edi + add edx,ebp + ; 40_59 51 + mov ebp,esi + xor ecx,DWORD PTR 20[esp] + xor ebp,eax + xor ecx,DWORD PTR 44[esp] + and ebp,edi + xor ecx,DWORD PTR [esp] rol ecx,1 - or ebp,esi - mov DWORD PTR 12[esp],ecx - and ebp,eax - lea ecx,DWORD PTR 2400959708[ebx*1+ecx] - mov ebx,edi + add ebp,ebx ror edi,2 - and ebx,esi - or ebp,ebx mov ebx,edx rol ebx,5 - add ecx,ebp + mov DWORD PTR 12[esp],ecx + lea ecx,DWORD PTR 2400959708[ebp*1+ecx] + mov ebp,esi add ecx,ebx - ; 40_59 52 + and ebp,eax mov ebx,DWORD PTR 16[esp] - mov ebp,DWORD PTR 24[esp] - xor ebx,ebp - mov ebp,DWORD PTR 48[esp] - xor ebx,ebp - mov ebp,DWORD PTR 4[esp] - xor ebx,ebp - mov ebp,edx + add ecx,ebp + ; 40_59 52 + mov ebp,edi + xor ebx,DWORD PTR 24[esp] + xor ebp,esi + xor ebx,DWORD PTR 48[esp] + and ebp,edx + xor ebx,DWORD PTR 4[esp] rol ebx,1 - or ebp,edi - mov DWORD PTR 16[esp],ebx - and ebp,esi - lea ebx,DWORD PTR 2400959708[eax*1+ebx] - mov eax,edx + add ebp,eax ror edx,2 - and eax,edi - or ebp,eax mov eax,ecx rol eax,5 - add ebx,ebp + mov DWORD PTR 16[esp],ebx + lea ebx,DWORD PTR 2400959708[ebp*1+ebx] + mov ebp,edi add ebx,eax - ; 40_59 53 + and ebp,esi mov eax,DWORD PTR 20[esp] - mov ebp,DWORD PTR 28[esp] - xor eax,ebp - mov ebp,DWORD PTR 52[esp] - xor eax,ebp - mov ebp,DWORD PTR 8[esp] - xor eax,ebp - mov ebp,ecx + add ebx,ebp + ; 40_59 53 + mov ebp,edx + xor eax,DWORD PTR 28[esp] + xor ebp,edi + xor eax,DWORD PTR 52[esp] + and ebp,ecx + xor eax,DWORD PTR 8[esp] rol eax,1 - or ebp,edx - mov DWORD PTR 20[esp],eax - and ebp,edi - lea eax,DWORD PTR 2400959708[esi*1+eax] - mov esi,ecx + add ebp,esi ror ecx,2 - and esi,edx - or ebp,esi mov esi,ebx rol esi,5 - add eax,ebp + mov DWORD PTR 20[esp],eax + lea eax,DWORD PTR 2400959708[ebp*1+eax] + mov ebp,edx add eax,esi - ; 40_59 54 + and ebp,edi mov esi,DWORD PTR 24[esp] - mov ebp,DWORD PTR 32[esp] - xor esi,ebp - mov ebp,DWORD PTR 56[esp] - xor esi,ebp - mov ebp,DWORD PTR 12[esp] - xor esi,ebp - mov ebp,ebx + add eax,ebp + ; 40_59 54 + mov ebp,ecx + xor esi,DWORD PTR 32[esp] + xor ebp,edx + xor esi,DWORD PTR 56[esp] + and ebp,ebx + xor esi,DWORD PTR 12[esp] rol esi,1 - or ebp,ecx - mov DWORD PTR 24[esp],esi - and ebp,edx - lea esi,DWORD PTR 2400959708[edi*1+esi] - mov edi,ebx + add ebp,edi ror ebx,2 - and edi,ecx - or ebp,edi mov edi,eax rol edi,5 - add esi,ebp + mov DWORD PTR 24[esp],esi + lea esi,DWORD PTR 2400959708[ebp*1+esi] + mov ebp,ecx add esi,edi - ; 40_59 55 + and ebp,edx mov edi,DWORD PTR 28[esp] - mov ebp,DWORD PTR 36[esp] - xor edi,ebp - mov ebp,DWORD PTR 60[esp] - xor edi,ebp - mov ebp,DWORD PTR 16[esp] - xor edi,ebp - mov ebp,eax + add esi,ebp + ; 40_59 55 + mov ebp,ebx + xor edi,DWORD PTR 36[esp] + xor ebp,ecx + xor edi,DWORD PTR 60[esp] + and ebp,eax + xor edi,DWORD PTR 16[esp] rol edi,1 - or ebp,ebx - mov DWORD PTR 28[esp],edi - and ebp,ecx - lea edi,DWORD PTR 2400959708[edx*1+edi] - mov edx,eax + add ebp,edx ror eax,2 - and edx,ebx - or ebp,edx mov edx,esi rol edx,5 - add edi,ebp + mov DWORD PTR 28[esp],edi + lea edi,DWORD PTR 2400959708[ebp*1+edi] + mov ebp,ebx add edi,edx - ; 40_59 56 + and ebp,ecx mov edx,DWORD PTR 32[esp] - mov ebp,DWORD PTR 40[esp] - xor edx,ebp - mov ebp,DWORD PTR [esp] - xor edx,ebp - mov ebp,DWORD PTR 20[esp] - xor edx,ebp - mov ebp,esi + add edi,ebp + ; 40_59 56 + mov ebp,eax + xor edx,DWORD PTR 40[esp] + xor ebp,ebx + xor edx,DWORD PTR [esp] + and ebp,esi + xor edx,DWORD PTR 20[esp] rol edx,1 - or ebp,eax - mov DWORD PTR 32[esp],edx - and ebp,ebx - lea edx,DWORD PTR 2400959708[ecx*1+edx] - mov ecx,esi + add ebp,ecx ror esi,2 - and ecx,eax - or ebp,ecx mov ecx,edi rol ecx,5 - add edx,ebp + mov DWORD PTR 32[esp],edx + lea edx,DWORD PTR 2400959708[ebp*1+edx] + mov ebp,eax add edx,ecx - ; 40_59 57 + and ebp,ebx mov ecx,DWORD PTR 36[esp] - mov ebp,DWORD PTR 44[esp] - xor ecx,ebp - mov ebp,DWORD PTR 4[esp] - xor ecx,ebp - mov ebp,DWORD PTR 24[esp] - xor ecx,ebp - mov ebp,edi + add edx,ebp + ; 40_59 57 + mov ebp,esi + xor ecx,DWORD PTR 44[esp] + xor ebp,eax + xor ecx,DWORD PTR 4[esp] + and ebp,edi + xor ecx,DWORD PTR 24[esp] rol ecx,1 - or ebp,esi - mov DWORD PTR 36[esp],ecx - and ebp,eax - lea ecx,DWORD PTR 2400959708[ebx*1+ecx] - mov ebx,edi + add ebp,ebx ror edi,2 - and ebx,esi - or ebp,ebx mov ebx,edx rol ebx,5 - add ecx,ebp + mov DWORD PTR 36[esp],ecx + lea ecx,DWORD PTR 2400959708[ebp*1+ecx] + mov ebp,esi add ecx,ebx - ; 40_59 58 + and ebp,eax mov ebx,DWORD PTR 40[esp] - mov ebp,DWORD PTR 48[esp] - xor ebx,ebp - mov ebp,DWORD PTR 8[esp] - xor ebx,ebp - mov ebp,DWORD PTR 28[esp] - xor ebx,ebp - mov ebp,edx + add ecx,ebp + ; 40_59 58 + mov ebp,edi + xor ebx,DWORD PTR 48[esp] + xor ebp,esi + xor ebx,DWORD PTR 8[esp] + and ebp,edx + xor ebx,DWORD PTR 28[esp] rol ebx,1 - or ebp,edi - mov DWORD PTR 40[esp],ebx - and ebp,esi - lea ebx,DWORD PTR 2400959708[eax*1+ebx] - mov eax,edx + add ebp,eax ror edx,2 - and eax,edi - or ebp,eax mov eax,ecx rol eax,5 - add ebx,ebp + mov DWORD PTR 40[esp],ebx + lea ebx,DWORD PTR 2400959708[ebp*1+ebx] + mov ebp,edi add ebx,eax - ; 40_59 59 + and ebp,esi mov eax,DWORD PTR 44[esp] - mov ebp,DWORD PTR 52[esp] - xor eax,ebp - mov ebp,DWORD PTR 12[esp] - xor eax,ebp - mov ebp,DWORD PTR 32[esp] - xor eax,ebp - mov ebp,ecx + add ebx,ebp + ; 40_59 59 + mov ebp,edx + xor eax,DWORD PTR 52[esp] + xor ebp,edi + xor eax,DWORD PTR 12[esp] + and ebp,ecx + xor eax,DWORD PTR 32[esp] rol eax,1 - or ebp,edx - mov DWORD PTR 44[esp],eax - and ebp,edi - lea eax,DWORD PTR 2400959708[esi*1+eax] - mov esi,ecx + add ebp,esi ror ecx,2 - and esi,edx - or ebp,esi mov esi,ebx rol esi,5 - add eax,ebp + mov DWORD PTR 44[esp],eax + lea eax,DWORD PTR 2400959708[ebp*1+eax] + mov ebp,edx add eax,esi + and ebp,edi + mov esi,DWORD PTR 48[esp] + add eax,ebp ; 20_39 60 mov ebp,ebx - mov esi,DWORD PTR 48[esp] - ror ebx,2 xor esi,DWORD PTR 56[esp] xor ebp,ecx xor esi,DWORD PTR 16[esp] xor ebp,edx xor esi,DWORD PTR 36[esp] rol esi,1 - add ebp,edi + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 mov DWORD PTR 48[esp],esi - mov edi,eax - rol edi,5 - lea esi,DWORD PTR 3395469782[ebp*1+esi] - add esi,edi + lea esi,DWORD PTR 3395469782[edi*1+esi] + mov edi,DWORD PTR 52[esp] + add esi,ebp ; 20_39 61 mov ebp,eax - mov edi,DWORD PTR 52[esp] - ror eax,2 xor edi,DWORD PTR 60[esp] xor ebp,ebx xor edi,DWORD PTR 20[esp] xor ebp,ecx xor edi,DWORD PTR 40[esp] rol edi,1 - add ebp,edx + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 mov DWORD PTR 52[esp],edi - mov edx,esi - rol edx,5 - lea edi,DWORD PTR 3395469782[ebp*1+edi] - add edi,edx + lea edi,DWORD PTR 3395469782[edx*1+edi] + mov edx,DWORD PTR 56[esp] + add edi,ebp ; 20_39 62 mov ebp,esi - mov edx,DWORD PTR 56[esp] - ror esi,2 xor edx,DWORD PTR [esp] xor ebp,eax xor edx,DWORD PTR 24[esp] xor ebp,ebx xor edx,DWORD PTR 44[esp] rol edx,1 - add ebp,ecx + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 mov DWORD PTR 56[esp],edx - mov ecx,edi - rol ecx,5 - lea edx,DWORD PTR 3395469782[ebp*1+edx] - add edx,ecx + lea edx,DWORD PTR 3395469782[ecx*1+edx] + mov ecx,DWORD PTR 60[esp] + add edx,ebp ; 20_39 63 mov ebp,edi - mov ecx,DWORD PTR 60[esp] - ror edi,2 xor ecx,DWORD PTR 4[esp] xor ebp,esi xor ecx,DWORD PTR 28[esp] xor ebp,eax xor ecx,DWORD PTR 48[esp] rol ecx,1 - add ebp,ebx + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 mov DWORD PTR 60[esp],ecx - mov ebx,edx - rol ebx,5 - lea ecx,DWORD PTR 3395469782[ebp*1+ecx] - add ecx,ebx + lea ecx,DWORD PTR 3395469782[ebx*1+ecx] + mov ebx,DWORD PTR [esp] + add ecx,ebp ; 20_39 64 mov ebp,edx - mov ebx,DWORD PTR [esp] - ror edx,2 xor ebx,DWORD PTR 8[esp] xor ebp,edi xor ebx,DWORD PTR 32[esp] xor ebp,esi xor ebx,DWORD PTR 52[esp] rol ebx,1 - add ebp,eax + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 mov DWORD PTR [esp],ebx - mov eax,ecx - rol eax,5 - lea ebx,DWORD PTR 3395469782[ebp*1+ebx] - add ebx,eax + lea ebx,DWORD PTR 3395469782[eax*1+ebx] + mov eax,DWORD PTR 4[esp] + add ebx,ebp ; 20_39 65 mov ebp,ecx - mov eax,DWORD PTR 4[esp] - ror ecx,2 xor eax,DWORD PTR 12[esp] xor ebp,edx xor eax,DWORD PTR 36[esp] xor ebp,edi xor eax,DWORD PTR 56[esp] rol eax,1 - add ebp,esi + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 mov DWORD PTR 4[esp],eax - mov esi,ebx - rol esi,5 - lea eax,DWORD PTR 3395469782[ebp*1+eax] - add eax,esi + lea eax,DWORD PTR 3395469782[esi*1+eax] + mov esi,DWORD PTR 8[esp] + add eax,ebp ; 20_39 66 mov ebp,ebx - mov esi,DWORD PTR 8[esp] - ror ebx,2 xor esi,DWORD PTR 16[esp] xor ebp,ecx xor esi,DWORD PTR 40[esp] xor ebp,edx xor esi,DWORD PTR 60[esp] rol esi,1 - add ebp,edi + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 mov DWORD PTR 8[esp],esi - mov edi,eax - rol edi,5 - lea esi,DWORD PTR 3395469782[ebp*1+esi] - add esi,edi + lea esi,DWORD PTR 3395469782[edi*1+esi] + mov edi,DWORD PTR 12[esp] + add esi,ebp ; 20_39 67 mov ebp,eax - mov edi,DWORD PTR 12[esp] - ror eax,2 xor edi,DWORD PTR 20[esp] xor ebp,ebx xor edi,DWORD PTR 44[esp] xor ebp,ecx xor edi,DWORD PTR [esp] rol edi,1 - add ebp,edx + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 mov DWORD PTR 12[esp],edi - mov edx,esi - rol edx,5 - lea edi,DWORD PTR 3395469782[ebp*1+edi] - add edi,edx + lea edi,DWORD PTR 3395469782[edx*1+edi] + mov edx,DWORD PTR 16[esp] + add edi,ebp ; 20_39 68 mov ebp,esi - mov edx,DWORD PTR 16[esp] - ror esi,2 xor edx,DWORD PTR 24[esp] xor ebp,eax xor edx,DWORD PTR 48[esp] xor ebp,ebx xor edx,DWORD PTR 4[esp] rol edx,1 - add ebp,ecx + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 mov DWORD PTR 16[esp],edx - mov ecx,edi - rol ecx,5 - lea edx,DWORD PTR 3395469782[ebp*1+edx] - add edx,ecx + lea edx,DWORD PTR 3395469782[ecx*1+edx] + mov ecx,DWORD PTR 20[esp] + add edx,ebp ; 20_39 69 mov ebp,edi - mov ecx,DWORD PTR 20[esp] - ror edi,2 xor ecx,DWORD PTR 28[esp] xor ebp,esi xor ecx,DWORD PTR 52[esp] xor ebp,eax xor ecx,DWORD PTR 8[esp] rol ecx,1 - add ebp,ebx + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 mov DWORD PTR 20[esp],ecx - mov ebx,edx - rol ebx,5 - lea ecx,DWORD PTR 3395469782[ebp*1+ecx] - add ecx,ebx + lea ecx,DWORD PTR 3395469782[ebx*1+ecx] + mov ebx,DWORD PTR 24[esp] + add ecx,ebp ; 20_39 70 mov ebp,edx - mov ebx,DWORD PTR 24[esp] - ror edx,2 xor ebx,DWORD PTR 32[esp] xor ebp,edi xor ebx,DWORD PTR 56[esp] xor ebp,esi xor ebx,DWORD PTR 12[esp] rol ebx,1 - add ebp,eax + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 mov DWORD PTR 24[esp],ebx - mov eax,ecx - rol eax,5 - lea ebx,DWORD PTR 3395469782[ebp*1+ebx] - add ebx,eax + lea ebx,DWORD PTR 3395469782[eax*1+ebx] + mov eax,DWORD PTR 28[esp] + add ebx,ebp ; 20_39 71 mov ebp,ecx - mov eax,DWORD PTR 28[esp] - ror ecx,2 xor eax,DWORD PTR 36[esp] xor ebp,edx xor eax,DWORD PTR 60[esp] xor ebp,edi xor eax,DWORD PTR 16[esp] rol eax,1 - add ebp,esi + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 mov DWORD PTR 28[esp],eax - mov esi,ebx - rol esi,5 - lea eax,DWORD PTR 3395469782[ebp*1+eax] - add eax,esi + lea eax,DWORD PTR 3395469782[esi*1+eax] + mov esi,DWORD PTR 32[esp] + add eax,ebp ; 20_39 72 mov ebp,ebx - mov esi,DWORD PTR 32[esp] - ror ebx,2 xor esi,DWORD PTR 40[esp] xor ebp,ecx xor esi,DWORD PTR [esp] xor ebp,edx xor esi,DWORD PTR 20[esp] rol esi,1 - add ebp,edi + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 mov DWORD PTR 32[esp],esi - mov edi,eax - rol edi,5 - lea esi,DWORD PTR 3395469782[ebp*1+esi] - add esi,edi + lea esi,DWORD PTR 3395469782[edi*1+esi] + mov edi,DWORD PTR 36[esp] + add esi,ebp ; 20_39 73 mov ebp,eax - mov edi,DWORD PTR 36[esp] - ror eax,2 xor edi,DWORD PTR 44[esp] xor ebp,ebx xor edi,DWORD PTR 4[esp] xor ebp,ecx xor edi,DWORD PTR 24[esp] rol edi,1 - add ebp,edx + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 mov DWORD PTR 36[esp],edi - mov edx,esi - rol edx,5 - lea edi,DWORD PTR 3395469782[ebp*1+edi] - add edi,edx + lea edi,DWORD PTR 3395469782[edx*1+edi] + mov edx,DWORD PTR 40[esp] + add edi,ebp ; 20_39 74 mov ebp,esi - mov edx,DWORD PTR 40[esp] - ror esi,2 xor edx,DWORD PTR 48[esp] xor ebp,eax xor edx,DWORD PTR 8[esp] xor ebp,ebx xor edx,DWORD PTR 28[esp] rol edx,1 - add ebp,ecx + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 mov DWORD PTR 40[esp],edx - mov ecx,edi - rol ecx,5 - lea edx,DWORD PTR 3395469782[ebp*1+edx] - add edx,ecx + lea edx,DWORD PTR 3395469782[ecx*1+edx] + mov ecx,DWORD PTR 44[esp] + add edx,ebp ; 20_39 75 mov ebp,edi - mov ecx,DWORD PTR 44[esp] - ror edi,2 xor ecx,DWORD PTR 52[esp] xor ebp,esi xor ecx,DWORD PTR 12[esp] xor ebp,eax xor ecx,DWORD PTR 32[esp] rol ecx,1 - add ebp,ebx + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 mov DWORD PTR 44[esp],ecx - mov ebx,edx - rol ebx,5 - lea ecx,DWORD PTR 3395469782[ebp*1+ecx] - add ecx,ebx + lea ecx,DWORD PTR 3395469782[ebx*1+ecx] + mov ebx,DWORD PTR 48[esp] + add ecx,ebp ; 20_39 76 mov ebp,edx - mov ebx,DWORD PTR 48[esp] - ror edx,2 xor ebx,DWORD PTR 56[esp] xor ebp,edi xor ebx,DWORD PTR 16[esp] xor ebp,esi xor ebx,DWORD PTR 36[esp] rol ebx,1 - add ebp,eax + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 mov DWORD PTR 48[esp],ebx - mov eax,ecx - rol eax,5 - lea ebx,DWORD PTR 3395469782[ebp*1+ebx] - add ebx,eax + lea ebx,DWORD PTR 3395469782[eax*1+ebx] + mov eax,DWORD PTR 52[esp] + add ebx,ebp ; 20_39 77 mov ebp,ecx - mov eax,DWORD PTR 52[esp] - ror ecx,2 xor eax,DWORD PTR 60[esp] xor ebp,edx xor eax,DWORD PTR 20[esp] xor ebp,edi xor eax,DWORD PTR 40[esp] rol eax,1 - add ebp,esi - mov DWORD PTR 52[esp],eax - mov esi,ebx - rol esi,5 - lea eax,DWORD PTR 3395469782[ebp*1+eax] - add eax,esi - ; 20_39 78 + add esi,ebp + ror ecx,2 mov ebp,ebx + rol ebp,5 + lea eax,DWORD PTR 3395469782[esi*1+eax] mov esi,DWORD PTR 56[esp] - ror ebx,2 + add eax,ebp + ; 20_39 78 + mov ebp,ebx xor esi,DWORD PTR [esp] xor ebp,ecx xor esi,DWORD PTR 24[esp] xor ebp,edx xor esi,DWORD PTR 44[esp] rol esi,1 - add ebp,edi - mov DWORD PTR 56[esp],esi - mov edi,eax - rol edi,5 - lea esi,DWORD PTR 3395469782[ebp*1+esi] - add esi,edi - ; 20_39 79 + add edi,ebp + ror ebx,2 mov ebp,eax + rol ebp,5 + lea esi,DWORD PTR 3395469782[edi*1+esi] mov edi,DWORD PTR 60[esp] - ror eax,2 + add esi,ebp + ; 20_39 79 + mov ebp,eax xor edi,DWORD PTR 4[esp] xor ebp,ebx xor edi,DWORD PTR 28[esp] xor ebp,ecx xor edi,DWORD PTR 48[esp] rol edi,1 - add ebp,edx - mov DWORD PTR 60[esp],edi - mov edx,esi - rol edx,5 - lea edi,DWORD PTR 3395469782[ebp*1+edi] - add edi,edx - mov ebp,DWORD PTR 84[esp] - mov edx,DWORD PTR 88[esp] + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + lea edi,DWORD PTR 3395469782[edx*1+edi] + add edi,ebp + mov ebp,DWORD PTR 96[esp] + mov edx,DWORD PTR 100[esp] add edi,DWORD PTR [ebp] add esi,DWORD PTR 4[ebp] add eax,DWORD PTR 8[ebp] @@ -1430,14 +1368,14 @@ $L000loop: mov DWORD PTR [ebp],edi add edx,64 mov DWORD PTR 4[ebp],esi - cmp edx,DWORD PTR 92[esp] + cmp edx,DWORD PTR 104[esp] mov DWORD PTR 8[ebp],eax mov edi,ecx mov DWORD PTR 12[ebp],ebx mov esi,edx mov DWORD PTR 16[ebp],ecx jb $L000loop - add esp,64 + add esp,76 pop edi pop esi pop ebx diff --git a/deps/openssl/asm/x86-win32-masm/sha/sha256-586.asm b/deps/openssl/asm/x86-win32-masm/sha/sha256-586.asm index 75b1dc8ac5..577c38ffab 100644 --- a/deps/openssl/asm/x86-win32-masm/sha/sha256-586.asm +++ b/deps/openssl/asm/x86-win32-masm/sha/sha256-586.asm @@ -2,7 +2,7 @@ TITLE sha512-586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 @@ -104,31 +104,30 @@ ALIGN 16 $L00300_15: mov ebx,DWORD PTR 92[esp] mov ecx,edx - ror ecx,6 - mov edi,edx - ror edi,11 + ror ecx,14 mov esi,DWORD PTR 20[esp] - xor ecx,edi - ror edi,14 - xor ecx,edi + xor ecx,edx + ror ecx,5 + xor ecx,edx + ror ecx,6 mov edi,DWORD PTR 24[esp] add ebx,ecx - mov DWORD PTR 16[esp],edx xor esi,edi + mov DWORD PTR 16[esp],edx mov ecx,eax and esi,edx mov edx,DWORD PTR 12[esp] xor esi,edi mov edi,eax add ebx,esi - ror ecx,2 + ror ecx,9 add ebx,DWORD PTR 28[esp] - ror edi,13 + xor ecx,eax + ror ecx,11 mov esi,DWORD PTR 4[esp] - xor ecx,edi - ror edi,9 + xor ecx,eax + ror ecx,2 add edx,ebx - xor ecx,edi mov edi,DWORD PTR 8[esp] add ebx,ecx mov DWORD PTR [esp],eax @@ -150,48 +149,46 @@ ALIGN 16 $L00416_63: mov esi,ebx mov ecx,DWORD PTR 100[esp] - shr ebx,3 - ror esi,7 - xor ebx,esi ror esi,11 mov edi,ecx + xor esi,ebx + ror esi,7 + shr ebx,3 + ror edi,2 xor ebx,esi - shr ecx,10 - mov esi,DWORD PTR 156[esp] + xor edi,ecx ror edi,17 - xor ecx,edi - ror edi,2 - add ebx,esi + shr ecx,10 + add ebx,DWORD PTR 156[esp] xor edi,ecx - add ebx,edi - mov ecx,edx add ebx,DWORD PTR 120[esp] - ror ecx,6 - mov edi,edx - ror edi,11 + mov ecx,edx + add ebx,edi + ror ecx,14 mov esi,DWORD PTR 20[esp] - xor ecx,edi - ror edi,14 + xor ecx,edx + ror ecx,5 mov DWORD PTR 92[esp],ebx - xor ecx,edi + xor ecx,edx + ror ecx,6 mov edi,DWORD PTR 24[esp] add ebx,ecx - mov DWORD PTR 16[esp],edx xor esi,edi + mov DWORD PTR 16[esp],edx mov ecx,eax and esi,edx mov edx,DWORD PTR 12[esp] xor esi,edi mov edi,eax add ebx,esi - ror ecx,2 + ror ecx,9 add ebx,DWORD PTR 28[esp] - ror edi,13 + xor ecx,eax + ror ecx,11 mov esi,DWORD PTR 4[esp] - xor ecx,edi - ror edi,9 + xor ecx,eax + ror ecx,2 add edx,ebx - xor ecx,edi mov edi,DWORD PTR 8[esp] add ebx,ecx mov DWORD PTR [esp],eax diff --git a/deps/openssl/asm/x86-win32-masm/sha/sha512-586.asm b/deps/openssl/asm/x86-win32-masm/sha/sha512-586.asm index 9f3249762b..98c1c070d7 100644 --- a/deps/openssl/asm/x86-win32-masm/sha/sha512-586.asm +++ b/deps/openssl/asm/x86-win32-masm/sha/sha512-586.asm @@ -2,7 +2,7 @@ TITLE sha512-586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 diff --git a/deps/openssl/asm/x86-win32-masm/x86cpuid.asm b/deps/openssl/asm/x86-win32-masm/x86cpuid.asm index 7e663d6645..b9b1c25843 100644 --- a/deps/openssl/asm/x86-win32-masm/x86cpuid.asm +++ b/deps/openssl/asm/x86-win32-masm/x86cpuid.asm @@ -2,7 +2,7 @@ TITLE x86cpuid.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF -.586 +.686 .MODEL FLAT OPTION DOTNAME IF @Version LT 800 @@ -27,9 +27,9 @@ $L_OPENSSL_ia32_cpuid_begin:: pushfd pop eax xor ecx,eax - bt ecx,21 - jnc $L000done xor eax,eax + bt ecx,21 + jnc $L000nocpuid cpuid mov edi,eax xor eax,eax @@ -55,7 +55,14 @@ $L_OPENSSL_ia32_cpuid_begin:: jnz $L001intel mov eax,2147483648 cpuid - cmp eax,2147483656 + cmp eax,2147483649 + jb $L001intel + mov esi,eax + mov eax,2147483649 + cpuid + or ebp,ecx + and ebp,2049 + cmp esi,2147483656 jb $L001intel mov eax,2147483656 cpuid @@ -64,46 +71,68 @@ $L_OPENSSL_ia32_cpuid_begin:: mov eax,1 cpuid bt edx,28 - jnc $L000done + jnc $L002generic shr ebx,16 and ebx,255 cmp ebx,esi - ja $L000done + ja $L002generic and edx,4026531839 - jmp $L000done + jmp $L002generic $L001intel: cmp edi,4 mov edi,-1 - jb $L002nocacheinfo + jb $L003nocacheinfo mov eax,4 mov ecx,0 cpuid mov edi,eax shr edi,14 and edi,4095 -$L002nocacheinfo: +$L003nocacheinfo: mov eax,1 cpuid + and edx,3220176895 cmp ebp,0 - jne $L003notP4 + jne $L004notintel + or edx,1073741824 and ah,15 cmp ah,15 - jne $L003notP4 + jne $L004notintel or edx,1048576 -$L003notP4: +$L004notintel: bt edx,28 - jnc $L000done + jnc $L002generic and edx,4026531839 cmp edi,0 - je $L000done + je $L002generic or edx,268435456 shr ebx,16 cmp bl,1 - ja $L000done + ja $L002generic and edx,4026531839 -$L000done: - mov eax,edx - mov edx,ecx +$L002generic: + and ebp,2048 + and ecx,4294965247 + mov esi,edx + or ebp,ecx + bt ecx,27 + jnc $L005clear_avx + xor ecx,ecx +DB 15,1,208 + and eax,6 + cmp eax,6 + je $L006done + cmp eax,2 + je $L005clear_avx +$L007clear_xmm: + and ebp,4261412861 + and esi,4278190079 +$L005clear_avx: + and ebp,4026525695 +$L006done: + mov eax,esi + mov edx,ebp +$L000nocpuid: pop edi pop esi pop ebx @@ -118,9 +147,9 @@ $L_OPENSSL_rdtsc_begin:: xor edx,edx lea ecx,DWORD PTR _OPENSSL_ia32cap_P bt DWORD PTR [ecx],4 - jnc $L004notsc + jnc $L008notsc rdtsc -$L004notsc: +$L008notsc: ret _OPENSSL_rdtsc ENDP ALIGN 16 @@ -128,14 +157,14 @@ _OPENSSL_instrument_halt PROC PUBLIC $L_OPENSSL_instrument_halt_begin:: lea ecx,DWORD PTR _OPENSSL_ia32cap_P bt DWORD PTR [ecx],4 - jnc $L005nohalt + jnc $L009nohalt DD 2421723150 and eax,3 - jnz $L005nohalt + jnz $L009nohalt pushfd pop eax bt eax,9 - jnc $L005nohalt + jnc $L009nohalt rdtsc push edx push eax @@ -145,7 +174,7 @@ DD 2421723150 sbb edx,DWORD PTR 4[esp] add esp,8 ret -$L005nohalt: +$L009nohalt: xor eax,eax xor edx,edx ret @@ -156,21 +185,21 @@ $L_OPENSSL_far_spin_begin:: pushfd pop eax bt eax,9 - jnc $L006nospin + jnc $L010nospin mov eax,DWORD PTR 4[esp] mov ecx,DWORD PTR 8[esp] DD 2430111262 xor eax,eax mov edx,DWORD PTR [ecx] - jmp $L007spin + jmp $L011spin ALIGN 16 -$L007spin: +$L011spin: inc eax cmp edx,DWORD PTR [ecx] - je $L007spin + je $L011spin DD 529567888 ret -$L006nospin: +$L010nospin: xor eax,eax xor edx,edx ret @@ -183,9 +212,9 @@ $L_OPENSSL_wipe_cpu_begin:: lea ecx,DWORD PTR _OPENSSL_ia32cap_P mov ecx,DWORD PTR [ecx] bt DWORD PTR [ecx],1 - jnc $L008no_x87 + jnc $L012no_x87 DD 4007259865,4007259865,4007259865,4007259865,2430851995 -$L008no_x87: +$L012no_x87: lea eax,DWORD PTR 4[esp] ret _OPENSSL_wipe_cpu ENDP @@ -197,11 +226,11 @@ $L_OPENSSL_atomic_add_begin:: push ebx nop mov eax,DWORD PTR [edx] -$L009spin: +$L013spin: lea ebx,DWORD PTR [ecx*1+eax] nop DD 447811568 - jne $L009spin + jne $L013spin mov eax,ebx pop ebx ret @@ -238,37 +267,50 @@ $L_OPENSSL_cleanse_begin:: mov ecx,DWORD PTR 8[esp] xor eax,eax cmp ecx,7 - jae $L010lot + jae $L014lot cmp ecx,0 - je $L011ret -$L012little: + je $L015ret +$L016little: mov BYTE PTR [edx],al sub ecx,1 lea edx,DWORD PTR 1[edx] - jnz $L012little -$L011ret: + jnz $L016little +$L015ret: ret ALIGN 16 -$L010lot: +$L014lot: test edx,3 - jz $L013aligned + jz $L017aligned mov BYTE PTR [edx],al lea ecx,DWORD PTR [ecx-1] lea edx,DWORD PTR 1[edx] - jmp $L010lot -$L013aligned: + jmp $L014lot +$L017aligned: mov DWORD PTR [edx],eax lea ecx,DWORD PTR [ecx-4] test ecx,-4 lea edx,DWORD PTR 4[edx] - jnz $L013aligned + jnz $L017aligned cmp ecx,0 - jne $L012little + jne $L016little ret _OPENSSL_cleanse ENDP +ALIGN 16 +_OPENSSL_ia32_rdrand PROC PUBLIC +$L_OPENSSL_ia32_rdrand_begin:: + mov ecx,8 +$L018loop: +DB 15,199,240 + jc $L019break + loop $L018loop +$L019break: + cmp eax,0 + cmove eax,ecx + ret +_OPENSSL_ia32_rdrand ENDP .text$ ENDS .bss SEGMENT 'BSS' -COMM _OPENSSL_ia32cap_P:DWORD +COMM _OPENSSL_ia32cap_P:QWORD .bss ENDS .CRT$XCU SEGMENT DWORD PUBLIC 'DATA' EXTERN _OPENSSL_cpuid_setup:NEAR