mirror of https://github.com/lukechilds/node.git
18 changed files with 18239 additions and 0 deletions
File diff suppressed because it is too large
@ -0,0 +1,839 @@ |
|||||
|
.text |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.type _vpaes_encrypt_core,@function |
||||
|
.align 16 |
||||
|
_vpaes_encrypt_core: |
||||
|
movq %rdx,%r9 |
||||
|
movq $16,%r11 |
||||
|
movl 240(%rdx),%eax |
||||
|
movdqa %xmm9,%xmm1 |
||||
|
movdqa .Lk_ipt(%rip),%xmm2 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
movdqu (%r9),%xmm5 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm0 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa .Lk_ipt+16(%rip),%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
pxor %xmm5,%xmm2 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
addq $16,%r9 |
||||
|
leaq .Lk_mc_backward(%rip),%r10 |
||||
|
jmp .Lenc_entry |
||||
|
|
||||
|
.align 16 |
||||
|
.Lenc_loop: |
||||
|
|
||||
|
movdqa %xmm13,%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
movdqa %xmm12,%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
movdqa %xmm15,%xmm5 |
||||
|
.byte 102,15,56,0,234 |
||||
|
movdqa -64(%r11,%r10,1),%xmm1 |
||||
|
movdqa %xmm14,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm5,%xmm2 |
||||
|
movdqa (%r11,%r10,1),%xmm4 |
||||
|
movdqa %xmm0,%xmm3 |
||||
|
.byte 102,15,56,0,193 |
||||
|
addq $16,%r9 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
.byte 102,15,56,0,220 |
||||
|
addq $16,%r11 |
||||
|
pxor %xmm0,%xmm3 |
||||
|
.byte 102,15,56,0,193 |
||||
|
andq $48,%r11 |
||||
|
pxor %xmm3,%xmm0 |
||||
|
subq $1,%rax |
||||
|
|
||||
|
.Lenc_entry: |
||||
|
|
||||
|
movdqa %xmm9,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm0 |
||||
|
movdqa %xmm11,%xmm5 |
||||
|
.byte 102,15,56,0,232 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm5,%xmm3 |
||||
|
movdqa %xmm10,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
movdqa %xmm10,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
movdqu (%r9),%xmm5 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
jnz .Lenc_loop |
||||
|
|
||||
|
|
||||
|
movdqa -96(%r10),%xmm4 |
||||
|
movdqa -80(%r10),%xmm0 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
.byte 102,15,56,0,195 |
||||
|
movdqa 64(%r11,%r10,1),%xmm1 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.type _vpaes_decrypt_core,@function |
||||
|
.align 16 |
||||
|
_vpaes_decrypt_core: |
||||
|
movq %rdx,%r9 |
||||
|
movl 240(%rdx),%eax |
||||
|
movdqa %xmm9,%xmm1 |
||||
|
movdqa .Lk_dipt(%rip),%xmm2 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
movq %rax,%r11 |
||||
|
psrld $4,%xmm1 |
||||
|
movdqu (%r9),%xmm5 |
||||
|
shlq $4,%r11 |
||||
|
pand %xmm9,%xmm0 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa .Lk_dipt+16(%rip),%xmm0 |
||||
|
xorq $48,%r11 |
||||
|
leaq .Lk_dsbd(%rip),%r10 |
||||
|
.byte 102,15,56,0,193 |
||||
|
andq $48,%r11 |
||||
|
pxor %xmm5,%xmm2 |
||||
|
movdqa .Lk_mc_forward+48(%rip),%xmm5 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
addq $16,%r9 |
||||
|
addq %r10,%r11 |
||||
|
jmp .Ldec_entry |
||||
|
|
||||
|
.align 16 |
||||
|
.Ldec_loop: |
||||
|
|
||||
|
|
||||
|
|
||||
|
movdqa -32(%r10),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa -16(%r10),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
addq $16,%r9 |
||||
|
|
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa 0(%r10),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 16(%r10),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
subq $1,%rax |
||||
|
|
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa 32(%r10),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 48(%r10),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
|
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa 64(%r10),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 80(%r10),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
|
||||
|
.byte 102,15,58,15,237,12 |
||||
|
|
||||
|
.Ldec_entry: |
||||
|
|
||||
|
movdqa %xmm9,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm0 |
||||
|
movdqa %xmm11,%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
movdqa %xmm10,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm2,%xmm4 |
||||
|
movdqa %xmm10,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
movdqu (%r9),%xmm0 |
||||
|
jnz .Ldec_loop |
||||
|
|
||||
|
|
||||
|
movdqa 96(%r10),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 112(%r10),%xmm0 |
||||
|
movdqa -352(%r11),%xmm2 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,56,0,194 |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.type _vpaes_schedule_core,@function |
||||
|
.align 16 |
||||
|
_vpaes_schedule_core: |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
call _vpaes_preheat |
||||
|
|
||||
|
movdqa .Lk_rcon(%rip),%xmm8 |
||||
|
movdqu (%rdi),%xmm0 |
||||
|
|
||||
|
|
||||
|
movdqa %xmm0,%xmm3 |
||||
|
leaq .Lk_ipt(%rip),%r11 |
||||
|
call _vpaes_schedule_transform |
||||
|
movdqa %xmm0,%xmm7 |
||||
|
|
||||
|
leaq .Lk_sr(%rip),%r10 |
||||
|
testq %rcx,%rcx |
||||
|
jnz .Lschedule_am_decrypting |
||||
|
|
||||
|
|
||||
|
movdqu %xmm0,(%rdx) |
||||
|
jmp .Lschedule_go |
||||
|
|
||||
|
.Lschedule_am_decrypting: |
||||
|
|
||||
|
movdqa (%r8,%r10,1),%xmm1 |
||||
|
.byte 102,15,56,0,217 |
||||
|
movdqu %xmm3,(%rdx) |
||||
|
xorq $48,%r8 |
||||
|
|
||||
|
.Lschedule_go: |
||||
|
cmpl $192,%esi |
||||
|
ja .Lschedule_256 |
||||
|
je .Lschedule_192 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.Lschedule_128: |
||||
|
movl $10,%esi |
||||
|
|
||||
|
.Loop_schedule_128: |
||||
|
call _vpaes_schedule_round |
||||
|
decq %rsi |
||||
|
jz .Lschedule_mangle_last |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
jmp .Loop_schedule_128 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.align 16 |
||||
|
.Lschedule_192: |
||||
|
movdqu 8(%rdi),%xmm0 |
||||
|
call _vpaes_schedule_transform |
||||
|
|
||||
|
movdqa %xmm0,%xmm6 |
||||
|
pxor %xmm4,%xmm4 |
||||
|
movhlps %xmm4,%xmm6 |
||||
|
movl $4,%esi |
||||
|
|
||||
|
.Loop_schedule_192: |
||||
|
call _vpaes_schedule_round |
||||
|
.byte 102,15,58,15,198,8 |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
call _vpaes_schedule_192_smear |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
call _vpaes_schedule_round |
||||
|
decq %rsi |
||||
|
jz .Lschedule_mangle_last |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
call _vpaes_schedule_192_smear |
||||
|
jmp .Loop_schedule_192 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.align 16 |
||||
|
.Lschedule_256: |
||||
|
movdqu 16(%rdi),%xmm0 |
||||
|
call _vpaes_schedule_transform |
||||
|
|
||||
|
movl $7,%esi |
||||
|
|
||||
|
.Loop_schedule_256: |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
movdqa %xmm0,%xmm6 |
||||
|
|
||||
|
|
||||
|
call _vpaes_schedule_round |
||||
|
decq %rsi |
||||
|
jz .Lschedule_mangle_last |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
|
||||
|
|
||||
|
pshufd $255,%xmm0,%xmm0 |
||||
|
movdqa %xmm7,%xmm5 |
||||
|
movdqa %xmm6,%xmm7 |
||||
|
call _vpaes_schedule_low_round |
||||
|
movdqa %xmm5,%xmm7 |
||||
|
|
||||
|
jmp .Loop_schedule_256 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.align 16 |
||||
|
.Lschedule_mangle_last: |
||||
|
|
||||
|
leaq .Lk_deskew(%rip),%r11 |
||||
|
testq %rcx,%rcx |
||||
|
jnz .Lschedule_mangle_last_dec |
||||
|
|
||||
|
|
||||
|
movdqa (%r8,%r10,1),%xmm1 |
||||
|
.byte 102,15,56,0,193 |
||||
|
leaq .Lk_opt(%rip),%r11 |
||||
|
addq $32,%rdx |
||||
|
|
||||
|
.Lschedule_mangle_last_dec: |
||||
|
addq $-16,%rdx |
||||
|
pxor .Lk_s63(%rip),%xmm0 |
||||
|
call _vpaes_schedule_transform |
||||
|
|
||||
|
movdqu %xmm0,(%rdx) |
||||
|
|
||||
|
|
||||
|
pxor %xmm0,%xmm0 |
||||
|
pxor %xmm1,%xmm1 |
||||
|
pxor %xmm2,%xmm2 |
||||
|
pxor %xmm3,%xmm3 |
||||
|
pxor %xmm4,%xmm4 |
||||
|
pxor %xmm5,%xmm5 |
||||
|
pxor %xmm6,%xmm6 |
||||
|
pxor %xmm7,%xmm7 |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size _vpaes_schedule_core,.-_vpaes_schedule_core |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.type _vpaes_schedule_192_smear,@function |
||||
|
.align 16 |
||||
|
_vpaes_schedule_192_smear: |
||||
|
pshufd $128,%xmm6,%xmm0 |
||||
|
pxor %xmm0,%xmm6 |
||||
|
pshufd $254,%xmm7,%xmm0 |
||||
|
pxor %xmm0,%xmm6 |
||||
|
movdqa %xmm6,%xmm0 |
||||
|
pxor %xmm1,%xmm1 |
||||
|
movhlps %xmm1,%xmm6 |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.type _vpaes_schedule_round,@function |
||||
|
.align 16 |
||||
|
_vpaes_schedule_round: |
||||
|
|
||||
|
pxor %xmm1,%xmm1 |
||||
|
.byte 102,65,15,58,15,200,15 |
||||
|
.byte 102,69,15,58,15,192,15 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
|
||||
|
|
||||
|
pshufd $255,%xmm0,%xmm0 |
||||
|
.byte 102,15,58,15,192,1 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
_vpaes_schedule_low_round: |
||||
|
|
||||
|
movdqa %xmm7,%xmm1 |
||||
|
pslldq $4,%xmm7 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
movdqa %xmm7,%xmm1 |
||||
|
pslldq $8,%xmm7 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
pxor .Lk_s63(%rip),%xmm7 |
||||
|
|
||||
|
|
||||
|
movdqa %xmm9,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm0 |
||||
|
movdqa %xmm11,%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
movdqa %xmm10,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm2,%xmm4 |
||||
|
movdqa %xmm10,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
movdqa %xmm13,%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
movdqa %xmm12,%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
|
||||
|
|
||||
|
pxor %xmm7,%xmm0 |
||||
|
movdqa %xmm0,%xmm7 |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size _vpaes_schedule_round,.-_vpaes_schedule_round |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.type _vpaes_schedule_transform,@function |
||||
|
.align 16 |
||||
|
_vpaes_schedule_transform: |
||||
|
movdqa %xmm9,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm0 |
||||
|
movdqa (%r11),%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa 16(%r11),%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.type _vpaes_schedule_mangle,@function |
||||
|
.align 16 |
||||
|
_vpaes_schedule_mangle: |
||||
|
movdqa %xmm0,%xmm4 |
||||
|
movdqa .Lk_mc_forward(%rip),%xmm5 |
||||
|
testq %rcx,%rcx |
||||
|
jnz .Lschedule_mangle_dec |
||||
|
|
||||
|
|
||||
|
addq $16,%rdx |
||||
|
pxor .Lk_s63(%rip),%xmm4 |
||||
|
.byte 102,15,56,0,229 |
||||
|
movdqa %xmm4,%xmm3 |
||||
|
.byte 102,15,56,0,229 |
||||
|
pxor %xmm4,%xmm3 |
||||
|
.byte 102,15,56,0,229 |
||||
|
pxor %xmm4,%xmm3 |
||||
|
|
||||
|
jmp .Lschedule_mangle_both |
||||
|
.align 16 |
||||
|
.Lschedule_mangle_dec: |
||||
|
|
||||
|
leaq .Lk_dksd(%rip),%r11 |
||||
|
movdqa %xmm9,%xmm1 |
||||
|
pandn %xmm4,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm4 |
||||
|
|
||||
|
movdqa 0(%r11),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
movdqa 16(%r11),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
|
||||
|
movdqa 32(%r11),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 48(%r11),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
|
||||
|
movdqa 64(%r11),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 80(%r11),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
|
||||
|
movdqa 96(%r11),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 112(%r11),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
|
||||
|
addq $-16,%rdx |
||||
|
|
||||
|
.Lschedule_mangle_both: |
||||
|
movdqa (%r8,%r10,1),%xmm1 |
||||
|
.byte 102,15,56,0,217 |
||||
|
addq $-16,%r8 |
||||
|
andq $48,%r8 |
||||
|
movdqu %xmm3,(%rdx) |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.globl vpaes_set_encrypt_key |
||||
|
.type vpaes_set_encrypt_key,@function |
||||
|
.align 16 |
||||
|
vpaes_set_encrypt_key: |
||||
|
movl %esi,%eax |
||||
|
shrl $5,%eax |
||||
|
addl $5,%eax |
||||
|
movl %eax,240(%rdx) |
||||
|
|
||||
|
movl $0,%ecx |
||||
|
movl $48,%r8d |
||||
|
call _vpaes_schedule_core |
||||
|
xorl %eax,%eax |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key |
||||
|
|
||||
|
.globl vpaes_set_decrypt_key |
||||
|
.type vpaes_set_decrypt_key,@function |
||||
|
.align 16 |
||||
|
vpaes_set_decrypt_key: |
||||
|
movl %esi,%eax |
||||
|
shrl $5,%eax |
||||
|
addl $5,%eax |
||||
|
movl %eax,240(%rdx) |
||||
|
shll $4,%eax |
||||
|
leaq 16(%rdx,%rax,1),%rdx |
||||
|
|
||||
|
movl $1,%ecx |
||||
|
movl %esi,%r8d |
||||
|
shrl $1,%r8d |
||||
|
andl $32,%r8d |
||||
|
xorl $32,%r8d |
||||
|
call _vpaes_schedule_core |
||||
|
xorl %eax,%eax |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key |
||||
|
|
||||
|
.globl vpaes_encrypt |
||||
|
.type vpaes_encrypt,@function |
||||
|
.align 16 |
||||
|
vpaes_encrypt: |
||||
|
movdqu (%rdi),%xmm0 |
||||
|
call _vpaes_preheat |
||||
|
call _vpaes_encrypt_core |
||||
|
movdqu %xmm0,(%rsi) |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size vpaes_encrypt,.-vpaes_encrypt |
||||
|
|
||||
|
.globl vpaes_decrypt |
||||
|
.type vpaes_decrypt,@function |
||||
|
.align 16 |
||||
|
vpaes_decrypt: |
||||
|
movdqu (%rdi),%xmm0 |
||||
|
call _vpaes_preheat |
||||
|
call _vpaes_decrypt_core |
||||
|
movdqu %xmm0,(%rsi) |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size vpaes_decrypt,.-vpaes_decrypt |
||||
|
.globl vpaes_cbc_encrypt |
||||
|
.type vpaes_cbc_encrypt,@function |
||||
|
.align 16 |
||||
|
vpaes_cbc_encrypt: |
||||
|
xchgq %rcx,%rdx |
||||
|
subq $16,%rcx |
||||
|
jc .Lcbc_abort |
||||
|
movdqu (%r8),%xmm6 |
||||
|
subq %rdi,%rsi |
||||
|
call _vpaes_preheat |
||||
|
cmpl $0,%r9d |
||||
|
je .Lcbc_dec_loop |
||||
|
jmp .Lcbc_enc_loop |
||||
|
.align 16 |
||||
|
.Lcbc_enc_loop: |
||||
|
movdqu (%rdi),%xmm0 |
||||
|
pxor %xmm6,%xmm0 |
||||
|
call _vpaes_encrypt_core |
||||
|
movdqa %xmm0,%xmm6 |
||||
|
movdqu %xmm0,(%rsi,%rdi,1) |
||||
|
leaq 16(%rdi),%rdi |
||||
|
subq $16,%rcx |
||||
|
jnc .Lcbc_enc_loop |
||||
|
jmp .Lcbc_done |
||||
|
.align 16 |
||||
|
.Lcbc_dec_loop: |
||||
|
movdqu (%rdi),%xmm0 |
||||
|
movdqa %xmm0,%xmm7 |
||||
|
call _vpaes_decrypt_core |
||||
|
pxor %xmm6,%xmm0 |
||||
|
movdqa %xmm7,%xmm6 |
||||
|
movdqu %xmm0,(%rsi,%rdi,1) |
||||
|
leaq 16(%rdi),%rdi |
||||
|
subq $16,%rcx |
||||
|
jnc .Lcbc_dec_loop |
||||
|
.Lcbc_done: |
||||
|
movdqu %xmm6,(%r8) |
||||
|
.Lcbc_abort: |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.type _vpaes_preheat,@function |
||||
|
.align 16 |
||||
|
_vpaes_preheat: |
||||
|
leaq .Lk_s0F(%rip),%r10 |
||||
|
movdqa -32(%r10),%xmm10 |
||||
|
movdqa -16(%r10),%xmm11 |
||||
|
movdqa 0(%r10),%xmm9 |
||||
|
movdqa 48(%r10),%xmm13 |
||||
|
movdqa 64(%r10),%xmm12 |
||||
|
movdqa 80(%r10),%xmm15 |
||||
|
movdqa 96(%r10),%xmm14 |
||||
|
.byte 0xf3,0xc3 |
||||
|
.size _vpaes_preheat,.-_vpaes_preheat |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.type _vpaes_consts,@object |
||||
|
.align 64 |
||||
|
_vpaes_consts: |
||||
|
.Lk_inv: |
||||
|
.quad 0x0E05060F0D080180, 0x040703090A0B0C02 |
||||
|
.quad 0x01040A060F0B0780, 0x030D0E0C02050809 |
||||
|
|
||||
|
.Lk_s0F: |
||||
|
.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F |
||||
|
|
||||
|
.Lk_ipt: |
||||
|
.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 |
||||
|
.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 |
||||
|
|
||||
|
.Lk_sb1: |
||||
|
.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 |
||||
|
.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF |
||||
|
.Lk_sb2: |
||||
|
.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD |
||||
|
.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A |
||||
|
.Lk_sbo: |
||||
|
.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 |
||||
|
.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA |
||||
|
|
||||
|
.Lk_mc_forward: |
||||
|
.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 |
||||
|
.quad 0x080B0A0904070605, 0x000302010C0F0E0D |
||||
|
.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 |
||||
|
.quad 0x000302010C0F0E0D, 0x080B0A0904070605 |
||||
|
|
||||
|
.Lk_mc_backward: |
||||
|
.quad 0x0605040702010003, 0x0E0D0C0F0A09080B |
||||
|
.quad 0x020100030E0D0C0F, 0x0A09080B06050407 |
||||
|
.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 |
||||
|
.quad 0x0A09080B06050407, 0x020100030E0D0C0F |
||||
|
|
||||
|
.Lk_sr: |
||||
|
.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 |
||||
|
.quad 0x030E09040F0A0500, 0x0B06010C07020D08 |
||||
|
.quad 0x0F060D040B020900, 0x070E050C030A0108 |
||||
|
.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 |
||||
|
|
||||
|
.Lk_rcon: |
||||
|
.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 |
||||
|
|
||||
|
.Lk_s63: |
||||
|
.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B |
||||
|
|
||||
|
.Lk_opt: |
||||
|
.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 |
||||
|
.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 |
||||
|
|
||||
|
.Lk_deskew: |
||||
|
.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A |
||||
|
.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.Lk_dksd: |
||||
|
.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 |
||||
|
.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E |
||||
|
.Lk_dksb: |
||||
|
.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 |
||||
|
.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 |
||||
|
.Lk_dkse: |
||||
|
.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 |
||||
|
.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 |
||||
|
.Lk_dks9: |
||||
|
.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC |
||||
|
.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.Lk_dipt: |
||||
|
.quad 0x0F505B040B545F00, 0x154A411E114E451A |
||||
|
.quad 0x86E383E660056500, 0x12771772F491F194 |
||||
|
|
||||
|
.Lk_dsb9: |
||||
|
.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 |
||||
|
.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 |
||||
|
.Lk_dsbd: |
||||
|
.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 |
||||
|
.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 |
||||
|
.Lk_dsbb: |
||||
|
.quad 0xD022649296B44200, 0x602646F6B0F2D404 |
||||
|
.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B |
||||
|
.Lk_dsbe: |
||||
|
.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 |
||||
|
.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 |
||||
|
.Lk_dsbo: |
||||
|
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D |
||||
|
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C |
||||
|
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 |
||||
|
.align 64 |
||||
|
.size _vpaes_consts,.-_vpaes_consts |
File diff suppressed because it is too large
File diff suppressed because it is too large
@ -0,0 +1,838 @@ |
|||||
|
.text |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_encrypt_core: |
||||
|
movq %rdx,%r9 |
||||
|
movq $16,%r11 |
||||
|
movl 240(%rdx),%eax |
||||
|
movdqa %xmm9,%xmm1 |
||||
|
movdqa L$k_ipt(%rip),%xmm2 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
movdqu (%r9),%xmm5 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm0 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa L$k_ipt+16(%rip),%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
pxor %xmm5,%xmm2 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
addq $16,%r9 |
||||
|
leaq L$k_mc_backward(%rip),%r10 |
||||
|
jmp L$enc_entry |
||||
|
|
||||
|
.p2align 4 |
||||
|
L$enc_loop: |
||||
|
|
||||
|
movdqa %xmm13,%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
movdqa %xmm12,%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
movdqa %xmm15,%xmm5 |
||||
|
.byte 102,15,56,0,234 |
||||
|
movdqa -64(%r11,%r10,1),%xmm1 |
||||
|
movdqa %xmm14,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm5,%xmm2 |
||||
|
movdqa (%r11,%r10,1),%xmm4 |
||||
|
movdqa %xmm0,%xmm3 |
||||
|
.byte 102,15,56,0,193 |
||||
|
addq $16,%r9 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
.byte 102,15,56,0,220 |
||||
|
addq $16,%r11 |
||||
|
pxor %xmm0,%xmm3 |
||||
|
.byte 102,15,56,0,193 |
||||
|
andq $48,%r11 |
||||
|
pxor %xmm3,%xmm0 |
||||
|
subq $1,%rax |
||||
|
|
||||
|
L$enc_entry: |
||||
|
|
||||
|
movdqa %xmm9,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm0 |
||||
|
movdqa %xmm11,%xmm5 |
||||
|
.byte 102,15,56,0,232 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm5,%xmm3 |
||||
|
movdqa %xmm10,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
movdqa %xmm10,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
movdqu (%r9),%xmm5 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
jnz L$enc_loop |
||||
|
|
||||
|
|
||||
|
movdqa -96(%r10),%xmm4 |
||||
|
movdqa -80(%r10),%xmm0 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
.byte 102,15,56,0,195 |
||||
|
movdqa 64(%r11,%r10,1),%xmm1 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_decrypt_core: |
||||
|
movq %rdx,%r9 |
||||
|
movl 240(%rdx),%eax |
||||
|
movdqa %xmm9,%xmm1 |
||||
|
movdqa L$k_dipt(%rip),%xmm2 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
movq %rax,%r11 |
||||
|
psrld $4,%xmm1 |
||||
|
movdqu (%r9),%xmm5 |
||||
|
shlq $4,%r11 |
||||
|
pand %xmm9,%xmm0 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa L$k_dipt+16(%rip),%xmm0 |
||||
|
xorq $48,%r11 |
||||
|
leaq L$k_dsbd(%rip),%r10 |
||||
|
.byte 102,15,56,0,193 |
||||
|
andq $48,%r11 |
||||
|
pxor %xmm5,%xmm2 |
||||
|
movdqa L$k_mc_forward+48(%rip),%xmm5 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
addq $16,%r9 |
||||
|
addq %r10,%r11 |
||||
|
jmp L$dec_entry |
||||
|
|
||||
|
.p2align 4 |
||||
|
L$dec_loop: |
||||
|
|
||||
|
|
||||
|
|
||||
|
movdqa -32(%r10),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa -16(%r10),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
addq $16,%r9 |
||||
|
|
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa 0(%r10),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 16(%r10),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
subq $1,%rax |
||||
|
|
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa 32(%r10),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 48(%r10),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
|
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa 64(%r10),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 80(%r10),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
|
||||
|
.byte 102,15,58,15,237,12 |
||||
|
|
||||
|
L$dec_entry: |
||||
|
|
||||
|
movdqa %xmm9,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm0 |
||||
|
movdqa %xmm11,%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
movdqa %xmm10,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm2,%xmm4 |
||||
|
movdqa %xmm10,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
movdqu (%r9),%xmm0 |
||||
|
jnz L$dec_loop |
||||
|
|
||||
|
|
||||
|
movdqa 96(%r10),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 112(%r10),%xmm0 |
||||
|
movdqa -352(%r11),%xmm2 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,56,0,194 |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_schedule_core: |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
call _vpaes_preheat |
||||
|
|
||||
|
movdqa L$k_rcon(%rip),%xmm8 |
||||
|
movdqu (%rdi),%xmm0 |
||||
|
|
||||
|
|
||||
|
movdqa %xmm0,%xmm3 |
||||
|
leaq L$k_ipt(%rip),%r11 |
||||
|
call _vpaes_schedule_transform |
||||
|
movdqa %xmm0,%xmm7 |
||||
|
|
||||
|
leaq L$k_sr(%rip),%r10 |
||||
|
testq %rcx,%rcx |
||||
|
jnz L$schedule_am_decrypting |
||||
|
|
||||
|
|
||||
|
movdqu %xmm0,(%rdx) |
||||
|
jmp L$schedule_go |
||||
|
|
||||
|
L$schedule_am_decrypting: |
||||
|
|
||||
|
movdqa (%r8,%r10,1),%xmm1 |
||||
|
.byte 102,15,56,0,217 |
||||
|
movdqu %xmm3,(%rdx) |
||||
|
xorq $48,%r8 |
||||
|
|
||||
|
L$schedule_go: |
||||
|
cmpl $192,%esi |
||||
|
ja L$schedule_256 |
||||
|
je L$schedule_192 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
L$schedule_128: |
||||
|
movl $10,%esi |
||||
|
|
||||
|
L$oop_schedule_128: |
||||
|
call _vpaes_schedule_round |
||||
|
decq %rsi |
||||
|
jz L$schedule_mangle_last |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
jmp L$oop_schedule_128 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
L$schedule_192: |
||||
|
movdqu 8(%rdi),%xmm0 |
||||
|
call _vpaes_schedule_transform |
||||
|
|
||||
|
movdqa %xmm0,%xmm6 |
||||
|
pxor %xmm4,%xmm4 |
||||
|
movhlps %xmm4,%xmm6 |
||||
|
movl $4,%esi |
||||
|
|
||||
|
L$oop_schedule_192: |
||||
|
call _vpaes_schedule_round |
||||
|
.byte 102,15,58,15,198,8 |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
call _vpaes_schedule_192_smear |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
call _vpaes_schedule_round |
||||
|
decq %rsi |
||||
|
jz L$schedule_mangle_last |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
call _vpaes_schedule_192_smear |
||||
|
jmp L$oop_schedule_192 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
L$schedule_256: |
||||
|
movdqu 16(%rdi),%xmm0 |
||||
|
call _vpaes_schedule_transform |
||||
|
|
||||
|
movl $7,%esi |
||||
|
|
||||
|
L$oop_schedule_256: |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
movdqa %xmm0,%xmm6 |
||||
|
|
||||
|
|
||||
|
call _vpaes_schedule_round |
||||
|
decq %rsi |
||||
|
jz L$schedule_mangle_last |
||||
|
call _vpaes_schedule_mangle |
||||
|
|
||||
|
|
||||
|
|
||||
|
pshufd $255,%xmm0,%xmm0 |
||||
|
movdqa %xmm7,%xmm5 |
||||
|
movdqa %xmm6,%xmm7 |
||||
|
call _vpaes_schedule_low_round |
||||
|
movdqa %xmm5,%xmm7 |
||||
|
|
||||
|
jmp L$oop_schedule_256 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
L$schedule_mangle_last: |
||||
|
|
||||
|
leaq L$k_deskew(%rip),%r11 |
||||
|
testq %rcx,%rcx |
||||
|
jnz L$schedule_mangle_last_dec |
||||
|
|
||||
|
|
||||
|
movdqa (%r8,%r10,1),%xmm1 |
||||
|
.byte 102,15,56,0,193 |
||||
|
leaq L$k_opt(%rip),%r11 |
||||
|
addq $32,%rdx |
||||
|
|
||||
|
L$schedule_mangle_last_dec: |
||||
|
addq $-16,%rdx |
||||
|
pxor L$k_s63(%rip),%xmm0 |
||||
|
call _vpaes_schedule_transform |
||||
|
|
||||
|
movdqu %xmm0,(%rdx) |
||||
|
|
||||
|
|
||||
|
pxor %xmm0,%xmm0 |
||||
|
pxor %xmm1,%xmm1 |
||||
|
pxor %xmm2,%xmm2 |
||||
|
pxor %xmm3,%xmm3 |
||||
|
pxor %xmm4,%xmm4 |
||||
|
pxor %xmm5,%xmm5 |
||||
|
pxor %xmm6,%xmm6 |
||||
|
pxor %xmm7,%xmm7 |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_schedule_192_smear: |
||||
|
pshufd $128,%xmm6,%xmm0 |
||||
|
pxor %xmm0,%xmm6 |
||||
|
pshufd $254,%xmm7,%xmm0 |
||||
|
pxor %xmm0,%xmm6 |
||||
|
movdqa %xmm6,%xmm0 |
||||
|
pxor %xmm1,%xmm1 |
||||
|
movhlps %xmm1,%xmm6 |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_schedule_round: |
||||
|
|
||||
|
pxor %xmm1,%xmm1 |
||||
|
.byte 102,65,15,58,15,200,15 |
||||
|
.byte 102,69,15,58,15,192,15 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
|
||||
|
|
||||
|
pshufd $255,%xmm0,%xmm0 |
||||
|
.byte 102,15,58,15,192,1 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
_vpaes_schedule_low_round: |
||||
|
|
||||
|
movdqa %xmm7,%xmm1 |
||||
|
pslldq $4,%xmm7 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
movdqa %xmm7,%xmm1 |
||||
|
pslldq $8,%xmm7 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
pxor L$k_s63(%rip),%xmm7 |
||||
|
|
||||
|
|
||||
|
movdqa %xmm9,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm0 |
||||
|
movdqa %xmm11,%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
movdqa %xmm10,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm2,%xmm4 |
||||
|
movdqa %xmm10,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm10,%xmm3 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
movdqa %xmm13,%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
movdqa %xmm12,%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
|
||||
|
|
||||
|
pxor %xmm7,%xmm0 |
||||
|
movdqa %xmm0,%xmm7 |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_schedule_transform: |
||||
|
movdqa %xmm9,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm0 |
||||
|
movdqa (%r11),%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa 16(%r11),%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_schedule_mangle: |
||||
|
movdqa %xmm0,%xmm4 |
||||
|
movdqa L$k_mc_forward(%rip),%xmm5 |
||||
|
testq %rcx,%rcx |
||||
|
jnz L$schedule_mangle_dec |
||||
|
|
||||
|
|
||||
|
addq $16,%rdx |
||||
|
pxor L$k_s63(%rip),%xmm4 |
||||
|
.byte 102,15,56,0,229 |
||||
|
movdqa %xmm4,%xmm3 |
||||
|
.byte 102,15,56,0,229 |
||||
|
pxor %xmm4,%xmm3 |
||||
|
.byte 102,15,56,0,229 |
||||
|
pxor %xmm4,%xmm3 |
||||
|
|
||||
|
jmp L$schedule_mangle_both |
||||
|
.p2align 4 |
||||
|
L$schedule_mangle_dec: |
||||
|
|
||||
|
leaq L$k_dksd(%rip),%r11 |
||||
|
movdqa %xmm9,%xmm1 |
||||
|
pandn %xmm4,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm9,%xmm4 |
||||
|
|
||||
|
movdqa 0(%r11),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
movdqa 16(%r11),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
|
||||
|
movdqa 32(%r11),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 48(%r11),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
|
||||
|
movdqa 64(%r11),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 80(%r11),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
|
||||
|
movdqa 96(%r11),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 112(%r11),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
|
||||
|
addq $-16,%rdx |
||||
|
|
||||
|
L$schedule_mangle_both: |
||||
|
movdqa (%r8,%r10,1),%xmm1 |
||||
|
.byte 102,15,56,0,217 |
||||
|
addq $-16,%r8 |
||||
|
andq $48,%r8 |
||||
|
movdqu %xmm3,(%rdx) |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.globl _vpaes_set_encrypt_key |
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_set_encrypt_key: |
||||
|
movl %esi,%eax |
||||
|
shrl $5,%eax |
||||
|
addl $5,%eax |
||||
|
movl %eax,240(%rdx) |
||||
|
|
||||
|
movl $0,%ecx |
||||
|
movl $48,%r8d |
||||
|
call _vpaes_schedule_core |
||||
|
xorl %eax,%eax |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
.globl _vpaes_set_decrypt_key |
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_set_decrypt_key: |
||||
|
movl %esi,%eax |
||||
|
shrl $5,%eax |
||||
|
addl $5,%eax |
||||
|
movl %eax,240(%rdx) |
||||
|
shll $4,%eax |
||||
|
leaq 16(%rdx,%rax,1),%rdx |
||||
|
|
||||
|
movl $1,%ecx |
||||
|
movl %esi,%r8d |
||||
|
shrl $1,%r8d |
||||
|
andl $32,%r8d |
||||
|
xorl $32,%r8d |
||||
|
call _vpaes_schedule_core |
||||
|
xorl %eax,%eax |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
.globl _vpaes_encrypt |
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_encrypt: |
||||
|
movdqu (%rdi),%xmm0 |
||||
|
call _vpaes_preheat |
||||
|
call _vpaes_encrypt_core |
||||
|
movdqu %xmm0,(%rsi) |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
.globl _vpaes_decrypt |
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_decrypt: |
||||
|
movdqu (%rdi),%xmm0 |
||||
|
call _vpaes_preheat |
||||
|
call _vpaes_decrypt_core |
||||
|
movdqu %xmm0,(%rsi) |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
.globl _vpaes_cbc_encrypt |
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_cbc_encrypt: |
||||
|
xchgq %rcx,%rdx |
||||
|
subq $16,%rcx |
||||
|
jc L$cbc_abort |
||||
|
movdqu (%r8),%xmm6 |
||||
|
subq %rdi,%rsi |
||||
|
call _vpaes_preheat |
||||
|
cmpl $0,%r9d |
||||
|
je L$cbc_dec_loop |
||||
|
jmp L$cbc_enc_loop |
||||
|
.p2align 4 |
||||
|
L$cbc_enc_loop: |
||||
|
movdqu (%rdi),%xmm0 |
||||
|
pxor %xmm6,%xmm0 |
||||
|
call _vpaes_encrypt_core |
||||
|
movdqa %xmm0,%xmm6 |
||||
|
movdqu %xmm0,(%rsi,%rdi,1) |
||||
|
leaq 16(%rdi),%rdi |
||||
|
subq $16,%rcx |
||||
|
jnc L$cbc_enc_loop |
||||
|
jmp L$cbc_done |
||||
|
.p2align 4 |
||||
|
L$cbc_dec_loop: |
||||
|
movdqu (%rdi),%xmm0 |
||||
|
movdqa %xmm0,%xmm7 |
||||
|
call _vpaes_decrypt_core |
||||
|
pxor %xmm6,%xmm0 |
||||
|
movdqa %xmm7,%xmm6 |
||||
|
movdqu %xmm0,(%rsi,%rdi,1) |
||||
|
leaq 16(%rdi),%rdi |
||||
|
subq $16,%rcx |
||||
|
jnc L$cbc_dec_loop |
||||
|
L$cbc_done: |
||||
|
movdqu %xmm6,(%r8) |
||||
|
L$cbc_abort: |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 4 |
||||
|
_vpaes_preheat: |
||||
|
leaq L$k_s0F(%rip),%r10 |
||||
|
movdqa -32(%r10),%xmm10 |
||||
|
movdqa -16(%r10),%xmm11 |
||||
|
movdqa 0(%r10),%xmm9 |
||||
|
movdqa 48(%r10),%xmm13 |
||||
|
movdqa 64(%r10),%xmm12 |
||||
|
movdqa 80(%r10),%xmm15 |
||||
|
movdqa 96(%r10),%xmm14 |
||||
|
.byte 0xf3,0xc3 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
.p2align 6 |
||||
|
_vpaes_consts: |
||||
|
L$k_inv: |
||||
|
.quad 0x0E05060F0D080180, 0x040703090A0B0C02 |
||||
|
.quad 0x01040A060F0B0780, 0x030D0E0C02050809 |
||||
|
|
||||
|
L$k_s0F: |
||||
|
.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F |
||||
|
|
||||
|
L$k_ipt: |
||||
|
.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 |
||||
|
.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 |
||||
|
|
||||
|
L$k_sb1: |
||||
|
.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 |
||||
|
.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF |
||||
|
L$k_sb2: |
||||
|
.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD |
||||
|
.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A |
||||
|
L$k_sbo: |
||||
|
.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 |
||||
|
.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA |
||||
|
|
||||
|
L$k_mc_forward: |
||||
|
.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 |
||||
|
.quad 0x080B0A0904070605, 0x000302010C0F0E0D |
||||
|
.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 |
||||
|
.quad 0x000302010C0F0E0D, 0x080B0A0904070605 |
||||
|
|
||||
|
L$k_mc_backward: |
||||
|
.quad 0x0605040702010003, 0x0E0D0C0F0A09080B |
||||
|
.quad 0x020100030E0D0C0F, 0x0A09080B06050407 |
||||
|
.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 |
||||
|
.quad 0x0A09080B06050407, 0x020100030E0D0C0F |
||||
|
|
||||
|
L$k_sr: |
||||
|
.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 |
||||
|
.quad 0x030E09040F0A0500, 0x0B06010C07020D08 |
||||
|
.quad 0x0F060D040B020900, 0x070E050C030A0108 |
||||
|
.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 |
||||
|
|
||||
|
L$k_rcon: |
||||
|
.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 |
||||
|
|
||||
|
L$k_s63: |
||||
|
.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B |
||||
|
|
||||
|
L$k_opt: |
||||
|
.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 |
||||
|
.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 |
||||
|
|
||||
|
L$k_deskew: |
||||
|
.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A |
||||
|
.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
L$k_dksd: |
||||
|
.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 |
||||
|
.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E |
||||
|
L$k_dksb: |
||||
|
.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 |
||||
|
.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 |
||||
|
L$k_dkse: |
||||
|
.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 |
||||
|
.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 |
||||
|
L$k_dks9: |
||||
|
.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC |
||||
|
.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
L$k_dipt: |
||||
|
.quad 0x0F505B040B545F00, 0x154A411E114E451A |
||||
|
.quad 0x86E383E660056500, 0x12771772F491F194 |
||||
|
|
||||
|
L$k_dsb9: |
||||
|
.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 |
||||
|
.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 |
||||
|
L$k_dsbd: |
||||
|
.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 |
||||
|
.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 |
||||
|
L$k_dsbb: |
||||
|
.quad 0xD022649296B44200, 0x602646F6B0F2D404 |
||||
|
.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B |
||||
|
L$k_dsbe: |
||||
|
.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 |
||||
|
.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 |
||||
|
L$k_dsbo: |
||||
|
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D |
||||
|
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C |
||||
|
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 |
||||
|
.p2align 6 |
File diff suppressed because it is too large
File diff suppressed because it is too large
File diff suppressed because it is too large
File diff suppressed because it is too large
@ -0,0 +1,661 @@ |
|||||
|
.file "vpaes-x86.s" |
||||
|
.text |
||||
|
.align 64 |
||||
|
.L_vpaes_consts: |
||||
|
.long 218628480,235210255,168496130,67568393 |
||||
|
.long 252381056,17041926,33884169,51187212 |
||||
|
.long 252645135,252645135,252645135,252645135 |
||||
|
.long 1512730624,3266504856,1377990664,3401244816 |
||||
|
.long 830229760,1275146365,2969422977,3447763452 |
||||
|
.long 3411033600,2979783055,338359620,2782886510 |
||||
|
.long 4209124096,907596821,221174255,1006095553 |
||||
|
.long 191964160,3799684038,3164090317,1589111125 |
||||
|
.long 182528256,1777043520,2877432650,3265356744 |
||||
|
.long 1874708224,3503451415,3305285752,363511674 |
||||
|
.long 1606117888,3487855781,1093350906,2384367825 |
||||
|
.long 197121,67569157,134941193,202313229 |
||||
|
.long 67569157,134941193,202313229,197121 |
||||
|
.long 134941193,202313229,197121,67569157 |
||||
|
.long 202313229,197121,67569157,134941193 |
||||
|
.long 33619971,100992007,168364043,235736079 |
||||
|
.long 235736079,33619971,100992007,168364043 |
||||
|
.long 168364043,235736079,33619971,100992007 |
||||
|
.long 100992007,168364043,235736079,33619971 |
||||
|
.long 50462976,117835012,185207048,252579084 |
||||
|
.long 252314880,51251460,117574920,184942860 |
||||
|
.long 184682752,252054788,50987272,118359308 |
||||
|
.long 118099200,185467140,251790600,50727180 |
||||
|
.long 2946363062,528716217,1300004225,1881839624 |
||||
|
.long 1532713819,1532713819,1532713819,1532713819 |
||||
|
.long 3602276352,4288629033,3737020424,4153884961 |
||||
|
.long 1354558464,32357713,2958822624,3775749553 |
||||
|
.long 1201988352,132424512,1572796698,503232858 |
||||
|
.long 2213177600,1597421020,4103937655,675398315 |
||||
|
.long 2749646592,4273543773,1511898873,121693092 |
||||
|
.long 3040248576,1103263732,2871565598,1608280554 |
||||
|
.long 2236667136,2588920351,482954393,64377734 |
||||
|
.long 3069987328,291237287,2117370568,3650299247 |
||||
|
.long 533321216,3573750986,2572112006,1401264716 |
||||
|
.long 1339849704,2721158661,548607111,3445553514 |
||||
|
.long 2128193280,3054596040,2183486460,1257083700 |
||||
|
.long 655635200,1165381986,3923443150,2344132524 |
||||
|
.long 190078720,256924420,290342170,357187870 |
||||
|
.long 1610966272,2263057382,4103205268,309794674 |
||||
|
.long 2592527872,2233205587,1335446729,3402964816 |
||||
|
.long 3973531904,3225098121,3002836325,1918774430 |
||||
|
.long 3870401024,2102906079,2284471353,4117666579 |
||||
|
.long 617007872,1021508343,366931923,691083277 |
||||
|
.long 2528395776,3491914898,2968704004,1613121270 |
||||
|
.long 3445188352,3247741094,844474987,4093578302 |
||||
|
.long 651481088,1190302358,1689581232,574775300 |
||||
|
.long 4289380608,206939853,2555985458,2489840491 |
||||
|
.long 2130264064,327674451,3566485037,3349835193 |
||||
|
.long 2470714624,316102159,3636825756,3393945945 |
||||
|
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 |
||||
|
.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 |
||||
|
.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 |
||||
|
.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 |
||||
|
.byte 118,101,114,115,105,116,121,41,0 |
||||
|
.align 64 |
||||
|
.type _vpaes_preheat,@function |
||||
|
.align 16 |
||||
|
_vpaes_preheat: |
||||
|
addl (%esp),%ebp |
||||
|
movdqa -48(%ebp),%xmm7 |
||||
|
movdqa -16(%ebp),%xmm6 |
||||
|
ret |
||||
|
.size _vpaes_preheat,.-_vpaes_preheat |
||||
|
.type _vpaes_encrypt_core,@function |
||||
|
.align 16 |
||||
|
_vpaes_encrypt_core: |
||||
|
movl $16,%ecx |
||||
|
movl 240(%edx),%eax |
||||
|
movdqa %xmm6,%xmm1 |
||||
|
movdqa (%ebp),%xmm2 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
movdqu (%edx),%xmm5 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm6,%xmm0 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa 16(%ebp),%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
pxor %xmm5,%xmm2 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
addl $16,%edx |
||||
|
leal 192(%ebp),%ebx |
||||
|
jmp .L000enc_entry |
||||
|
.align 16 |
||||
|
.L001enc_loop: |
||||
|
movdqa 32(%ebp),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
movdqa 48(%ebp),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
movdqa 64(%ebp),%xmm5 |
||||
|
.byte 102,15,56,0,234 |
||||
|
movdqa -64(%ebx,%ecx,1),%xmm1 |
||||
|
movdqa 80(%ebp),%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm5,%xmm2 |
||||
|
movdqa (%ebx,%ecx,1),%xmm4 |
||||
|
movdqa %xmm0,%xmm3 |
||||
|
.byte 102,15,56,0,193 |
||||
|
addl $16,%edx |
||||
|
pxor %xmm2,%xmm0 |
||||
|
.byte 102,15,56,0,220 |
||||
|
addl $16,%ecx |
||||
|
pxor %xmm0,%xmm3 |
||||
|
.byte 102,15,56,0,193 |
||||
|
andl $48,%ecx |
||||
|
pxor %xmm3,%xmm0 |
||||
|
subl $1,%eax |
||||
|
.L000enc_entry: |
||||
|
movdqa %xmm6,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm6,%xmm0 |
||||
|
movdqa -32(%ebp),%xmm5 |
||||
|
.byte 102,15,56,0,232 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm7,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm5,%xmm3 |
||||
|
movdqa %xmm7,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
movdqa %xmm7,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm7,%xmm3 |
||||
|
movdqu (%edx),%xmm5 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
jnz .L001enc_loop |
||||
|
movdqa 96(%ebp),%xmm4 |
||||
|
movdqa 112(%ebp),%xmm0 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
.byte 102,15,56,0,195 |
||||
|
movdqa 64(%ebx,%ecx,1),%xmm1 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
ret |
||||
|
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core |
||||
|
.type _vpaes_decrypt_core,@function |
||||
|
.align 16 |
||||
|
_vpaes_decrypt_core: |
||||
|
movl 240(%edx),%eax |
||||
|
leal 608(%ebp),%ebx |
||||
|
movdqa %xmm6,%xmm1 |
||||
|
movdqa -64(%ebx),%xmm2 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
movl %eax,%ecx |
||||
|
psrld $4,%xmm1 |
||||
|
movdqu (%edx),%xmm5 |
||||
|
shll $4,%ecx |
||||
|
pand %xmm6,%xmm0 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa -48(%ebx),%xmm0 |
||||
|
xorl $48,%ecx |
||||
|
.byte 102,15,56,0,193 |
||||
|
andl $48,%ecx |
||||
|
pxor %xmm5,%xmm2 |
||||
|
movdqa 176(%ebp),%xmm5 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
addl $16,%edx |
||||
|
leal -352(%ebx,%ecx,1),%ecx |
||||
|
jmp .L002dec_entry |
||||
|
.align 16 |
||||
|
.L003dec_loop: |
||||
|
movdqa -32(%ebx),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa -16(%ebx),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
addl $16,%edx |
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa (%ebx),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 16(%ebx),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
subl $1,%eax |
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa 32(%ebx),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 48(%ebx),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa 64(%ebx),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 80(%ebx),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,58,15,237,12 |
||||
|
.L002dec_entry: |
||||
|
movdqa %xmm6,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm6,%xmm0 |
||||
|
movdqa -32(%ebp),%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm7,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
movdqa %xmm7,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm2,%xmm4 |
||||
|
movdqa %xmm7,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm7,%xmm3 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
movdqu (%edx),%xmm0 |
||||
|
jnz .L003dec_loop |
||||
|
movdqa 96(%ebx),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 112(%ebx),%xmm0 |
||||
|
movdqa (%ecx),%xmm2 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,56,0,194 |
||||
|
ret |
||||
|
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core |
||||
|
.type _vpaes_schedule_core,@function |
||||
|
.align 16 |
||||
|
_vpaes_schedule_core: |
||||
|
addl (%esp),%ebp |
||||
|
movdqu (%esi),%xmm0 |
||||
|
movdqa 320(%ebp),%xmm2 |
||||
|
movdqa %xmm0,%xmm3 |
||||
|
leal (%ebp),%ebx |
||||
|
movdqa %xmm2,4(%esp) |
||||
|
call _vpaes_schedule_transform |
||||
|
movdqa %xmm0,%xmm7 |
||||
|
testl %edi,%edi |
||||
|
jnz .L004schedule_am_decrypting |
||||
|
movdqu %xmm0,(%edx) |
||||
|
jmp .L005schedule_go |
||||
|
.L004schedule_am_decrypting: |
||||
|
movdqa 256(%ebp,%ecx,1),%xmm1 |
||||
|
.byte 102,15,56,0,217 |
||||
|
movdqu %xmm3,(%edx) |
||||
|
xorl $48,%ecx |
||||
|
.L005schedule_go: |
||||
|
cmpl $192,%eax |
||||
|
ja .L006schedule_256 |
||||
|
je .L007schedule_192 |
||||
|
.L008schedule_128: |
||||
|
movl $10,%eax |
||||
|
.L009loop_schedule_128: |
||||
|
call _vpaes_schedule_round |
||||
|
decl %eax |
||||
|
jz .L010schedule_mangle_last |
||||
|
call _vpaes_schedule_mangle |
||||
|
jmp .L009loop_schedule_128 |
||||
|
.align 16 |
||||
|
.L007schedule_192: |
||||
|
movdqu 8(%esi),%xmm0 |
||||
|
call _vpaes_schedule_transform |
||||
|
movdqa %xmm0,%xmm6 |
||||
|
pxor %xmm4,%xmm4 |
||||
|
movhlps %xmm4,%xmm6 |
||||
|
movl $4,%eax |
||||
|
.L011loop_schedule_192: |
||||
|
call _vpaes_schedule_round |
||||
|
.byte 102,15,58,15,198,8 |
||||
|
call _vpaes_schedule_mangle |
||||
|
call _vpaes_schedule_192_smear |
||||
|
call _vpaes_schedule_mangle |
||||
|
call _vpaes_schedule_round |
||||
|
decl %eax |
||||
|
jz .L010schedule_mangle_last |
||||
|
call _vpaes_schedule_mangle |
||||
|
call _vpaes_schedule_192_smear |
||||
|
jmp .L011loop_schedule_192 |
||||
|
.align 16 |
||||
|
.L006schedule_256: |
||||
|
movdqu 16(%esi),%xmm0 |
||||
|
call _vpaes_schedule_transform |
||||
|
movl $7,%eax |
||||
|
.L012loop_schedule_256: |
||||
|
call _vpaes_schedule_mangle |
||||
|
movdqa %xmm0,%xmm6 |
||||
|
call _vpaes_schedule_round |
||||
|
decl %eax |
||||
|
jz .L010schedule_mangle_last |
||||
|
call _vpaes_schedule_mangle |
||||
|
pshufd $255,%xmm0,%xmm0 |
||||
|
movdqa %xmm7,20(%esp) |
||||
|
movdqa %xmm6,%xmm7 |
||||
|
call .L_vpaes_schedule_low_round |
||||
|
movdqa 20(%esp),%xmm7 |
||||
|
jmp .L012loop_schedule_256 |
||||
|
.align 16 |
||||
|
.L010schedule_mangle_last: |
||||
|
leal 384(%ebp),%ebx |
||||
|
testl %edi,%edi |
||||
|
jnz .L013schedule_mangle_last_dec |
||||
|
movdqa 256(%ebp,%ecx,1),%xmm1 |
||||
|
.byte 102,15,56,0,193 |
||||
|
leal 352(%ebp),%ebx |
||||
|
addl $32,%edx |
||||
|
.L013schedule_mangle_last_dec: |
||||
|
addl $-16,%edx |
||||
|
pxor 336(%ebp),%xmm0 |
||||
|
call _vpaes_schedule_transform |
||||
|
movdqu %xmm0,(%edx) |
||||
|
pxor %xmm0,%xmm0 |
||||
|
pxor %xmm1,%xmm1 |
||||
|
pxor %xmm2,%xmm2 |
||||
|
pxor %xmm3,%xmm3 |
||||
|
pxor %xmm4,%xmm4 |
||||
|
pxor %xmm5,%xmm5 |
||||
|
pxor %xmm6,%xmm6 |
||||
|
pxor %xmm7,%xmm7 |
||||
|
ret |
||||
|
.size _vpaes_schedule_core,.-_vpaes_schedule_core |
||||
|
.type _vpaes_schedule_192_smear,@function |
||||
|
.align 16 |
||||
|
_vpaes_schedule_192_smear: |
||||
|
pshufd $128,%xmm6,%xmm0 |
||||
|
pxor %xmm0,%xmm6 |
||||
|
pshufd $254,%xmm7,%xmm0 |
||||
|
pxor %xmm0,%xmm6 |
||||
|
movdqa %xmm6,%xmm0 |
||||
|
pxor %xmm1,%xmm1 |
||||
|
movhlps %xmm1,%xmm6 |
||||
|
ret |
||||
|
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear |
||||
|
.type _vpaes_schedule_round,@function |
||||
|
.align 16 |
||||
|
_vpaes_schedule_round: |
||||
|
movdqa 8(%esp),%xmm2 |
||||
|
pxor %xmm1,%xmm1 |
||||
|
.byte 102,15,58,15,202,15 |
||||
|
.byte 102,15,58,15,210,15 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
pshufd $255,%xmm0,%xmm0 |
||||
|
.byte 102,15,58,15,192,1 |
||||
|
movdqa %xmm2,8(%esp) |
||||
|
.L_vpaes_schedule_low_round: |
||||
|
movdqa %xmm7,%xmm1 |
||||
|
pslldq $4,%xmm7 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
movdqa %xmm7,%xmm1 |
||||
|
pslldq $8,%xmm7 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
pxor 336(%ebp),%xmm7 |
||||
|
movdqa -16(%ebp),%xmm4 |
||||
|
movdqa -48(%ebp),%xmm5 |
||||
|
movdqa %xmm4,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movdqa -32(%ebp),%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm5,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
movdqa %xmm5,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm2,%xmm4 |
||||
|
movdqa %xmm5,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm5,%xmm3 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
movdqa 32(%ebp),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
movdqa 48(%ebp),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
pxor %xmm7,%xmm0 |
||||
|
movdqa %xmm0,%xmm7 |
||||
|
ret |
||||
|
.size _vpaes_schedule_round,.-_vpaes_schedule_round |
||||
|
.type _vpaes_schedule_transform,@function |
||||
|
.align 16 |
||||
|
_vpaes_schedule_transform: |
||||
|
movdqa -16(%ebp),%xmm2 |
||||
|
movdqa %xmm2,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm2,%xmm0 |
||||
|
movdqa (%ebx),%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa 16(%ebx),%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
ret |
||||
|
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform |
||||
|
.type _vpaes_schedule_mangle,@function |
||||
|
.align 16 |
||||
|
_vpaes_schedule_mangle: |
||||
|
movdqa %xmm0,%xmm4 |
||||
|
movdqa 128(%ebp),%xmm5 |
||||
|
testl %edi,%edi |
||||
|
jnz .L014schedule_mangle_dec |
||||
|
addl $16,%edx |
||||
|
pxor 336(%ebp),%xmm4 |
||||
|
.byte 102,15,56,0,229 |
||||
|
movdqa %xmm4,%xmm3 |
||||
|
.byte 102,15,56,0,229 |
||||
|
pxor %xmm4,%xmm3 |
||||
|
.byte 102,15,56,0,229 |
||||
|
pxor %xmm4,%xmm3 |
||||
|
jmp .L015schedule_mangle_both |
||||
|
.align 16 |
||||
|
.L014schedule_mangle_dec: |
||||
|
movdqa -16(%ebp),%xmm2 |
||||
|
leal 416(%ebp),%esi |
||||
|
movdqa %xmm2,%xmm1 |
||||
|
pandn %xmm4,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm2,%xmm4 |
||||
|
movdqa (%esi),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
movdqa 16(%esi),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
movdqa 32(%esi),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 48(%esi),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
movdqa 64(%esi),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 80(%esi),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
movdqa 96(%esi),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 112(%esi),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
addl $-16,%edx |
||||
|
.L015schedule_mangle_both: |
||||
|
movdqa 256(%ebp,%ecx,1),%xmm1 |
||||
|
.byte 102,15,56,0,217 |
||||
|
addl $-16,%ecx |
||||
|
andl $48,%ecx |
||||
|
movdqu %xmm3,(%edx) |
||||
|
ret |
||||
|
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle |
||||
|
.globl vpaes_set_encrypt_key |
||||
|
.type vpaes_set_encrypt_key,@function |
||||
|
.align 16 |
||||
|
vpaes_set_encrypt_key: |
||||
|
.L_vpaes_set_encrypt_key_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
movl 20(%esp),%esi |
||||
|
leal -56(%esp),%ebx |
||||
|
movl 24(%esp),%eax |
||||
|
andl $-16,%ebx |
||||
|
movl 28(%esp),%edx |
||||
|
xchgl %esp,%ebx |
||||
|
movl %ebx,48(%esp) |
||||
|
movl %eax,%ebx |
||||
|
shrl $5,%ebx |
||||
|
addl $5,%ebx |
||||
|
movl %ebx,240(%edx) |
||||
|
movl $48,%ecx |
||||
|
movl $0,%edi |
||||
|
leal .L_vpaes_consts+0x30-.L016pic_point,%ebp |
||||
|
call _vpaes_schedule_core |
||||
|
.L016pic_point: |
||||
|
movl 48(%esp),%esp |
||||
|
xorl %eax,%eax |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.size vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin |
||||
|
.globl vpaes_set_decrypt_key |
||||
|
.type vpaes_set_decrypt_key,@function |
||||
|
.align 16 |
||||
|
vpaes_set_decrypt_key: |
||||
|
.L_vpaes_set_decrypt_key_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
movl 20(%esp),%esi |
||||
|
leal -56(%esp),%ebx |
||||
|
movl 24(%esp),%eax |
||||
|
andl $-16,%ebx |
||||
|
movl 28(%esp),%edx |
||||
|
xchgl %esp,%ebx |
||||
|
movl %ebx,48(%esp) |
||||
|
movl %eax,%ebx |
||||
|
shrl $5,%ebx |
||||
|
addl $5,%ebx |
||||
|
movl %ebx,240(%edx) |
||||
|
shll $4,%ebx |
||||
|
leal 16(%edx,%ebx,1),%edx |
||||
|
movl $1,%edi |
||||
|
movl %eax,%ecx |
||||
|
shrl $1,%ecx |
||||
|
andl $32,%ecx |
||||
|
xorl $32,%ecx |
||||
|
leal .L_vpaes_consts+0x30-.L017pic_point,%ebp |
||||
|
call _vpaes_schedule_core |
||||
|
.L017pic_point: |
||||
|
movl 48(%esp),%esp |
||||
|
xorl %eax,%eax |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.size vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin |
||||
|
.globl vpaes_encrypt |
||||
|
.type vpaes_encrypt,@function |
||||
|
.align 16 |
||||
|
vpaes_encrypt: |
||||
|
.L_vpaes_encrypt_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
leal .L_vpaes_consts+0x30-.L018pic_point,%ebp |
||||
|
call _vpaes_preheat |
||||
|
.L018pic_point: |
||||
|
movl 20(%esp),%esi |
||||
|
leal -56(%esp),%ebx |
||||
|
movl 24(%esp),%edi |
||||
|
andl $-16,%ebx |
||||
|
movl 28(%esp),%edx |
||||
|
xchgl %esp,%ebx |
||||
|
movl %ebx,48(%esp) |
||||
|
movdqu (%esi),%xmm0 |
||||
|
call _vpaes_encrypt_core |
||||
|
movdqu %xmm0,(%edi) |
||||
|
movl 48(%esp),%esp |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.size vpaes_encrypt,.-.L_vpaes_encrypt_begin |
||||
|
.globl vpaes_decrypt |
||||
|
.type vpaes_decrypt,@function |
||||
|
.align 16 |
||||
|
vpaes_decrypt: |
||||
|
.L_vpaes_decrypt_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
leal .L_vpaes_consts+0x30-.L019pic_point,%ebp |
||||
|
call _vpaes_preheat |
||||
|
.L019pic_point: |
||||
|
movl 20(%esp),%esi |
||||
|
leal -56(%esp),%ebx |
||||
|
movl 24(%esp),%edi |
||||
|
andl $-16,%ebx |
||||
|
movl 28(%esp),%edx |
||||
|
xchgl %esp,%ebx |
||||
|
movl %ebx,48(%esp) |
||||
|
movdqu (%esi),%xmm0 |
||||
|
call _vpaes_decrypt_core |
||||
|
movdqu %xmm0,(%edi) |
||||
|
movl 48(%esp),%esp |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.size vpaes_decrypt,.-.L_vpaes_decrypt_begin |
||||
|
.globl vpaes_cbc_encrypt |
||||
|
.type vpaes_cbc_encrypt,@function |
||||
|
.align 16 |
||||
|
vpaes_cbc_encrypt: |
||||
|
.L_vpaes_cbc_encrypt_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
movl 20(%esp),%esi |
||||
|
movl 24(%esp),%edi |
||||
|
movl 28(%esp),%eax |
||||
|
movl 32(%esp),%edx |
||||
|
subl $16,%eax |
||||
|
jc .L020cbc_abort |
||||
|
leal -56(%esp),%ebx |
||||
|
movl 36(%esp),%ebp |
||||
|
andl $-16,%ebx |
||||
|
movl 40(%esp),%ecx |
||||
|
xchgl %esp,%ebx |
||||
|
movdqu (%ebp),%xmm1 |
||||
|
subl %esi,%edi |
||||
|
movl %ebx,48(%esp) |
||||
|
movl %edi,(%esp) |
||||
|
movl %edx,4(%esp) |
||||
|
movl %ebp,8(%esp) |
||||
|
movl %eax,%edi |
||||
|
leal .L_vpaes_consts+0x30-.L021pic_point,%ebp |
||||
|
call _vpaes_preheat |
||||
|
.L021pic_point: |
||||
|
cmpl $0,%ecx |
||||
|
je .L022cbc_dec_loop |
||||
|
jmp .L023cbc_enc_loop |
||||
|
.align 16 |
||||
|
.L023cbc_enc_loop: |
||||
|
movdqu (%esi),%xmm0 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
call _vpaes_encrypt_core |
||||
|
movl (%esp),%ebx |
||||
|
movl 4(%esp),%edx |
||||
|
movdqa %xmm0,%xmm1 |
||||
|
movdqu %xmm0,(%ebx,%esi,1) |
||||
|
leal 16(%esi),%esi |
||||
|
subl $16,%edi |
||||
|
jnc .L023cbc_enc_loop |
||||
|
jmp .L024cbc_done |
||||
|
.align 16 |
||||
|
.L022cbc_dec_loop: |
||||
|
movdqu (%esi),%xmm0 |
||||
|
movdqa %xmm1,16(%esp) |
||||
|
movdqa %xmm0,32(%esp) |
||||
|
call _vpaes_decrypt_core |
||||
|
movl (%esp),%ebx |
||||
|
movl 4(%esp),%edx |
||||
|
pxor 16(%esp),%xmm0 |
||||
|
movdqa 32(%esp),%xmm1 |
||||
|
movdqu %xmm0,(%ebx,%esi,1) |
||||
|
leal 16(%esi),%esi |
||||
|
subl $16,%edi |
||||
|
jnc .L022cbc_dec_loop |
||||
|
.L024cbc_done: |
||||
|
movl 8(%esp),%ebx |
||||
|
movl 48(%esp),%esp |
||||
|
movdqu %xmm1,(%ebx) |
||||
|
.L020cbc_abort: |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin |
@ -0,0 +1,728 @@ |
|||||
|
.file "ghash-x86.s" |
||||
|
.text |
||||
|
.globl gcm_gmult_4bit_x86 |
||||
|
.type gcm_gmult_4bit_x86,@function |
||||
|
.align 16 |
||||
|
gcm_gmult_4bit_x86: |
||||
|
.L_gcm_gmult_4bit_x86_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
subl $84,%esp |
||||
|
movl 104(%esp),%edi |
||||
|
movl 108(%esp),%esi |
||||
|
movl (%edi),%ebp |
||||
|
movl 4(%edi),%edx |
||||
|
movl 8(%edi),%ecx |
||||
|
movl 12(%edi),%ebx |
||||
|
movl $0,16(%esp) |
||||
|
movl $471859200,20(%esp) |
||||
|
movl $943718400,24(%esp) |
||||
|
movl $610271232,28(%esp) |
||||
|
movl $1887436800,32(%esp) |
||||
|
movl $1822425088,36(%esp) |
||||
|
movl $1220542464,40(%esp) |
||||
|
movl $1423966208,44(%esp) |
||||
|
movl $3774873600,48(%esp) |
||||
|
movl $4246732800,52(%esp) |
||||
|
movl $3644850176,56(%esp) |
||||
|
movl $3311403008,60(%esp) |
||||
|
movl $2441084928,64(%esp) |
||||
|
movl $2376073216,68(%esp) |
||||
|
movl $2847932416,72(%esp) |
||||
|
movl $3051356160,76(%esp) |
||||
|
movl %ebp,(%esp) |
||||
|
movl %edx,4(%esp) |
||||
|
movl %ecx,8(%esp) |
||||
|
movl %ebx,12(%esp) |
||||
|
shrl $20,%ebx |
||||
|
andl $240,%ebx |
||||
|
movl 4(%esi,%ebx,1),%ebp |
||||
|
movl (%esi,%ebx,1),%edx |
||||
|
movl 12(%esi,%ebx,1),%ecx |
||||
|
movl 8(%esi,%ebx,1),%ebx |
||||
|
xorl %eax,%eax |
||||
|
movl $15,%edi |
||||
|
jmp .L000x86_loop |
||||
|
.align 16 |
||||
|
.L000x86_loop: |
||||
|
movb %bl,%al |
||||
|
shrdl $4,%ecx,%ebx |
||||
|
andb $15,%al |
||||
|
shrdl $4,%edx,%ecx |
||||
|
shrdl $4,%ebp,%edx |
||||
|
shrl $4,%ebp |
||||
|
xorl 16(%esp,%eax,4),%ebp |
||||
|
movb (%esp,%edi,1),%al |
||||
|
andb $240,%al |
||||
|
xorl 8(%esi,%eax,1),%ebx |
||||
|
xorl 12(%esi,%eax,1),%ecx |
||||
|
xorl (%esi,%eax,1),%edx |
||||
|
xorl 4(%esi,%eax,1),%ebp |
||||
|
decl %edi |
||||
|
js .L001x86_break |
||||
|
movb %bl,%al |
||||
|
shrdl $4,%ecx,%ebx |
||||
|
andb $15,%al |
||||
|
shrdl $4,%edx,%ecx |
||||
|
shrdl $4,%ebp,%edx |
||||
|
shrl $4,%ebp |
||||
|
xorl 16(%esp,%eax,4),%ebp |
||||
|
movb (%esp,%edi,1),%al |
||||
|
shlb $4,%al |
||||
|
xorl 8(%esi,%eax,1),%ebx |
||||
|
xorl 12(%esi,%eax,1),%ecx |
||||
|
xorl (%esi,%eax,1),%edx |
||||
|
xorl 4(%esi,%eax,1),%ebp |
||||
|
jmp .L000x86_loop |
||||
|
.align 16 |
||||
|
.L001x86_break: |
||||
|
bswap %ebx |
||||
|
bswap %ecx |
||||
|
bswap %edx |
||||
|
bswap %ebp |
||||
|
movl 104(%esp),%edi |
||||
|
movl %ebx,12(%edi) |
||||
|
movl %ecx,8(%edi) |
||||
|
movl %edx,4(%edi) |
||||
|
movl %ebp,(%edi) |
||||
|
addl $84,%esp |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin |
||||
|
.globl gcm_ghash_4bit_x86 |
||||
|
.type gcm_ghash_4bit_x86,@function |
||||
|
.align 16 |
||||
|
gcm_ghash_4bit_x86: |
||||
|
.L_gcm_ghash_4bit_x86_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
subl $84,%esp |
||||
|
movl 104(%esp),%ebx |
||||
|
movl 108(%esp),%esi |
||||
|
movl 112(%esp),%edi |
||||
|
movl 116(%esp),%ecx |
||||
|
addl %edi,%ecx |
||||
|
movl %ecx,116(%esp) |
||||
|
movl (%ebx),%ebp |
||||
|
movl 4(%ebx),%edx |
||||
|
movl 8(%ebx),%ecx |
||||
|
movl 12(%ebx),%ebx |
||||
|
movl $0,16(%esp) |
||||
|
movl $471859200,20(%esp) |
||||
|
movl $943718400,24(%esp) |
||||
|
movl $610271232,28(%esp) |
||||
|
movl $1887436800,32(%esp) |
||||
|
movl $1822425088,36(%esp) |
||||
|
movl $1220542464,40(%esp) |
||||
|
movl $1423966208,44(%esp) |
||||
|
movl $3774873600,48(%esp) |
||||
|
movl $4246732800,52(%esp) |
||||
|
movl $3644850176,56(%esp) |
||||
|
movl $3311403008,60(%esp) |
||||
|
movl $2441084928,64(%esp) |
||||
|
movl $2376073216,68(%esp) |
||||
|
movl $2847932416,72(%esp) |
||||
|
movl $3051356160,76(%esp) |
||||
|
.align 16 |
||||
|
.L002x86_outer_loop: |
||||
|
xorl 12(%edi),%ebx |
||||
|
xorl 8(%edi),%ecx |
||||
|
xorl 4(%edi),%edx |
||||
|
xorl (%edi),%ebp |
||||
|
movl %ebx,12(%esp) |
||||
|
movl %ecx,8(%esp) |
||||
|
movl %edx,4(%esp) |
||||
|
movl %ebp,(%esp) |
||||
|
shrl $20,%ebx |
||||
|
andl $240,%ebx |
||||
|
movl 4(%esi,%ebx,1),%ebp |
||||
|
movl (%esi,%ebx,1),%edx |
||||
|
movl 12(%esi,%ebx,1),%ecx |
||||
|
movl 8(%esi,%ebx,1),%ebx |
||||
|
xorl %eax,%eax |
||||
|
movl $15,%edi |
||||
|
jmp .L003x86_loop |
||||
|
.align 16 |
||||
|
.L003x86_loop: |
||||
|
movb %bl,%al |
||||
|
shrdl $4,%ecx,%ebx |
||||
|
andb $15,%al |
||||
|
shrdl $4,%edx,%ecx |
||||
|
shrdl $4,%ebp,%edx |
||||
|
shrl $4,%ebp |
||||
|
xorl 16(%esp,%eax,4),%ebp |
||||
|
movb (%esp,%edi,1),%al |
||||
|
andb $240,%al |
||||
|
xorl 8(%esi,%eax,1),%ebx |
||||
|
xorl 12(%esi,%eax,1),%ecx |
||||
|
xorl (%esi,%eax,1),%edx |
||||
|
xorl 4(%esi,%eax,1),%ebp |
||||
|
decl %edi |
||||
|
js .L004x86_break |
||||
|
movb %bl,%al |
||||
|
shrdl $4,%ecx,%ebx |
||||
|
andb $15,%al |
||||
|
shrdl $4,%edx,%ecx |
||||
|
shrdl $4,%ebp,%edx |
||||
|
shrl $4,%ebp |
||||
|
xorl 16(%esp,%eax,4),%ebp |
||||
|
movb (%esp,%edi,1),%al |
||||
|
shlb $4,%al |
||||
|
xorl 8(%esi,%eax,1),%ebx |
||||
|
xorl 12(%esi,%eax,1),%ecx |
||||
|
xorl (%esi,%eax,1),%edx |
||||
|
xorl 4(%esi,%eax,1),%ebp |
||||
|
jmp .L003x86_loop |
||||
|
.align 16 |
||||
|
.L004x86_break: |
||||
|
bswap %ebx |
||||
|
bswap %ecx |
||||
|
bswap %edx |
||||
|
bswap %ebp |
||||
|
movl 112(%esp),%edi |
||||
|
leal 16(%edi),%edi |
||||
|
cmpl 116(%esp),%edi |
||||
|
movl %edi,112(%esp) |
||||
|
jb .L002x86_outer_loop |
||||
|
movl 104(%esp),%edi |
||||
|
movl %ebx,12(%edi) |
||||
|
movl %ecx,8(%edi) |
||||
|
movl %edx,4(%edi) |
||||
|
movl %ebp,(%edi) |
||||
|
addl $84,%esp |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin |
||||
|
.type _mmx_gmult_4bit_inner,@function |
||||
|
.align 16 |
||||
|
_mmx_gmult_4bit_inner: |
||||
|
xorl %ecx,%ecx |
||||
|
movl %ebx,%edx |
||||
|
movb %dl,%cl |
||||
|
shlb $4,%cl |
||||
|
andl $240,%edx |
||||
|
movq 8(%esi,%ecx,1),%mm0 |
||||
|
movq (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 14(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 13(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 12(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 11(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 10(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 9(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 8(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 7(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 6(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 5(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 4(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 3(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 2(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 1(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb (%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
movl 4(%eax,%ebp,8),%edi |
||||
|
psrlq $32,%mm0 |
||||
|
movd %mm1,%edx |
||||
|
psrlq $32,%mm1 |
||||
|
movd %mm0,%ecx |
||||
|
movd %mm1,%ebp |
||||
|
shll $4,%edi |
||||
|
bswap %ebx |
||||
|
bswap %edx |
||||
|
bswap %ecx |
||||
|
xorl %edi,%ebp |
||||
|
bswap %ebp |
||||
|
ret |
||||
|
.size _mmx_gmult_4bit_inner,.-_mmx_gmult_4bit_inner |
||||
|
.globl gcm_gmult_4bit_mmx |
||||
|
.type gcm_gmult_4bit_mmx,@function |
||||
|
.align 16 |
||||
|
gcm_gmult_4bit_mmx: |
||||
|
.L_gcm_gmult_4bit_mmx_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
movl 20(%esp),%edi |
||||
|
movl 24(%esp),%esi |
||||
|
call .L005pic_point |
||||
|
.L005pic_point: |
||||
|
popl %eax |
||||
|
leal .Lrem_4bit-.L005pic_point(%eax),%eax |
||||
|
movzbl 15(%edi),%ebx |
||||
|
call _mmx_gmult_4bit_inner |
||||
|
movl 20(%esp),%edi |
||||
|
emms |
||||
|
movl %ebx,12(%edi) |
||||
|
movl %edx,4(%edi) |
||||
|
movl %ecx,8(%edi) |
||||
|
movl %ebp,(%edi) |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin |
||||
|
.globl gcm_ghash_4bit_mmx |
||||
|
.type gcm_ghash_4bit_mmx,@function |
||||
|
.align 16 |
||||
|
gcm_ghash_4bit_mmx: |
||||
|
.L_gcm_ghash_4bit_mmx_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
movl 20(%esp),%ebp |
||||
|
movl 24(%esp),%esi |
||||
|
movl 28(%esp),%edi |
||||
|
movl 32(%esp),%ecx |
||||
|
call .L006pic_point |
||||
|
.L006pic_point: |
||||
|
popl %eax |
||||
|
leal .Lrem_4bit-.L006pic_point(%eax),%eax |
||||
|
addl %edi,%ecx |
||||
|
movl %ecx,32(%esp) |
||||
|
subl $20,%esp |
||||
|
movl 12(%ebp),%ebx |
||||
|
movl 4(%ebp),%edx |
||||
|
movl 8(%ebp),%ecx |
||||
|
movl (%ebp),%ebp |
||||
|
jmp .L007mmx_outer_loop |
||||
|
.align 16 |
||||
|
.L007mmx_outer_loop: |
||||
|
xorl 12(%edi),%ebx |
||||
|
xorl 4(%edi),%edx |
||||
|
xorl 8(%edi),%ecx |
||||
|
xorl (%edi),%ebp |
||||
|
movl %edi,48(%esp) |
||||
|
movl %ebx,12(%esp) |
||||
|
movl %edx,4(%esp) |
||||
|
movl %ecx,8(%esp) |
||||
|
movl %ebp,(%esp) |
||||
|
movl %esp,%edi |
||||
|
shrl $24,%ebx |
||||
|
call _mmx_gmult_4bit_inner |
||||
|
movl 48(%esp),%edi |
||||
|
leal 16(%edi),%edi |
||||
|
cmpl 52(%esp),%edi |
||||
|
jb .L007mmx_outer_loop |
||||
|
movl 40(%esp),%edi |
||||
|
emms |
||||
|
movl %ebx,12(%edi) |
||||
|
movl %edx,4(%edi) |
||||
|
movl %ecx,8(%edi) |
||||
|
movl %ebp,(%edi) |
||||
|
addl $20,%esp |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin |
||||
|
.align 64 |
||||
|
.Lrem_4bit: |
||||
|
.long 0,0,0,29491200,0,58982400,0,38141952 |
||||
|
.long 0,117964800,0,113901568,0,76283904,0,88997888 |
||||
|
.long 0,235929600,0,265420800,0,227803136,0,206962688 |
||||
|
.long 0,152567808,0,148504576,0,177995776,0,190709760 |
||||
|
.align 64 |
||||
|
.L008rem_8bit: |
||||
|
.value 0,450,900,582,1800,1738,1164,1358 |
||||
|
.value 3600,4050,3476,3158,2328,2266,2716,2910 |
||||
|
.value 7200,7650,8100,7782,6952,6890,6316,6510 |
||||
|
.value 4656,5106,4532,4214,5432,5370,5820,6014 |
||||
|
.value 14400,14722,15300,14854,16200,16010,15564,15630 |
||||
|
.value 13904,14226,13780,13334,12632,12442,13020,13086 |
||||
|
.value 9312,9634,10212,9766,9064,8874,8428,8494 |
||||
|
.value 10864,11186,10740,10294,11640,11450,12028,12094 |
||||
|
.value 28800,28994,29444,29382,30600,30282,29708,30158 |
||||
|
.value 32400,32594,32020,31958,31128,30810,31260,31710 |
||||
|
.value 27808,28002,28452,28390,27560,27242,26668,27118 |
||||
|
.value 25264,25458,24884,24822,26040,25722,26172,26622 |
||||
|
.value 18624,18690,19268,19078,20424,19978,19532,19854 |
||||
|
.value 18128,18194,17748,17558,16856,16410,16988,17310 |
||||
|
.value 21728,21794,22372,22182,21480,21034,20588,20910 |
||||
|
.value 23280,23346,22900,22710,24056,23610,24188,24510 |
||||
|
.value 57600,57538,57988,58182,58888,59338,58764,58446 |
||||
|
.value 61200,61138,60564,60758,59416,59866,60316,59998 |
||||
|
.value 64800,64738,65188,65382,64040,64490,63916,63598 |
||||
|
.value 62256,62194,61620,61814,62520,62970,63420,63102 |
||||
|
.value 55616,55426,56004,56070,56904,57226,56780,56334 |
||||
|
.value 55120,54930,54484,54550,53336,53658,54236,53790 |
||||
|
.value 50528,50338,50916,50982,49768,50090,49644,49198 |
||||
|
.value 52080,51890,51444,51510,52344,52666,53244,52798 |
||||
|
.value 37248,36930,37380,37830,38536,38730,38156,38094 |
||||
|
.value 40848,40530,39956,40406,39064,39258,39708,39646 |
||||
|
.value 36256,35938,36388,36838,35496,35690,35116,35054 |
||||
|
.value 33712,33394,32820,33270,33976,34170,34620,34558 |
||||
|
.value 43456,43010,43588,43910,44744,44810,44364,44174 |
||||
|
.value 42960,42514,42068,42390,41176,41242,41820,41630 |
||||
|
.value 46560,46114,46692,47014,45800,45866,45420,45230 |
||||
|
.value 48112,47666,47220,47542,48376,48442,49020,48830 |
||||
|
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 |
||||
|
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 |
||||
|
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 |
||||
|
.byte 0 |
@ -0,0 +1,635 @@ |
|||||
|
.file "vpaes-x86.s" |
||||
|
.text |
||||
|
.align 6,0x90 |
||||
|
L_vpaes_consts: |
||||
|
.long 218628480,235210255,168496130,67568393 |
||||
|
.long 252381056,17041926,33884169,51187212 |
||||
|
.long 252645135,252645135,252645135,252645135 |
||||
|
.long 1512730624,3266504856,1377990664,3401244816 |
||||
|
.long 830229760,1275146365,2969422977,3447763452 |
||||
|
.long 3411033600,2979783055,338359620,2782886510 |
||||
|
.long 4209124096,907596821,221174255,1006095553 |
||||
|
.long 191964160,3799684038,3164090317,1589111125 |
||||
|
.long 182528256,1777043520,2877432650,3265356744 |
||||
|
.long 1874708224,3503451415,3305285752,363511674 |
||||
|
.long 1606117888,3487855781,1093350906,2384367825 |
||||
|
.long 197121,67569157,134941193,202313229 |
||||
|
.long 67569157,134941193,202313229,197121 |
||||
|
.long 134941193,202313229,197121,67569157 |
||||
|
.long 202313229,197121,67569157,134941193 |
||||
|
.long 33619971,100992007,168364043,235736079 |
||||
|
.long 235736079,33619971,100992007,168364043 |
||||
|
.long 168364043,235736079,33619971,100992007 |
||||
|
.long 100992007,168364043,235736079,33619971 |
||||
|
.long 50462976,117835012,185207048,252579084 |
||||
|
.long 252314880,51251460,117574920,184942860 |
||||
|
.long 184682752,252054788,50987272,118359308 |
||||
|
.long 118099200,185467140,251790600,50727180 |
||||
|
.long 2946363062,528716217,1300004225,1881839624 |
||||
|
.long 1532713819,1532713819,1532713819,1532713819 |
||||
|
.long 3602276352,4288629033,3737020424,4153884961 |
||||
|
.long 1354558464,32357713,2958822624,3775749553 |
||||
|
.long 1201988352,132424512,1572796698,503232858 |
||||
|
.long 2213177600,1597421020,4103937655,675398315 |
||||
|
.long 2749646592,4273543773,1511898873,121693092 |
||||
|
.long 3040248576,1103263732,2871565598,1608280554 |
||||
|
.long 2236667136,2588920351,482954393,64377734 |
||||
|
.long 3069987328,291237287,2117370568,3650299247 |
||||
|
.long 533321216,3573750986,2572112006,1401264716 |
||||
|
.long 1339849704,2721158661,548607111,3445553514 |
||||
|
.long 2128193280,3054596040,2183486460,1257083700 |
||||
|
.long 655635200,1165381986,3923443150,2344132524 |
||||
|
.long 190078720,256924420,290342170,357187870 |
||||
|
.long 1610966272,2263057382,4103205268,309794674 |
||||
|
.long 2592527872,2233205587,1335446729,3402964816 |
||||
|
.long 3973531904,3225098121,3002836325,1918774430 |
||||
|
.long 3870401024,2102906079,2284471353,4117666579 |
||||
|
.long 617007872,1021508343,366931923,691083277 |
||||
|
.long 2528395776,3491914898,2968704004,1613121270 |
||||
|
.long 3445188352,3247741094,844474987,4093578302 |
||||
|
.long 651481088,1190302358,1689581232,574775300 |
||||
|
.long 4289380608,206939853,2555985458,2489840491 |
||||
|
.long 2130264064,327674451,3566485037,3349835193 |
||||
|
.long 2470714624,316102159,3636825756,3393945945 |
||||
|
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 |
||||
|
.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 |
||||
|
.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 |
||||
|
.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 |
||||
|
.byte 118,101,114,115,105,116,121,41,0 |
||||
|
.align 6,0x90 |
||||
|
.align 4 |
||||
|
__vpaes_preheat: |
||||
|
addl (%esp),%ebp |
||||
|
movdqa -48(%ebp),%xmm7 |
||||
|
movdqa -16(%ebp),%xmm6 |
||||
|
ret |
||||
|
.align 4 |
||||
|
__vpaes_encrypt_core: |
||||
|
movl $16,%ecx |
||||
|
movl 240(%edx),%eax |
||||
|
movdqa %xmm6,%xmm1 |
||||
|
movdqa (%ebp),%xmm2 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
movdqu (%edx),%xmm5 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm6,%xmm0 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa 16(%ebp),%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
pxor %xmm5,%xmm2 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
addl $16,%edx |
||||
|
leal 192(%ebp),%ebx |
||||
|
jmp L000enc_entry |
||||
|
.align 4,0x90 |
||||
|
L001enc_loop: |
||||
|
movdqa 32(%ebp),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
movdqa 48(%ebp),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
movdqa 64(%ebp),%xmm5 |
||||
|
.byte 102,15,56,0,234 |
||||
|
movdqa -64(%ebx,%ecx,1),%xmm1 |
||||
|
movdqa 80(%ebp),%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm5,%xmm2 |
||||
|
movdqa (%ebx,%ecx,1),%xmm4 |
||||
|
movdqa %xmm0,%xmm3 |
||||
|
.byte 102,15,56,0,193 |
||||
|
addl $16,%edx |
||||
|
pxor %xmm2,%xmm0 |
||||
|
.byte 102,15,56,0,220 |
||||
|
addl $16,%ecx |
||||
|
pxor %xmm0,%xmm3 |
||||
|
.byte 102,15,56,0,193 |
||||
|
andl $48,%ecx |
||||
|
pxor %xmm3,%xmm0 |
||||
|
subl $1,%eax |
||||
|
L000enc_entry: |
||||
|
movdqa %xmm6,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm6,%xmm0 |
||||
|
movdqa -32(%ebp),%xmm5 |
||||
|
.byte 102,15,56,0,232 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm7,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm5,%xmm3 |
||||
|
movdqa %xmm7,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
movdqa %xmm7,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm7,%xmm3 |
||||
|
movdqu (%edx),%xmm5 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
jnz L001enc_loop |
||||
|
movdqa 96(%ebp),%xmm4 |
||||
|
movdqa 112(%ebp),%xmm0 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm5,%xmm4 |
||||
|
.byte 102,15,56,0,195 |
||||
|
movdqa 64(%ebx,%ecx,1),%xmm1 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
ret |
||||
|
.align 4 |
||||
|
__vpaes_decrypt_core: |
||||
|
movl 240(%edx),%eax |
||||
|
leal 608(%ebp),%ebx |
||||
|
movdqa %xmm6,%xmm1 |
||||
|
movdqa -64(%ebx),%xmm2 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
movl %eax,%ecx |
||||
|
psrld $4,%xmm1 |
||||
|
movdqu (%edx),%xmm5 |
||||
|
shll $4,%ecx |
||||
|
pand %xmm6,%xmm0 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa -48(%ebx),%xmm0 |
||||
|
xorl $48,%ecx |
||||
|
.byte 102,15,56,0,193 |
||||
|
andl $48,%ecx |
||||
|
pxor %xmm5,%xmm2 |
||||
|
movdqa 176(%ebp),%xmm5 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
addl $16,%edx |
||||
|
leal -352(%ebx,%ecx,1),%ecx |
||||
|
jmp L002dec_entry |
||||
|
.align 4,0x90 |
||||
|
L003dec_loop: |
||||
|
movdqa -32(%ebx),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa -16(%ebx),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
addl $16,%edx |
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa (%ebx),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 16(%ebx),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
subl $1,%eax |
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa 32(%ebx),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 48(%ebx),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,56,0,197 |
||||
|
movdqa 64(%ebx),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 80(%ebx),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,58,15,237,12 |
||||
|
L002dec_entry: |
||||
|
movdqa %xmm6,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm6,%xmm0 |
||||
|
movdqa -32(%ebp),%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm7,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
movdqa %xmm7,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm2,%xmm4 |
||||
|
movdqa %xmm7,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm7,%xmm3 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
movdqu (%edx),%xmm0 |
||||
|
jnz L003dec_loop |
||||
|
movdqa 96(%ebx),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
pxor %xmm0,%xmm4 |
||||
|
movdqa 112(%ebx),%xmm0 |
||||
|
movdqa (%ecx),%xmm2 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
.byte 102,15,56,0,194 |
||||
|
ret |
||||
|
.align 4 |
||||
|
__vpaes_schedule_core: |
||||
|
addl (%esp),%ebp |
||||
|
movdqu (%esi),%xmm0 |
||||
|
movdqa 320(%ebp),%xmm2 |
||||
|
movdqa %xmm0,%xmm3 |
||||
|
leal (%ebp),%ebx |
||||
|
movdqa %xmm2,4(%esp) |
||||
|
call __vpaes_schedule_transform |
||||
|
movdqa %xmm0,%xmm7 |
||||
|
testl %edi,%edi |
||||
|
jnz L004schedule_am_decrypting |
||||
|
movdqu %xmm0,(%edx) |
||||
|
jmp L005schedule_go |
||||
|
L004schedule_am_decrypting: |
||||
|
movdqa 256(%ebp,%ecx,1),%xmm1 |
||||
|
.byte 102,15,56,0,217 |
||||
|
movdqu %xmm3,(%edx) |
||||
|
xorl $48,%ecx |
||||
|
L005schedule_go: |
||||
|
cmpl $192,%eax |
||||
|
ja L006schedule_256 |
||||
|
je L007schedule_192 |
||||
|
L008schedule_128: |
||||
|
movl $10,%eax |
||||
|
L009loop_schedule_128: |
||||
|
call __vpaes_schedule_round |
||||
|
decl %eax |
||||
|
jz L010schedule_mangle_last |
||||
|
call __vpaes_schedule_mangle |
||||
|
jmp L009loop_schedule_128 |
||||
|
.align 4,0x90 |
||||
|
L007schedule_192: |
||||
|
movdqu 8(%esi),%xmm0 |
||||
|
call __vpaes_schedule_transform |
||||
|
movdqa %xmm0,%xmm6 |
||||
|
pxor %xmm4,%xmm4 |
||||
|
movhlps %xmm4,%xmm6 |
||||
|
movl $4,%eax |
||||
|
L011loop_schedule_192: |
||||
|
call __vpaes_schedule_round |
||||
|
.byte 102,15,58,15,198,8 |
||||
|
call __vpaes_schedule_mangle |
||||
|
call __vpaes_schedule_192_smear |
||||
|
call __vpaes_schedule_mangle |
||||
|
call __vpaes_schedule_round |
||||
|
decl %eax |
||||
|
jz L010schedule_mangle_last |
||||
|
call __vpaes_schedule_mangle |
||||
|
call __vpaes_schedule_192_smear |
||||
|
jmp L011loop_schedule_192 |
||||
|
.align 4,0x90 |
||||
|
L006schedule_256: |
||||
|
movdqu 16(%esi),%xmm0 |
||||
|
call __vpaes_schedule_transform |
||||
|
movl $7,%eax |
||||
|
L012loop_schedule_256: |
||||
|
call __vpaes_schedule_mangle |
||||
|
movdqa %xmm0,%xmm6 |
||||
|
call __vpaes_schedule_round |
||||
|
decl %eax |
||||
|
jz L010schedule_mangle_last |
||||
|
call __vpaes_schedule_mangle |
||||
|
pshufd $255,%xmm0,%xmm0 |
||||
|
movdqa %xmm7,20(%esp) |
||||
|
movdqa %xmm6,%xmm7 |
||||
|
call L_vpaes_schedule_low_round |
||||
|
movdqa 20(%esp),%xmm7 |
||||
|
jmp L012loop_schedule_256 |
||||
|
.align 4,0x90 |
||||
|
L010schedule_mangle_last: |
||||
|
leal 384(%ebp),%ebx |
||||
|
testl %edi,%edi |
||||
|
jnz L013schedule_mangle_last_dec |
||||
|
movdqa 256(%ebp,%ecx,1),%xmm1 |
||||
|
.byte 102,15,56,0,193 |
||||
|
leal 352(%ebp),%ebx |
||||
|
addl $32,%edx |
||||
|
L013schedule_mangle_last_dec: |
||||
|
addl $-16,%edx |
||||
|
pxor 336(%ebp),%xmm0 |
||||
|
call __vpaes_schedule_transform |
||||
|
movdqu %xmm0,(%edx) |
||||
|
pxor %xmm0,%xmm0 |
||||
|
pxor %xmm1,%xmm1 |
||||
|
pxor %xmm2,%xmm2 |
||||
|
pxor %xmm3,%xmm3 |
||||
|
pxor %xmm4,%xmm4 |
||||
|
pxor %xmm5,%xmm5 |
||||
|
pxor %xmm6,%xmm6 |
||||
|
pxor %xmm7,%xmm7 |
||||
|
ret |
||||
|
.align 4 |
||||
|
__vpaes_schedule_192_smear: |
||||
|
pshufd $128,%xmm6,%xmm0 |
||||
|
pxor %xmm0,%xmm6 |
||||
|
pshufd $254,%xmm7,%xmm0 |
||||
|
pxor %xmm0,%xmm6 |
||||
|
movdqa %xmm6,%xmm0 |
||||
|
pxor %xmm1,%xmm1 |
||||
|
movhlps %xmm1,%xmm6 |
||||
|
ret |
||||
|
.align 4 |
||||
|
__vpaes_schedule_round: |
||||
|
movdqa 8(%esp),%xmm2 |
||||
|
pxor %xmm1,%xmm1 |
||||
|
.byte 102,15,58,15,202,15 |
||||
|
.byte 102,15,58,15,210,15 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
pshufd $255,%xmm0,%xmm0 |
||||
|
.byte 102,15,58,15,192,1 |
||||
|
movdqa %xmm2,8(%esp) |
||||
|
L_vpaes_schedule_low_round: |
||||
|
movdqa %xmm7,%xmm1 |
||||
|
pslldq $4,%xmm7 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
movdqa %xmm7,%xmm1 |
||||
|
pslldq $8,%xmm7 |
||||
|
pxor %xmm1,%xmm7 |
||||
|
pxor 336(%ebp),%xmm7 |
||||
|
movdqa -16(%ebp),%xmm4 |
||||
|
movdqa -48(%ebp),%xmm5 |
||||
|
movdqa %xmm4,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm4,%xmm0 |
||||
|
movdqa -32(%ebp),%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
movdqa %xmm5,%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
movdqa %xmm5,%xmm4 |
||||
|
.byte 102,15,56,0,224 |
||||
|
pxor %xmm2,%xmm4 |
||||
|
movdqa %xmm5,%xmm2 |
||||
|
.byte 102,15,56,0,211 |
||||
|
pxor %xmm0,%xmm2 |
||||
|
movdqa %xmm5,%xmm3 |
||||
|
.byte 102,15,56,0,220 |
||||
|
pxor %xmm1,%xmm3 |
||||
|
movdqa 32(%ebp),%xmm4 |
||||
|
.byte 102,15,56,0,226 |
||||
|
movdqa 48(%ebp),%xmm0 |
||||
|
.byte 102,15,56,0,195 |
||||
|
pxor %xmm4,%xmm0 |
||||
|
pxor %xmm7,%xmm0 |
||||
|
movdqa %xmm0,%xmm7 |
||||
|
ret |
||||
|
.align 4 |
||||
|
__vpaes_schedule_transform: |
||||
|
movdqa -16(%ebp),%xmm2 |
||||
|
movdqa %xmm2,%xmm1 |
||||
|
pandn %xmm0,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm2,%xmm0 |
||||
|
movdqa (%ebx),%xmm2 |
||||
|
.byte 102,15,56,0,208 |
||||
|
movdqa 16(%ebx),%xmm0 |
||||
|
.byte 102,15,56,0,193 |
||||
|
pxor %xmm2,%xmm0 |
||||
|
ret |
||||
|
.align 4 |
||||
|
__vpaes_schedule_mangle: |
||||
|
movdqa %xmm0,%xmm4 |
||||
|
movdqa 128(%ebp),%xmm5 |
||||
|
testl %edi,%edi |
||||
|
jnz L014schedule_mangle_dec |
||||
|
addl $16,%edx |
||||
|
pxor 336(%ebp),%xmm4 |
||||
|
.byte 102,15,56,0,229 |
||||
|
movdqa %xmm4,%xmm3 |
||||
|
.byte 102,15,56,0,229 |
||||
|
pxor %xmm4,%xmm3 |
||||
|
.byte 102,15,56,0,229 |
||||
|
pxor %xmm4,%xmm3 |
||||
|
jmp L015schedule_mangle_both |
||||
|
.align 4,0x90 |
||||
|
L014schedule_mangle_dec: |
||||
|
movdqa -16(%ebp),%xmm2 |
||||
|
leal 416(%ebp),%esi |
||||
|
movdqa %xmm2,%xmm1 |
||||
|
pandn %xmm4,%xmm1 |
||||
|
psrld $4,%xmm1 |
||||
|
pand %xmm2,%xmm4 |
||||
|
movdqa (%esi),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
movdqa 16(%esi),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
movdqa 32(%esi),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 48(%esi),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
movdqa 64(%esi),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 80(%esi),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
.byte 102,15,56,0,221 |
||||
|
movdqa 96(%esi),%xmm2 |
||||
|
.byte 102,15,56,0,212 |
||||
|
pxor %xmm3,%xmm2 |
||||
|
movdqa 112(%esi),%xmm3 |
||||
|
.byte 102,15,56,0,217 |
||||
|
pxor %xmm2,%xmm3 |
||||
|
addl $-16,%edx |
||||
|
L015schedule_mangle_both: |
||||
|
movdqa 256(%ebp,%ecx,1),%xmm1 |
||||
|
.byte 102,15,56,0,217 |
||||
|
addl $-16,%ecx |
||||
|
andl $48,%ecx |
||||
|
movdqu %xmm3,(%edx) |
||||
|
ret |
||||
|
.globl _vpaes_set_encrypt_key |
||||
|
.align 4 |
||||
|
_vpaes_set_encrypt_key: |
||||
|
L_vpaes_set_encrypt_key_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
movl 20(%esp),%esi |
||||
|
leal -56(%esp),%ebx |
||||
|
movl 24(%esp),%eax |
||||
|
andl $-16,%ebx |
||||
|
movl 28(%esp),%edx |
||||
|
xchgl %esp,%ebx |
||||
|
movl %ebx,48(%esp) |
||||
|
movl %eax,%ebx |
||||
|
shrl $5,%ebx |
||||
|
addl $5,%ebx |
||||
|
movl %ebx,240(%edx) |
||||
|
movl $48,%ecx |
||||
|
movl $0,%edi |
||||
|
leal L_vpaes_consts+0x30-L016pic_point,%ebp |
||||
|
call __vpaes_schedule_core |
||||
|
L016pic_point: |
||||
|
movl 48(%esp),%esp |
||||
|
xorl %eax,%eax |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.globl _vpaes_set_decrypt_key |
||||
|
.align 4 |
||||
|
_vpaes_set_decrypt_key: |
||||
|
L_vpaes_set_decrypt_key_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
movl 20(%esp),%esi |
||||
|
leal -56(%esp),%ebx |
||||
|
movl 24(%esp),%eax |
||||
|
andl $-16,%ebx |
||||
|
movl 28(%esp),%edx |
||||
|
xchgl %esp,%ebx |
||||
|
movl %ebx,48(%esp) |
||||
|
movl %eax,%ebx |
||||
|
shrl $5,%ebx |
||||
|
addl $5,%ebx |
||||
|
movl %ebx,240(%edx) |
||||
|
shll $4,%ebx |
||||
|
leal 16(%edx,%ebx,1),%edx |
||||
|
movl $1,%edi |
||||
|
movl %eax,%ecx |
||||
|
shrl $1,%ecx |
||||
|
andl $32,%ecx |
||||
|
xorl $32,%ecx |
||||
|
leal L_vpaes_consts+0x30-L017pic_point,%ebp |
||||
|
call __vpaes_schedule_core |
||||
|
L017pic_point: |
||||
|
movl 48(%esp),%esp |
||||
|
xorl %eax,%eax |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.globl _vpaes_encrypt |
||||
|
.align 4 |
||||
|
_vpaes_encrypt: |
||||
|
L_vpaes_encrypt_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
leal L_vpaes_consts+0x30-L018pic_point,%ebp |
||||
|
call __vpaes_preheat |
||||
|
L018pic_point: |
||||
|
movl 20(%esp),%esi |
||||
|
leal -56(%esp),%ebx |
||||
|
movl 24(%esp),%edi |
||||
|
andl $-16,%ebx |
||||
|
movl 28(%esp),%edx |
||||
|
xchgl %esp,%ebx |
||||
|
movl %ebx,48(%esp) |
||||
|
movdqu (%esi),%xmm0 |
||||
|
call __vpaes_encrypt_core |
||||
|
movdqu %xmm0,(%edi) |
||||
|
movl 48(%esp),%esp |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.globl _vpaes_decrypt |
||||
|
.align 4 |
||||
|
_vpaes_decrypt: |
||||
|
L_vpaes_decrypt_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
leal L_vpaes_consts+0x30-L019pic_point,%ebp |
||||
|
call __vpaes_preheat |
||||
|
L019pic_point: |
||||
|
movl 20(%esp),%esi |
||||
|
leal -56(%esp),%ebx |
||||
|
movl 24(%esp),%edi |
||||
|
andl $-16,%ebx |
||||
|
movl 28(%esp),%edx |
||||
|
xchgl %esp,%ebx |
||||
|
movl %ebx,48(%esp) |
||||
|
movdqu (%esi),%xmm0 |
||||
|
call __vpaes_decrypt_core |
||||
|
movdqu %xmm0,(%edi) |
||||
|
movl 48(%esp),%esp |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.globl _vpaes_cbc_encrypt |
||||
|
.align 4 |
||||
|
_vpaes_cbc_encrypt: |
||||
|
L_vpaes_cbc_encrypt_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
movl 20(%esp),%esi |
||||
|
movl 24(%esp),%edi |
||||
|
movl 28(%esp),%eax |
||||
|
movl 32(%esp),%edx |
||||
|
subl $16,%eax |
||||
|
jc L020cbc_abort |
||||
|
leal -56(%esp),%ebx |
||||
|
movl 36(%esp),%ebp |
||||
|
andl $-16,%ebx |
||||
|
movl 40(%esp),%ecx |
||||
|
xchgl %esp,%ebx |
||||
|
movdqu (%ebp),%xmm1 |
||||
|
subl %esi,%edi |
||||
|
movl %ebx,48(%esp) |
||||
|
movl %edi,(%esp) |
||||
|
movl %edx,4(%esp) |
||||
|
movl %ebp,8(%esp) |
||||
|
movl %eax,%edi |
||||
|
leal L_vpaes_consts+0x30-L021pic_point,%ebp |
||||
|
call __vpaes_preheat |
||||
|
L021pic_point: |
||||
|
cmpl $0,%ecx |
||||
|
je L022cbc_dec_loop |
||||
|
jmp L023cbc_enc_loop |
||||
|
.align 4,0x90 |
||||
|
L023cbc_enc_loop: |
||||
|
movdqu (%esi),%xmm0 |
||||
|
pxor %xmm1,%xmm0 |
||||
|
call __vpaes_encrypt_core |
||||
|
movl (%esp),%ebx |
||||
|
movl 4(%esp),%edx |
||||
|
movdqa %xmm0,%xmm1 |
||||
|
movdqu %xmm0,(%ebx,%esi,1) |
||||
|
leal 16(%esi),%esi |
||||
|
subl $16,%edi |
||||
|
jnc L023cbc_enc_loop |
||||
|
jmp L024cbc_done |
||||
|
.align 4,0x90 |
||||
|
L022cbc_dec_loop: |
||||
|
movdqu (%esi),%xmm0 |
||||
|
movdqa %xmm1,16(%esp) |
||||
|
movdqa %xmm0,32(%esp) |
||||
|
call __vpaes_decrypt_core |
||||
|
movl (%esp),%ebx |
||||
|
movl 4(%esp),%edx |
||||
|
pxor 16(%esp),%xmm0 |
||||
|
movdqa 32(%esp),%xmm1 |
||||
|
movdqu %xmm0,(%ebx,%esi,1) |
||||
|
leal 16(%esi),%esi |
||||
|
subl $16,%edi |
||||
|
jnc L022cbc_dec_loop |
||||
|
L024cbc_done: |
||||
|
movl 8(%esp),%ebx |
||||
|
movl 48(%esp),%esp |
||||
|
movdqu %xmm1,(%ebx) |
||||
|
L020cbc_abort: |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
@ -0,0 +1,718 @@ |
|||||
|
.file "ghash-x86.s" |
||||
|
.text |
||||
|
.globl _gcm_gmult_4bit_x86 |
||||
|
.align 4 |
||||
|
_gcm_gmult_4bit_x86: |
||||
|
L_gcm_gmult_4bit_x86_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
subl $84,%esp |
||||
|
movl 104(%esp),%edi |
||||
|
movl 108(%esp),%esi |
||||
|
movl (%edi),%ebp |
||||
|
movl 4(%edi),%edx |
||||
|
movl 8(%edi),%ecx |
||||
|
movl 12(%edi),%ebx |
||||
|
movl $0,16(%esp) |
||||
|
movl $471859200,20(%esp) |
||||
|
movl $943718400,24(%esp) |
||||
|
movl $610271232,28(%esp) |
||||
|
movl $1887436800,32(%esp) |
||||
|
movl $1822425088,36(%esp) |
||||
|
movl $1220542464,40(%esp) |
||||
|
movl $1423966208,44(%esp) |
||||
|
movl $3774873600,48(%esp) |
||||
|
movl $4246732800,52(%esp) |
||||
|
movl $3644850176,56(%esp) |
||||
|
movl $3311403008,60(%esp) |
||||
|
movl $2441084928,64(%esp) |
||||
|
movl $2376073216,68(%esp) |
||||
|
movl $2847932416,72(%esp) |
||||
|
movl $3051356160,76(%esp) |
||||
|
movl %ebp,(%esp) |
||||
|
movl %edx,4(%esp) |
||||
|
movl %ecx,8(%esp) |
||||
|
movl %ebx,12(%esp) |
||||
|
shrl $20,%ebx |
||||
|
andl $240,%ebx |
||||
|
movl 4(%esi,%ebx,1),%ebp |
||||
|
movl (%esi,%ebx,1),%edx |
||||
|
movl 12(%esi,%ebx,1),%ecx |
||||
|
movl 8(%esi,%ebx,1),%ebx |
||||
|
xorl %eax,%eax |
||||
|
movl $15,%edi |
||||
|
jmp L000x86_loop |
||||
|
.align 4,0x90 |
||||
|
L000x86_loop: |
||||
|
movb %bl,%al |
||||
|
shrdl $4,%ecx,%ebx |
||||
|
andb $15,%al |
||||
|
shrdl $4,%edx,%ecx |
||||
|
shrdl $4,%ebp,%edx |
||||
|
shrl $4,%ebp |
||||
|
xorl 16(%esp,%eax,4),%ebp |
||||
|
movb (%esp,%edi,1),%al |
||||
|
andb $240,%al |
||||
|
xorl 8(%esi,%eax,1),%ebx |
||||
|
xorl 12(%esi,%eax,1),%ecx |
||||
|
xorl (%esi,%eax,1),%edx |
||||
|
xorl 4(%esi,%eax,1),%ebp |
||||
|
decl %edi |
||||
|
js L001x86_break |
||||
|
movb %bl,%al |
||||
|
shrdl $4,%ecx,%ebx |
||||
|
andb $15,%al |
||||
|
shrdl $4,%edx,%ecx |
||||
|
shrdl $4,%ebp,%edx |
||||
|
shrl $4,%ebp |
||||
|
xorl 16(%esp,%eax,4),%ebp |
||||
|
movb (%esp,%edi,1),%al |
||||
|
shlb $4,%al |
||||
|
xorl 8(%esi,%eax,1),%ebx |
||||
|
xorl 12(%esi,%eax,1),%ecx |
||||
|
xorl (%esi,%eax,1),%edx |
||||
|
xorl 4(%esi,%eax,1),%ebp |
||||
|
jmp L000x86_loop |
||||
|
.align 4,0x90 |
||||
|
L001x86_break: |
||||
|
bswap %ebx |
||||
|
bswap %ecx |
||||
|
bswap %edx |
||||
|
bswap %ebp |
||||
|
movl 104(%esp),%edi |
||||
|
movl %ebx,12(%edi) |
||||
|
movl %ecx,8(%edi) |
||||
|
movl %edx,4(%edi) |
||||
|
movl %ebp,(%edi) |
||||
|
addl $84,%esp |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.globl _gcm_ghash_4bit_x86 |
||||
|
.align 4 |
||||
|
_gcm_ghash_4bit_x86: |
||||
|
L_gcm_ghash_4bit_x86_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
subl $84,%esp |
||||
|
movl 104(%esp),%ebx |
||||
|
movl 108(%esp),%esi |
||||
|
movl 112(%esp),%edi |
||||
|
movl 116(%esp),%ecx |
||||
|
addl %edi,%ecx |
||||
|
movl %ecx,116(%esp) |
||||
|
movl (%ebx),%ebp |
||||
|
movl 4(%ebx),%edx |
||||
|
movl 8(%ebx),%ecx |
||||
|
movl 12(%ebx),%ebx |
||||
|
movl $0,16(%esp) |
||||
|
movl $471859200,20(%esp) |
||||
|
movl $943718400,24(%esp) |
||||
|
movl $610271232,28(%esp) |
||||
|
movl $1887436800,32(%esp) |
||||
|
movl $1822425088,36(%esp) |
||||
|
movl $1220542464,40(%esp) |
||||
|
movl $1423966208,44(%esp) |
||||
|
movl $3774873600,48(%esp) |
||||
|
movl $4246732800,52(%esp) |
||||
|
movl $3644850176,56(%esp) |
||||
|
movl $3311403008,60(%esp) |
||||
|
movl $2441084928,64(%esp) |
||||
|
movl $2376073216,68(%esp) |
||||
|
movl $2847932416,72(%esp) |
||||
|
movl $3051356160,76(%esp) |
||||
|
.align 4,0x90 |
||||
|
L002x86_outer_loop: |
||||
|
xorl 12(%edi),%ebx |
||||
|
xorl 8(%edi),%ecx |
||||
|
xorl 4(%edi),%edx |
||||
|
xorl (%edi),%ebp |
||||
|
movl %ebx,12(%esp) |
||||
|
movl %ecx,8(%esp) |
||||
|
movl %edx,4(%esp) |
||||
|
movl %ebp,(%esp) |
||||
|
shrl $20,%ebx |
||||
|
andl $240,%ebx |
||||
|
movl 4(%esi,%ebx,1),%ebp |
||||
|
movl (%esi,%ebx,1),%edx |
||||
|
movl 12(%esi,%ebx,1),%ecx |
||||
|
movl 8(%esi,%ebx,1),%ebx |
||||
|
xorl %eax,%eax |
||||
|
movl $15,%edi |
||||
|
jmp L003x86_loop |
||||
|
.align 4,0x90 |
||||
|
L003x86_loop: |
||||
|
movb %bl,%al |
||||
|
shrdl $4,%ecx,%ebx |
||||
|
andb $15,%al |
||||
|
shrdl $4,%edx,%ecx |
||||
|
shrdl $4,%ebp,%edx |
||||
|
shrl $4,%ebp |
||||
|
xorl 16(%esp,%eax,4),%ebp |
||||
|
movb (%esp,%edi,1),%al |
||||
|
andb $240,%al |
||||
|
xorl 8(%esi,%eax,1),%ebx |
||||
|
xorl 12(%esi,%eax,1),%ecx |
||||
|
xorl (%esi,%eax,1),%edx |
||||
|
xorl 4(%esi,%eax,1),%ebp |
||||
|
decl %edi |
||||
|
js L004x86_break |
||||
|
movb %bl,%al |
||||
|
shrdl $4,%ecx,%ebx |
||||
|
andb $15,%al |
||||
|
shrdl $4,%edx,%ecx |
||||
|
shrdl $4,%ebp,%edx |
||||
|
shrl $4,%ebp |
||||
|
xorl 16(%esp,%eax,4),%ebp |
||||
|
movb (%esp,%edi,1),%al |
||||
|
shlb $4,%al |
||||
|
xorl 8(%esi,%eax,1),%ebx |
||||
|
xorl 12(%esi,%eax,1),%ecx |
||||
|
xorl (%esi,%eax,1),%edx |
||||
|
xorl 4(%esi,%eax,1),%ebp |
||||
|
jmp L003x86_loop |
||||
|
.align 4,0x90 |
||||
|
L004x86_break: |
||||
|
bswap %ebx |
||||
|
bswap %ecx |
||||
|
bswap %edx |
||||
|
bswap %ebp |
||||
|
movl 112(%esp),%edi |
||||
|
leal 16(%edi),%edi |
||||
|
cmpl 116(%esp),%edi |
||||
|
movl %edi,112(%esp) |
||||
|
jb L002x86_outer_loop |
||||
|
movl 104(%esp),%edi |
||||
|
movl %ebx,12(%edi) |
||||
|
movl %ecx,8(%edi) |
||||
|
movl %edx,4(%edi) |
||||
|
movl %ebp,(%edi) |
||||
|
addl $84,%esp |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.align 4 |
||||
|
__mmx_gmult_4bit_inner: |
||||
|
xorl %ecx,%ecx |
||||
|
movl %ebx,%edx |
||||
|
movb %dl,%cl |
||||
|
shlb $4,%cl |
||||
|
andl $240,%edx |
||||
|
movq 8(%esi,%ecx,1),%mm0 |
||||
|
movq (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 14(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 13(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 12(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 11(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 10(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 9(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 8(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 7(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 6(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 5(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 4(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 3(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 2(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb 1(%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
movb (%edi),%cl |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movl %ecx,%edx |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
shlb $4,%cl |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%ecx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
andl $240,%edx |
||||
|
pxor (%eax,%ebp,8),%mm1 |
||||
|
andl $15,%ebx |
||||
|
pxor (%esi,%ecx,1),%mm1 |
||||
|
movd %mm0,%ebp |
||||
|
pxor %mm2,%mm0 |
||||
|
psrlq $4,%mm0 |
||||
|
movq %mm1,%mm2 |
||||
|
psrlq $4,%mm1 |
||||
|
pxor 8(%esi,%edx,1),%mm0 |
||||
|
psllq $60,%mm2 |
||||
|
pxor (%eax,%ebx,8),%mm1 |
||||
|
andl $15,%ebp |
||||
|
pxor (%esi,%edx,1),%mm1 |
||||
|
movd %mm0,%ebx |
||||
|
pxor %mm2,%mm0 |
||||
|
movl 4(%eax,%ebp,8),%edi |
||||
|
psrlq $32,%mm0 |
||||
|
movd %mm1,%edx |
||||
|
psrlq $32,%mm1 |
||||
|
movd %mm0,%ecx |
||||
|
movd %mm1,%ebp |
||||
|
shll $4,%edi |
||||
|
bswap %ebx |
||||
|
bswap %edx |
||||
|
bswap %ecx |
||||
|
xorl %edi,%ebp |
||||
|
bswap %ebp |
||||
|
ret |
||||
|
.globl _gcm_gmult_4bit_mmx |
||||
|
.align 4 |
||||
|
_gcm_gmult_4bit_mmx: |
||||
|
L_gcm_gmult_4bit_mmx_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
movl 20(%esp),%edi |
||||
|
movl 24(%esp),%esi |
||||
|
call L005pic_point |
||||
|
L005pic_point: |
||||
|
popl %eax |
||||
|
leal Lrem_4bit-L005pic_point(%eax),%eax |
||||
|
movzbl 15(%edi),%ebx |
||||
|
call __mmx_gmult_4bit_inner |
||||
|
movl 20(%esp),%edi |
||||
|
emms |
||||
|
movl %ebx,12(%edi) |
||||
|
movl %edx,4(%edi) |
||||
|
movl %ecx,8(%edi) |
||||
|
movl %ebp,(%edi) |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.globl _gcm_ghash_4bit_mmx |
||||
|
.align 4 |
||||
|
_gcm_ghash_4bit_mmx: |
||||
|
L_gcm_ghash_4bit_mmx_begin: |
||||
|
pushl %ebp |
||||
|
pushl %ebx |
||||
|
pushl %esi |
||||
|
pushl %edi |
||||
|
movl 20(%esp),%ebp |
||||
|
movl 24(%esp),%esi |
||||
|
movl 28(%esp),%edi |
||||
|
movl 32(%esp),%ecx |
||||
|
call L006pic_point |
||||
|
L006pic_point: |
||||
|
popl %eax |
||||
|
leal Lrem_4bit-L006pic_point(%eax),%eax |
||||
|
addl %edi,%ecx |
||||
|
movl %ecx,32(%esp) |
||||
|
subl $20,%esp |
||||
|
movl 12(%ebp),%ebx |
||||
|
movl 4(%ebp),%edx |
||||
|
movl 8(%ebp),%ecx |
||||
|
movl (%ebp),%ebp |
||||
|
jmp L007mmx_outer_loop |
||||
|
.align 4,0x90 |
||||
|
L007mmx_outer_loop: |
||||
|
xorl 12(%edi),%ebx |
||||
|
xorl 4(%edi),%edx |
||||
|
xorl 8(%edi),%ecx |
||||
|
xorl (%edi),%ebp |
||||
|
movl %edi,48(%esp) |
||||
|
movl %ebx,12(%esp) |
||||
|
movl %edx,4(%esp) |
||||
|
movl %ecx,8(%esp) |
||||
|
movl %ebp,(%esp) |
||||
|
movl %esp,%edi |
||||
|
shrl $24,%ebx |
||||
|
call __mmx_gmult_4bit_inner |
||||
|
movl 48(%esp),%edi |
||||
|
leal 16(%edi),%edi |
||||
|
cmpl 52(%esp),%edi |
||||
|
jb L007mmx_outer_loop |
||||
|
movl 40(%esp),%edi |
||||
|
emms |
||||
|
movl %ebx,12(%edi) |
||||
|
movl %edx,4(%edi) |
||||
|
movl %ecx,8(%edi) |
||||
|
movl %ebp,(%edi) |
||||
|
addl $20,%esp |
||||
|
popl %edi |
||||
|
popl %esi |
||||
|
popl %ebx |
||||
|
popl %ebp |
||||
|
ret |
||||
|
.align 6,0x90 |
||||
|
Lrem_4bit: |
||||
|
.long 0,0,0,29491200,0,58982400,0,38141952 |
||||
|
.long 0,117964800,0,113901568,0,76283904,0,88997888 |
||||
|
.long 0,235929600,0,265420800,0,227803136,0,206962688 |
||||
|
.long 0,152567808,0,148504576,0,177995776,0,190709760 |
||||
|
.align 6,0x90 |
||||
|
L008rem_8bit: |
||||
|
.value 0,450,900,582,1800,1738,1164,1358 |
||||
|
.value 3600,4050,3476,3158,2328,2266,2716,2910 |
||||
|
.value 7200,7650,8100,7782,6952,6890,6316,6510 |
||||
|
.value 4656,5106,4532,4214,5432,5370,5820,6014 |
||||
|
.value 14400,14722,15300,14854,16200,16010,15564,15630 |
||||
|
.value 13904,14226,13780,13334,12632,12442,13020,13086 |
||||
|
.value 9312,9634,10212,9766,9064,8874,8428,8494 |
||||
|
.value 10864,11186,10740,10294,11640,11450,12028,12094 |
||||
|
.value 28800,28994,29444,29382,30600,30282,29708,30158 |
||||
|
.value 32400,32594,32020,31958,31128,30810,31260,31710 |
||||
|
.value 27808,28002,28452,28390,27560,27242,26668,27118 |
||||
|
.value 25264,25458,24884,24822,26040,25722,26172,26622 |
||||
|
.value 18624,18690,19268,19078,20424,19978,19532,19854 |
||||
|
.value 18128,18194,17748,17558,16856,16410,16988,17310 |
||||
|
.value 21728,21794,22372,22182,21480,21034,20588,20910 |
||||
|
.value 23280,23346,22900,22710,24056,23610,24188,24510 |
||||
|
.value 57600,57538,57988,58182,58888,59338,58764,58446 |
||||
|
.value 61200,61138,60564,60758,59416,59866,60316,59998 |
||||
|
.value 64800,64738,65188,65382,64040,64490,63916,63598 |
||||
|
.value 62256,62194,61620,61814,62520,62970,63420,63102 |
||||
|
.value 55616,55426,56004,56070,56904,57226,56780,56334 |
||||
|
.value 55120,54930,54484,54550,53336,53658,54236,53790 |
||||
|
.value 50528,50338,50916,50982,49768,50090,49644,49198 |
||||
|
.value 52080,51890,51444,51510,52344,52666,53244,52798 |
||||
|
.value 37248,36930,37380,37830,38536,38730,38156,38094 |
||||
|
.value 40848,40530,39956,40406,39064,39258,39708,39646 |
||||
|
.value 36256,35938,36388,36838,35496,35690,35116,35054 |
||||
|
.value 33712,33394,32820,33270,33976,34170,34620,34558 |
||||
|
.value 43456,43010,43588,43910,44744,44810,44364,44174 |
||||
|
.value 42960,42514,42068,42390,41176,41242,41820,41630 |
||||
|
.value 46560,46114,46692,47014,45800,45866,45420,45230 |
||||
|
.value 48112,47666,47220,47542,48376,48442,49020,48830 |
||||
|
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 |
||||
|
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 |
||||
|
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 |
||||
|
.byte 0 |
@ -0,0 +1,662 @@ |
|||||
|
TITLE vpaes-x86.asm |
||||
|
IF @Version LT 800 |
||||
|
ECHO MASM version 8.00 or later is strongly recommended. |
||||
|
ENDIF |
||||
|
.686 |
||||
|
.XMM |
||||
|
IF @Version LT 800 |
||||
|
XMMWORD STRUCT 16 |
||||
|
DQ 2 dup (?) |
||||
|
XMMWORD ENDS |
||||
|
ENDIF |
||||
|
|
||||
|
.MODEL FLAT |
||||
|
OPTION DOTNAME |
||||
|
IF @Version LT 800 |
||||
|
.text$ SEGMENT PAGE 'CODE' |
||||
|
ELSE |
||||
|
.text$ SEGMENT ALIGN(64) 'CODE' |
||||
|
ENDIF |
||||
|
ALIGN 64 |
||||
|
$L_vpaes_consts:: |
||||
|
DD 218628480,235210255,168496130,67568393 |
||||
|
DD 252381056,17041926,33884169,51187212 |
||||
|
DD 252645135,252645135,252645135,252645135 |
||||
|
DD 1512730624,3266504856,1377990664,3401244816 |
||||
|
DD 830229760,1275146365,2969422977,3447763452 |
||||
|
DD 3411033600,2979783055,338359620,2782886510 |
||||
|
DD 4209124096,907596821,221174255,1006095553 |
||||
|
DD 191964160,3799684038,3164090317,1589111125 |
||||
|
DD 182528256,1777043520,2877432650,3265356744 |
||||
|
DD 1874708224,3503451415,3305285752,363511674 |
||||
|
DD 1606117888,3487855781,1093350906,2384367825 |
||||
|
DD 197121,67569157,134941193,202313229 |
||||
|
DD 67569157,134941193,202313229,197121 |
||||
|
DD 134941193,202313229,197121,67569157 |
||||
|
DD 202313229,197121,67569157,134941193 |
||||
|
DD 33619971,100992007,168364043,235736079 |
||||
|
DD 235736079,33619971,100992007,168364043 |
||||
|
DD 168364043,235736079,33619971,100992007 |
||||
|
DD 100992007,168364043,235736079,33619971 |
||||
|
DD 50462976,117835012,185207048,252579084 |
||||
|
DD 252314880,51251460,117574920,184942860 |
||||
|
DD 184682752,252054788,50987272,118359308 |
||||
|
DD 118099200,185467140,251790600,50727180 |
||||
|
DD 2946363062,528716217,1300004225,1881839624 |
||||
|
DD 1532713819,1532713819,1532713819,1532713819 |
||||
|
DD 3602276352,4288629033,3737020424,4153884961 |
||||
|
DD 1354558464,32357713,2958822624,3775749553 |
||||
|
DD 1201988352,132424512,1572796698,503232858 |
||||
|
DD 2213177600,1597421020,4103937655,675398315 |
||||
|
DD 2749646592,4273543773,1511898873,121693092 |
||||
|
DD 3040248576,1103263732,2871565598,1608280554 |
||||
|
DD 2236667136,2588920351,482954393,64377734 |
||||
|
DD 3069987328,291237287,2117370568,3650299247 |
||||
|
DD 533321216,3573750986,2572112006,1401264716 |
||||
|
DD 1339849704,2721158661,548607111,3445553514 |
||||
|
DD 2128193280,3054596040,2183486460,1257083700 |
||||
|
DD 655635200,1165381986,3923443150,2344132524 |
||||
|
DD 190078720,256924420,290342170,357187870 |
||||
|
DD 1610966272,2263057382,4103205268,309794674 |
||||
|
DD 2592527872,2233205587,1335446729,3402964816 |
||||
|
DD 3973531904,3225098121,3002836325,1918774430 |
||||
|
DD 3870401024,2102906079,2284471353,4117666579 |
||||
|
DD 617007872,1021508343,366931923,691083277 |
||||
|
DD 2528395776,3491914898,2968704004,1613121270 |
||||
|
DD 3445188352,3247741094,844474987,4093578302 |
||||
|
DD 651481088,1190302358,1689581232,574775300 |
||||
|
DD 4289380608,206939853,2555985458,2489840491 |
||||
|
DD 2130264064,327674451,3566485037,3349835193 |
||||
|
DD 2470714624,316102159,3636825756,3393945945 |
||||
|
DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 |
||||
|
DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 |
||||
|
DB 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 |
||||
|
DB 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 |
||||
|
DB 118,101,114,115,105,116,121,41,0 |
||||
|
ALIGN 64 |
||||
|
ALIGN 16 |
||||
|
__vpaes_preheat PROC PRIVATE |
||||
|
add ebp,DWORD PTR [esp] |
||||
|
movdqa xmm7,XMMWORD PTR [ebp-48] |
||||
|
movdqa xmm6,XMMWORD PTR [ebp-16] |
||||
|
ret |
||||
|
__vpaes_preheat ENDP |
||||
|
ALIGN 16 |
||||
|
__vpaes_encrypt_core PROC PRIVATE |
||||
|
mov ecx,16 |
||||
|
mov eax,DWORD PTR 240[edx] |
||||
|
movdqa xmm1,xmm6 |
||||
|
movdqa xmm2,XMMWORD PTR [ebp] |
||||
|
pandn xmm1,xmm0 |
||||
|
movdqu xmm5,XMMWORD PTR [edx] |
||||
|
psrld xmm1,4 |
||||
|
pand xmm0,xmm6 |
||||
|
DB 102,15,56,0,208 |
||||
|
movdqa xmm0,XMMWORD PTR 16[ebp] |
||||
|
DB 102,15,56,0,193 |
||||
|
pxor xmm2,xmm5 |
||||
|
pxor xmm0,xmm2 |
||||
|
add edx,16 |
||||
|
lea ebx,DWORD PTR 192[ebp] |
||||
|
jmp $L000enc_entry |
||||
|
ALIGN 16 |
||||
|
$L001enc_loop: |
||||
|
movdqa xmm4,XMMWORD PTR 32[ebp] |
||||
|
DB 102,15,56,0,226 |
||||
|
pxor xmm4,xmm5 |
||||
|
movdqa xmm0,XMMWORD PTR 48[ebp] |
||||
|
DB 102,15,56,0,195 |
||||
|
pxor xmm0,xmm4 |
||||
|
movdqa xmm5,XMMWORD PTR 64[ebp] |
||||
|
DB 102,15,56,0,234 |
||||
|
movdqa xmm1,XMMWORD PTR [ecx*1+ebx-64] |
||||
|
movdqa xmm2,XMMWORD PTR 80[ebp] |
||||
|
DB 102,15,56,0,211 |
||||
|
pxor xmm2,xmm5 |
||||
|
movdqa xmm4,XMMWORD PTR [ecx*1+ebx] |
||||
|
movdqa xmm3,xmm0 |
||||
|
DB 102,15,56,0,193 |
||||
|
add edx,16 |
||||
|
pxor xmm0,xmm2 |
||||
|
DB 102,15,56,0,220 |
||||
|
add ecx,16 |
||||
|
pxor xmm3,xmm0 |
||||
|
DB 102,15,56,0,193 |
||||
|
and ecx,48 |
||||
|
pxor xmm0,xmm3 |
||||
|
sub eax,1 |
||||
|
$L000enc_entry: |
||||
|
movdqa xmm1,xmm6 |
||||
|
pandn xmm1,xmm0 |
||||
|
psrld xmm1,4 |
||||
|
pand xmm0,xmm6 |
||||
|
movdqa xmm5,XMMWORD PTR [ebp-32] |
||||
|
DB 102,15,56,0,232 |
||||
|
pxor xmm0,xmm1 |
||||
|
movdqa xmm3,xmm7 |
||||
|
DB 102,15,56,0,217 |
||||
|
pxor xmm3,xmm5 |
||||
|
movdqa xmm4,xmm7 |
||||
|
DB 102,15,56,0,224 |
||||
|
pxor xmm4,xmm5 |
||||
|
movdqa xmm2,xmm7 |
||||
|
DB 102,15,56,0,211 |
||||
|
pxor xmm2,xmm0 |
||||
|
movdqa xmm3,xmm7 |
||||
|
movdqu xmm5,XMMWORD PTR [edx] |
||||
|
DB 102,15,56,0,220 |
||||
|
pxor xmm3,xmm1 |
||||
|
jnz $L001enc_loop |
||||
|
movdqa xmm4,XMMWORD PTR 96[ebp] |
||||
|
movdqa xmm0,XMMWORD PTR 112[ebp] |
||||
|
DB 102,15,56,0,226 |
||||
|
pxor xmm4,xmm5 |
||||
|
DB 102,15,56,0,195 |
||||
|
movdqa xmm1,XMMWORD PTR 64[ecx*1+ebx] |
||||
|
pxor xmm0,xmm4 |
||||
|
DB 102,15,56,0,193 |
||||
|
ret |
||||
|
__vpaes_encrypt_core ENDP |
||||
|
ALIGN 16 |
||||
|
__vpaes_decrypt_core PROC PRIVATE |
||||
|
mov eax,DWORD PTR 240[edx] |
||||
|
lea ebx,DWORD PTR 608[ebp] |
||||
|
movdqa xmm1,xmm6 |
||||
|
movdqa xmm2,XMMWORD PTR [ebx-64] |
||||
|
pandn xmm1,xmm0 |
||||
|
mov ecx,eax |
||||
|
psrld xmm1,4 |
||||
|
movdqu xmm5,XMMWORD PTR [edx] |
||||
|
shl ecx,4 |
||||
|
pand xmm0,xmm6 |
||||
|
DB 102,15,56,0,208 |
||||
|
movdqa xmm0,XMMWORD PTR [ebx-48] |
||||
|
xor ecx,48 |
||||
|
DB 102,15,56,0,193 |
||||
|
and ecx,48 |
||||
|
pxor xmm2,xmm5 |
||||
|
movdqa xmm5,XMMWORD PTR 176[ebp] |
||||
|
pxor xmm0,xmm2 |
||||
|
add edx,16 |
||||
|
lea ecx,DWORD PTR [ecx*1+ebx-352] |
||||
|
jmp $L002dec_entry |
||||
|
ALIGN 16 |
||||
|
$L003dec_loop: |
||||
|
movdqa xmm4,XMMWORD PTR [ebx-32] |
||||
|
DB 102,15,56,0,226 |
||||
|
pxor xmm4,xmm0 |
||||
|
movdqa xmm0,XMMWORD PTR [ebx-16] |
||||
|
DB 102,15,56,0,195 |
||||
|
pxor xmm0,xmm4 |
||||
|
add edx,16 |
||||
|
DB 102,15,56,0,197 |
||||
|
movdqa xmm4,XMMWORD PTR [ebx] |
||||
|
DB 102,15,56,0,226 |
||||
|
pxor xmm4,xmm0 |
||||
|
movdqa xmm0,XMMWORD PTR 16[ebx] |
||||
|
DB 102,15,56,0,195 |
||||
|
pxor xmm0,xmm4 |
||||
|
sub eax,1 |
||||
|
DB 102,15,56,0,197 |
||||
|
movdqa xmm4,XMMWORD PTR 32[ebx] |
||||
|
DB 102,15,56,0,226 |
||||
|
pxor xmm4,xmm0 |
||||
|
movdqa xmm0,XMMWORD PTR 48[ebx] |
||||
|
DB 102,15,56,0,195 |
||||
|
pxor xmm0,xmm4 |
||||
|
DB 102,15,56,0,197 |
||||
|
movdqa xmm4,XMMWORD PTR 64[ebx] |
||||
|
DB 102,15,56,0,226 |
||||
|
pxor xmm4,xmm0 |
||||
|
movdqa xmm0,XMMWORD PTR 80[ebx] |
||||
|
DB 102,15,56,0,195 |
||||
|
pxor xmm0,xmm4 |
||||
|
DB 102,15,58,15,237,12 |
||||
|
$L002dec_entry: |
||||
|
movdqa xmm1,xmm6 |
||||
|
pandn xmm1,xmm0 |
||||
|
psrld xmm1,4 |
||||
|
pand xmm0,xmm6 |
||||
|
movdqa xmm2,XMMWORD PTR [ebp-32] |
||||
|
DB 102,15,56,0,208 |
||||
|
pxor xmm0,xmm1 |
||||
|
movdqa xmm3,xmm7 |
||||
|
DB 102,15,56,0,217 |
||||
|
pxor xmm3,xmm2 |
||||
|
movdqa xmm4,xmm7 |
||||
|
DB 102,15,56,0,224 |
||||
|
pxor xmm4,xmm2 |
||||
|
movdqa xmm2,xmm7 |
||||
|
DB 102,15,56,0,211 |
||||
|
pxor xmm2,xmm0 |
||||
|
movdqa xmm3,xmm7 |
||||
|
DB 102,15,56,0,220 |
||||
|
pxor xmm3,xmm1 |
||||
|
movdqu xmm0,XMMWORD PTR [edx] |
||||
|
jnz $L003dec_loop |
||||
|
movdqa xmm4,XMMWORD PTR 96[ebx] |
||||
|
DB 102,15,56,0,226 |
||||
|
pxor xmm4,xmm0 |
||||
|
movdqa xmm0,XMMWORD PTR 112[ebx] |
||||
|
movdqa xmm2,XMMWORD PTR [ecx] |
||||
|
DB 102,15,56,0,195 |
||||
|
pxor xmm0,xmm4 |
||||
|
DB 102,15,56,0,194 |
||||
|
ret |
||||
|
__vpaes_decrypt_core ENDP |
||||
|
ALIGN 16 |
||||
|
__vpaes_schedule_core PROC PRIVATE |
||||
|
add ebp,DWORD PTR [esp] |
||||
|
movdqu xmm0,XMMWORD PTR [esi] |
||||
|
movdqa xmm2,XMMWORD PTR 320[ebp] |
||||
|
movdqa xmm3,xmm0 |
||||
|
lea ebx,DWORD PTR [ebp] |
||||
|
movdqa XMMWORD PTR 4[esp],xmm2 |
||||
|
call __vpaes_schedule_transform |
||||
|
movdqa xmm7,xmm0 |
||||
|
test edi,edi |
||||
|
jnz $L004schedule_am_decrypting |
||||
|
movdqu XMMWORD PTR [edx],xmm0 |
||||
|
jmp $L005schedule_go |
||||
|
$L004schedule_am_decrypting: |
||||
|
movdqa xmm1,XMMWORD PTR 256[ecx*1+ebp] |
||||
|
DB 102,15,56,0,217 |
||||
|
movdqu XMMWORD PTR [edx],xmm3 |
||||
|
xor ecx,48 |
||||
|
$L005schedule_go: |
||||
|
cmp eax,192 |
||||
|
ja $L006schedule_256 |
||||
|
je $L007schedule_192 |
||||
|
$L008schedule_128: |
||||
|
mov eax,10 |
||||
|
$L009loop_schedule_128: |
||||
|
call __vpaes_schedule_round |
||||
|
dec eax |
||||
|
jz $L010schedule_mangle_last |
||||
|
call __vpaes_schedule_mangle |
||||
|
jmp $L009loop_schedule_128 |
||||
|
ALIGN 16 |
||||
|
$L007schedule_192: |
||||
|
movdqu xmm0,XMMWORD PTR 8[esi] |
||||
|
call __vpaes_schedule_transform |
||||
|
movdqa xmm6,xmm0 |
||||
|
pxor xmm4,xmm4 |
||||
|
movhlps xmm6,xmm4 |
||||
|
mov eax,4 |
||||
|
$L011loop_schedule_192: |
||||
|
call __vpaes_schedule_round |
||||
|
DB 102,15,58,15,198,8 |
||||
|
call __vpaes_schedule_mangle |
||||
|
call __vpaes_schedule_192_smear |
||||
|
call __vpaes_schedule_mangle |
||||
|
call __vpaes_schedule_round |
||||
|
dec eax |
||||
|
jz $L010schedule_mangle_last |
||||
|
call __vpaes_schedule_mangle |
||||
|
call __vpaes_schedule_192_smear |
||||
|
jmp $L011loop_schedule_192 |
||||
|
ALIGN 16 |
||||
|
$L006schedule_256: |
||||
|
movdqu xmm0,XMMWORD PTR 16[esi] |
||||
|
call __vpaes_schedule_transform |
||||
|
mov eax,7 |
||||
|
$L012loop_schedule_256: |
||||
|
call __vpaes_schedule_mangle |
||||
|
movdqa xmm6,xmm0 |
||||
|
call __vpaes_schedule_round |
||||
|
dec eax |
||||
|
jz $L010schedule_mangle_last |
||||
|
call __vpaes_schedule_mangle |
||||
|
pshufd xmm0,xmm0,255 |
||||
|
movdqa XMMWORD PTR 20[esp],xmm7 |
||||
|
movdqa xmm7,xmm6 |
||||
|
call $L_vpaes_schedule_low_round |
||||
|
movdqa xmm7,XMMWORD PTR 20[esp] |
||||
|
jmp $L012loop_schedule_256 |
||||
|
ALIGN 16 |
||||
|
$L010schedule_mangle_last: |
||||
|
lea ebx,DWORD PTR 384[ebp] |
||||
|
test edi,edi |
||||
|
jnz $L013schedule_mangle_last_dec |
||||
|
movdqa xmm1,XMMWORD PTR 256[ecx*1+ebp] |
||||
|
DB 102,15,56,0,193 |
||||
|
lea ebx,DWORD PTR 352[ebp] |
||||
|
add edx,32 |
||||
|
$L013schedule_mangle_last_dec: |
||||
|
add edx,-16 |
||||
|
pxor xmm0,XMMWORD PTR 336[ebp] |
||||
|
call __vpaes_schedule_transform |
||||
|
movdqu XMMWORD PTR [edx],xmm0 |
||||
|
pxor xmm0,xmm0 |
||||
|
pxor xmm1,xmm1 |
||||
|
pxor xmm2,xmm2 |
||||
|
pxor xmm3,xmm3 |
||||
|
pxor xmm4,xmm4 |
||||
|
pxor xmm5,xmm5 |
||||
|
pxor xmm6,xmm6 |
||||
|
pxor xmm7,xmm7 |
||||
|
ret |
||||
|
__vpaes_schedule_core ENDP |
||||
|
ALIGN 16 |
||||
|
__vpaes_schedule_192_smear PROC PRIVATE |
||||
|
pshufd xmm0,xmm6,128 |
||||
|
pxor xmm6,xmm0 |
||||
|
pshufd xmm0,xmm7,254 |
||||
|
pxor xmm6,xmm0 |
||||
|
movdqa xmm0,xmm6 |
||||
|
pxor xmm1,xmm1 |
||||
|
movhlps xmm6,xmm1 |
||||
|
ret |
||||
|
__vpaes_schedule_192_smear ENDP |
||||
|
ALIGN 16 |
||||
|
__vpaes_schedule_round PROC PRIVATE |
||||
|
movdqa xmm2,XMMWORD PTR 8[esp] |
||||
|
pxor xmm1,xmm1 |
||||
|
DB 102,15,58,15,202,15 |
||||
|
DB 102,15,58,15,210,15 |
||||
|
pxor xmm7,xmm1 |
||||
|
pshufd xmm0,xmm0,255 |
||||
|
DB 102,15,58,15,192,1 |
||||
|
movdqa XMMWORD PTR 8[esp],xmm2 |
||||
|
$L_vpaes_schedule_low_round:: |
||||
|
movdqa xmm1,xmm7 |
||||
|
pslldq xmm7,4 |
||||
|
pxor xmm7,xmm1 |
||||
|
movdqa xmm1,xmm7 |
||||
|
pslldq xmm7,8 |
||||
|
pxor xmm7,xmm1 |
||||
|
pxor xmm7,XMMWORD PTR 336[ebp] |
||||
|
movdqa xmm4,XMMWORD PTR [ebp-16] |
||||
|
movdqa xmm5,XMMWORD PTR [ebp-48] |
||||
|
movdqa xmm1,xmm4 |
||||
|
pandn xmm1,xmm0 |
||||
|
psrld xmm1,4 |
||||
|
pand xmm0,xmm4 |
||||
|
movdqa xmm2,XMMWORD PTR [ebp-32] |
||||
|
DB 102,15,56,0,208 |
||||
|
pxor xmm0,xmm1 |
||||
|
movdqa xmm3,xmm5 |
||||
|
DB 102,15,56,0,217 |
||||
|
pxor xmm3,xmm2 |
||||
|
movdqa xmm4,xmm5 |
||||
|
DB 102,15,56,0,224 |
||||
|
pxor xmm4,xmm2 |
||||
|
movdqa xmm2,xmm5 |
||||
|
DB 102,15,56,0,211 |
||||
|
pxor xmm2,xmm0 |
||||
|
movdqa xmm3,xmm5 |
||||
|
DB 102,15,56,0,220 |
||||
|
pxor xmm3,xmm1 |
||||
|
movdqa xmm4,XMMWORD PTR 32[ebp] |
||||
|
DB 102,15,56,0,226 |
||||
|
movdqa xmm0,XMMWORD PTR 48[ebp] |
||||
|
DB 102,15,56,0,195 |
||||
|
pxor xmm0,xmm4 |
||||
|
pxor xmm0,xmm7 |
||||
|
movdqa xmm7,xmm0 |
||||
|
ret |
||||
|
__vpaes_schedule_round ENDP |
||||
|
ALIGN 16 |
||||
|
__vpaes_schedule_transform PROC PRIVATE |
||||
|
movdqa xmm2,XMMWORD PTR [ebp-16] |
||||
|
movdqa xmm1,xmm2 |
||||
|
pandn xmm1,xmm0 |
||||
|
psrld xmm1,4 |
||||
|
pand xmm0,xmm2 |
||||
|
movdqa xmm2,XMMWORD PTR [ebx] |
||||
|
DB 102,15,56,0,208 |
||||
|
movdqa xmm0,XMMWORD PTR 16[ebx] |
||||
|
DB 102,15,56,0,193 |
||||
|
pxor xmm0,xmm2 |
||||
|
ret |
||||
|
__vpaes_schedule_transform ENDP |
||||
|
ALIGN 16 |
||||
|
__vpaes_schedule_mangle PROC PRIVATE |
||||
|
movdqa xmm4,xmm0 |
||||
|
movdqa xmm5,XMMWORD PTR 128[ebp] |
||||
|
test edi,edi |
||||
|
jnz $L014schedule_mangle_dec |
||||
|
add edx,16 |
||||
|
pxor xmm4,XMMWORD PTR 336[ebp] |
||||
|
DB 102,15,56,0,229 |
||||
|
movdqa xmm3,xmm4 |
||||
|
DB 102,15,56,0,229 |
||||
|
pxor xmm3,xmm4 |
||||
|
DB 102,15,56,0,229 |
||||
|
pxor xmm3,xmm4 |
||||
|
jmp $L015schedule_mangle_both |
||||
|
ALIGN 16 |
||||
|
$L014schedule_mangle_dec: |
||||
|
movdqa xmm2,XMMWORD PTR [ebp-16] |
||||
|
lea esi,DWORD PTR 416[ebp] |
||||
|
movdqa xmm1,xmm2 |
||||
|
pandn xmm1,xmm4 |
||||
|
psrld xmm1,4 |
||||
|
pand xmm4,xmm2 |
||||
|
movdqa xmm2,XMMWORD PTR [esi] |
||||
|
DB 102,15,56,0,212 |
||||
|
movdqa xmm3,XMMWORD PTR 16[esi] |
||||
|
DB 102,15,56,0,217 |
||||
|
pxor xmm3,xmm2 |
||||
|
DB 102,15,56,0,221 |
||||
|
movdqa xmm2,XMMWORD PTR 32[esi] |
||||
|
DB 102,15,56,0,212 |
||||
|
pxor xmm2,xmm3 |
||||
|
movdqa xmm3,XMMWORD PTR 48[esi] |
||||
|
DB 102,15,56,0,217 |
||||
|
pxor xmm3,xmm2 |
||||
|
DB 102,15,56,0,221 |
||||
|
movdqa xmm2,XMMWORD PTR 64[esi] |
||||
|
DB 102,15,56,0,212 |
||||
|
pxor xmm2,xmm3 |
||||
|
movdqa xmm3,XMMWORD PTR 80[esi] |
||||
|
DB 102,15,56,0,217 |
||||
|
pxor xmm3,xmm2 |
||||
|
DB 102,15,56,0,221 |
||||
|
movdqa xmm2,XMMWORD PTR 96[esi] |
||||
|
DB 102,15,56,0,212 |
||||
|
pxor xmm2,xmm3 |
||||
|
movdqa xmm3,XMMWORD PTR 112[esi] |
||||
|
DB 102,15,56,0,217 |
||||
|
pxor xmm3,xmm2 |
||||
|
add edx,-16 |
||||
|
$L015schedule_mangle_both: |
||||
|
movdqa xmm1,XMMWORD PTR 256[ecx*1+ebp] |
||||
|
DB 102,15,56,0,217 |
||||
|
add ecx,-16 |
||||
|
and ecx,48 |
||||
|
movdqu XMMWORD PTR [edx],xmm3 |
||||
|
ret |
||||
|
__vpaes_schedule_mangle ENDP |
||||
|
ALIGN 16 |
||||
|
_vpaes_set_encrypt_key PROC PUBLIC |
||||
|
$L_vpaes_set_encrypt_key_begin:: |
||||
|
push ebp |
||||
|
push ebx |
||||
|
push esi |
||||
|
push edi |
||||
|
mov esi,DWORD PTR 20[esp] |
||||
|
lea ebx,DWORD PTR [esp-56] |
||||
|
mov eax,DWORD PTR 24[esp] |
||||
|
and ebx,-16 |
||||
|
mov edx,DWORD PTR 28[esp] |
||||
|
xchg ebx,esp |
||||
|
mov DWORD PTR 48[esp],ebx |
||||
|
mov ebx,eax |
||||
|
shr ebx,5 |
||||
|
add ebx,5 |
||||
|
mov DWORD PTR 240[edx],ebx |
||||
|
mov ecx,48 |
||||
|
mov edi,0 |
||||
|
mov ebp,OFFSET ($L_vpaes_consts+030h-$L016pic_point) |
||||
|
call __vpaes_schedule_core |
||||
|
$L016pic_point: |
||||
|
mov esp,DWORD PTR 48[esp] |
||||
|
xor eax,eax |
||||
|
pop edi |
||||
|
pop esi |
||||
|
pop ebx |
||||
|
pop ebp |
||||
|
ret |
||||
|
_vpaes_set_encrypt_key ENDP |
||||
|
ALIGN 16 |
||||
|
_vpaes_set_decrypt_key PROC PUBLIC |
||||
|
$L_vpaes_set_decrypt_key_begin:: |
||||
|
push ebp |
||||
|
push ebx |
||||
|
push esi |
||||
|
push edi |
||||
|
mov esi,DWORD PTR 20[esp] |
||||
|
lea ebx,DWORD PTR [esp-56] |
||||
|
mov eax,DWORD PTR 24[esp] |
||||
|
and ebx,-16 |
||||
|
mov edx,DWORD PTR 28[esp] |
||||
|
xchg ebx,esp |
||||
|
mov DWORD PTR 48[esp],ebx |
||||
|
mov ebx,eax |
||||
|
shr ebx,5 |
||||
|
add ebx,5 |
||||
|
mov DWORD PTR 240[edx],ebx |
||||
|
shl ebx,4 |
||||
|
lea edx,DWORD PTR 16[ebx*1+edx] |
||||
|
mov edi,1 |
||||
|
mov ecx,eax |
||||
|
shr ecx,1 |
||||
|
and ecx,32 |
||||
|
xor ecx,32 |
||||
|
mov ebp,OFFSET ($L_vpaes_consts+030h-$L017pic_point) |
||||
|
call __vpaes_schedule_core |
||||
|
$L017pic_point: |
||||
|
mov esp,DWORD PTR 48[esp] |
||||
|
xor eax,eax |
||||
|
pop edi |
||||
|
pop esi |
||||
|
pop ebx |
||||
|
pop ebp |
||||
|
ret |
||||
|
_vpaes_set_decrypt_key ENDP |
||||
|
ALIGN 16 |
||||
|
_vpaes_encrypt PROC PUBLIC |
||||
|
$L_vpaes_encrypt_begin:: |
||||
|
push ebp |
||||
|
push ebx |
||||
|
push esi |
||||
|
push edi |
||||
|
mov ebp,OFFSET ($L_vpaes_consts+030h-$L018pic_point) |
||||
|
call __vpaes_preheat |
||||
|
$L018pic_point: |
||||
|
mov esi,DWORD PTR 20[esp] |
||||
|
lea ebx,DWORD PTR [esp-56] |
||||
|
mov edi,DWORD PTR 24[esp] |
||||
|
and ebx,-16 |
||||
|
mov edx,DWORD PTR 28[esp] |
||||
|
xchg ebx,esp |
||||
|
mov DWORD PTR 48[esp],ebx |
||||
|
movdqu xmm0,XMMWORD PTR [esi] |
||||
|
call __vpaes_encrypt_core |
||||
|
movdqu XMMWORD PTR [edi],xmm0 |
||||
|
mov esp,DWORD PTR 48[esp] |
||||
|
pop edi |
||||
|
pop esi |
||||
|
pop ebx |
||||
|
pop ebp |
||||
|
ret |
||||
|
_vpaes_encrypt ENDP |
||||
|
ALIGN 16 |
||||
|
_vpaes_decrypt PROC PUBLIC |
||||
|
$L_vpaes_decrypt_begin:: |
||||
|
push ebp |
||||
|
push ebx |
||||
|
push esi |
||||
|
push edi |
||||
|
mov ebp,OFFSET ($L_vpaes_consts+030h-$L019pic_point) |
||||
|
call __vpaes_preheat |
||||
|
$L019pic_point: |
||||
|
mov esi,DWORD PTR 20[esp] |
||||
|
lea ebx,DWORD PTR [esp-56] |
||||
|
mov edi,DWORD PTR 24[esp] |
||||
|
and ebx,-16 |
||||
|
mov edx,DWORD PTR 28[esp] |
||||
|
xchg ebx,esp |
||||
|
mov DWORD PTR 48[esp],ebx |
||||
|
movdqu xmm0,XMMWORD PTR [esi] |
||||
|
call __vpaes_decrypt_core |
||||
|
movdqu XMMWORD PTR [edi],xmm0 |
||||
|
mov esp,DWORD PTR 48[esp] |
||||
|
pop edi |
||||
|
pop esi |
||||
|
pop ebx |
||||
|
pop ebp |
||||
|
ret |
||||
|
_vpaes_decrypt ENDP |
||||
|
ALIGN 16 |
||||
|
_vpaes_cbc_encrypt PROC PUBLIC |
||||
|
$L_vpaes_cbc_encrypt_begin:: |
||||
|
push ebp |
||||
|
push ebx |
||||
|
push esi |
||||
|
push edi |
||||
|
mov esi,DWORD PTR 20[esp] |
||||
|
mov edi,DWORD PTR 24[esp] |
||||
|
mov eax,DWORD PTR 28[esp] |
||||
|
mov edx,DWORD PTR 32[esp] |
||||
|
sub eax,16 |
||||
|
jc $L020cbc_abort |
||||
|
lea ebx,DWORD PTR [esp-56] |
||||
|
mov ebp,DWORD PTR 36[esp] |
||||
|
and ebx,-16 |
||||
|
mov ecx,DWORD PTR 40[esp] |
||||
|
xchg ebx,esp |
||||
|
movdqu xmm1,XMMWORD PTR [ebp] |
||||
|
sub edi,esi |
||||
|
mov DWORD PTR 48[esp],ebx |
||||
|
mov DWORD PTR [esp],edi |
||||
|
mov DWORD PTR 4[esp],edx |
||||
|
mov DWORD PTR 8[esp],ebp |
||||
|
mov edi,eax |
||||
|
mov ebp,OFFSET ($L_vpaes_consts+030h-$L021pic_point) |
||||
|
call __vpaes_preheat |
||||
|
$L021pic_point: |
||||
|
cmp ecx,0 |
||||
|
je $L022cbc_dec_loop |
||||
|
jmp $L023cbc_enc_loop |
||||
|
ALIGN 16 |
||||
|
$L023cbc_enc_loop: |
||||
|
movdqu xmm0,XMMWORD PTR [esi] |
||||
|
pxor xmm0,xmm1 |
||||
|
call __vpaes_encrypt_core |
||||
|
mov ebx,DWORD PTR [esp] |
||||
|
mov edx,DWORD PTR 4[esp] |
||||
|
movdqa xmm1,xmm0 |
||||
|
movdqu XMMWORD PTR [esi*1+ebx],xmm0 |
||||
|
lea esi,DWORD PTR 16[esi] |
||||
|
sub edi,16 |
||||
|
jnc $L023cbc_enc_loop |
||||
|
jmp $L024cbc_done |
||||
|
ALIGN 16 |
||||
|
$L022cbc_dec_loop: |
||||
|
movdqu xmm0,XMMWORD PTR [esi] |
||||
|
movdqa XMMWORD PTR 16[esp],xmm1 |
||||
|
movdqa XMMWORD PTR 32[esp],xmm0 |
||||
|
call __vpaes_decrypt_core |
||||
|
mov ebx,DWORD PTR [esp] |
||||
|
mov edx,DWORD PTR 4[esp] |
||||
|
pxor xmm0,XMMWORD PTR 16[esp] |
||||
|
movdqa xmm1,XMMWORD PTR 32[esp] |
||||
|
movdqu XMMWORD PTR [esi*1+ebx],xmm0 |
||||
|
lea esi,DWORD PTR 16[esi] |
||||
|
sub edi,16 |
||||
|
jnc $L022cbc_dec_loop |
||||
|
$L024cbc_done: |
||||
|
mov ebx,DWORD PTR 8[esp] |
||||
|
mov esp,DWORD PTR 48[esp] |
||||
|
movdqu XMMWORD PTR [ebx],xmm1 |
||||
|
$L020cbc_abort: |
||||
|
pop edi |
||||
|
pop esi |
||||
|
pop ebx |
||||
|
pop ebp |
||||
|
ret |
||||
|
_vpaes_cbc_encrypt ENDP |
||||
|
.text$ ENDS |
||||
|
END |
@ -0,0 +1,738 @@ |
|||||
|
TITLE ghash-x86.asm |
||||
|
IF @Version LT 800 |
||||
|
ECHO MASM version 8.00 or later is strongly recommended. |
||||
|
ENDIF |
||||
|
.686 |
||||
|
.XMM |
||||
|
IF @Version LT 800 |
||||
|
XMMWORD STRUCT 16 |
||||
|
DQ 2 dup (?) |
||||
|
XMMWORD ENDS |
||||
|
ENDIF |
||||
|
|
||||
|
.MODEL FLAT |
||||
|
OPTION DOTNAME |
||||
|
IF @Version LT 800 |
||||
|
.text$ SEGMENT PAGE 'CODE' |
||||
|
ELSE |
||||
|
.text$ SEGMENT ALIGN(64) 'CODE' |
||||
|
ENDIF |
||||
|
ALIGN 16 |
||||
|
_gcm_gmult_4bit_x86 PROC PUBLIC |
||||
|
$L_gcm_gmult_4bit_x86_begin:: |
||||
|
push ebp |
||||
|
push ebx |
||||
|
push esi |
||||
|
push edi |
||||
|
sub esp,84 |
||||
|
mov edi,DWORD PTR 104[esp] |
||||
|
mov esi,DWORD PTR 108[esp] |
||||
|
mov ebp,DWORD PTR [edi] |
||||
|
mov edx,DWORD PTR 4[edi] |
||||
|
mov ecx,DWORD PTR 8[edi] |
||||
|
mov ebx,DWORD PTR 12[edi] |
||||
|
mov DWORD PTR 16[esp],0 |
||||
|
mov DWORD PTR 20[esp],471859200 |
||||
|
mov DWORD PTR 24[esp],943718400 |
||||
|
mov DWORD PTR 28[esp],610271232 |
||||
|
mov DWORD PTR 32[esp],1887436800 |
||||
|
mov DWORD PTR 36[esp],1822425088 |
||||
|
mov DWORD PTR 40[esp],1220542464 |
||||
|
mov DWORD PTR 44[esp],1423966208 |
||||
|
mov DWORD PTR 48[esp],3774873600 |
||||
|
mov DWORD PTR 52[esp],4246732800 |
||||
|
mov DWORD PTR 56[esp],3644850176 |
||||
|
mov DWORD PTR 60[esp],3311403008 |
||||
|
mov DWORD PTR 64[esp],2441084928 |
||||
|
mov DWORD PTR 68[esp],2376073216 |
||||
|
mov DWORD PTR 72[esp],2847932416 |
||||
|
mov DWORD PTR 76[esp],3051356160 |
||||
|
mov DWORD PTR [esp],ebp |
||||
|
mov DWORD PTR 4[esp],edx |
||||
|
mov DWORD PTR 8[esp],ecx |
||||
|
mov DWORD PTR 12[esp],ebx |
||||
|
shr ebx,20 |
||||
|
and ebx,240 |
||||
|
mov ebp,DWORD PTR 4[ebx*1+esi] |
||||
|
mov edx,DWORD PTR [ebx*1+esi] |
||||
|
mov ecx,DWORD PTR 12[ebx*1+esi] |
||||
|
mov ebx,DWORD PTR 8[ebx*1+esi] |
||||
|
xor eax,eax |
||||
|
mov edi,15 |
||||
|
jmp $L000x86_loop |
||||
|
ALIGN 16 |
||||
|
$L000x86_loop: |
||||
|
mov al,bl |
||||
|
shrd ebx,ecx,4 |
||||
|
and al,15 |
||||
|
shrd ecx,edx,4 |
||||
|
shrd edx,ebp,4 |
||||
|
shr ebp,4 |
||||
|
xor ebp,DWORD PTR 16[eax*4+esp] |
||||
|
mov al,BYTE PTR [edi*1+esp] |
||||
|
and al,240 |
||||
|
xor ebx,DWORD PTR 8[eax*1+esi] |
||||
|
xor ecx,DWORD PTR 12[eax*1+esi] |
||||
|
xor edx,DWORD PTR [eax*1+esi] |
||||
|
xor ebp,DWORD PTR 4[eax*1+esi] |
||||
|
dec edi |
||||
|
js $L001x86_break |
||||
|
mov al,bl |
||||
|
shrd ebx,ecx,4 |
||||
|
and al,15 |
||||
|
shrd ecx,edx,4 |
||||
|
shrd edx,ebp,4 |
||||
|
shr ebp,4 |
||||
|
xor ebp,DWORD PTR 16[eax*4+esp] |
||||
|
mov al,BYTE PTR [edi*1+esp] |
||||
|
shl al,4 |
||||
|
xor ebx,DWORD PTR 8[eax*1+esi] |
||||
|
xor ecx,DWORD PTR 12[eax*1+esi] |
||||
|
xor edx,DWORD PTR [eax*1+esi] |
||||
|
xor ebp,DWORD PTR 4[eax*1+esi] |
||||
|
jmp $L000x86_loop |
||||
|
ALIGN 16 |
||||
|
$L001x86_break: |
||||
|
bswap ebx |
||||
|
bswap ecx |
||||
|
bswap edx |
||||
|
bswap ebp |
||||
|
mov edi,DWORD PTR 104[esp] |
||||
|
mov DWORD PTR 12[edi],ebx |
||||
|
mov DWORD PTR 8[edi],ecx |
||||
|
mov DWORD PTR 4[edi],edx |
||||
|
mov DWORD PTR [edi],ebp |
||||
|
add esp,84 |
||||
|
pop edi |
||||
|
pop esi |
||||
|
pop ebx |
||||
|
pop ebp |
||||
|
ret |
||||
|
_gcm_gmult_4bit_x86 ENDP |
||||
|
ALIGN 16 |
||||
|
_gcm_ghash_4bit_x86 PROC PUBLIC |
||||
|
$L_gcm_ghash_4bit_x86_begin:: |
||||
|
push ebp |
||||
|
push ebx |
||||
|
push esi |
||||
|
push edi |
||||
|
sub esp,84 |
||||
|
mov ebx,DWORD PTR 104[esp] |
||||
|
mov esi,DWORD PTR 108[esp] |
||||
|
mov edi,DWORD PTR 112[esp] |
||||
|
mov ecx,DWORD PTR 116[esp] |
||||
|
add ecx,edi |
||||
|
mov DWORD PTR 116[esp],ecx |
||||
|
mov ebp,DWORD PTR [ebx] |
||||
|
mov edx,DWORD PTR 4[ebx] |
||||
|
mov ecx,DWORD PTR 8[ebx] |
||||
|
mov ebx,DWORD PTR 12[ebx] |
||||
|
mov DWORD PTR 16[esp],0 |
||||
|
mov DWORD PTR 20[esp],471859200 |
||||
|
mov DWORD PTR 24[esp],943718400 |
||||
|
mov DWORD PTR 28[esp],610271232 |
||||
|
mov DWORD PTR 32[esp],1887436800 |
||||
|
mov DWORD PTR 36[esp],1822425088 |
||||
|
mov DWORD PTR 40[esp],1220542464 |
||||
|
mov DWORD PTR 44[esp],1423966208 |
||||
|
mov DWORD PTR 48[esp],3774873600 |
||||
|
mov DWORD PTR 52[esp],4246732800 |
||||
|
mov DWORD PTR 56[esp],3644850176 |
||||
|
mov DWORD PTR 60[esp],3311403008 |
||||
|
mov DWORD PTR 64[esp],2441084928 |
||||
|
mov DWORD PTR 68[esp],2376073216 |
||||
|
mov DWORD PTR 72[esp],2847932416 |
||||
|
mov DWORD PTR 76[esp],3051356160 |
||||
|
ALIGN 16 |
||||
|
$L002x86_outer_loop: |
||||
|
xor ebx,DWORD PTR 12[edi] |
||||
|
xor ecx,DWORD PTR 8[edi] |
||||
|
xor edx,DWORD PTR 4[edi] |
||||
|
xor ebp,DWORD PTR [edi] |
||||
|
mov DWORD PTR 12[esp],ebx |
||||
|
mov DWORD PTR 8[esp],ecx |
||||
|
mov DWORD PTR 4[esp],edx |
||||
|
mov DWORD PTR [esp],ebp |
||||
|
shr ebx,20 |
||||
|
and ebx,240 |
||||
|
mov ebp,DWORD PTR 4[ebx*1+esi] |
||||
|
mov edx,DWORD PTR [ebx*1+esi] |
||||
|
mov ecx,DWORD PTR 12[ebx*1+esi] |
||||
|
mov ebx,DWORD PTR 8[ebx*1+esi] |
||||
|
xor eax,eax |
||||
|
mov edi,15 |
||||
|
jmp $L003x86_loop |
||||
|
ALIGN 16 |
||||
|
$L003x86_loop: |
||||
|
mov al,bl |
||||
|
shrd ebx,ecx,4 |
||||
|
and al,15 |
||||
|
shrd ecx,edx,4 |
||||
|
shrd edx,ebp,4 |
||||
|
shr ebp,4 |
||||
|
xor ebp,DWORD PTR 16[eax*4+esp] |
||||
|
mov al,BYTE PTR [edi*1+esp] |
||||
|
and al,240 |
||||
|
xor ebx,DWORD PTR 8[eax*1+esi] |
||||
|
xor ecx,DWORD PTR 12[eax*1+esi] |
||||
|
xor edx,DWORD PTR [eax*1+esi] |
||||
|
xor ebp,DWORD PTR 4[eax*1+esi] |
||||
|
dec edi |
||||
|
js $L004x86_break |
||||
|
mov al,bl |
||||
|
shrd ebx,ecx,4 |
||||
|
and al,15 |
||||
|
shrd ecx,edx,4 |
||||
|
shrd edx,ebp,4 |
||||
|
shr ebp,4 |
||||
|
xor ebp,DWORD PTR 16[eax*4+esp] |
||||
|
mov al,BYTE PTR [edi*1+esp] |
||||
|
shl al,4 |
||||
|
xor ebx,DWORD PTR 8[eax*1+esi] |
||||
|
xor ecx,DWORD PTR 12[eax*1+esi] |
||||
|
xor edx,DWORD PTR [eax*1+esi] |
||||
|
xor ebp,DWORD PTR 4[eax*1+esi] |
||||
|
jmp $L003x86_loop |
||||
|
ALIGN 16 |
||||
|
$L004x86_break: |
||||
|
bswap ebx |
||||
|
bswap ecx |
||||
|
bswap edx |
||||
|
bswap ebp |
||||
|
mov edi,DWORD PTR 112[esp] |
||||
|
lea edi,DWORD PTR 16[edi] |
||||
|
cmp edi,DWORD PTR 116[esp] |
||||
|
mov DWORD PTR 112[esp],edi |
||||
|
jb $L002x86_outer_loop |
||||
|
mov edi,DWORD PTR 104[esp] |
||||
|
mov DWORD PTR 12[edi],ebx |
||||
|
mov DWORD PTR 8[edi],ecx |
||||
|
mov DWORD PTR 4[edi],edx |
||||
|
mov DWORD PTR [edi],ebp |
||||
|
add esp,84 |
||||
|
pop edi |
||||
|
pop esi |
||||
|
pop ebx |
||||
|
pop ebp |
||||
|
ret |
||||
|
_gcm_ghash_4bit_x86 ENDP |
||||
|
ALIGN 16 |
||||
|
__mmx_gmult_4bit_inner PROC PRIVATE |
||||
|
xor ecx,ecx |
||||
|
mov edx,ebx |
||||
|
mov cl,dl |
||||
|
shl cl,4 |
||||
|
and edx,240 |
||||
|
movq mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
movq mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 14[edi] |
||||
|
psllq mm2,60 |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 13[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 12[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 11[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 10[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 9[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 8[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 7[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 6[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 5[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 4[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 3[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 2[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR 1[edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
mov cl,BYTE PTR [edi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
mov edx,ecx |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
shl cl,4 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[ecx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
and edx,240 |
||||
|
pxor mm1,QWORD PTR [ebp*8+eax] |
||||
|
and ebx,15 |
||||
|
pxor mm1,QWORD PTR [ecx*1+esi] |
||||
|
movd ebp,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
psrlq mm0,4 |
||||
|
movq mm2,mm1 |
||||
|
psrlq mm1,4 |
||||
|
pxor mm0,QWORD PTR 8[edx*1+esi] |
||||
|
psllq mm2,60 |
||||
|
pxor mm1,QWORD PTR [ebx*8+eax] |
||||
|
and ebp,15 |
||||
|
pxor mm1,QWORD PTR [edx*1+esi] |
||||
|
movd ebx,mm0 |
||||
|
pxor mm0,mm2 |
||||
|
mov edi,DWORD PTR 4[ebp*8+eax] |
||||
|
psrlq mm0,32 |
||||
|
movd edx,mm1 |
||||
|
psrlq mm1,32 |
||||
|
movd ecx,mm0 |
||||
|
movd ebp,mm1 |
||||
|
shl edi,4 |
||||
|
bswap ebx |
||||
|
bswap edx |
||||
|
bswap ecx |
||||
|
xor ebp,edi |
||||
|
bswap ebp |
||||
|
ret |
||||
|
__mmx_gmult_4bit_inner ENDP |
||||
|
ALIGN 16 |
||||
|
_gcm_gmult_4bit_mmx PROC PUBLIC |
||||
|
$L_gcm_gmult_4bit_mmx_begin:: |
||||
|
push ebp |
||||
|
push ebx |
||||
|
push esi |
||||
|
push edi |
||||
|
mov edi,DWORD PTR 20[esp] |
||||
|
mov esi,DWORD PTR 24[esp] |
||||
|
call $L005pic_point |
||||
|
$L005pic_point: |
||||
|
pop eax |
||||
|
lea eax,DWORD PTR ($Lrem_4bit-$L005pic_point)[eax] |
||||
|
movzx ebx,BYTE PTR 15[edi] |
||||
|
call __mmx_gmult_4bit_inner |
||||
|
mov edi,DWORD PTR 20[esp] |
||||
|
emms |
||||
|
mov DWORD PTR 12[edi],ebx |
||||
|
mov DWORD PTR 4[edi],edx |
||||
|
mov DWORD PTR 8[edi],ecx |
||||
|
mov DWORD PTR [edi],ebp |
||||
|
pop edi |
||||
|
pop esi |
||||
|
pop ebx |
||||
|
pop ebp |
||||
|
ret |
||||
|
_gcm_gmult_4bit_mmx ENDP |
||||
|
ALIGN 16 |
||||
|
_gcm_ghash_4bit_mmx PROC PUBLIC |
||||
|
$L_gcm_ghash_4bit_mmx_begin:: |
||||
|
push ebp |
||||
|
push ebx |
||||
|
push esi |
||||
|
push edi |
||||
|
mov ebp,DWORD PTR 20[esp] |
||||
|
mov esi,DWORD PTR 24[esp] |
||||
|
mov edi,DWORD PTR 28[esp] |
||||
|
mov ecx,DWORD PTR 32[esp] |
||||
|
call $L006pic_point |
||||
|
$L006pic_point: |
||||
|
pop eax |
||||
|
lea eax,DWORD PTR ($Lrem_4bit-$L006pic_point)[eax] |
||||
|
add ecx,edi |
||||
|
mov DWORD PTR 32[esp],ecx |
||||
|
sub esp,20 |
||||
|
mov ebx,DWORD PTR 12[ebp] |
||||
|
mov edx,DWORD PTR 4[ebp] |
||||
|
mov ecx,DWORD PTR 8[ebp] |
||||
|
mov ebp,DWORD PTR [ebp] |
||||
|
jmp $L007mmx_outer_loop |
||||
|
ALIGN 16 |
||||
|
$L007mmx_outer_loop: |
||||
|
xor ebx,DWORD PTR 12[edi] |
||||
|
xor edx,DWORD PTR 4[edi] |
||||
|
xor ecx,DWORD PTR 8[edi] |
||||
|
xor ebp,DWORD PTR [edi] |
||||
|
mov DWORD PTR 48[esp],edi |
||||
|
mov DWORD PTR 12[esp],ebx |
||||
|
mov DWORD PTR 4[esp],edx |
||||
|
mov DWORD PTR 8[esp],ecx |
||||
|
mov DWORD PTR [esp],ebp |
||||
|
mov edi,esp |
||||
|
shr ebx,24 |
||||
|
call __mmx_gmult_4bit_inner |
||||
|
mov edi,DWORD PTR 48[esp] |
||||
|
lea edi,DWORD PTR 16[edi] |
||||
|
cmp edi,DWORD PTR 52[esp] |
||||
|
jb $L007mmx_outer_loop |
||||
|
mov edi,DWORD PTR 40[esp] |
||||
|
emms |
||||
|
mov DWORD PTR 12[edi],ebx |
||||
|
mov DWORD PTR 4[edi],edx |
||||
|
mov DWORD PTR 8[edi],ecx |
||||
|
mov DWORD PTR [edi],ebp |
||||
|
add esp,20 |
||||
|
pop edi |
||||
|
pop esi |
||||
|
pop ebx |
||||
|
pop ebp |
||||
|
ret |
||||
|
_gcm_ghash_4bit_mmx ENDP |
||||
|
ALIGN 64 |
||||
|
$Lrem_4bit:: |
||||
|
DD 0,0,0,29491200,0,58982400,0,38141952 |
||||
|
DD 0,117964800,0,113901568,0,76283904,0,88997888 |
||||
|
DD 0,235929600,0,265420800,0,227803136,0,206962688 |
||||
|
DD 0,152567808,0,148504576,0,177995776,0,190709760 |
||||
|
ALIGN 64 |
||||
|
$L008rem_8bit: |
||||
|
DW 0,450,900,582,1800,1738,1164,1358 |
||||
|
DW 3600,4050,3476,3158,2328,2266,2716,2910 |
||||
|
DW 7200,7650,8100,7782,6952,6890,6316,6510 |
||||
|
DW 4656,5106,4532,4214,5432,5370,5820,6014 |
||||
|
DW 14400,14722,15300,14854,16200,16010,15564,15630 |
||||
|
DW 13904,14226,13780,13334,12632,12442,13020,13086 |
||||
|
DW 9312,9634,10212,9766,9064,8874,8428,8494 |
||||
|
DW 10864,11186,10740,10294,11640,11450,12028,12094 |
||||
|
DW 28800,28994,29444,29382,30600,30282,29708,30158 |
||||
|
DW 32400,32594,32020,31958,31128,30810,31260,31710 |
||||
|
DW 27808,28002,28452,28390,27560,27242,26668,27118 |
||||
|
DW 25264,25458,24884,24822,26040,25722,26172,26622 |
||||
|
DW 18624,18690,19268,19078,20424,19978,19532,19854 |
||||
|
DW 18128,18194,17748,17558,16856,16410,16988,17310 |
||||
|
DW 21728,21794,22372,22182,21480,21034,20588,20910 |
||||
|
DW 23280,23346,22900,22710,24056,23610,24188,24510 |
||||
|
DW 57600,57538,57988,58182,58888,59338,58764,58446 |
||||
|
DW 61200,61138,60564,60758,59416,59866,60316,59998 |
||||
|
DW 64800,64738,65188,65382,64040,64490,63916,63598 |
||||
|
DW 62256,62194,61620,61814,62520,62970,63420,63102 |
||||
|
DW 55616,55426,56004,56070,56904,57226,56780,56334 |
||||
|
DW 55120,54930,54484,54550,53336,53658,54236,53790 |
||||
|
DW 50528,50338,50916,50982,49768,50090,49644,49198 |
||||
|
DW 52080,51890,51444,51510,52344,52666,53244,52798 |
||||
|
DW 37248,36930,37380,37830,38536,38730,38156,38094 |
||||
|
DW 40848,40530,39956,40406,39064,39258,39708,39646 |
||||
|
DW 36256,35938,36388,36838,35496,35690,35116,35054 |
||||
|
DW 33712,33394,32820,33270,33976,34170,34620,34558 |
||||
|
DW 43456,43010,43588,43910,44744,44810,44364,44174 |
||||
|
DW 42960,42514,42068,42390,41176,41242,41820,41630 |
||||
|
DW 46560,46114,46692,47014,45800,45866,45420,45230 |
||||
|
DW 48112,47666,47220,47542,48376,48442,49020,48830 |
||||
|
DB 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 |
||||
|
DB 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 |
||||
|
DB 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 |
||||
|
DB 0 |
||||
|
.text$ ENDS |
||||
|
END |
Loading…
Reference in new issue