You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

476 lines
9.0 KiB

TITLE ../openssl/crypto/bn/asm/x86-mont.asm
IF @Version LT 800
ECHO MASM version 8.00 or later is strongly recommended.
ENDIF
.686
.XMM
IF @Version LT 800
XMMWORD STRUCT 16
DQ 2 dup (?)
XMMWORD ENDS
ENDIF
.MODEL FLAT
OPTION DOTNAME
IF @Version LT 800
.text$ SEGMENT PAGE 'CODE'
ELSE
.text$ SEGMENT ALIGN(64) 'CODE'
ENDIF
;EXTERN _OPENSSL_ia32cap_P:NEAR
ALIGN 16
_bn_mul_mont PROC PUBLIC
$L_bn_mul_mont_begin::
push ebp
push ebx
push esi
push edi
xor eax,eax
mov edi,DWORD PTR 40[esp]
cmp edi,4
jl $L000just_leave
lea esi,DWORD PTR 20[esp]
lea edx,DWORD PTR 24[esp]
mov ebp,esp
add edi,2
neg edi
lea esp,DWORD PTR [edi*4+esp-32]
neg edi
mov eax,esp
sub eax,edx
and eax,2047
sub esp,eax
xor edx,esp
and edx,2048
xor edx,2048
sub esp,edx
and esp,-64
mov eax,DWORD PTR [esi]
mov ebx,DWORD PTR 4[esi]
mov ecx,DWORD PTR 8[esi]
mov edx,DWORD PTR 12[esi]
mov esi,DWORD PTR 16[esi]
mov esi,DWORD PTR [esi]
mov DWORD PTR 4[esp],eax
mov DWORD PTR 8[esp],ebx
mov DWORD PTR 12[esp],ecx
mov DWORD PTR 16[esp],edx
mov DWORD PTR 20[esp],esi
lea ebx,DWORD PTR [edi-3]
mov DWORD PTR 24[esp],ebp
lea eax,DWORD PTR _OPENSSL_ia32cap_P
bt DWORD PTR [eax],26
jnc $L001non_sse2
mov eax,-1
movd mm7,eax
mov esi,DWORD PTR 8[esp]
mov edi,DWORD PTR 12[esp]
mov ebp,DWORD PTR 16[esp]
xor edx,edx
xor ecx,ecx
movd mm4,DWORD PTR [edi]
movd mm5,DWORD PTR [esi]
movd mm3,DWORD PTR [ebp]
pmuludq mm5,mm4
movq mm2,mm5
movq mm0,mm5
pand mm0,mm7
pmuludq mm5,QWORD PTR 20[esp]
pmuludq mm3,mm5
paddq mm3,mm0
movd mm1,DWORD PTR 4[ebp]
movd mm0,DWORD PTR 4[esi]
psrlq mm2,32
psrlq mm3,32
inc ecx
ALIGN 16
$L0021st:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
paddq mm3,mm1
movq mm0,mm2
pand mm0,mm7
movd mm1,DWORD PTR 4[ecx*4+ebp]
paddq mm3,mm0
movd mm0,DWORD PTR 4[ecx*4+esi]
psrlq mm2,32
movd DWORD PTR 28[ecx*4+esp],mm3
psrlq mm3,32
lea ecx,DWORD PTR 1[ecx]
cmp ecx,ebx
jl $L0021st
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
paddq mm3,mm1
movq mm0,mm2
pand mm0,mm7
paddq mm3,mm0
movd DWORD PTR 28[ecx*4+esp],mm3
psrlq mm2,32
psrlq mm3,32
paddq mm3,mm2
movq QWORD PTR 32[ebx*4+esp],mm3
inc edx
$L003outer:
xor ecx,ecx
movd mm4,DWORD PTR [edx*4+edi]
movd mm5,DWORD PTR [esi]
movd mm6,DWORD PTR 32[esp]
movd mm3,DWORD PTR [ebp]
pmuludq mm5,mm4
paddq mm5,mm6
movq mm0,mm5
movq mm2,mm5
pand mm0,mm7
pmuludq mm5,QWORD PTR 20[esp]
pmuludq mm3,mm5
paddq mm3,mm0
movd mm6,DWORD PTR 36[esp]
movd mm1,DWORD PTR 4[ebp]
movd mm0,DWORD PTR 4[esi]
psrlq mm2,32
psrlq mm3,32
paddq mm2,mm6
inc ecx
dec ebx
$L004inner:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
paddq mm3,mm1
movq mm0,mm2
movd mm6,DWORD PTR 36[ecx*4+esp]
pand mm0,mm7
movd mm1,DWORD PTR 4[ecx*4+ebp]
paddq mm3,mm0
movd mm0,DWORD PTR 4[ecx*4+esi]
psrlq mm2,32
movd DWORD PTR 28[ecx*4+esp],mm3
psrlq mm3,32
paddq mm2,mm6
dec ebx
lea ecx,DWORD PTR 1[ecx]
jnz $L004inner
mov ebx,ecx
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
paddq mm3,mm1
movq mm0,mm2
pand mm0,mm7
paddq mm3,mm0
movd DWORD PTR 28[ecx*4+esp],mm3
psrlq mm2,32
psrlq mm3,32
movd mm6,DWORD PTR 36[ebx*4+esp]
paddq mm3,mm2
paddq mm3,mm6
movq QWORD PTR 32[ebx*4+esp],mm3
lea edx,DWORD PTR 1[edx]
cmp edx,ebx
jle $L003outer
emms
jmp $L005common_tail
ALIGN 16
$L001non_sse2:
mov esi,DWORD PTR 8[esp]
lea ebp,DWORD PTR 1[ebx]
mov edi,DWORD PTR 12[esp]
xor ecx,ecx
mov edx,esi
and ebp,1
sub edx,edi
lea eax,DWORD PTR 4[ebx*4+edi]
or ebp,edx
mov edi,DWORD PTR [edi]
jz $L006bn_sqr_mont
mov DWORD PTR 28[esp],eax
mov eax,DWORD PTR [esi]
xor edx,edx
ALIGN 16
$L007mull:
mov ebp,edx
mul edi
add ebp,eax
lea ecx,DWORD PTR 1[ecx]
adc edx,0
mov eax,DWORD PTR [ecx*4+esi]
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L007mull
mov ebp,edx
mul edi
mov edi,DWORD PTR 20[esp]
add eax,ebp
mov esi,DWORD PTR 16[esp]
adc edx,0
imul edi,DWORD PTR 32[esp]
mov DWORD PTR 32[ebx*4+esp],eax
xor ecx,ecx
mov DWORD PTR 36[ebx*4+esp],edx
mov DWORD PTR 40[ebx*4+esp],ecx
mov eax,DWORD PTR [esi]
mul edi
add eax,DWORD PTR 32[esp]
mov eax,DWORD PTR 4[esi]
adc edx,0
inc ecx
jmp $L0082ndmadd
ALIGN 16
$L0091stmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
lea ecx,DWORD PTR 1[ecx]
adc edx,0
add ebp,eax
mov eax,DWORD PTR [ecx*4+esi]
adc edx,0
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L0091stmadd
mov ebp,edx
mul edi
add eax,DWORD PTR 32[ebx*4+esp]
mov edi,DWORD PTR 20[esp]
adc edx,0
mov esi,DWORD PTR 16[esp]
add ebp,eax
adc edx,0
imul edi,DWORD PTR 32[esp]
xor ecx,ecx
add edx,DWORD PTR 36[ebx*4+esp]
mov DWORD PTR 32[ebx*4+esp],ebp
adc ecx,0
mov eax,DWORD PTR [esi]
mov DWORD PTR 36[ebx*4+esp],edx
mov DWORD PTR 40[ebx*4+esp],ecx
mul edi
add eax,DWORD PTR 32[esp]
mov eax,DWORD PTR 4[esi]
adc edx,0
mov ecx,1
ALIGN 16
$L0082ndmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
lea ecx,DWORD PTR 1[ecx]
adc edx,0
add ebp,eax
mov eax,DWORD PTR [ecx*4+esi]
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0082ndmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
adc edx,0
add ebp,eax
adc edx,0
mov DWORD PTR 28[ebx*4+esp],ebp
xor eax,eax
mov ecx,DWORD PTR 12[esp]
add edx,DWORD PTR 36[ebx*4+esp]
adc eax,DWORD PTR 40[ebx*4+esp]
lea ecx,DWORD PTR 4[ecx]
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,DWORD PTR 28[esp]
mov DWORD PTR 36[ebx*4+esp],eax
je $L005common_tail
mov edi,DWORD PTR [ecx]
mov esi,DWORD PTR 8[esp]
mov DWORD PTR 12[esp],ecx
xor ecx,ecx
xor edx,edx
mov eax,DWORD PTR [esi]
jmp $L0091stmadd
ALIGN 16
$L006bn_sqr_mont:
mov DWORD PTR [esp],ebx
mov DWORD PTR 12[esp],ecx
mov eax,edi
mul edi
mov DWORD PTR 32[esp],eax
mov ebx,edx
shr edx,1
and ebx,1
inc ecx
ALIGN 16
$L010sqr:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
add eax,ebp
lea ecx,DWORD PTR 1[ecx]
adc edx,0
lea ebp,DWORD PTR [eax*2+ebx]
shr eax,31
cmp ecx,DWORD PTR [esp]
mov ebx,eax
mov DWORD PTR 28[ecx*4+esp],ebp
jl $L010sqr
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
add eax,ebp
mov edi,DWORD PTR 20[esp]
adc edx,0
mov esi,DWORD PTR 16[esp]
lea ebp,DWORD PTR [eax*2+ebx]
imul edi,DWORD PTR 32[esp]
shr eax,31
mov DWORD PTR 32[ecx*4+esp],ebp
lea ebp,DWORD PTR [edx*2+eax]
mov eax,DWORD PTR [esi]
shr edx,31
mov DWORD PTR 36[ecx*4+esp],ebp
mov DWORD PTR 40[ecx*4+esp],edx
mul edi
add eax,DWORD PTR 32[esp]
mov ebx,ecx
adc edx,0
mov eax,DWORD PTR 4[esi]
mov ecx,1
ALIGN 16
$L0113rdmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
adc edx,0
add ebp,eax
mov eax,DWORD PTR 4[ecx*4+esi]
adc edx,0
mov DWORD PTR 28[ecx*4+esp],ebp
mov ebp,edx
mul edi
add ebp,DWORD PTR 36[ecx*4+esp]
lea ecx,DWORD PTR 2[ecx]
adc edx,0
add ebp,eax
mov eax,DWORD PTR [ecx*4+esi]
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
jl $L0113rdmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
adc edx,0
add ebp,eax
adc edx,0
mov DWORD PTR 28[ebx*4+esp],ebp
mov ecx,DWORD PTR 12[esp]
xor eax,eax
mov esi,DWORD PTR 8[esp]
add edx,DWORD PTR 36[ebx*4+esp]
adc eax,DWORD PTR 40[ebx*4+esp]
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,ebx
mov DWORD PTR 36[ebx*4+esp],eax
je $L005common_tail
mov edi,DWORD PTR 4[ecx*4+esi]
lea ecx,DWORD PTR 1[ecx]
mov eax,edi
mov DWORD PTR 12[esp],ecx
mul edi
add eax,DWORD PTR 32[ecx*4+esp]
adc edx,0
mov DWORD PTR 32[ecx*4+esp],eax
xor ebp,ebp
cmp ecx,ebx
lea ecx,DWORD PTR 1[ecx]
je $L012sqrlast
mov ebx,edx
shr edx,1
and ebx,1
ALIGN 16
$L013sqradd:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
add eax,ebp
lea ebp,DWORD PTR [eax*1+eax]
adc edx,0
shr eax,31
add ebp,DWORD PTR 32[ecx*4+esp]
lea ecx,DWORD PTR 1[ecx]
adc eax,0
add ebp,ebx
adc eax,0
cmp ecx,DWORD PTR [esp]
mov DWORD PTR 28[ecx*4+esp],ebp
mov ebx,eax
jle $L013sqradd
mov ebp,edx
add edx,edx
shr ebp,31
add edx,ebx
adc ebp,0
$L012sqrlast:
mov edi,DWORD PTR 20[esp]
mov esi,DWORD PTR 16[esp]
imul edi,DWORD PTR 32[esp]
add edx,DWORD PTR 32[ecx*4+esp]
mov eax,DWORD PTR [esi]
adc ebp,0
mov DWORD PTR 32[ecx*4+esp],edx
mov DWORD PTR 36[ecx*4+esp],ebp
mul edi
add eax,DWORD PTR 32[esp]
lea ebx,DWORD PTR [ecx-1]
adc edx,0
mov ecx,1
mov eax,DWORD PTR 4[esi]
jmp $L0113rdmadd
ALIGN 16
$L005common_tail:
mov ebp,DWORD PTR 16[esp]
mov edi,DWORD PTR 4[esp]
lea esi,DWORD PTR 32[esp]
mov eax,DWORD PTR [esi]
mov ecx,ebx
xor edx,edx
ALIGN 16
$L014sub:
sbb eax,DWORD PTR [edx*4+ebp]
mov DWORD PTR [edx*4+edi],eax
dec ecx
mov eax,DWORD PTR 4[edx*4+esi]
lea edx,DWORD PTR 1[edx]
jge $L014sub
sbb eax,0
and esi,eax
not eax
mov ebp,edi
and ebp,eax
or esi,ebp
ALIGN 16
$L015copy:
mov eax,DWORD PTR [ebx*4+esi]
mov DWORD PTR [ebx*4+edi],eax
mov DWORD PTR 32[ebx*4+esp],ecx
dec ebx
jge $L015copy
mov esp,DWORD PTR 24[esp]
mov eax,1
$L000just_leave:
pop edi
pop esi
pop ebx
pop ebp
ret
_bn_mul_mont ENDP
DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
DB 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
DB 111,114,103,62,0
.text$ ENDS
.bss SEGMENT 'BSS'
COMM _OPENSSL_ia32cap_P:DWORD:4
.bss ENDS
END