shithub: riscv

ref: f57df708538093ffb47727a522a0217b9ee81125
dir: /sys/src/libsec/amd64/sha1block.s/

View raw version
/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
 * wp[off] = x;
 * x += A <<< 5;
 * E += 0xca62c1d6 + x;
 * x = FN(B,C,D);
 * E += x;
 * B >>> 2
 */
#define BSWAPDI	BYTE $0x0f; BYTE $0xcf;

#define BODY(off,FN,V,A,B,C,D,E)\
	MOVL (off-64)(BP),DI;\
	XORL (off-56)(BP),DI;\
	XORL (off-32)(BP),DI;\
	XORL (off-12)(BP),DI;\
	ROLL $1,DI;\
	MOVL DI,off(BP);\
	LEAL V(DI)(E*1),E;\
	MOVL A,DI;\
	ROLL $5,DI;\
	ADDL DI,E;\
	FN(B,C,D)\
	ADDL DI,E;\
	RORL $2,B;\

#define BODY0(off,FN,V,A,B,C,D,E)\
	MOVLQZX off(BX),DI;\
	BSWAPDI;\
	MOVL DI,off(BP);\
	LEAL V(DI)(E*1),E;\
	MOVL A,DI;\
	ROLL $5,DI;\
	ADDL DI,E;\
	FN(B,C,D)\
	ADDL DI,E;\
	RORL $2,B;\

/*
 * fn1 = (((C^D)&B)^D);
 */
#define FN1(B,C,D)\
	MOVL C,DI;\
	XORL D,DI;\
	ANDL B,DI;\
	XORL D,DI;\

/*
 * fn24 = B ^ C ^ D
 */
#define FN24(B,C,D)\
	MOVL B,DI;\
	XORL C,DI;\
	XORL D,DI;\

/*
 * fn3 = ((B ^ C) & (D ^= B)) ^ B
 * D ^= B to restore D
 */
#define FN3(B,C,D)\
	MOVL B,DI;\
	XORL C,DI;\
	XORL B,D;\
	ANDL D,DI;\
	XORL B,DI;\
	XORL B,D;\

/*
 * stack offsets
 * void sha1block(uchar *DATA, int LEN, ulong *STATE)
 */
#define	DATA	0
#define	LEN	8
#define	STATE	16

/*
 * stack offsets for locals
 * ulong w[80];
 * uchar *edata;
 * ulong *w15, *w40, *w60, *w80;
 * register local
 * ulong *wp = BP
 * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi
 * ulong tmp = edi
 */
#define	Rpdata	R8
#define WARRAY	(-8-(80*4))
#define TMP1	(-16-(80*4))
#define TMP2	(-24-(80*4))
#define W15	(-32-(80*4))
#define W40	(-40-(80*4))
#define W60	(-48-(80*4))
#define W80	(-56-(80*4))
#define EDATA	(-64-(80*4))

TEXT	_sha1block+0(SB),$384

	MOVQ RARG, Rpdata
	MOVLQZX len+LEN(FP),BX
	ADDQ BX, RARG
	MOVQ RARG,edata+EDATA(SP)

	LEAQ aw15+(WARRAY+15*4)(SP),DI
	MOVQ DI,w15+W15(SP)
	LEAQ aw40+(WARRAY+40*4)(SP),DX
	MOVQ DX,w40+W40(SP)
	LEAQ aw60+(WARRAY+60*4)(SP),CX
	MOVQ CX,w60+W60(SP)
	LEAQ aw80+(WARRAY+80*4)(SP),DI
	MOVQ DI,w80+W80(SP)

mainloop:
	LEAQ warray+WARRAY(SP),BP

	MOVQ state+STATE(FP),DI
	MOVL (DI),AX
	MOVL 4(DI),BX
	MOVL BX,tmp1+TMP1(SP)
	MOVL 8(DI),CX
	MOVL 12(DI),DX
	MOVL 16(DI),SI

	MOVQ Rpdata,BX

loop1:
	BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
	MOVL SI,tmp2+TMP2(SP)
	BODY0(4,FN1,0x5a827999,SI,AX,tmp1+TMP1(SP),CX,DX)
	MOVL tmp1+TMP1(SP),SI
	BODY0(8,FN1,0x5a827999,DX,tmp2+TMP2(SP),AX,SI,CX)
	BODY0(12,FN1,0x5a827999,CX,DX,tmp2+TMP2(SP),AX,SI)
	MOVL SI,tmp1+TMP1(SP)
	BODY0(16,FN1,0x5a827999,SI,CX,DX,tmp2+TMP2(SP),AX)
	MOVL tmp2+TMP2(SP),SI

	ADDQ $20,BX
	ADDQ $20,BP
	CMPQ BP,w15+W15(SP)
	JCS loop1

	BODY0(0,FN1,0x5a827999,AX,tmp1+TMP1(SP),CX,DX,SI)
	ADDQ $4,BX
	MOVQ BX,R8
	MOVQ tmp1+TMP1(SP),BX

	BODY(4,FN1,0x5a827999,SI,AX,BX,CX,DX)
	BODY(8,FN1,0x5a827999,DX,SI,AX,BX,CX)
	BODY(12,FN1,0x5a827999,CX,DX,SI,AX,BX)
	BODY(16,FN1,0x5a827999,BX,CX,DX,SI,AX)

	ADDQ $20,BP

loop2:
	BODY(0,FN24,0x6ed9eba1,AX,BX,CX,DX,SI)
	BODY(4,FN24,0x6ed9eba1,SI,AX,BX,CX,DX)
	BODY(8,FN24,0x6ed9eba1,DX,SI,AX,BX,CX)
	BODY(12,FN24,0x6ed9eba1,CX,DX,SI,AX,BX)
	BODY(16,FN24,0x6ed9eba1,BX,CX,DX,SI,AX)

	ADDQ $20,BP
	CMPQ BP,w40+W40(SP)
	JCS loop2

loop3:
	BODY(0,FN3,0x8f1bbcdc,AX,BX,CX,DX,SI)
	BODY(4,FN3,0x8f1bbcdc,SI,AX,BX,CX,DX)
	BODY(8,FN3,0x8f1bbcdc,DX,SI,AX,BX,CX)
	BODY(12,FN3,0x8f1bbcdc,CX,DX,SI,AX,BX)
	BODY(16,FN3,0x8f1bbcdc,BX,CX,DX,SI,AX)

	ADDQ $20,BP
	CMPQ BP,w60+W60(SP)
	JCS loop3

loop4:
	BODY(0,FN24,0xca62c1d6,AX,BX,CX,DX,SI)
	BODY(4,FN24,0xca62c1d6,SI,AX,BX,CX,DX)
	BODY(8,FN24,0xca62c1d6,DX,SI,AX,BX,CX)
	BODY(12,FN24,0xca62c1d6,CX,DX,SI,AX,BX)
	BODY(16,FN24,0xca62c1d6,BX,CX,DX,SI,AX)

	ADDQ $20,BP
	CMPQ BP,w80+W80(SP)
	JCS loop4

	MOVQ state+STATE(FP),DI
	ADDL AX,0(DI)
	ADDL BX,4(DI)
	ADDL CX,8(DI)
	ADDL DX,12(DI)
	ADDL SI,16(DI)

	MOVQ edata+EDATA(SP),DI
	CMPQ Rpdata,DI
	JCS mainloop

	RET
	END