ref: 90c65adbd50f21bc2802438d03451c7845b8e43f
parent: e2994142e890cd2ac79cc9ac3b76900ffce420b4
author: mia soweli <inbox@tachibana-labs.org>
date: Mon May 13 19:17:31 EDT 2024
atomic: fix 386 atomics atomics now pass the tests on 386 and i can successfully mount and ream a filesystem on a single core 386. not tested with many cores.
--- a/atomic-386.s
+++ b/atomic-386.s
@@ -2,10 +2,10 @@
BYTE $0x0F; BYTE $0xB1; BYTE $0x11
#define CMPXCHG64 /* (DI) */\
BYTE $0x0F; BYTE $0xC7; BYTE $0x0F
-#define XADDL /* CX, (AX) */ \
- BYTE $0x67; BYTE $0x0F; BYTE $0xC1; BYTE $0x08
-#define MFENCE
- BYTE $0x0F; BYTE $0xAE; BYTE $0xF0
+#define XADDL /* BX, (AX) */ \
+ BYTE $0x0F; BYTE $0xC1; BYTE $0x03
+#define XADDLSP /* AX, (SP) */ \
+ BYTE $0x0F; BYTE $0xC1; BYTE $0x04; BYTE $0x24
/* get variants */
TEXT ageti+0(SB),1,$0
@@ -16,10 +16,10 @@
RET
TEXT agetv+0(SB),1,$0
- MOVL p+0(FP),AX
- MOVL p+4(FP),BX
- FMOVD (BX),F0
- FMOVD F0,(AX)
+ MOVL r+0(FP), AX
+ MOVL p+4(FP), BX
+ FMOVD (BX), F0
+ FMOVDP F0, (AX)
RET
/* set variants */
@@ -26,10 +26,9 @@
TEXT aseti+0(SB),1,$0
TEXT asetl+0(SB),1,$0
TEXT asetp+0(SB),1,$0
- MOVL v+0(FP), AX
- MOVL 0(AX), AX
- MOVL v+4(FP), BX
- LOCK; XCHGL (AX), BX
+ MOVL p+0(FP), BX
+ MOVL v+4(FP), AX
+ LOCK; XCHGL (BX), AX
RET
TEXT asetv+0(SB),1,$0
@@ -50,26 +49,27 @@
TEXT ainci+0(SB),1,$0
TEXT aincl+0(SB),1,$0
TEXT aincp+0(SB),1,$0
- MOVL p+0(FP), AX
- MOVL 0(AX), AX
- MOVL v+4(FP), CX
- LOCK; XADDL // AX, (DX)
+ MOVL p+0(FP), BX
+ MOVL v+4(FP), CX
+ MOVL CX, AX
+ LOCK; XADDL
+ ADDL CX, AX
RET
TEXT aincv+0(SB),1,$0
MOVL p+4(FP), DI
retry:
- MOVL 0x0(DI), AX // ov = *p
- MOVL 0x4(DI), DX
- MOVL AX, BX // nv = ov
+ MOVL 0(DI), AX
+ MOVL 4(DI), DX
+ MOVL AX, BX
MOVL DX, CX
- ADDL dv+0x8(FP), BX // nv += dv
- ADCL dv+0x12(FP), CX
+ ADDL v+8(FP), BX
+ ADCL v+12(FP), CX
LOCK; CMPXCHG64
JNE retry
MOVL r+0(FP), DI
- MOVL BX, (DI)
- MOVL CX, (DI)
+ MOVL BX, 0x0(DI)
+ MOVL CX, 0x4(DI)
RET
/* cas variants */
@@ -79,7 +79,7 @@
MOVL p+0(FP), CX
MOVL ov+4(FP), AX
MOVL nv+8(FP), DX
- LOCK; CMPXCHG // (CX)
+ LOCK; CMPXCHG
JNE fail32
MOVL $1,AX
RET
@@ -89,13 +89,14 @@
TEXT acasv+0(SB),1,$0
MOVL p+0(FP), DI
- MOVL ov+0x4(FP), AX
- MOVL ov+0x8(FP), DX
- MOVL nv+0xc(FP), BX
- MOVL nv+0x10(FP), CX
- LOCK; CMPXCHG64 // (DI)
+ MOVL ov+4(FP), AX
+ MOVL ov+8(FP), DX
+ MOVL nv+12(FP), BX
+ MOVL nv+16(FP), CX
+ LOCK; CMPXCHG64
JNE fail64
MOVL $1,AX
+ RET
fail64:
MOVL $0,AX
RET
@@ -102,5 +103,7 @@
/* barriers (do we want to distinguish types?) */
TEXT coherence+0(SB),1,$0
- MFENCE
+ /* this is essentially mfence but that requires sse2 */
+ XORL AX, AX
+ LOCK; XADDLSP
RET