ref: 1b51d5683a5d8adcde03bbd277e6331f23c2f723
parent: ad5c6c0dfabd5feb39a85eec7c95a703c91feabe
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sun Mar 31 15:55:02 EDT 2024
pc64: use chain of IDIVQ for delayloop() (former aamloop()) On modern machines, doing a empty loop is too fast, resulting in us using the maximum loopconst of 1000000 and getting wrong delay() timings. To fix the timings, use a chain of IDIVQ instructions instead. The loop timings is going to be measured using the TSC, so the exact timing doesnt need to be known. Rename aamloop() to delayloop() as it does different things depending on 386 or amd64.
--- a/sys/src/9/pc/archgeneric.c
+++ b/sys/src/9/pc/archgeneric.c
@@ -46,7 +46,7 @@
millisecs *= m->loopconst;
if(millisecs <= 0)
millisecs = 1;
- aamloop(millisecs);
+ delayloop(millisecs);
}
void
@@ -56,7 +56,7 @@
microsecs /= 1000;
if(microsecs <= 0)
microsecs = 1;
- aamloop(microsecs);
+ delayloop(microsecs);
}
/*
--- a/sys/src/9/pc/dat.h
+++ b/sys/src/9/pc/dat.h
@@ -208,7 +208,7 @@
int lastintr;
int loopconst;
- int aalcycles;
+ int delaylcycles;
int cpumhz;
uvlong cpuhz;
--- a/sys/src/9/pc/devarch.c
+++ b/sys/src/9/pc/devarch.c
@@ -546,7 +546,16 @@
|| (t->family == -1))
break;
- m->aalcycles = t->aalcycles;
+ /*
+ * This is only meaningfull for old archs on 386 kernel
+ * where we use LOOP+AAM instruction in delayloop()
+ * which has documented cycle times.
+ *
+ * On AMD64, we use a chain of IDIVQ instructions but
+ * hopefully, we have the TSC instruction available
+ * to actually measure the delay.
+ */
+ m->delaylcycles = t->aalcycles;
m->cpuidtype = t->name;
/*
--- a/sys/src/9/pc/fns.h
+++ b/sys/src/9/pc/fns.h
@@ -1,6 +1,5 @@
#include "../port/portfns.h"
-void aamloop(int);
Dirtab* addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong));
void archinit(void);
void archreset(void);
@@ -25,6 +24,7 @@
void cpuidprint(void);
void (*cycles)(uvlong*);
void delay(int);
+void delayloop(int);
void* dmabva(int);
#define dmaflush(clean, addr, len)
int dmacount(int);
--- a/sys/src/9/pc/hpet.c
+++ b/sys/src/9/pc/hpet.c
@@ -63,7 +63,7 @@
for(loops = 1000;;loops += 1000){
cycles(&a);
x = hpet.mmio[Ctrlo];
- aamloop(loops);
+ delayloop(loops);
cycles(&b);
y = hpet.mmio[Ctrlo] - x;
if(y >= hpet.freq/HZ || loops >= 1000000)
@@ -74,10 +74,10 @@
if(m->havetsc && b > a){
b -= a;
m->cyclefreq = b * hpet.freq / y;
- m->aalcycles = (b + loops-1) / loops;
+ m->delaylcycles = (b + loops-1) / loops;
return m->cyclefreq;
}
- return (vlong)loops*m->aalcycles * hpet.freq / y;
+ return (vlong)loops*m->delaylcycles * hpet.freq / y;
}
void
@@ -99,7 +99,7 @@
/* measure loopconst for delay() and tsc frequencies */
cpufreq = hpetcpufreq();
- m->loopconst = (cpufreq/1000)/m->aalcycles; /* AAM+LOOP's for 1 ms */
+ m->loopconst = (cpufreq/1000)/m->delaylcycles; /* delayloop()'s for 1 ms */
m->cpuhz = cpufreq;
/* round to the nearest megahz */
--- a/sys/src/9/pc/i8253.c
+++ b/sys/src/9/pc/i8253.c
@@ -123,23 +123,12 @@
ilock(&i8253);
for(loops = 1000;;loops += 1000) {
- /*
- * measure time for the loop
- *
- * MOVL loops,CX
- * aaml1: AAM
- * LOOP aaml1
- *
- * the time for the loop should be independent of external
- * cache and memory system since it fits in the execution
- * prefetch buffer.
- *
- */
+ /* measure time for the delayloop() */
outb(Tmode, Latch2);
cycles(&a);
x = inb(T2cntr);
x |= inb(T2cntr)<<8;
- aamloop(loops);
+ delayloop(loops);
outb(Tmode, Latch2);
cycles(&b);
y = inb(T2cntr);
@@ -161,12 +150,12 @@
if(m->havetsc && b > a){
b -= a;
m->cyclefreq = b * 2*Freq / x;
- m->aalcycles = (b + loops-1) / loops;
+ m->delaylcycles = (b + loops-1) / loops;
return m->cyclefreq;
}
- return (vlong)loops*m->aalcycles * 2*Freq / x;
+ return (vlong)loops*m->delaylcycles * 2*Freq / x;
}
void
@@ -189,7 +178,7 @@
cpufreq = i8253cpufreq();
- m->loopconst = (cpufreq/1000)/m->aalcycles; /* AAM+LOOP's for 1 ms */
+ m->loopconst = (cpufreq/1000)/m->delaylcycles; /* delayloop()'s for 1 ms */
m->cpuhz = cpufreq;
/*
--- a/sys/src/9/pc/l.s
+++ b/sys/src/9/pc/l.s
@@ -566,7 +566,7 @@
/*
* Basic timing loop to determine CPU frequency.
*/
-TEXT aamloop(SB), $0
+TEXT delayloop(SB), $0
MOVL count+0(FP), CX
_aamloop:
AAM
--- a/sys/src/9/pc64/dat.h
+++ b/sys/src/9/pc64/dat.h
@@ -188,7 +188,7 @@
int lastintr;
int loopconst;
- int aalcycles;
+ int delaylcycles;
int cpumhz;
uvlong cpuhz;
--- a/sys/src/9/pc64/fns.h
+++ b/sys/src/9/pc64/fns.h
@@ -1,6 +1,5 @@
#include "../port/portfns.h"
-void aamloop(int);
Dirtab* addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong));
void archinit(void);
void archreset(void);
@@ -21,6 +20,7 @@
void cpuidprint(void);
void (*cycles)(uvlong*);
void delay(int);
+void delayloop(int);
void* dmabva(int);
#define dmaflush(clean, addr, len)
int dmacount(int);
--- a/sys/src/9/pc64/l.s
+++ b/sys/src/9/pc64/l.s
@@ -430,12 +430,23 @@
RET
/*
- * Basic timing loop to determine CPU frequency.
+ * Basic timing loop.
*/
-TEXT aamloop(SB), 1, $-4
+TEXT delayloop(SB), 1, $-4
MOVL RARG, CX
-_aamloop:
- LOOP _aamloop
+ MOVL $1, BX
+ MOVL $0, DX
+ MOVL $-1, AX
+_delayloop:
+ IDIVQ BX
+ IDIVQ BX
+ IDIVQ BX
+ IDIVQ BX
+ IDIVQ BX
+ IDIVQ BX
+ IDIVQ BX
+ IDIVQ BX
+ LOOP _delayloop
RET
TEXT _cycles(SB), 1, $-4 /* time stamp counter */