shithub: riscv

Download patch

ref: 1b51d5683a5d8adcde03bbd277e6331f23c2f723
parent: ad5c6c0dfabd5feb39a85eec7c95a703c91feabe
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sun Mar 31 15:55:02 EDT 2024

pc64: use chain of IDIVQ for delayloop() (former aamloop())

On modern machines, doing a empty loop
is too fast, resulting in us using the
maximum loopconst of 1000000 and getting
wrong delay() timings.

To fix the timings, use a chain of IDIVQ
instructions instead.

The loop timings is going to be measured
using the TSC, so the exact timing doesnt
need to be known.

Rename aamloop() to delayloop() as it does
different things depending on 386 or amd64.

--- a/sys/src/9/pc/archgeneric.c
+++ b/sys/src/9/pc/archgeneric.c
@@ -46,7 +46,7 @@
 	millisecs *= m->loopconst;
 	if(millisecs <= 0)
 		millisecs = 1;
-	aamloop(millisecs);
+	delayloop(millisecs);
 }
 
 void
@@ -56,7 +56,7 @@
 	microsecs /= 1000;
 	if(microsecs <= 0)
 		microsecs = 1;
-	aamloop(microsecs);
+	delayloop(microsecs);
 }
 
 /*  
--- a/sys/src/9/pc/dat.h
+++ b/sys/src/9/pc/dat.h
@@ -208,7 +208,7 @@
 	int	lastintr;
 
 	int	loopconst;
-	int	aalcycles;
+	int	delaylcycles;
 	int	cpumhz;
 	uvlong	cpuhz;
 
--- a/sys/src/9/pc/devarch.c
+++ b/sys/src/9/pc/devarch.c
@@ -546,7 +546,16 @@
 		|| (t->family == -1))
 			break;
 
-	m->aalcycles = t->aalcycles;
+	/*
+	 * This is only meaningfull for old archs on 386 kernel
+	 * where we use LOOP+AAM instruction in delayloop()
+	 * which has documented cycle times.
+	 *
+	 * On AMD64, we use a chain of IDIVQ instructions but
+	 * hopefully, we have the TSC instruction available
+	 * to actually measure the delay.
+	 */
+	m->delaylcycles = t->aalcycles;
 	m->cpuidtype = t->name;
 
 	/*
--- a/sys/src/9/pc/fns.h
+++ b/sys/src/9/pc/fns.h
@@ -1,6 +1,5 @@
 #include "../port/portfns.h"
 
-void	aamloop(int);
 Dirtab*	addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong));
 void	archinit(void);
 void	archreset(void);
@@ -25,6 +24,7 @@
 void	cpuidprint(void);
 void	(*cycles)(uvlong*);
 void	delay(int);
+void	delayloop(int);
 void*	dmabva(int);
 #define	dmaflush(clean, addr, len)
 int	dmacount(int);
--- a/sys/src/9/pc/hpet.c
+++ b/sys/src/9/pc/hpet.c
@@ -63,7 +63,7 @@
 	for(loops = 1000;;loops += 1000){
 		cycles(&a);
 		x = hpet.mmio[Ctrlo];
-		aamloop(loops);
+		delayloop(loops);
 		cycles(&b);
 		y = hpet.mmio[Ctrlo] - x;
 		if(y >= hpet.freq/HZ || loops >= 1000000)
@@ -74,10 +74,10 @@
 	if(m->havetsc && b > a){
 		b -= a;
 		m->cyclefreq = b * hpet.freq / y;
-		m->aalcycles = (b + loops-1) / loops;
+		m->delaylcycles = (b + loops-1) / loops;
 		return m->cyclefreq;
 	}
-	return (vlong)loops*m->aalcycles * hpet.freq / y;
+	return (vlong)loops*m->delaylcycles * hpet.freq / y;
 }
 
 void
@@ -99,7 +99,7 @@
 	/* measure loopconst for delay() and tsc frequencies */
 	cpufreq = hpetcpufreq();
 
-	m->loopconst = (cpufreq/1000)/m->aalcycles;	/* AAM+LOOP's for 1 ms */
+	m->loopconst = (cpufreq/1000)/m->delaylcycles;	/* delayloop()'s for 1 ms */
 	m->cpuhz = cpufreq;
 
 	/* round to the nearest megahz */
--- a/sys/src/9/pc/i8253.c
+++ b/sys/src/9/pc/i8253.c
@@ -123,23 +123,12 @@
 
 	ilock(&i8253);
 	for(loops = 1000;;loops += 1000) {
-		/*
-		 *  measure time for the loop
-		 *
-		 *			MOVL	loops,CX
-		 *	aaml1:	 	AAM
-		 *			LOOP	aaml1
-		 *
-		 *  the time for the loop should be independent of external
-		 *  cache and memory system since it fits in the execution
-		 *  prefetch buffer.
-		 *
-		 */
+		/* measure time for the delayloop() */
 		outb(Tmode, Latch2);
 		cycles(&a);
 		x = inb(T2cntr);
 		x |= inb(T2cntr)<<8;
-		aamloop(loops);
+		delayloop(loops);
 		outb(Tmode, Latch2);
 		cycles(&b);
 		y = inb(T2cntr);
@@ -161,12 +150,12 @@
 	if(m->havetsc && b > a){
 		b -= a;
 		m->cyclefreq = b * 2*Freq / x;
-		m->aalcycles = (b + loops-1) / loops;
+		m->delaylcycles = (b + loops-1) / loops;
 
 		return m->cyclefreq;
 	}
 
-	return (vlong)loops*m->aalcycles * 2*Freq / x;
+	return (vlong)loops*m->delaylcycles * 2*Freq / x;
 }
 
 void
@@ -189,7 +178,7 @@
 
 	cpufreq = i8253cpufreq();
 
-	m->loopconst = (cpufreq/1000)/m->aalcycles;	/* AAM+LOOP's for 1 ms */
+	m->loopconst = (cpufreq/1000)/m->delaylcycles;	/* delayloop()'s for 1 ms */
 	m->cpuhz = cpufreq;
 
 	/*
--- a/sys/src/9/pc/l.s
+++ b/sys/src/9/pc/l.s
@@ -566,7 +566,7 @@
 /*
  * Basic timing loop to determine CPU frequency.
  */
-TEXT aamloop(SB), $0
+TEXT delayloop(SB), $0
 	MOVL	count+0(FP), CX
 _aamloop:
 	AAM
--- a/sys/src/9/pc64/dat.h
+++ b/sys/src/9/pc64/dat.h
@@ -188,7 +188,7 @@
 	int	lastintr;
 
 	int	loopconst;
-	int	aalcycles;
+	int	delaylcycles;
 	int	cpumhz;
 	uvlong	cpuhz;
 
--- a/sys/src/9/pc64/fns.h
+++ b/sys/src/9/pc64/fns.h
@@ -1,6 +1,5 @@
 #include "../port/portfns.h"
 
-void	aamloop(int);
 Dirtab*	addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong));
 void	archinit(void);
 void	archreset(void);
@@ -21,6 +20,7 @@
 void	cpuidprint(void);
 void	(*cycles)(uvlong*);
 void	delay(int);
+void	delayloop(int);
 void*	dmabva(int);
 #define	dmaflush(clean, addr, len)
 int	dmacount(int);
--- a/sys/src/9/pc64/l.s
+++ b/sys/src/9/pc64/l.s
@@ -430,12 +430,23 @@
 	RET
 
 /*
- * Basic timing loop to determine CPU frequency.
+ * Basic timing loop.
  */
-TEXT aamloop(SB), 1, $-4
+TEXT delayloop(SB), 1, $-4
 	MOVL	RARG, CX
-_aamloop:
-	LOOP	_aamloop
+	MOVL	$1, BX
+	MOVL	$0, DX
+	MOVL	$-1, AX
+_delayloop:
+	IDIVQ	BX
+	IDIVQ	BX
+	IDIVQ	BX
+	IDIVQ	BX
+	IDIVQ	BX
+	IDIVQ	BX
+	IDIVQ	BX
+	IDIVQ	BX
+	LOOP	_delayloop
 	RET
 
 TEXT _cycles(SB), 1, $-4			/* time stamp counter */