shithub: amd64-simd

Download patch

ref: ed1a42eb5035b0ba8d0dab7c17a6372995b403a2
parent: 0c51b567258d7b826e65976c7a72b081e30c2ccd
author: rodri <rgl@antares-labs.eu>
date: Sun Nov 26 16:36:41 EST 2023

corrected tests and reimplemented some functions using 6a's own instructions.

some tests were reporting wrong results when the functions didn't write to the correct memory addresses, because they were reused from the previous test. now those are reset before every run.

--- a/bench/main.c
+++ b/bench/main.c
@@ -13,6 +13,7 @@
 Point3 crossvec3_sse(Point3, Point3);
 double hsubpd(double, double);
 double fma(double, double, double);
+Point2 addpt2_sse(Point2, Point2);
 Point2 addpt2_avx(Point2, Point2);
 Point3 addpt3_avx(Point3, Point3);
 
@@ -232,13 +233,14 @@
 baddpt2(int fd)
 {
 	Bgr g;
-	B *b0, *b1;
+	B *b0, *b1, *b2;
 	Point2 a, b;
 	int i;
 
 	benchinitgr(&g, "2d point sum");
 	b0 = benchadd(&g, "addpt2");
-	b1 = benchadd(&g, "addpt2_avx");
+	b1 = benchadd(&g, "addpt2_sse");
+	b2 = benchadd(&g, "addpt2_avx");
 
 	while(b0->n > 0 || b1->n > 0){
 		a = Pt2(truerand()*frand(), truerand()*frand(), truerand()*frand());
@@ -251,8 +253,13 @@
 
 		benchin(b1);
 		for(i = 0; i < 1e6; i++)
-			addpt2_avx(a, b);
+			addpt2_sse(a, b);
 		benchout(b1);
+
+		benchin(b2);
+		for(i = 0; i < 1e6; i++)
+			addpt2_avx(a, b);
+		benchout(b2);
 	}
 
 	benchprintgr(&g, fd);