shithub: qk1

Download patch

ref: 970cd854ede3f5bdc553c18e871b2de902dfa7bd
parent: 27db2993ef83fa6d425700b25d0e2b0570dc974c
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Oct 31 08:26:07 EDT 2023

turn DotProduct into a C function; provide arm64 neon implementation

--- a/cmd.c
+++ b/cmd.c
@@ -621,7 +621,7 @@
 	
 // check cvars
 	if (!Cvar_Command ())
-		Con_Printf ("Unknown command \"%s\"\n", Cmd_Argv(0));
+		Con_Printf ("Unknown command \"%s\": %s\n", Cmd_Argv(0), text);
 	
 }
 
--- /dev/null
+++ b/dotproduct.c
@@ -1,0 +1,5 @@
+float
+DotProduct(float v1[3], float v2[3])
+{
+	return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
+}
--- /dev/null
+++ b/dotproduct_arm64.s
@@ -1,0 +1,8 @@
+TEXT DotProduct+0(SB), 1, $-4
+	MOV	v2+8(FP), R1
+	WORD $0x0d40a000 // ld3 {v0.s, v1.s, v2.s}[0], [x0]
+	WORD $0x0d40a023 // ld3 {v3.s, v4.s, v5.s}[0], [x1]
+	WORD $0x1e230800 // fmul s0, s0, s3
+	WORD $0x1f040020 // fmadd s0, s1, s4, s0
+	WORD $0x1f050040 // fmadd s0, s2, s5, s0
+	RETURN
--- a/mathlib.c
+++ b/mathlib.c
@@ -293,12 +293,6 @@
 	vecc[2] = veca[2] + scale*vecb[2];
 }
 
-
-vec_t _DotProduct (vec3_t v1, vec3_t v2)
-{
-	return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
-}
-
 void _VectorSubtract (vec3_t veca, vec3_t vecb, vec3_t out)
 {
 	out[0] = veca[0]-vecb[0];
--- a/mathlib.h
+++ b/mathlib.h
@@ -21,7 +21,7 @@
 
 #define	IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask)
 
-#define DotProduct(x,y) (x[0]*y[0]+x[1]*y[1]+x[2]*y[2])
+#define DotProduct_(x,y) (x[0]*y[0]+x[1]*y[1]+x[2]*y[2])
 #define DotProductDouble(x,y) ((double)x[0]*y[0]+(double)x[1]*y[1]+(double)x[2]*y[2])
 #define VectorSubtract(a,b,c) {c[0]=a[0]-b[0];c[1]=a[1]-b[1];c[2]=a[2]-b[2];}
 #define VectorAdd(a,b,c) {c[0]=a[0]+b[0];c[1]=a[1]+b[1];c[2]=a[2]+b[2];}
@@ -29,7 +29,7 @@
 
 void VectorMA (vec3_t veca, float scale, vec3_t vecb, vec3_t vecc);
 
-vec_t _DotProduct (vec3_t v1, vec3_t v2);
+vec_t DotProduct (vec3_t v1, vec3_t v2);
 void _VectorSubtract (vec3_t veca, vec3_t vecb, vec3_t out);
 void _VectorAdd (vec3_t veca, vec3_t vecb, vec3_t out);
 void _VectorCopy (vec3_t in, vec3_t out);
--- a/mkfile
+++ b/mkfile
@@ -8,6 +8,7 @@
 	pal`{test -f pal_$objtype.s && echo -n _$objtype}.$O\
 	span`{test -f span_$objtype.s && echo -n _$objtype}.$O\
 	span_alpha.$O\
+	dotproduct`{test -f span_$objtype.s && echo -n _$objtype}.$O\
 	cd.$O\
 	cl_demo.$O\
 	cl_input.$O\