shithub: kwa

Download patch

ref: 5086f3453d6716d6dcd782a45831c1ebb0f2b115
parent: 8e3c92b4eae6ff958d8af946caf6fb7d7f3bfbf7
author: qwx <qwx@sciops.net>
date: Wed Sep 24 21:41:19 EDT 2025

to_number: try to strtoll before strtod

our awk is already stricter than onetrueawk and considers
anything with trailing characters to be a string.  we'll
be stricter still and attempt to capture all edge cases.
prerequisite for ints.

--- a/lib.c
+++ b/lib.c
@@ -635,47 +635,74 @@
 	return *s == '=' && s > os && *(s+1) != '=';
 }
 
-/* strtod is supposed to be a proper test of what's a valid number */
+static int is_float(char *s, Awkfloat *fp)
+{
+	char c, *p, *q;
+	Awkfloat f;
 
+	f = *fp = strtod(s, &p);
+	if (p == s)
+		return 0;
+	else if (isInf(f, 1) || isInf(f, -1) || isNaN(f))
+		return 0;
+	else if (f == 0.0 && ((q = strchr(s, '0')) == nil || q > p))
+		return 0;
+	for (; (c = *p) != '\0'; p++) {
+		switch(c) {
+		case ' ':
+		case '\t':
+		case '\n':
+		case '\f':
+		case '\r':
+		case '\v':
+			continue;
+		case '\0':
+			return 1;
+		default:
+			return 0;
+		}
+	}
+	return 1;
+}
+
 int to_number(char *s, Awkfloat *fp)
 {
-	double r;
-	char *ep;
+	vlong v;
+	char *p, *q;
 
-	/*
-	 * fast could-it-be-a-number check before calling strtod,
-	 * which takes a surprisingly long time to reject non-numbers.
-	 */
-	switch (*s) {
-	case '0': case '1': case '2': case '3': case '4':
-	case '5': case '6': case '7': case '8': case '9':
-	case '\t':
-	case '\n':
-	case '\v':
-	case '\f':
-	case '\r':
-	case ' ':
-	case '-':
-	case '+':
+	v = strtoll(s, &p, 0);
+	*fp = (Awkfloat)v;
+	switch(*p){
 	case '.':
-	case 'n':		/* nans */
-	case 'N':
-	case 'i':		/* infs */
-	case 'I':
+	case 'E':
+	case 'I':	/* inf */
+	case 'N':	/* nan */
+	case 'e':
+	case 'i':
+	case 'n':
+		if (is_float(s, fp))
+			return NUM;
+		return 0;
+	case '\0':
+		if (p == s)
+			return 0;
+		else if (v != 0 || (q = strchr(s, '0')) != nil && q < p)
+			return NUM;
 		break;
-	default:
-		return 0;	/* can't be a number */
 	}
-
-	r = strtod(s, &ep);
-	if (fp != nil)	/* relied upon by getfval */
-		*fp = r;
-	if (ep == s || isInf(r, 1) || isInf(r, -1) || isNaN(r))
-		return 0;
-	while (*ep == ' ' || *ep == '\t' || *ep == '\n')
-		ep++;
-	if (*ep == '\0')
-		return 1;
-	else
-		return 0;
+	for (;; p++) {
+		switch(*p) {
+		case ' ':
+		case '\t':
+		case '\n':
+		case '\f':
+		case '\r':
+		case '\v':
+			continue;
+		case '\0':
+			return NUM;
+		default:
+			return 0;
+		}
+	}
 }
--- /dev/null
+++ b/test/to_number.c
@@ -1,0 +1,118 @@
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+
+typedef double Awkfloat;
+
+enum{
+	NUM = 1<<0,
+	FLT = 1<<1,
+};
+
+static int is_float(char *s, Awkfloat *fp)
+{
+	char c, *p, *q;
+	Awkfloat f;
+
+	f = *fp = strtod(s, &p);
+	if (p == s)
+		return 0;
+	else if (isInf(f, 1) || isInf(f, -1) || isNaN(f))
+		return 0;
+	else if (f == 0.0 && ((q = strchr(s, '0')) == nil || q > p))
+		return 0;
+	for (; (c = *p) != '\0'; p++) {
+		switch(c) {
+		case ' ':
+		case '\t':
+		case '\n':
+		case '\f':
+		case '\r':
+		case '\v':
+			continue;
+		case '\0':
+			return 1;
+		default:
+			return 0;
+		}
+	}
+	return 1;
+}
+
+int to_number(char *s, Awkfloat *fp)
+{
+	vlong v;
+	char *p, *q;
+
+	v = strtoll(s, &p, 0);
+	*fp = (Awkfloat)v;
+	switch(*p){
+	case '.':
+	case 'E':
+	case 'I':	/* inf */
+	case 'N':	/* nan */
+	case 'e':
+	case 'i':
+	case 'n':
+		if (is_float(s, fp))
+			return NUM | FLT;
+		return 0;
+	case '\0':
+		if (p == s)
+			return 0;
+		else if (v != 0 || (q = strchr(s, '0')) != nil && q < p)
+			return NUM;
+		break;
+	}
+	for (;; p++) {
+		switch(*p) {
+		case ' ':
+		case '\t':
+		case '\n':
+		case '\f':
+		case '\r':
+		case '\v':
+			continue;
+		case '\0':
+			return NUM;
+		default:
+			return 0;
+		}
+	}
+}
+
+void
+main(void)
+{
+	int r;
+	Awkfloat f;
+
+	r = to_number("3", &f); assert(r == NUM);
+	r = to_number(" 3", &f); assert(r == NUM);
+	r = to_number("3 ", &f); assert(r == NUM);
+	r = to_number(" 3 ", &f); assert(r == NUM);
+	r = to_number("3x", &f); assert(r == 0);
+	r = to_number(" 3x", &f); assert(r == 0);
+	r = to_number("3x ", &f); assert(r == 0);
+	r = to_number("3x tyu", &f); assert(r == 0);
+	r = to_number("3e1", &f); assert(r == (NUM | FLT));
+	r = to_number("3e", &f); assert(r == 0);
+	r = to_number("-3e", &f); assert(r == 0);
+	r = to_number(" -3e ", &f); assert(r == 0);
+	r = to_number("3.1", &f); assert(r == (NUM | FLT));
+	r = to_number(" 3.1", &f); assert(r == (NUM | FLT));
+	r = to_number("3.1 ", &f); assert(r == (NUM | FLT));
+	r = to_number(" 3.1 ", &f); assert(r == (NUM | FLT));
+	r = to_number("x ", &f); assert(r == 0);
+	r = to_number(".1 ", &f); assert(r == (NUM | FLT));
+	r = to_number("-1e4 ", &f); assert(r == (NUM | FLT));
+	r = to_number(".", &f); assert(r == 0);
+	r = to_number("", &f); assert(r == 0);
+	r = to_number("0", &f); assert(r == NUM);
+	r = to_number("0.", &f); assert(r == (NUM | FLT));
+	r = to_number("0.0", &f); assert(r == (NUM | FLT));
+	r = to_number("0e", &f); assert(r == 0);
+	r = to_number("nan", &f); assert(r == 0);
+	r = to_number("inf", &f); assert(r == 0);
+	exits(nil);
+}
--