ref: 5086f3453d6716d6dcd782a45831c1ebb0f2b115
parent: 8e3c92b4eae6ff958d8af946caf6fb7d7f3bfbf7
author: qwx <qwx@sciops.net>
date: Wed Sep 24 21:41:19 EDT 2025
to_number: try to strtoll before strtod our awk is already stricter than onetrueawk and considers anything with trailing characters to be a string. we'll be stricter still and attempt to capture all edge cases. prerequisite for ints.
--- a/lib.c
+++ b/lib.c
@@ -635,47 +635,74 @@
return *s == '=' && s > os && *(s+1) != '=';
}
-/* strtod is supposed to be a proper test of what's a valid number */
+static int is_float(char *s, Awkfloat *fp)
+{+ char c, *p, *q;
+ Awkfloat f;
+ f = *fp = strtod(s, &p);
+ if (p == s)
+ return 0;
+ else if (isInf(f, 1) || isInf(f, -1) || isNaN(f))
+ return 0;
+ else if (f == 0.0 && ((q = strchr(s, '0')) == nil || q > p))
+ return 0;
+ for (; (c = *p) != '\0'; p++) {+ switch(c) {+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ case '\v':
+ continue;
+ case '\0':
+ return 1;
+ default:
+ return 0;
+ }
+ }
+ return 1;
+}
+
int to_number(char *s, Awkfloat *fp)
{- double r;
- char *ep;
+ vlong v;
+ char *p, *q;
- /*
- * fast could-it-be-a-number check before calling strtod,
- * which takes a surprisingly long time to reject non-numbers.
- */
- switch (*s) {- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- case '\t':
- case '\n':
- case '\v':
- case '\f':
- case '\r':
- case ' ':
- case '-':
- case '+':
+ v = strtoll(s, &p, 0);
+ *fp = (Awkfloat)v;
+ switch(*p){case '.':
- case 'n': /* nans */
- case 'N':
- case 'i': /* infs */
- case 'I':
+ case 'E':
+ case 'I': /* inf */
+ case 'N': /* nan */
+ case 'e':
+ case 'i':
+ case 'n':
+ if (is_float(s, fp))
+ return NUM;
+ return 0;
+ case '\0':
+ if (p == s)
+ return 0;
+ else if (v != 0 || (q = strchr(s, '0')) != nil && q < p)
+ return NUM;
break;
- default:
- return 0; /* can't be a number */
}
-
- r = strtod(s, &ep);
- if (fp != nil) /* relied upon by getfval */
- *fp = r;
- if (ep == s || isInf(r, 1) || isInf(r, -1) || isNaN(r))
- return 0;
- while (*ep == ' ' || *ep == '\t' || *ep == '\n')
- ep++;
- if (*ep == '\0')
- return 1;
- else
- return 0;
+ for (;; p++) {+ switch(*p) {+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ case '\v':
+ continue;
+ case '\0':
+ return NUM;
+ default:
+ return 0;
+ }
+ }
}
--- /dev/null
+++ b/test/to_number.c
@@ -1,0 +1,118 @@
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+
+typedef double Awkfloat;
+
+enum{+ NUM = 1<<0,
+ FLT = 1<<1,
+};
+
+static int is_float(char *s, Awkfloat *fp)
+{+ char c, *p, *q;
+ Awkfloat f;
+
+ f = *fp = strtod(s, &p);
+ if (p == s)
+ return 0;
+ else if (isInf(f, 1) || isInf(f, -1) || isNaN(f))
+ return 0;
+ else if (f == 0.0 && ((q = strchr(s, '0')) == nil || q > p))
+ return 0;
+ for (; (c = *p) != '\0'; p++) {+ switch(c) {+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ case '\v':
+ continue;
+ case '\0':
+ return 1;
+ default:
+ return 0;
+ }
+ }
+ return 1;
+}
+
+int to_number(char *s, Awkfloat *fp)
+{+ vlong v;
+ char *p, *q;
+
+ v = strtoll(s, &p, 0);
+ *fp = (Awkfloat)v;
+ switch(*p){+ case '.':
+ case 'E':
+ case 'I': /* inf */
+ case 'N': /* nan */
+ case 'e':
+ case 'i':
+ case 'n':
+ if (is_float(s, fp))
+ return NUM | FLT;
+ return 0;
+ case '\0':
+ if (p == s)
+ return 0;
+ else if (v != 0 || (q = strchr(s, '0')) != nil && q < p)
+ return NUM;
+ break;
+ }
+ for (;; p++) {+ switch(*p) {+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ case '\v':
+ continue;
+ case '\0':
+ return NUM;
+ default:
+ return 0;
+ }
+ }
+}
+
+void
+main(void)
+{+ int r;
+ Awkfloat f;
+
+ r = to_number("3", &f); assert(r == NUM);+ r = to_number(" 3", &f); assert(r == NUM);+ r = to_number("3 ", &f); assert(r == NUM);+ r = to_number(" 3 ", &f); assert(r == NUM);+ r = to_number("3x", &f); assert(r == 0);+ r = to_number(" 3x", &f); assert(r == 0);+ r = to_number("3x ", &f); assert(r == 0);+ r = to_number("3x tyu", &f); assert(r == 0);+ r = to_number("3e1", &f); assert(r == (NUM | FLT));+ r = to_number("3e", &f); assert(r == 0);+ r = to_number("-3e", &f); assert(r == 0);+ r = to_number(" -3e ", &f); assert(r == 0);+ r = to_number("3.1", &f); assert(r == (NUM | FLT));+ r = to_number(" 3.1", &f); assert(r == (NUM | FLT));+ r = to_number("3.1 ", &f); assert(r == (NUM | FLT));+ r = to_number(" 3.1 ", &f); assert(r == (NUM | FLT));+ r = to_number("x ", &f); assert(r == 0);+ r = to_number(".1 ", &f); assert(r == (NUM | FLT));+ r = to_number("-1e4 ", &f); assert(r == (NUM | FLT));+ r = to_number(".", &f); assert(r == 0);+ r = to_number("", &f); assert(r == 0);+ r = to_number("0", &f); assert(r == NUM);+ r = to_number("0.", &f); assert(r == (NUM | FLT));+ r = to_number("0.0", &f); assert(r == (NUM | FLT));+ r = to_number("0e", &f); assert(r == 0);+ r = to_number("nan", &f); assert(r == 0);+ r = to_number("inf", &f); assert(r == 0);+ exits(nil);
+}
--
⑨