From 84895f1a210d0037a86887f0f647570bdf40afa2 Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Mon, 27 Nov 2017 12:51:52 +1100
Subject: [PATCH] py/parsenum: Improve parsing of floating point numbers.

This patch improves parsing of floating point numbers by converting all the
digits (integer and fractional) together into a number 1 or greater, and
then applying the correct power of 10 at the very end.  In particular the
multiple "multiply by 0.1" operations to build a fraction are now combined
together and applied at the same time as the exponent, at the very end.

This helps to retain precision during parsing of floats, and also includes
a check that the number doesn't overflow during the parsing.  One benefit
is that a float will have the same value no matter where the decimal point
is located, eg 1.23 == 123e-2.
---
 py/parsenum.c                         | 27 +++++++++++++++++++++------
 tests/float/float_parse.py            | 22 ++++++++++++++++++++++
 tests/float/float_parse_doubleprec.py | 16 ++++++++++++++++
 tests/run-tests                       |  1 +
 4 files changed, 60 insertions(+), 6 deletions(-)
 create mode 100644 tests/float/float_parse.py
 create mode 100644 tests/float/float_parse_doubleprec.py

diff --git a/py/parsenum.c b/py/parsenum.c
index b62029f7c7..98e7736851 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -170,6 +170,14 @@ typedef enum {
 
 mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool force_complex, mp_lexer_t *lex) {
 #if MICROPY_PY_BUILTINS_FLOAT
+
+// DEC_VAL_MAX only needs to be rough and is used to retain precision while not overflowing
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+#define DEC_VAL_MAX 1e20F
+#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+#define DEC_VAL_MAX 1e200
+#endif
+
     const char *top = str + len;
     mp_float_t dec_val = 0;
     bool dec_neg = false;
@@ -214,8 +222,8 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool
         // string should be a decimal number
         parse_dec_in_t in = PARSE_DEC_IN_INTG;
         bool exp_neg = false;
-        mp_float_t frac_mult = 0.1;
         mp_int_t exp_val = 0;
+        mp_int_t exp_extra = 0;
         while (str < top) {
             mp_uint_t dig = *str++;
             if ('0' <= dig && dig <= '9') {
@@ -223,11 +231,18 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool
                 if (in == PARSE_DEC_IN_EXP) {
                     exp_val = 10 * exp_val + dig;
                 } else {
-                    if (in == PARSE_DEC_IN_FRAC) {
-                        dec_val += dig * frac_mult;
-                        frac_mult *= MICROPY_FLOAT_CONST(0.1);
-                    } else {
+                    if (dec_val < DEC_VAL_MAX) {
+                        // dec_val won't overflow so keep accumulating
                         dec_val = 10 * dec_val + dig;
+                        if (in == PARSE_DEC_IN_FRAC) {
+                            --exp_extra;
+                        }
+                    } else {
+                        // dec_val might overflow and we anyway can't represent more digits
+                        // of precision, so ignore the digit and just adjust the exponent
+                        if (in == PARSE_DEC_IN_INTG) {
+                            ++exp_extra;
+                        }
                     }
                 }
             } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
@@ -261,7 +276,7 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool
         }
 
         // apply the exponent
-        dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val);
+        dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val + exp_extra);
     }
 
     // negate value if needed
diff --git a/tests/float/float_parse.py b/tests/float/float_parse.py
new file mode 100644
index 0000000000..448eff3bc9
--- /dev/null
+++ b/tests/float/float_parse.py
@@ -0,0 +1,22 @@
+# test parsing of floats
+
+inf = float('inf')
+
+# it shouldn't matter where the decimal point is if the exponent balances the value
+print(float('1234') - float('0.1234e4'))
+print(float('1.015625') - float('1015625e-6'))
+
+# very large integer part with a very negative exponent should cancel out
+print(float('9' * 60 + 'e-60'))
+print(float('9' * 60 + 'e-40'))
+print(float('9' * 60 + 'e-20') == float('1e40'))
+
+# many fractional digits
+print(float('.' + '9' * 70))
+print(float('.' + '9' * 70 + 'e20'))
+print(float('.' + '9' * 70 + 'e-50') == float('1e-50'))
+
+# tiny fraction with large exponent
+print(float('.' + '0' * 60 + '1e10') == float('1e-51'))
+print(float('.' + '0' * 60 + '9e25'))
+print(float('.' + '0' * 60 + '9e40'))
diff --git a/tests/float/float_parse_doubleprec.py b/tests/float/float_parse_doubleprec.py
new file mode 100644
index 0000000000..3566011309
--- /dev/null
+++ b/tests/float/float_parse_doubleprec.py
@@ -0,0 +1,16 @@
+# test parsing of floats, requiring double-precision
+
+# very large integer part with a very negative exponent should cancel out
+print(float('9' * 400 + 'e-100'))
+print(float('9' * 400 + 'e-200'))
+print(float('9' * 400 + 'e-400'))
+
+# many fractional digits
+print(float('.' + '9' * 400))
+print(float('.' + '9' * 400 + 'e100'))
+print(float('.' + '9' * 400 + 'e-100'))
+
+# tiny fraction with large exponent
+print(float('.' + '0' * 400 + '9e100'))
+print(float('.' + '0' * 400 + '9e200'))
+print(float('.' + '0' * 400 + '9e400'))
diff --git a/tests/run-tests b/tests/run-tests
index 6280a5182b..3c763512c0 100755
--- a/tests/run-tests
+++ b/tests/run-tests
@@ -271,6 +271,7 @@ def run_tests(pyb, tests, args, base_path="."):
     if upy_float_precision < 64:
         skip_tests.add('float/float_divmod.py') # tested by float/float_divmod_relaxed.py instead
         skip_tests.add('float/float2int_doubleprec_intbig.py')
+        skip_tests.add('float/float_parse_doubleprec.py')
 
     if not has_complex:
         skip_tests.add('float/complex1.py')