From 84895f1a210d0037a86887f0f647570bdf40afa2 Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 27 Nov 2017 12:51:52 +1100 Subject: [PATCH] py/parsenum: Improve parsing of floating point numbers. This patch improves parsing of floating point numbers by converting all the digits (integer and fractional) together into a number 1 or greater, and then applying the correct power of 10 at the very end. In particular the multiple "multiply by 0.1" operations to build a fraction are now combined together and applied at the same time as the exponent, at the very end. This helps to retain precision during parsing of floats, and also includes a check that the number doesn't overflow during the parsing. One benefit is that a float will have the same value no matter where the decimal point is located, eg 1.23 == 123e-2. --- py/parsenum.c | 27 +++++++++++++++++++++------ tests/float/float_parse.py | 22 ++++++++++++++++++++++ tests/float/float_parse_doubleprec.py | 16 ++++++++++++++++ tests/run-tests | 1 + 4 files changed, 60 insertions(+), 6 deletions(-) create mode 100644 tests/float/float_parse.py create mode 100644 tests/float/float_parse_doubleprec.py diff --git a/py/parsenum.c b/py/parsenum.c index b62029f7c7..98e7736851 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -170,6 +170,14 @@ typedef enum { mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool force_complex, mp_lexer_t *lex) { #if MICROPY_PY_BUILTINS_FLOAT + +// DEC_VAL_MAX only needs to be rough and is used to retain precision while not overflowing +#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT +#define DEC_VAL_MAX 1e20F +#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE +#define DEC_VAL_MAX 1e200 +#endif + const char *top = str + len; mp_float_t dec_val = 0; bool dec_neg = false; @@ -214,8 +222,8 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool // string should be a decimal number parse_dec_in_t in = PARSE_DEC_IN_INTG; bool exp_neg = false; - mp_float_t frac_mult = 0.1; mp_int_t exp_val = 0; + mp_int_t exp_extra = 0; while (str < top) { mp_uint_t dig = *str++; if ('0' <= dig && dig <= '9') { @@ -223,11 +231,18 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool if (in == PARSE_DEC_IN_EXP) { exp_val = 10 * exp_val + dig; } else { - if (in == PARSE_DEC_IN_FRAC) { - dec_val += dig * frac_mult; - frac_mult *= MICROPY_FLOAT_CONST(0.1); - } else { + if (dec_val < DEC_VAL_MAX) { + // dec_val won't overflow so keep accumulating dec_val = 10 * dec_val + dig; + if (in == PARSE_DEC_IN_FRAC) { + --exp_extra; + } + } else { + // dec_val might overflow and we anyway can't represent more digits + // of precision, so ignore the digit and just adjust the exponent + if (in == PARSE_DEC_IN_INTG) { + ++exp_extra; + } } } } else if (in == PARSE_DEC_IN_INTG && dig == '.') { @@ -261,7 +276,7 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool } // apply the exponent - dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val); + dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val + exp_extra); } // negate value if needed diff --git a/tests/float/float_parse.py b/tests/float/float_parse.py new file mode 100644 index 0000000000..448eff3bc9 --- /dev/null +++ b/tests/float/float_parse.py @@ -0,0 +1,22 @@ +# test parsing of floats + +inf = float('inf') + +# it shouldn't matter where the decimal point is if the exponent balances the value +print(float('1234') - float('0.1234e4')) +print(float('1.015625') - float('1015625e-6')) + +# very large integer part with a very negative exponent should cancel out +print(float('9' * 60 + 'e-60')) +print(float('9' * 60 + 'e-40')) +print(float('9' * 60 + 'e-20') == float('1e40')) + +# many fractional digits +print(float('.' + '9' * 70)) +print(float('.' + '9' * 70 + 'e20')) +print(float('.' + '9' * 70 + 'e-50') == float('1e-50')) + +# tiny fraction with large exponent +print(float('.' + '0' * 60 + '1e10') == float('1e-51')) +print(float('.' + '0' * 60 + '9e25')) +print(float('.' + '0' * 60 + '9e40')) diff --git a/tests/float/float_parse_doubleprec.py b/tests/float/float_parse_doubleprec.py new file mode 100644 index 0000000000..3566011309 --- /dev/null +++ b/tests/float/float_parse_doubleprec.py @@ -0,0 +1,16 @@ +# test parsing of floats, requiring double-precision + +# very large integer part with a very negative exponent should cancel out +print(float('9' * 400 + 'e-100')) +print(float('9' * 400 + 'e-200')) +print(float('9' * 400 + 'e-400')) + +# many fractional digits +print(float('.' + '9' * 400)) +print(float('.' + '9' * 400 + 'e100')) +print(float('.' + '9' * 400 + 'e-100')) + +# tiny fraction with large exponent +print(float('.' + '0' * 400 + '9e100')) +print(float('.' + '0' * 400 + '9e200')) +print(float('.' + '0' * 400 + '9e400')) diff --git a/tests/run-tests b/tests/run-tests index 6280a5182b..3c763512c0 100755 --- a/tests/run-tests +++ b/tests/run-tests @@ -271,6 +271,7 @@ def run_tests(pyb, tests, args, base_path="."): if upy_float_precision < 64: skip_tests.add('float/float_divmod.py') # tested by float/float_divmod_relaxed.py instead skip_tests.add('float/float2int_doubleprec_intbig.py') + skip_tests.add('float/float_parse_doubleprec.py') if not has_complex: skip_tests.add('float/complex1.py')