From 783b1a868fb0f3c1fd6cf7231311c24801c33505 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 24 Mar 2020 23:54:45 -0500 Subject: [PATCH] py/runtime: Allow multiple *args in a function call. This is a partial implementation of PEP 448 to allow unpacking multiple star args in a function or method call. This is implemented by changing the emitted bytecodes so that both positional args and star args are stored as positional args. A bitmap is added to indicate if an argument at a given position is a positional argument or a star arg. In the generated code, this new bitmap takes the place of the old star arg. It is stored as a small int, so this means only the first N arguments can be star args where N is the number of bits in a small int. The runtime is modified to interpret this new bytecode format while still trying to perform as few memory reallocations as possible. Signed-off-by: David Lechner --- docs/differences/python_35.rst | 2 +- py/compile.c | 37 +++++---- py/runtime.c | 103 ++++++++++++++++---------- py/vm.c | 4 +- tests/basics/fun_callstar.py | 12 +++ tests/basics/fun_callstardblstar.py | 10 +++ tests/basics/fun_kwvarargs.py | 13 ++++ tests/basics/python34.py | 11 +-- tests/basics/python34.py.exp | 5 -- tests/cpydiff/syntax_arg_unpacking.py | 23 ++++++ 10 files changed, 151 insertions(+), 69 deletions(-) create mode 100644 tests/cpydiff/syntax_arg_unpacking.py diff --git a/docs/differences/python_35.rst b/docs/differences/python_35.rst index e88df25f95..06cfbfc03c 100644 --- a/docs/differences/python_35.rst +++ b/docs/differences/python_35.rst @@ -8,7 +8,7 @@ Below is a list of finalised/accepted PEPs for Python 3.5 grouped into their imp +----------------------------------------------------------------------------------------------------------+---------------+ | **Extensions to the syntax:** | **Status** | +--------------------------------------------------------+-------------------------------------------------+---------------+ - | `PEP 448 `_ | additional unpacking generalizations | | + | `PEP 448 `_ | additional unpacking generalizations | Partial | +--------------------------------------------------------+-------------------------------------------------+---------------+ | `PEP 465 `_ | a new matrix multiplication operator | Completed | +--------------------------------------------------------+-------------------------------------------------+---------------+ diff --git a/py/compile.c b/py/compile.c index 1636bd157d..fc11062705 100644 --- a/py/compile.c +++ b/py/compile.c @@ -37,6 +37,7 @@ #include "py/asmbase.h" #include "py/nativeglue.h" #include "py/persistentcode.h" +#include "py/smallint.h" #if MICROPY_ENABLE_COMPILER @@ -2397,17 +2398,30 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar int n_positional = n_positional_extra; uint n_keyword = 0; uint star_flags = 0; - mp_parse_node_struct_t *star_args_node = NULL; + mp_uint_t star_args = 0; for (size_t i = 0; i < n_args; i++) { if (MP_PARSE_NODE_IS_STRUCT(args[i])) { mp_parse_node_struct_t *pns_arg = (mp_parse_node_struct_t *)args[i]; if (MP_PARSE_NODE_STRUCT_KIND(pns_arg) == PN_arglist_star) { - if (star_flags & MP_EMIT_STAR_FLAG_SINGLE) { - compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("can't have multiple *x")); + if (star_flags & MP_EMIT_STAR_FLAG_DOUBLE) { + compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("* arg after **")); + return; + } + #if MICROPY_DYNAMIC_COMPILER + if (i > mp_dynamic_compiler.small_int_bits) + #else + if (i > MP_SMALL_INT_BITS) + #endif + { + // If there are not enough bits in a small int to fit the flag, then we consider + // it a syntax error. It should be unlikely to have this many args in practice. + compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("too many args")); return; } star_flags |= MP_EMIT_STAR_FLAG_SINGLE; - star_args_node = pns_arg; + star_args |= 1 << i; + compile_node(comp, pns_arg->nodes[0]); + n_positional++; } else if (MP_PARSE_NODE_STRUCT_KIND(pns_arg) == PN_arglist_dbl_star) { star_flags |= MP_EMIT_STAR_FLAG_DOUBLE; // double-star args are stored as kw arg with key of None @@ -2438,12 +2452,12 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar } } else { normal_argument: - if (star_flags) { - compile_syntax_error(comp, args[i], MP_ERROR_TEXT("non-keyword arg after */**")); + if (star_flags & MP_EMIT_STAR_FLAG_DOUBLE) { + compile_syntax_error(comp, args[i], MP_ERROR_TEXT("positional arg after **")); return; } if (n_keyword > 0) { - compile_syntax_error(comp, args[i], MP_ERROR_TEXT("non-keyword arg after keyword arg")); + compile_syntax_error(comp, args[i], MP_ERROR_TEXT("positional arg after keyword arg")); return; } compile_node(comp, args[i]); @@ -2451,14 +2465,9 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar } } - // compile the star/double-star arguments if we had them - // if we had one but not the other then we load "null" as a place holder if (star_flags != 0) { - if (star_args_node == NULL) { - EMIT(load_null); - } else { - compile_node(comp, star_args_node->nodes[0]); - } + // one extra object that contains the star_args map + EMIT_ARG(load_const_small_int, star_args); } // emit the function/method call diff --git a/py/runtime.c b/py/runtime.c index e6bfbda58d..90722ee18a 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -701,9 +701,9 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_ } uint n_args = n_args_n_kw & 0xff; uint n_kw = (n_args_n_kw >> 8) & 0xff; - mp_obj_t pos_seq = args[n_args + 2 * n_kw]; // may be MP_OBJ_NULL + mp_uint_t star_args = mp_obj_get_int_truncated(args[n_args + 2 * n_kw]); - DEBUG_OP_printf("call method var (fun=%p, self=%p, n_args=%u, n_kw=%u, args=%p, seq=%p)\n", fun, self, n_args, n_kw, args, pos_seq); + DEBUG_OP_printf("call method var (fun=%p, self=%p, n_args=%u, n_kw=%u, args=%p, map=%u)\n", fun, self, n_args, n_kw, args, star_args); // We need to create the following array of objects: // args[0 .. n_args] unpacked(pos_seq) args[n_args .. n_args + 2 * n_kw] unpacked(kw_dict) @@ -714,6 +714,20 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_ uint args2_alloc; uint args2_len = 0; + // Try to get a hint for unpacked * args length + uint list_len = 0; + + if (star_args != 0) { + for (uint i = 0; i < n_args; i++) { + if (star_args & (1 << i)) { + mp_obj_t len = mp_obj_len_maybe(args[i]); + if (len != MP_OBJ_NULL) { + list_len += mp_obj_get_int(len); + } + } + } + } + // Try to get a hint for the size of the kw_dict uint kw_dict_len = 0; @@ -727,8 +741,8 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_ // Extract the pos_seq sequence to the new args array. // Note that it can be arbitrary iterator. - if (pos_seq == MP_OBJ_NULL) { - // no sequence + if (star_args == 0) { + // no star args to unpack // allocate memory for the new array of args args2_alloc = 1 + n_args + 2 * (n_kw + kw_dict_len); @@ -742,33 +756,11 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_ // copy the fixed pos args mp_seq_copy(args2 + args2_len, args, n_args, mp_obj_t); args2_len += n_args; - - } else if (mp_obj_is_type(pos_seq, &mp_type_tuple) || mp_obj_is_type(pos_seq, &mp_type_list)) { - // optimise the case of a tuple and list - - // get the items - size_t len; - mp_obj_t *items; - mp_obj_get_array(pos_seq, &len, &items); - - // allocate memory for the new array of args - args2_alloc = 1 + n_args + len + 2 * (n_kw + kw_dict_len); - args2 = mp_nonlocal_alloc(args2_alloc * sizeof(mp_obj_t)); - - // copy the self - if (self != MP_OBJ_NULL) { - args2[args2_len++] = self; - } - - // copy the fixed and variable position args - mp_seq_cat(args2 + args2_len, args, n_args, items, len, mp_obj_t); - args2_len += n_args + len; - } else { - // generic iterator + // at least one star arg to unpack // allocate memory for the new array of args - args2_alloc = 1 + n_args + 2 * (n_kw + kw_dict_len) + 3; + args2_alloc = 1 + n_args + list_len + 2 * (n_kw + kw_dict_len); args2 = mp_nonlocal_alloc(args2_alloc * sizeof(mp_obj_t)); // copy the self @@ -776,26 +768,57 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_ args2[args2_len++] = self; } - // copy the fixed position args - mp_seq_copy(args2 + args2_len, args, n_args, mp_obj_t); - args2_len += n_args; + for (uint i = 0; i < n_args; i++) { + mp_obj_t arg = args[i]; + if (star_args & (1 << i)) { + // star arg + if (mp_obj_is_type(arg, &mp_type_tuple) || mp_obj_is_type(arg, &mp_type_list)) { + // optimise the case of a tuple and list - // extract the variable position args from the iterator - mp_obj_iter_buf_t iter_buf; - mp_obj_t iterable = mp_getiter(pos_seq, &iter_buf); - mp_obj_t item; - while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) { - if (args2_len >= args2_alloc) { - args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t), args2_alloc * 2 * sizeof(mp_obj_t)); - args2_alloc *= 2; + // get the items + size_t len; + mp_obj_t *items; + mp_obj_get_array(arg, &len, &items); + + // copy the items + assert(args2_len + len <= args2_alloc); + mp_seq_copy(args2 + args2_len, items, len, mp_obj_t); + args2_len += len; + } else { + // generic iterator + + // extract the variable position args from the iterator + mp_obj_iter_buf_t iter_buf; + mp_obj_t iterable = mp_getiter(arg, &iter_buf); + mp_obj_t item; + while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) { + if (args2_len >= args2_alloc) { + args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t), + args2_alloc * 2 * sizeof(mp_obj_t)); + args2_alloc *= 2; + } + args2[args2_len++] = item; + } + } + } else { + // normal argument + assert(args2_len < args2_alloc); + args2[args2_len++] = arg; } - args2[args2_len++] = item; } } // The size of the args2 array now is the number of positional args. uint pos_args_len = args2_len; + // ensure there is still enough room for kw args + if (args2_len + 2 * (n_kw + kw_dict_len) > args2_alloc) { + uint new_alloc = args2_len + 2 * (n_kw + kw_dict_len); + args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t), + new_alloc * sizeof(mp_obj_t)); + args2_alloc = new_alloc; + } + // Copy the kw args. for (uint i = 0; i < n_kw; i++) { mp_obj_t kw_key = args[n_args + i * 2]; diff --git a/py/vm.c b/py/vm.c index 1450004838..02f8bc88c9 100644 --- a/py/vm.c +++ b/py/vm.c @@ -949,7 +949,7 @@ unwind_jump:; // unum & 0xff == n_positional // (unum >> 8) & 0xff == n_keyword // We have following stack layout here: - // fun arg0 arg1 ... kw0 val0 kw1 val1 ... seq <- TOS + // fun arg0 arg1 ... kw0 val0 kw1 val1 ... bitmap <- TOS sp -= (unum & 0xff) + ((unum >> 7) & 0x1fe) + 1; #if MICROPY_STACKLESS if (mp_obj_get_type(*sp) == &mp_type_fun_bc) { @@ -1034,7 +1034,7 @@ unwind_jump:; // unum & 0xff == n_positional // (unum >> 8) & 0xff == n_keyword // We have following stack layout here: - // fun self arg0 arg1 ... kw0 val0 kw1 val1 ... seq <- TOS + // fun self arg0 arg1 ... kw0 val0 kw1 val1 ... bitmap <- TOS sp -= (unum & 0xff) + ((unum >> 7) & 0x1fe) + 2; #if MICROPY_STACKLESS if (mp_obj_get_type(*sp) == &mp_type_fun_bc) { diff --git a/tests/basics/fun_callstar.py b/tests/basics/fun_callstar.py index a27a288a3c..53d2ece3e1 100644 --- a/tests/basics/fun_callstar.py +++ b/tests/basics/fun_callstar.py @@ -3,10 +3,16 @@ def foo(a, b, c): print(a, b, c) +foo(*(), 1, 2, 3) +foo(*(1,), 2, 3) +foo(*(1, 2), 3) foo(*(1, 2, 3)) foo(1, *(2, 3)) foo(1, 2, *(3,)) foo(1, 2, 3, *()) +foo(*(1,), 2, *(3,)) +foo(*(1, 2), *(3,)) +foo(*(1,), *(2, 3)) # Another sequence type foo(1, 2, *[100]) @@ -29,10 +35,16 @@ class A: print(a, b, c) a = A() +a.foo(*(), 1, 2, 3) +a.foo(*(1,), 2, 3) +a.foo(*(1, 2), 3) a.foo(*(1, 2, 3)) a.foo(1, *(2, 3)) a.foo(1, 2, *(3,)) a.foo(1, 2, 3, *()) +a.foo(*(1,), 2, *(3,)) +a.foo(*(1, 2), *(3,)) +a.foo(*(1,), *(2, 3)) # Another sequence type a.foo(1, 2, *[100]) diff --git a/tests/basics/fun_callstardblstar.py b/tests/basics/fun_callstardblstar.py index f2fd29107e..aceb04a843 100644 --- a/tests/basics/fun_callstardblstar.py +++ b/tests/basics/fun_callstardblstar.py @@ -6,6 +6,11 @@ def f(a, b, c, d): f(*(1, 2), **{'c':3, 'd':4}) f(*(1, 2), **{['c', 'd'][i]:(3 + i) for i in range(2)}) +try: + eval("f(**{'a': 1}, *(2, 3, 4))") +except SyntaxError: + print("SyntaxError") + # test calling a method with *tuple and **dict class A: @@ -15,3 +20,8 @@ class A: a = A() a.f(*(1, 2), **{'c':3, 'd':4}) a.f(*(1, 2), **{['c', 'd'][i]:(3 + i) for i in range(2)}) + +try: + eval("a.f(**{'a': 1}, *(2, 3, 4))") +except SyntaxError: + print("SyntaxError") diff --git a/tests/basics/fun_kwvarargs.py b/tests/basics/fun_kwvarargs.py index bdc10fcf14..e9fd0720e9 100644 --- a/tests/basics/fun_kwvarargs.py +++ b/tests/basics/fun_kwvarargs.py @@ -23,3 +23,16 @@ def f4(*vargs, **kwargs): f4(*(1, 2)) f4(kw_arg=3) f4(*(1, 2), kw_arg=3) + + +# test evaluation order of arguments +def f5(*vargs, **kwargs): + print(vargs, kwargs) + + +def print_ret(x): + print(x) + return x + + +f5(*print_ret(["a", "b"]), kw_arg=print_ret(None)) diff --git a/tests/basics/python34.py b/tests/basics/python34.py index 609a8b6b84..36e25e20dd 100644 --- a/tests/basics/python34.py +++ b/tests/basics/python34.py @@ -6,26 +6,23 @@ except NameError: print("SKIP") raise SystemExit -# from basics/fun_kwvarargs.py -# test evaluation order of arguments (in 3.4 it's backwards, 3.5 it's fixed) -def f4(*vargs, **kwargs): - print(vargs, kwargs) + def print_ret(x): print(x) return x -f4(*print_ret(['a', 'b']), kw_arg=print_ret(None)) # test evaluation order of dictionary key/value pair (in 3.4 it's backwards) {print_ret(1):print_ret(2)} + # from basics/syntaxerror.py def test_syntax(code): try: exec(code) except SyntaxError: print("SyntaxError") -test_syntax("f(*a, *b)") # can't have multiple * (in 3.5 we can) -test_syntax("f(*a, b)") # can't have positional after * + + test_syntax("f(**a, b)") # can't have positional after ** test_syntax("() = []") # can't assign to empty tuple (in 3.6 we can) test_syntax("del ()") # can't delete empty tuple (in 3.6 we can) diff --git a/tests/basics/python34.py.exp b/tests/basics/python34.py.exp index 75f1c2c056..a56c1a50b6 100644 --- a/tests/basics/python34.py.exp +++ b/tests/basics/python34.py.exp @@ -1,13 +1,8 @@ -None -['a', 'b'] -('a', 'b') {'kw_arg': None} 2 1 SyntaxError SyntaxError SyntaxError -SyntaxError -SyntaxError 3.4 3 4 IndexError('foo',) diff --git a/tests/cpydiff/syntax_arg_unpacking.py b/tests/cpydiff/syntax_arg_unpacking.py new file mode 100644 index 0000000000..e54832ddb9 --- /dev/null +++ b/tests/cpydiff/syntax_arg_unpacking.py @@ -0,0 +1,23 @@ +""" +categories: Syntax +description: Argument unpacking does not work if the argument being unpacked is the nth or greater argument where n is the number of bits in an MP_SMALL_INT. +cause: The implementation uses an MP_SMALL_INT to flag args that need to be unpacked. +workaround: Use fewer arguments. +""" + + +def example(*args): + print(len(args)) + + +MORE = ["a", "b", "c"] + +# fmt: off +example( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + *MORE, +) +# fmt: on