py/runtime: Allow multiple *args in a function call.

This is a partial implementation of PEP 448 to allow unpacking multiple star args in a function or method call. This is implemented by changing the emitted bytecodes so that both positional args and star args are stored as positional args. A bitmap is added to indicate if an argument at a given position is a positional argument or a star arg. In the generated code, this new bitmap takes the place of the old star arg. It is stored as a small int, so this means only the first N arguments can be star args where N is the number of bits in a small int. The runtime is modified to interpret this new bytecode format while still trying to perform as few memory reallocations as possible. Signed-off-by: David Lechner <david@pybricks.com>
2020-03-24 23:54:45 -05:00 · 2020-03-24 23:54:45 -05:00 · 783b1a868f
commit 783b1a868f
--- a/docs/differences/python_35.rst
+++ b/docs/differences/python_35.rst
@ -8,7 +8,7 @@ Below is a list of finalised/accepted PEPs for Python 3.5 grouped into their imp
   +----------------------------------------------------------------------------------------------------------+---------------+
   | **Extensions to the syntax:**                                                                            | **Status**    |
   +--------------------------------------------------------+-------------------------------------------------+---------------+
-   | `PEP 448 <https://www.python.org/dev/peps/pep-0448/>`_ | additional unpacking generalizations            |               |
+   | `PEP 448 <https://www.python.org/dev/peps/pep-0448/>`_ | additional unpacking generalizations            | Partial       |
   +--------------------------------------------------------+-------------------------------------------------+---------------+
   | `PEP 465 <https://www.python.org/dev/peps/pep-0465/>`_ | a new matrix multiplication operator            | Completed     |
   +--------------------------------------------------------+-------------------------------------------------+---------------+
--- a/py/compile.c
+++ b/py/compile.c
@ -37,6 +37,7 @@
 #include "py/asmbase.h"
 #include "py/nativeglue.h"
 #include "py/persistentcode.h"
+#include "py/smallint.h"

 #if MICROPY_ENABLE_COMPILER

@ -2397,17 +2398,30 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar
    int n_positional = n_positional_extra;
    uint n_keyword = 0;
    uint star_flags = 0;
-    mp_parse_node_struct_t *star_args_node = NULL;
+    mp_uint_t star_args = 0;
    for (size_t i = 0; i < n_args; i++) {
        if (MP_PARSE_NODE_IS_STRUCT(args[i])) {
            mp_parse_node_struct_t *pns_arg = (mp_parse_node_struct_t *)args[i];
            if (MP_PARSE_NODE_STRUCT_KIND(pns_arg) == PN_arglist_star) {
-                if (star_flags & MP_EMIT_STAR_FLAG_SINGLE) {
-                    compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("can't have multiple *x"));
+                if (star_flags & MP_EMIT_STAR_FLAG_DOUBLE) {
+                    compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("* arg after **"));
+                    return;
+                }
+                #if MICROPY_DYNAMIC_COMPILER
+                if (i > mp_dynamic_compiler.small_int_bits)
+                #else
+                if (i > MP_SMALL_INT_BITS)
+                #endif
+                {
+                    // If there are not enough bits in a small int to fit the flag, then we consider
+                    // it a syntax error. It should be unlikely to have this many args in practice.
+                    compile_syntax_error(comp, (mp_parse_node_t)pns_arg, MP_ERROR_TEXT("too many args"));
                    return;
                }
                star_flags |= MP_EMIT_STAR_FLAG_SINGLE;
-                star_args_node = pns_arg;
+                star_args |= 1 << i;
+                compile_node(comp, pns_arg->nodes[0]);
+                n_positional++;
            } else if (MP_PARSE_NODE_STRUCT_KIND(pns_arg) == PN_arglist_dbl_star) {
                star_flags |= MP_EMIT_STAR_FLAG_DOUBLE;
                // double-star args are stored as kw arg with key of None
@ -2438,12 +2452,12 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar
            }
        } else {
        normal_argument:
-            if (star_flags) {
-                compile_syntax_error(comp, args[i], MP_ERROR_TEXT("non-keyword arg after */**"));
+            if (star_flags & MP_EMIT_STAR_FLAG_DOUBLE) {
+                compile_syntax_error(comp, args[i], MP_ERROR_TEXT("positional arg after **"));
                return;
            }
            if (n_keyword > 0) {
-                compile_syntax_error(comp, args[i], MP_ERROR_TEXT("non-keyword arg after keyword arg"));
+                compile_syntax_error(comp, args[i], MP_ERROR_TEXT("positional arg after keyword arg"));
                return;
            }
            compile_node(comp, args[i]);
@ -2451,14 +2465,9 @@ STATIC void compile_trailer_paren_helper(compiler_t *comp, mp_parse_node_t pn_ar
        }
    }

-    // compile the star/double-star arguments if we had them
-    // if we had one but not the other then we load "null" as a place holder
    if (star_flags != 0) {
-        if (star_args_node == NULL) {
-            EMIT(load_null);
-        } else {
-            compile_node(comp, star_args_node->nodes[0]);
-        }
+        // one extra object that contains the star_args map
+        EMIT_ARG(load_const_small_int, star_args);
    }

    // emit the function/method call
--- a/py/runtime.c
+++ b/py/runtime.c
@ -701,9 +701,9 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
    }
    uint n_args = n_args_n_kw & 0xff;
    uint n_kw = (n_args_n_kw >> 8) & 0xff;
-    mp_obj_t pos_seq = args[n_args + 2 * n_kw]; // may be MP_OBJ_NULL
+    mp_uint_t star_args = mp_obj_get_int_truncated(args[n_args + 2 * n_kw]);

-    DEBUG_OP_printf("call method var (fun=%p, self=%p, n_args=%u, n_kw=%u, args=%p, seq=%p)\n", fun, self, n_args, n_kw, args, pos_seq);
+    DEBUG_OP_printf("call method var (fun=%p, self=%p, n_args=%u, n_kw=%u, args=%p, map=%u)\n", fun, self, n_args, n_kw, args, star_args);

    // We need to create the following array of objects:
    //     args[0 .. n_args]  unpacked(pos_seq)  args[n_args .. n_args + 2 * n_kw]  unpacked(kw_dict)
@ -714,6 +714,20 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
    uint args2_alloc;
    uint args2_len = 0;

+    // Try to get a hint for unpacked * args length
+    uint list_len = 0;
+
+    if (star_args != 0) {
+        for (uint i = 0; i < n_args; i++) {
+            if (star_args & (1 << i)) {
+                mp_obj_t len = mp_obj_len_maybe(args[i]);
+                if (len != MP_OBJ_NULL) {
+                    list_len += mp_obj_get_int(len);
+                }
+            }
+        }
+    }
+
    // Try to get a hint for the size of the kw_dict
    uint kw_dict_len = 0;

@ -727,8 +741,8 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_

    // Extract the pos_seq sequence to the new args array.
    // Note that it can be arbitrary iterator.
-    if (pos_seq == MP_OBJ_NULL) {
-        // no sequence
+    if (star_args == 0) {
+        // no star args to unpack

        // allocate memory for the new array of args
        args2_alloc = 1 + n_args + 2 * (n_kw + kw_dict_len);
@ -742,33 +756,11 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
        // copy the fixed pos args
        mp_seq_copy(args2 + args2_len, args, n_args, mp_obj_t);
        args2_len += n_args;
-
-    } else if (mp_obj_is_type(pos_seq, &mp_type_tuple) || mp_obj_is_type(pos_seq, &mp_type_list)) {
-        // optimise the case of a tuple and list
-
-        // get the items
-        size_t len;
-        mp_obj_t *items;
-        mp_obj_get_array(pos_seq, &len, &items);
-
-        // allocate memory for the new array of args
-        args2_alloc = 1 + n_args + len + 2 * (n_kw + kw_dict_len);
-        args2 = mp_nonlocal_alloc(args2_alloc * sizeof(mp_obj_t));
-
-        // copy the self
-        if (self != MP_OBJ_NULL) {
-            args2[args2_len++] = self;
-        }
-
-        // copy the fixed and variable position args
-        mp_seq_cat(args2 + args2_len, args, n_args, items, len, mp_obj_t);
-        args2_len += n_args + len;
-
    } else {
-        // generic iterator
+        // at least one star arg to unpack

        // allocate memory for the new array of args
-        args2_alloc = 1 + n_args + 2 * (n_kw + kw_dict_len) + 3;
+        args2_alloc = 1 + n_args + list_len + 2 * (n_kw + kw_dict_len);
        args2 = mp_nonlocal_alloc(args2_alloc * sizeof(mp_obj_t));

        // copy the self
@ -776,26 +768,57 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_
            args2[args2_len++] = self;
        }

-        // copy the fixed position args
-        mp_seq_copy(args2 + args2_len, args, n_args, mp_obj_t);
-        args2_len += n_args;
+        for (uint i = 0; i < n_args; i++) {
+            mp_obj_t arg = args[i];
+            if (star_args & (1 << i)) {
+                // star arg
+                if (mp_obj_is_type(arg, &mp_type_tuple) || mp_obj_is_type(arg, &mp_type_list)) {
+                    // optimise the case of a tuple and list

-        // extract the variable position args from the iterator
-        mp_obj_iter_buf_t iter_buf;
-        mp_obj_t iterable = mp_getiter(pos_seq, &iter_buf);
-        mp_obj_t item;
-        while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
-            if (args2_len >= args2_alloc) {
-                args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t), args2_alloc * 2 * sizeof(mp_obj_t));
-                args2_alloc *= 2;
+                    // get the items
+                    size_t len;
+                    mp_obj_t *items;
+                    mp_obj_get_array(arg, &len, &items);
+
+                    // copy the items
+                    assert(args2_len + len <= args2_alloc);
+                    mp_seq_copy(args2 + args2_len, items, len, mp_obj_t);
+                    args2_len += len;
+                } else {
+                    // generic iterator
+
+                    // extract the variable position args from the iterator
+                    mp_obj_iter_buf_t iter_buf;
+                    mp_obj_t iterable = mp_getiter(arg, &iter_buf);
+                    mp_obj_t item;
+                    while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+                        if (args2_len >= args2_alloc) {
+                            args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t),
+                                args2_alloc * 2 * sizeof(mp_obj_t));
+                            args2_alloc *= 2;
+                        }
+                        args2[args2_len++] = item;
+                    }
+                }
+            } else {
+                // normal argument
+                assert(args2_len < args2_alloc);
+                args2[args2_len++] = arg;
            }
-            args2[args2_len++] = item;
        }
    }

    // The size of the args2 array now is the number of positional args.
    uint pos_args_len = args2_len;

+    // ensure there is still enough room for kw args
+    if (args2_len + 2 * (n_kw + kw_dict_len) > args2_alloc) {
+        uint new_alloc = args2_len + 2 * (n_kw + kw_dict_len);
+        args2 = mp_nonlocal_realloc(args2, args2_alloc * sizeof(mp_obj_t),
+            new_alloc * sizeof(mp_obj_t));
+        args2_alloc = new_alloc;
+    }
+
    // Copy the kw args.
    for (uint i = 0; i < n_kw; i++) {
        mp_obj_t kw_key = args[n_args + i * 2];
--- a/py/vm.c
+++ b/py/vm.c
@ -949,7 +949,7 @@ unwind_jump:;
                    // unum & 0xff == n_positional
                    // (unum >> 8) & 0xff == n_keyword
                    // We have following stack layout here:
-                    // fun arg0 arg1 ... kw0 val0 kw1 val1 ... seq <- TOS
+                    // fun arg0 arg1 ... kw0 val0 kw1 val1 ... bitmap <- TOS
                    sp -= (unum & 0xff) + ((unum >> 7) & 0x1fe) + 1;
                    #if MICROPY_STACKLESS
                    if (mp_obj_get_type(*sp) == &mp_type_fun_bc) {
@ -1034,7 +1034,7 @@ unwind_jump:;
                    // unum & 0xff == n_positional
                    // (unum >> 8) & 0xff == n_keyword
                    // We have following stack layout here:
-                    // fun self arg0 arg1 ... kw0 val0 kw1 val1 ... seq <- TOS
+                    // fun self arg0 arg1 ... kw0 val0 kw1 val1 ... bitmap <- TOS
                    sp -= (unum & 0xff) + ((unum >> 7) & 0x1fe) + 2;
                    #if MICROPY_STACKLESS
                    if (mp_obj_get_type(*sp) == &mp_type_fun_bc) {
--- a/tests/basics/fun_callstar.py
+++ b/tests/basics/fun_callstar.py
@ -3,10 +3,16 @@
 def foo(a, b, c):
    print(a, b, c)

+foo(*(), 1, 2, 3)
+foo(*(1,), 2, 3)
+foo(*(1, 2), 3)
 foo(*(1, 2, 3))
 foo(1, *(2, 3))
 foo(1, 2, *(3,))
 foo(1, 2, 3, *())
+foo(*(1,), 2, *(3,))
+foo(*(1, 2), *(3,))
+foo(*(1,), *(2, 3))

 # Another sequence type
 foo(1, 2, *[100])
@ -29,10 +35,16 @@ class A:
        print(a, b, c)

 a = A()
+a.foo(*(), 1, 2, 3)
+a.foo(*(1,), 2, 3)
+a.foo(*(1, 2), 3)
 a.foo(*(1, 2, 3))
 a.foo(1, *(2, 3))
 a.foo(1, 2, *(3,))
 a.foo(1, 2, 3, *())
+a.foo(*(1,), 2, *(3,))
+a.foo(*(1, 2), *(3,))
+a.foo(*(1,), *(2, 3))

 # Another sequence type
 a.foo(1, 2, *[100])
--- a/tests/basics/fun_callstardblstar.py
+++ b/tests/basics/fun_callstardblstar.py
@ -6,6 +6,11 @@ def f(a, b, c, d):
 f(*(1, 2), **{'c':3, 'd':4})
 f(*(1, 2), **{['c', 'd'][i]:(3 + i) for i in range(2)})

+try:
+    eval("f(**{'a': 1}, *(2, 3, 4))")
+except SyntaxError:
+    print("SyntaxError")
+
 # test calling a method with *tuple and **dict

 class A:
@ -15,3 +20,8 @@ class A:
 a = A()
 a.f(*(1, 2), **{'c':3, 'd':4})
 a.f(*(1, 2), **{['c', 'd'][i]:(3 + i) for i in range(2)})
+
+try:
+    eval("a.f(**{'a': 1}, *(2, 3, 4))")
+except SyntaxError:
+    print("SyntaxError")
--- a/tests/basics/fun_kwvarargs.py
+++ b/tests/basics/fun_kwvarargs.py
@ -23,3 +23,16 @@ def f4(*vargs, **kwargs):
 f4(*(1, 2))
 f4(kw_arg=3)
 f4(*(1, 2), kw_arg=3)
+
+
+# test evaluation order of arguments
+def f5(*vargs, **kwargs):
+    print(vargs, kwargs)
+
+
+def print_ret(x):
+    print(x)
+    return x
+
+
+f5(*print_ret(["a", "b"]), kw_arg=print_ret(None))
--- a/tests/basics/python34.py
+++ b/tests/basics/python34.py
@ -6,26 +6,23 @@ except NameError:
    print("SKIP")
    raise SystemExit

-# from basics/fun_kwvarargs.py
-# test evaluation order of arguments (in 3.4 it's backwards, 3.5 it's fixed)
-def f4(*vargs, **kwargs):
-    print(vargs, kwargs)
+
 def print_ret(x):
    print(x)
    return x
-f4(*print_ret(['a', 'b']), kw_arg=print_ret(None))

 # test evaluation order of dictionary key/value pair (in 3.4 it's backwards)
 {print_ret(1):print_ret(2)}

+
 # from basics/syntaxerror.py
 def test_syntax(code):
    try:
        exec(code)
    except SyntaxError:
        print("SyntaxError")
-test_syntax("f(*a, *b)") # can't have multiple * (in 3.5 we can)
-test_syntax("f(*a, b)") # can't have positional after *
+
+
 test_syntax("f(**a, b)") # can't have positional after **
 test_syntax("() = []") # can't assign to empty tuple (in 3.6 we can)
 test_syntax("del ()") # can't delete empty tuple (in 3.6 we can)
--- a/tests/basics/python34.py.exp
+++ b/tests/basics/python34.py.exp
@ -1,13 +1,8 @@
-None
-['a', 'b']
-('a', 'b') {'kw_arg': None}
 2
 1
 SyntaxError
 SyntaxError
 SyntaxError
-SyntaxError
-SyntaxError
 3.4
 3 4
 IndexError('foo',)
--- a/tests/cpydiff/syntax_arg_unpacking.py
+++ b/tests/cpydiff/syntax_arg_unpacking.py
@ -0,0 +1,23 @@
+"""
+categories: Syntax
+description: Argument unpacking does not work if the argument being unpacked is the nth or greater argument where n is the number of bits in an MP_SMALL_INT.
+cause: The implementation uses an MP_SMALL_INT to flag args that need to be unpacked.
+workaround: Use fewer arguments.
+"""
+
+
+def example(*args):
+    print(len(args))
+
+
+MORE = ["a", "b", "c"]
+
+# fmt: off
+example(
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+    *MORE,
+)
+# fmt: on