From 3f2afe92bf2ed416e77de466bf90a978a947dbc1 Mon Sep 17 00:00:00 2001 From: Rayane Chatrieux Date: Fri, 4 Mar 2022 10:26:08 -0500 Subject: [PATCH 1/2] py/parse.c: Implement function keyword argument syntax restriction. Implemented function keyword argument syntax restriction added with Python 3.8 (listed under issue #7899). This is done by adding a switch in the push_result_rule function in parse.c to turn off the parentheses removal optimization for atom_paren rules encountered while parsing an arglist rule (meaning this atom_paren rule is a function argument) and when the enclosed node is a lone ID. Keeping the atom_paren rule in the parse tree allows the compiler to catch the syntax error when the rule happens to be a keyword argument. In order to determine whether an atom_paren rule is a function argument, a bit is added to the parser structure, which is set in mp_parse when an arglist rule is being parsed (unset once done with it). Other options were considered: hard-coding the distance on the rule stack between an arglist rule and an atom_paren rule in the added switch, using a global boolean variable, using a local boolean variable (by expanding the function signature of the push_result_rule function). Adding a field to the parser structure seemed to provide the best balance between code size increase, code changes, memory usage overhead, code maintainability, etc. Because the compile_atom_paren function did not expect this rule to be a parent for a lone ID, a switch is added in that function to handle this. A new test file fun_kwargs_syntax.py is added to make sure the syntax restriction is in effect, and a test is added to fun2.py to make sure extraneous parenthese around ID function arguments does not cause any issues. Signed-off-by: Rayane Chatrieux --- py/compile.c | 3 +++ py/parse.c | 16 ++++++++++++++++ py/parse.h | 3 +++ tests/basics/fun2.py | 2 ++ tests/basics/fun_kwargs_syntax.py | 18 ++++++++++++++++++ 5 files changed, 42 insertions(+) create mode 100644 tests/basics/fun_kwargs_syntax.py diff --git a/py/compile.c b/py/compile.c index 92736e22e5..48b864281f 100644 --- a/py/compile.c +++ b/py/compile.c @@ -2505,6 +2505,9 @@ STATIC void compile_atom_paren(compiler_t *comp, mp_parse_node_struct_t *pns) { if (MP_PARSE_NODE_IS_NULL(pns->nodes[0])) { // an empty tuple EMIT_ARG(build, 0, MP_EMIT_BUILD_TUPLE); + } else if (MP_PARSE_NODE_IS_ID(pns->nodes[0])) { + // a lone ID + compile_node(comp, pns->nodes[0]); } else { assert(MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)); pns = (mp_parse_node_struct_t *)pns->nodes[0]; diff --git a/py/parse.c b/py/parse.c index 68c761734a..19ccf86365 100644 --- a/py/parse.c +++ b/py/parse.c @@ -237,6 +237,8 @@ typedef struct _parser_t { mp_parse_tree_t tree; mp_parse_chunk_t *cur_chunk; + uint32_t cur_view; + #if MICROPY_COMP_CONST mp_map_t consts; #endif @@ -806,6 +808,9 @@ STATIC void push_result_rule(parser_t *parser, size_t src_line, uint8_t rule_id, // need to keep parenthesis for () } else if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, RULE_testlist_comp)) { // need to keep parenthesis for (a, b, ...) + } else if (MP_PARSE_NODE_IS_ID(pn) && MP_PARSE_IS_PARSING_ARGLIST(parser)) { + // Keep parentheses around single IDs that are function arguments + // since they may be keyword arguments. } else { // parenthesis around a single expression, so it's just the expression return; @@ -883,6 +888,8 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { parser.tree.chunk = NULL; parser.cur_chunk = NULL; + parser.cur_view = 0x0; + #if MICROPY_COMP_CONST mp_map_init(&parser.consts, 0); #endif @@ -1073,12 +1080,21 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { default: { assert((rule_act & RULE_ACT_KIND_MASK) == RULE_ACT_LIST); + if (rule_id == RULE_arglist && !MP_PARSE_IS_PARSING_ARGLIST(&parser)) { + parser.cur_view |= MP_PARSE_PARSING_ARGLIST; + } + // n=2 is: item item* // n=1 is: item (sep item)* // n=3 is: item (sep item)* [sep] bool had_trailing_sep; if (backtrack) { list_backtrack: + + if (rule_id == RULE_arglist) { + parser.cur_view &= ~MP_PARSE_PARSING_ARGLIST; + } + had_trailing_sep = false; if (n == 2) { if (i == 1) { diff --git a/py/parse.h b/py/parse.h index a6eb380047..4f2fe2fb50 100644 --- a/py/parse.h +++ b/py/parse.h @@ -31,6 +31,9 @@ #include "py/obj.h" +#define MP_PARSE_PARSING_ARGLIST (0x1) +#define MP_PARSE_IS_PARSING_ARGLIST(parser) (((parser)->cur_view) & MP_PARSE_PARSING_ARGLIST) + struct _mp_lexer_t; // a mp_parse_node_t is: diff --git a/tests/basics/fun2.py b/tests/basics/fun2.py index a3c3e7babf..f7560e71c0 100644 --- a/tests/basics/fun2.py +++ b/tests/basics/fun2.py @@ -8,3 +8,5 @@ def g(x): f(4 * x) g(3) +a = 3 +g((a)) # Making sure the extra parentheses cause no issues diff --git a/tests/basics/fun_kwargs_syntax.py b/tests/basics/fun_kwargs_syntax.py new file mode 100644 index 0000000000..955903882e --- /dev/null +++ b/tests/basics/fun_kwargs_syntax.py @@ -0,0 +1,18 @@ +# Test function call keyword argument syntax + +def test_syntax(code): + try: + eval(code) + except SyntaxError: + print("SyntaxError in '{}'".format(code)) + +def f(a): + return a + +# Should throw syntax errors. +test_syntax("f((a)=2)") +test_syntax("f(a()=2)") +test_syntax("f(a or b=2)") +test_syntax("f(2, (a)=3)") +test_syntax("f((a) = 1, (b) = 5)") +test_syntax("f((a, b) = 1)") From 3622ed5cb160f219a1299e6b2cc7857ad7eb564e Mon Sep 17 00:00:00 2001 From: Rayane Chatrieux Date: Sat, 2 Apr 2022 10:07:19 -0400 Subject: [PATCH 2/2] tests/basics/fun_kwargs_syntax: Added a nested call test. --- tests/basics/fun_kwargs_syntax.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/basics/fun_kwargs_syntax.py b/tests/basics/fun_kwargs_syntax.py index 955903882e..4550ec556a 100644 --- a/tests/basics/fun_kwargs_syntax.py +++ b/tests/basics/fun_kwargs_syntax.py @@ -16,3 +16,4 @@ test_syntax("f(a or b=2)") test_syntax("f(2, (a)=3)") test_syntax("f((a) = 1, (b) = 5)") test_syntax("f((a, b) = 1)") +test_syntax("f(f(), (a)=2)")