From bd25445a82c896752dd735f54e3495f0e5e7350a Mon Sep 17 00:00:00 2001 From: Damien Date: Wed, 16 Oct 2013 20:39:12 +0100 Subject: [PATCH] Implement BC & runtime support for generator/yielding. --- py/bc.h | 3 +- py/emitbc.c | 2 +- py/runtime.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++---- py/runtime.h | 2 +- py/vm.c | 66 +++++++++++++++++----------- 5 files changed, 155 insertions(+), 37 deletions(-) diff --git a/py/bc.h b/py/bc.h index 1a5bcd9b35..7ea2235f70 100644 --- a/py/bc.h +++ b/py/bc.h @@ -93,4 +93,5 @@ #define PYBC_IMPORT_FROM (0xe1) #define PYBC_IMPORT_STAR (0xe2) -py_obj_t py_execute_byte_code(const byte *code, uint len, const py_obj_t *args, uint n_args); +py_obj_t py_execute_byte_code(const byte *code, const py_obj_t *args, uint n_args); +bool py_execute_byte_code_2(const byte *code, const byte **ip_in_out, py_obj_t *fastn, py_obj_t **sp_in_out); diff --git a/py/emitbc.c b/py/emitbc.c index 8f28910c08..06c52a0dc1 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -76,7 +76,7 @@ static void emit_bc_end_pass(emit_t *emit) { printf("code_size: %u\n", emit->code_size); } else if (emit->pass == PASS_3) { - rt_assign_byte_code(emit->scope->unique_code_id, emit->code_base, emit->code_size, emit->scope->num_params); + rt_assign_byte_code(emit->scope->unique_code_id, emit->code_base, emit->code_size, emit->scope->num_params, emit->scope->num_locals, emit->scope->stack_size, (emit->scope->flags & SCOPE_FLAG_GENERATOR) != 0); } } diff --git a/py/runtime.c b/py/runtime.c index e80791df33..0b76df8a5b 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -1,5 +1,6 @@ // in principle, rt_xxx functions are called only by vm/native/viper and make assumptions about args // py_xxx functions are safer and can be called by anyone +// note that rt_assign_xxx are called only from emit*, and maybe we can rename them to reflect this #include #include @@ -50,6 +51,8 @@ typedef enum { O_FUN_N, O_FUN_BC, O_FUN_ASM, + O_GEN_WRAP, + O_GEN_INSTANCE, O_BOUND_METH, O_TUPLE, O_LIST, @@ -123,6 +126,15 @@ struct _py_obj_base_t { int n_args; void *fun; } u_fun_asm; + struct { // for O_GEN_WRAP + int n_state; + py_obj_base_t *fun; + } u_gen_wrap; + struct { // for O_GEN_INSTANCE + py_obj_t *state; + const byte *ip; + py_obj_t *sp; + } u_gen_instance; struct { // for O_BOUND_METH py_obj_t meth; py_obj_t self; @@ -367,10 +379,20 @@ py_obj_t rt_list_append(py_obj_t self_in, py_obj_t arg) { return arg; } +py_obj_t rt_gen_instance_next(py_obj_t self_in) { + py_obj_t ret = rt_iternext(self_in); + if (ret == py_const_stop_iteration) { + nlr_jump(py_obj_new_exception_0(qstr_from_str_static("StopIteration"))); + } else { + return ret; + } +} + static qstr q_append; static qstr q_print; static qstr q_len; static qstr q___build_class__; +static qstr q___next__; static qstr q_AttributeError; static qstr q_IndexError; static qstr q_NameError; @@ -386,6 +408,9 @@ typedef enum { typedef struct _py_code_t { py_code_kind_t kind; int n_args; + int n_locals; + int n_stack; + bool is_generator; union { struct { byte *code; @@ -404,6 +429,7 @@ static int next_unique_code_id; static py_code_t *unique_codes; py_obj_t fun_list_append; +py_obj_t fun_gen_instance_next; py_obj_t py_builtin_print(py_obj_t o) { if (IS_O(o, O_STR)) { @@ -463,6 +489,7 @@ void rt_init() { q_print = qstr_from_str_static("print"); q_len = qstr_from_str_static("len"); q___build_class__ = qstr_from_str_static("__build_class__"); + q___next__ = qstr_from_str_static("__next__"); q_AttributeError = qstr_from_str_static("AttributeError"); q_IndexError = qstr_from_str_static("IndexError"); q_NameError = qstr_from_str_static("NameError"); @@ -487,6 +514,7 @@ void rt_init() { unique_codes = NULL; fun_list_append = rt_make_function_2(rt_list_append); + fun_gen_instance_next = rt_make_function_1(rt_gen_instance_next); #ifdef WRITE_NATIVE fp_native = fopen("out-native", "wb"); @@ -514,12 +542,15 @@ static void alloc_unique_codes() { } } -void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args) { +void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args, int n_locals, int n_stack, bool is_generator) { alloc_unique_codes(); assert(unique_code_id < next_unique_code_id); unique_codes[unique_code_id].kind = PY_CODE_BYTE; unique_codes[unique_code_id].n_args = n_args; + unique_codes[unique_code_id].n_locals = n_locals; + unique_codes[unique_code_id].n_stack = n_stack; + unique_codes[unique_code_id].is_generator = is_generator; unique_codes[unique_code_id].u_byte.code = code; unique_codes[unique_code_id].u_byte.len = len; @@ -532,6 +563,9 @@ void rt_assign_native_code(int unique_code_id, py_fun_t fun, uint len, int n_arg assert(1 <= unique_code_id && unique_code_id < next_unique_code_id); unique_codes[unique_code_id].kind = PY_CODE_NATIVE; unique_codes[unique_code_id].n_args = n_args; + unique_codes[unique_code_id].n_locals = 0; + unique_codes[unique_code_id].n_stack = 0; + unique_codes[unique_code_id].is_generator = false; unique_codes[unique_code_id].u_native.fun = fun; #ifdef DEBUG_PRINT @@ -560,6 +594,9 @@ void rt_assign_inline_asm_code(int unique_code_id, py_fun_t fun, uint len, int n assert(1 <= unique_code_id && unique_code_id < next_unique_code_id); unique_codes[unique_code_id].kind = PY_CODE_INLINE_ASM; unique_codes[unique_code_id].n_args = n_args; + unique_codes[unique_code_id].n_locals = 0; + unique_codes[unique_code_id].n_stack = 0; + unique_codes[unique_code_id].is_generator = false; unique_codes[unique_code_id].u_inline_asm.fun = fun; #ifdef DEBUG_PRINT @@ -625,6 +662,8 @@ const char *py_obj_get_type_str(py_obj_t o_in) { case O_FUN_N: case O_FUN_BC: return "function"; + case O_GEN_INSTANCE: + return "generator"; case O_TUPLE: return "tuple"; case O_LIST: @@ -669,10 +708,16 @@ void py_obj_print(py_obj_t o_in) { printf("%f", o->u_flt); break; #endif + case O_EXCEPTION_0: + printf("%s", qstr_str(o->u_exc0.id)); + break; case O_EXCEPTION_2: printf("%s: ", qstr_str(o->u_exc2.id)); printf(o->u_exc2.fmt, o->u_exc2.s1, o->u_exc2.s2); break; + case O_GEN_INSTANCE: + printf("", o); + break; case O_TUPLE: printf("("); for (int i = 0; i < o->u_tuple_list.len; i++) { @@ -861,7 +906,8 @@ py_obj_t rt_binary_op(int op, py_obj_t lhs, py_obj_t rhs) { switch (op) { case RT_BINARY_OP_ADD: case RT_BINARY_OP_INPLACE_ADD: val = FROM_SMALL_INT(lhs) + FROM_SMALL_INT(rhs); break; - case RT_BINARY_OP_SUBTRACT: val = FROM_SMALL_INT(lhs) - FROM_SMALL_INT(rhs); break; + case RT_BINARY_OP_SUBTRACT: + case RT_BINARY_OP_INPLACE_SUBTRACT: val = FROM_SMALL_INT(lhs) - FROM_SMALL_INT(rhs); break; case RT_BINARY_OP_MULTIPLY: val = FROM_SMALL_INT(lhs) * FROM_SMALL_INT(rhs); break; case RT_BINARY_OP_FLOOR_DIVIDE: val = FROM_SMALL_INT(lhs) / FROM_SMALL_INT(rhs); break; #if MICROPY_ENABLE_FLOAT @@ -938,6 +984,17 @@ py_obj_t rt_make_function_from_id(int unique_code_id) { default: assert(0); } + + // check for generator functions and if so wrap in generator object + if (c->is_generator) { + py_obj_base_t *o2 = m_new(py_obj_base_t, 1); + o2->kind = O_GEN_WRAP; + // we have at least 3 locals so the bc can write back fast[0,1,2] safely; should improve how this is done + o2->u_gen_wrap.n_state = (c->n_locals < 3 ? 3 : c->n_locals) + c->n_stack; + o2->u_gen_wrap.fun = o; + o = o2; + } + return o; } @@ -1071,7 +1128,7 @@ py_obj_t rt_call_function_n(py_obj_t fun, int n_args, const py_obj_t *args) { goto bad_n_args; } DEBUG_OP_printf("calling byte code %p(n_args=%d)\n", o->u_fun_bc.code, n_args); - return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, args, n_args); + return py_execute_byte_code(o->u_fun_bc.code, args, n_args); } else if (IS_O(fun, O_FUN_ASM)) { py_obj_base_t *o = fun; @@ -1095,6 +1152,28 @@ py_obj_t rt_call_function_n(py_obj_t fun, int n_args, const py_obj_t *args) { } return rt_convert_val_from_inline_asm(ret); + } else if (IS_O(fun, O_GEN_WRAP)) { + py_obj_base_t *o = fun; + py_obj_base_t *o_fun = o->u_gen_wrap.fun; + assert(o_fun->kind == O_FUN_BC); // TODO + if (n_args != o_fun->u_fun_bc.n_args) { + n_args_fun = o_fun->u_fun_bc.n_args; + goto bad_n_args; + } + py_obj_t *state = m_new(py_obj_t, 1 + o->u_gen_wrap.n_state); + // put function object at first slot in state (to keep u_gen_instance small) + state[0] = o_fun; + // init args + for (int i = 0; i < n_args; i++) { + state[1 + i] = args[n_args - 1 - i]; + } + py_obj_base_t *o2 = m_new(py_obj_base_t, 1); + o2->kind = O_GEN_INSTANCE; + o2->u_gen_instance.state = state; + o2->u_gen_instance.ip = o_fun->u_fun_bc.code; + o2->u_gen_instance.sp = state + o->u_gen_wrap.n_state; + return o2; + } else if (IS_O(fun, O_BOUND_METH)) { py_obj_base_t *o = fun; DEBUG_OP_printf("calling bound method %p(self=%p, n_args=%d)\n", o->u_bound_meth.meth, o->u_bound_meth.self, n_args); @@ -1132,9 +1211,7 @@ py_obj_t rt_call_function_n(py_obj_t fun, int n_args, const py_obj_t *args) { } bad_n_args: - printf("TypeError: function takes %d positional arguments but %d were given\n", n_args_fun, n_args); - assert(0); - return py_const_none; + nlr_jump(py_obj_new_exception_2(q_TypeError, "function takes %d positional arguments but %d were given", (const char*)(machine_int_t)n_args_fun, (const char*)(machine_int_t)n_args)); } // args contains: arg(n_args-1) arg(n_args-2) ... arg(0) self/NULL fun @@ -1287,7 +1364,11 @@ no_attr: void rt_load_method(py_obj_t base, qstr attr, py_obj_t *dest) { DEBUG_OP_printf("load method %s\n", qstr_str(attr)); - if (IS_O(base, O_LIST) && attr == q_append) { + if (IS_O(base, O_GEN_INSTANCE) && attr == q___next__) { + dest[1] = fun_gen_instance_next; + dest[0] = base; + return; + } else if (IS_O(base, O_LIST) && attr == q_append) { dest[1] = fun_list_append; dest[0] = base; return; @@ -1354,7 +1435,9 @@ void rt_store_subscr(py_obj_t base, py_obj_t index, py_obj_t value) { } py_obj_t rt_getiter(py_obj_t o_in) { - if (IS_O(o_in, O_RANGE)) { + if (IS_O(o_in, O_GEN_INSTANCE)) { + return o_in; + } else if (IS_O(o_in, O_RANGE)) { py_obj_base_t *o = o_in; return py_obj_new_range_iterator(o->u_range.start, o->u_range.stop, o->u_range.step); } else if (IS_O(o_in, O_TUPLE)) { @@ -1367,7 +1450,23 @@ py_obj_t rt_getiter(py_obj_t o_in) { } py_obj_t rt_iternext(py_obj_t o_in) { - if (IS_O(o_in, O_RANGE_IT)) { + if (IS_O(o_in, O_GEN_INSTANCE)) { + py_obj_base_t *self = o_in; + py_obj_base_t *fun = self->u_gen_instance.state[0]; + assert(fun->kind == O_FUN_BC); + bool yield = py_execute_byte_code_2(fun->u_fun_bc.code, &self->u_gen_instance.ip, &self->u_gen_instance.state[1], &self->u_gen_instance.sp); + if (yield) { + return *self->u_gen_instance.sp; + } else { + if (*self->u_gen_instance.sp == py_const_none) { + return py_const_stop_iteration; + } else { + // TODO return StopIteration with value *self->u_gen_instance.sp + return py_const_stop_iteration; + } + } + + } else if (IS_O(o_in, O_RANGE_IT)) { py_obj_base_t *o = o_in; if ((o->u_range_it.step > 0 && o->u_range_it.cur < o->u_range_it.stop) || (o->u_range_it.step < 0 && o->u_range_it.cur > o->u_range_it.stop)) { py_obj_t o_out = TO_SMALL_INT(o->u_range_it.cur); @@ -1376,6 +1475,7 @@ py_obj_t rt_iternext(py_obj_t o_in) { } else { return py_const_stop_iteration; } + } else if (IS_O(o_in, O_TUPLE_IT) || IS_O(o_in, O_LIST_IT)) { py_obj_base_t *o = o_in; if (o->u_tuple_list_it.cur < o->u_tuple_list_it.obj->u_tuple_list.len) { @@ -1385,6 +1485,7 @@ py_obj_t rt_iternext(py_obj_t o_in) { } else { return py_const_stop_iteration; } + } else { nlr_jump(py_obj_new_exception_2(q_TypeError, "? '%s' object is not iterable", py_obj_get_type_str(o_in), NULL)); } diff --git a/py/runtime.h b/py/runtime.h index e9adbe1f0e..7a806eb55a 100644 --- a/py/runtime.h +++ b/py/runtime.h @@ -87,7 +87,7 @@ extern py_obj_t py_const_stop_iteration; // special object indicating end of ite void rt_init(); void rt_deinit(); int rt_get_new_unique_code_id(); -void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args); +void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args, int n_locals, int n_stack, bool is_generator); void rt_assign_native_code(int unique_code_id, py_fun_t f, uint len, int n_args); void rt_assign_inline_asm_code(int unique_code_id, py_fun_t f, uint len, int n_args); py_fun_t rt_get_code(qstr id); diff --git a/py/vm.c b/py/vm.c index 2821d40470..a4fbf2f16c 100644 --- a/py/vm.c +++ b/py/vm.c @@ -10,43 +10,50 @@ #include "runtime.h" #include "bc.h" +// (value) stack grows down (to be compatible with native code when passing pointers to the stack), top element is pointed to +// exception stack grows up, top element is pointed to + #define DECODE_UINT do { unum = *ip++; if (unum > 127) { unum = ((unum & 0x3f) << 8) | (*ip++); } } while (0) #define DECODE_QSTR do { qstr = *ip++; if (qstr > 127) { qstr = ((qstr & 0x3f) << 8) | (*ip++); } } while (0) #define PUSH(val) *--sp = (val) #define POP() (*sp++) // args are in reverse order in array -py_obj_t py_execute_byte_code(const byte *code, uint len, const py_obj_t *args, uint n_args) { +py_obj_t py_execute_byte_code(const byte *code, const py_obj_t *args, uint n_args) { + py_obj_t state[18]; // TODO allocate properly + // init args + for (int i = 0; i < n_args; i++) { + assert(i < 8); + state[i] = args[n_args - 1 - i]; + } + py_obj_t *sp = &state[18]; + const byte *ip = code; + if (py_execute_byte_code_2(code, &ip, &state[0], &sp)) { + // it shouldn't yield + assert(0); + } + assert(sp == &state[17]); + return *sp; +} + +// fastn has items in normal order +// sp points to top of stack which grows down +bool py_execute_byte_code_2(const byte *code, const byte **ip_in_out, py_obj_t *fastn, py_obj_t **sp_in_out) { // careful: be sure to declare volatile any variables read in the exception handler (written is ok, I think) - const byte *ip = code; - py_obj_t stack[10]; - py_obj_t *sp = &stack[10]; // stack grows down, sp points to top of stack + const byte *ip = *ip_in_out; + py_obj_t *sp = *sp_in_out; machine_uint_t unum; machine_int_t snum; qstr qstr; py_obj_t obj1, obj2; - py_obj_t fast0 = NULL, fast1 = NULL, fast2 = NULL, fastn[4] = {NULL, NULL, NULL, NULL}; + py_obj_t fast0 = fastn[0], fast1 = fastn[1], fast2 = fastn[2]; nlr_buf_t nlr; // on the exception stack we store (ip, sp) for each block machine_uint_t exc_stack[8]; machine_uint_t *volatile exc_sp = &exc_stack[-1]; // stack grows up, exc_sp points to top of stack - // init args - for (int i = 0; i < n_args; i++) { - if (i == 0) { - fast0 = args[n_args - 1]; - } else if (i == 1) { - fast1 = args[n_args - 2]; - } else if (i == 2) { - fast2 = args[n_args - 3]; - } else { - assert(i - 3 < 4); - fastn[i - 3] = args[n_args - 1 - i]; - } - } - // outer exception handling loop for (;;) { if (nlr_push(&nlr) == 0) { @@ -99,7 +106,7 @@ py_obj_t py_execute_byte_code(const byte *code, uint len, const py_obj_t *args, case PYBC_LOAD_FAST_N: DECODE_UINT; - PUSH(fastn[unum - 3]); + PUSH(fastn[unum]); break; case PYBC_LOAD_NAME: @@ -141,7 +148,7 @@ py_obj_t py_execute_byte_code(const byte *code, uint len, const py_obj_t *args, case PYBC_STORE_FAST_N: DECODE_UINT; - fastn[unum - 3] = POP(); + fastn[unum] = POP(); break; case PYBC_STORE_NAME: @@ -251,7 +258,6 @@ py_obj_t py_execute_byte_code(const byte *code, uint len, const py_obj_t *args, //exc_sp--; // discard ip exc_sp -= 2; //sp += 3; // pop 3 exception values - assert(sp <= &stack[10]); break; case PYBC_BINARY_OP: @@ -330,15 +336,24 @@ py_obj_t py_execute_byte_code(const byte *code, uint len, const py_obj_t *args, case PYBC_RETURN_VALUE: nlr_pop(); - assert(sp == &stack[9]); + *sp_in_out = sp; assert(exc_sp == &exc_stack[-1]); - return *sp; + return false; + + case PYBC_YIELD_VALUE: + nlr_pop(); + *ip_in_out = ip; + fastn[0] = fast0; + fastn[1] = fast1; + fastn[2] = fast2; + *sp_in_out = sp; + return true; default: printf("code %p, offset %u, byte code 0x%02x not implemented\n", code, (uint)(ip - code), op); assert(0); nlr_pop(); - return py_const_none; + return false; } } @@ -355,6 +370,7 @@ py_obj_t py_execute_byte_code(const byte *code, uint len, const py_obj_t *args, PUSH(py_const_none); } else { // re-raise exception + // TODO what to do if this is a generator?? nlr_jump(nlr.ret_val); } }