From c8e9c0d89afa90694790fd48e1bf527487a5beb7 Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 2 Nov 2015 17:27:18 +0000 Subject: [PATCH] py: Add MICROPY_PERSISTENT_CODE so code can persist beyond the runtime. Main changes when MICROPY_PERSISTENT_CODE is enabled are: - qstrs are encoded as 2-byte fixed width in the bytecode - all pointers are removed from bytecode and put in const_table (this includes const objects and raw code pointers) Ultimately this option will enable persistence for not just bytecode but also native code. --- py/emitbc.c | 98 ++++++++++++++++++++++++++++++++++++++++--------- py/emitnative.c | 12 ++++-- py/mpconfig.h | 5 +++ py/objfun.c | 4 ++ py/showbc.c | 21 +++++++++++ py/vm.c | 21 +++++++++++ 6 files changed, 140 insertions(+), 21 deletions(-) diff --git a/py/emitbc.c b/py/emitbc.c index abe782b0d3..4cc0ca646a 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -56,7 +56,14 @@ struct _emit_t { mp_uint_t bytecode_offset; mp_uint_t bytecode_size; byte *code_base; // stores both byte code and code info + + #if MICROPY_PERSISTENT_CODE + uint16_t ct_cur_obj; + uint16_t ct_num_obj; + uint16_t ct_cur_raw_code; + #endif mp_uint_t *const_table; + // Accessed as mp_uint_t, so must be aligned as such byte dummy_data[DUMMY_DATA_SIZE]; }; @@ -108,10 +115,6 @@ STATIC byte *emit_get_cur_to_write_code_info(emit_t *emit, int num_bytes_to_writ } } -STATIC void emit_align_code_info_to_machine_word(emit_t *emit) { - emit->code_info_offset = (emit->code_info_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1)); -} - STATIC void emit_write_code_info_byte(emit_t* emit, byte val) { *emit_get_cur_to_write_code_info(emit, 1) = val; } @@ -121,7 +124,14 @@ STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) { } STATIC void emit_write_code_info_qstr(emit_t *emit, qstr qst) { + #if MICROPY_PERSISTENT_CODE + assert((qst >> 16) == 0); + byte *c = emit_get_cur_to_write_code_info(emit, 2); + c[0] = qst; + c[1] = qst >> 8; + #else emit_write_uint(emit, emit_get_cur_to_write_code_info, qst); + #endif } #if MICROPY_ENABLE_SOURCE_LINE @@ -163,10 +173,6 @@ STATIC byte *emit_get_cur_to_write_bytecode(emit_t *emit, int num_bytes_to_write } } -STATIC void emit_align_bytecode_to_machine_word(emit_t *emit) { - emit->bytecode_offset = (emit->bytecode_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1)); -} - STATIC void emit_write_bytecode_byte(emit_t *emit, byte b1) { byte *c = emit_get_cur_to_write_bytecode(emit, 1); c[0] = b1; @@ -211,18 +217,55 @@ STATIC void emit_write_bytecode_byte_uint(emit_t *emit, byte b, mp_uint_t val) { emit_write_uint(emit, emit_get_cur_to_write_bytecode, val); } -// aligns the pointer so it is friendly to GC +#if MICROPY_PERSISTENT_CODE +STATIC void emit_write_bytecode_byte_const(emit_t *emit, byte b, mp_uint_t n, mp_uint_t c) { + if (emit->pass == MP_PASS_EMIT) { + emit->const_table[n] = c; + } + emit_write_bytecode_byte_uint(emit, b, n); +} +#else STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) { + // aligns the pointer so it is friendly to GC emit_write_bytecode_byte(emit, b); - emit_align_bytecode_to_machine_word(emit); + emit->bytecode_offset = (mp_uint_t)MP_ALIGN(emit->bytecode_offset, sizeof(mp_uint_t)); mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_bytecode(emit, sizeof(mp_uint_t)); // Verify thar c is already uint-aligned assert(c == MP_ALIGN(c, sizeof(mp_uint_t))); *c = (mp_uint_t)ptr; } +#endif STATIC void emit_write_bytecode_byte_qstr(emit_t* emit, byte b, qstr qst) { + #if MICROPY_PERSISTENT_CODE + assert((qst >> 16) == 0); + byte *c = emit_get_cur_to_write_bytecode(emit, 3); + c[0] = b; + c[1] = qst; + c[2] = qst >> 8; + #else emit_write_bytecode_byte_uint(emit, b, qst); + #endif +} + +STATIC void emit_write_bytecode_byte_obj(emit_t *emit, byte b, void *ptr) { + #if MICROPY_PERSISTENT_CODE + emit_write_bytecode_byte_const(emit, b, + emit->scope->num_pos_args + emit->scope->num_kwonly_args + + emit->ct_cur_obj++, (mp_uint_t)ptr); + #else + emit_write_bytecode_byte_ptr(emit, b, ptr); + #endif +} + +STATIC void emit_write_bytecode_byte_raw_code(emit_t *emit, byte b, mp_raw_code_t *rc) { + #if MICROPY_PERSISTENT_CODE + emit_write_bytecode_byte_const(emit, b, + emit->scope->num_pos_args + emit->scope->num_kwonly_args + + emit->ct_num_obj + emit->ct_cur_raw_code++, (mp_uint_t)rc); + #else + emit_write_bytecode_byte_ptr(emit, b, rc); + #endif } // unsigned labels are relative to ip following this instruction, stored as 16 bits @@ -318,6 +361,11 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { } emit_write_bytecode_byte(emit, 255); // end of list sentinel + #if MICROPY_PERSISTENT_CODE + emit->ct_cur_obj = 0; + emit->ct_cur_raw_code = 0; + #endif + if (pass == MP_PASS_EMIT) { // Write argument names (needed to resolve positional args passed as // keywords). We store them as full word-sized objects for efficient access @@ -360,16 +408,30 @@ void mp_emit_bc_end_pass(emit_t *emit) { emit_write_code_info_byte(emit, 0); // end of line number info + #if MICROPY_PERSISTENT_CODE + assert(emit->pass <= MP_PASS_STACK_SIZE || (emit->ct_num_obj == emit->ct_cur_obj)); + emit->ct_num_obj = emit->ct_cur_obj; + #endif + if (emit->pass == MP_PASS_CODE_SIZE) { + #if !MICROPY_PERSISTENT_CODE // so bytecode is aligned - emit_align_code_info_to_machine_word(emit); + emit->code_info_offset = (mp_uint_t)MP_ALIGN(emit->code_info_offset, sizeof(mp_uint_t)); + #endif // calculate size of total code-info + bytecode, in bytes emit->code_info_size = emit->code_info_offset; emit->bytecode_size = emit->bytecode_offset; emit->code_base = m_new0(byte, emit->code_info_size + emit->bytecode_size); - emit->const_table = m_new0(mp_uint_t, emit->scope->num_pos_args + emit->scope->num_kwonly_args); + #if MICROPY_PERSISTENT_CODE + emit->const_table = m_new0(mp_uint_t, + emit->scope->num_pos_args + emit->scope->num_kwonly_args + + emit->ct_cur_obj + emit->ct_cur_raw_code); + #else + emit->const_table = m_new0(mp_uint_t, + emit->scope->num_pos_args + emit->scope->num_kwonly_args); + #endif } else if (emit->pass == MP_PASS_EMIT) { mp_emit_glue_assign_bytecode(emit->scope->raw_code, emit->code_base, @@ -457,7 +519,7 @@ void mp_emit_bc_load_const_tok(emit_t *emit, mp_token_kind_t tok) { case MP_TOKEN_KW_NONE: emit_write_bytecode_byte(emit, MP_BC_LOAD_CONST_NONE); break; case MP_TOKEN_KW_TRUE: emit_write_bytecode_byte(emit, MP_BC_LOAD_CONST_TRUE); break; no_other_choice: - case MP_TOKEN_ELLIPSIS: emit_write_bytecode_byte_ptr(emit, MP_BC_LOAD_CONST_OBJ, (void*)&mp_const_ellipsis_obj); break; + case MP_TOKEN_ELLIPSIS: emit_write_bytecode_byte_obj(emit, MP_BC_LOAD_CONST_OBJ, (void*)&mp_const_ellipsis_obj); break; default: assert(0); goto no_other_choice; // to help flow control analysis } } @@ -478,7 +540,7 @@ void mp_emit_bc_load_const_str(emit_t *emit, qstr qst) { void mp_emit_bc_load_const_obj(emit_t *emit, void *obj) { emit_bc_pre(emit, 1); - emit_write_bytecode_byte_ptr(emit, MP_BC_LOAD_CONST_OBJ, obj); + emit_write_bytecode_byte_obj(emit, MP_BC_LOAD_CONST_OBJ, obj); } void mp_emit_bc_load_null(emit_t *emit) { @@ -821,22 +883,22 @@ void mp_emit_bc_unpack_ex(emit_t *emit, mp_uint_t n_left, mp_uint_t n_right) { void mp_emit_bc_make_function(emit_t *emit, scope_t *scope, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) { if (n_pos_defaults == 0 && n_kw_defaults == 0) { emit_bc_pre(emit, 1); - emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_FUNCTION, scope->raw_code); + emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_FUNCTION, scope->raw_code); } else { emit_bc_pre(emit, -1); - emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_FUNCTION_DEFARGS, scope->raw_code); + emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_FUNCTION_DEFARGS, scope->raw_code); } } void mp_emit_bc_make_closure(emit_t *emit, scope_t *scope, mp_uint_t n_closed_over, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) { if (n_pos_defaults == 0 && n_kw_defaults == 0) { emit_bc_pre(emit, -n_closed_over + 1); - emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_CLOSURE, scope->raw_code); + emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_CLOSURE, scope->raw_code); emit_write_bytecode_byte(emit, n_closed_over); } else { assert(n_closed_over <= 255); emit_bc_pre(emit, -2 - n_closed_over + 1); - emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_CLOSURE_DEFARGS, scope->raw_code); + emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_CLOSURE_DEFARGS, scope->raw_code); emit_write_bytecode_byte(emit, n_closed_over); } } diff --git a/py/emitnative.c b/py/emitnative.c index d8f1640c0f..2abc46c936 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -830,10 +830,16 @@ STATIC void emit_native_end_pass(emit_t *emit) { ASM_DATA(emit->as, 1, emit->scope->num_kwonly_args); ASM_DATA(emit->as, 1, emit->scope->num_def_pos_args); - // write code info (just contains block name and source file) + // write code info + #if MICROPY_PERSISTENT_CODE ASM_DATA(emit->as, 1, 5); - ASM_DATA(emit->as, 2, emit->scope->simple_name); - ASM_DATA(emit->as, 2, emit->scope->source_file); + ASM_DATA(emit->as, 1, emit->scope->simple_name); + ASM_DATA(emit->as, 1, emit->scope->simple_name >> 8); + ASM_DATA(emit->as, 1, emit->scope->source_file); + ASM_DATA(emit->as, 1, emit->scope->source_file >> 8); + #else + ASM_DATA(emit->as, 1, 1); + #endif // bytecode prelude: initialise closed over variables for (int i = 0; i < emit->scope->id_info_len; i++) { diff --git a/py/mpconfig.h b/py/mpconfig.h index 4f023934e6..8019771e51 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -192,6 +192,11 @@ /*****************************************************************************/ /* Micro Python emitters */ +// Whether generated code can persist independently of the VM/runtime instance +#ifndef MICROPY_PERSISTENT_CODE +#define MICROPY_PERSISTENT_CODE (0) +#endif + // Whether to emit x64 native code #ifndef MICROPY_EMIT_X64 #define MICROPY_EMIT_X64 (0) diff --git a/py/objfun.c b/py/objfun.c index f55d44ca29..a39508e2b9 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -106,7 +106,11 @@ const mp_obj_type_t mp_type_fun_builtin = { qstr mp_obj_code_get_name(const byte *code_info) { mp_decode_uint(&code_info); // skip code_info_size entry + #if MICROPY_PERSISTENT_CODE + return code_info[0] | (code_info[1] << 8); + #else return mp_decode_uint(&code_info); + #endif } #if MICROPY_EMIT_NATIVE diff --git a/py/showbc.c b/py/showbc.c index 62c6168b78..0e1edb60d7 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -40,6 +40,18 @@ } #define DECODE_ULABEL do { unum = (ip[0] | (ip[1] << 8)); ip += 2; } while (0) #define DECODE_SLABEL do { unum = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2; } while (0) + +#if MICROPY_PERSISTENT_CODE + +#define DECODE_QSTR \ + qst = ip[0] | ip[1] << 8; \ + ip += 2; +#define DECODE_PTR \ + DECODE_UINT; \ + unum = mp_showbc_const_table[unum] + +#else + #define DECODE_QSTR { \ qst = 0; \ do { \ @@ -52,10 +64,14 @@ ip += sizeof(mp_uint_t); \ } while (0) +#endif + const byte *mp_showbc_code_start; +const mp_uint_t *mp_showbc_const_table; void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const mp_uint_t *const_table) { mp_showbc_code_start = ip; + mp_showbc_const_table = const_table; // get bytecode parameters mp_uint_t n_state = mp_decode_uint(&ip); @@ -69,8 +85,13 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m mp_uint_t code_info_size = mp_decode_uint(&code_info); ip += code_info_size; + #if MICROPY_PERSISTENT_CODE + qstr block_name = code_info[0] | (code_info[1] << 8); + qstr source_file = code_info[2] | (code_info[3] << 8); + #else qstr block_name = mp_decode_uint(&code_info); qstr source_file = mp_decode_uint(&code_info); + #endif printf("File %s, code block '%s' (descriptor: %p, bytecode @%p " UINT_FMT " bytes)\n", qstr_str(source_file), qstr_str(block_name), descr, mp_showbc_code_start, len); diff --git a/py/vm.c b/py/vm.c index 393b8a1db7..f9aa0f9b3c 100644 --- a/py/vm.c +++ b/py/vm.c @@ -65,6 +65,18 @@ typedef enum { } while ((*ip++ & 0x80) != 0) #define DECODE_ULABEL mp_uint_t ulab = (ip[0] | (ip[1] << 8)); ip += 2 #define DECODE_SLABEL mp_uint_t slab = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2 + +#if MICROPY_PERSISTENT_CODE + +#define DECODE_QSTR \ + qstr qst = ip[0] | ip[1] << 8; \ + ip += 2; +#define DECODE_PTR \ + DECODE_UINT; \ + void *ptr = (void*)code_state->const_table[unum] + +#else + #define DECODE_QSTR qstr qst = 0; \ do { \ qst = (qst << 7) + (*ip & 0x7f); \ @@ -73,6 +85,9 @@ typedef enum { ip = (byte*)(((mp_uint_t)ip + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1))); /* align ip */ \ void *ptr = (void*)*(mp_uint_t*)ip; \ ip += sizeof(mp_uint_t) + +#endif + #define PUSH(val) *++sp = (val) #define POP() (*sp--) #define TOP() (*sp) @@ -1280,8 +1295,14 @@ unwind_loop: if (mp_obj_is_exception_instance(nlr.ret_val) && nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) { const byte *ip = code_state->code_info; mp_uint_t code_info_size = mp_decode_uint(&ip); + #if MICROPY_PERSISTENT_CODE + qstr block_name = ip[0] | (ip[1] << 8); + qstr source_file = ip[2] | (ip[3] << 8); + ip += 4; + #else qstr block_name = mp_decode_uint(&ip); qstr source_file = mp_decode_uint(&ip); + #endif mp_uint_t bc = code_state->ip - code_state->code_info - code_info_size; mp_uint_t source_line = 1; mp_uint_t c;