py: Compress first part of bytecode prelude.

The start of the bytecode prelude contains 6 numbers telling the amount of
stack needed for the Python values and exceptions, and the signature of the
function.  Prior to this patch these numbers were all encoded one after the
other (2x variable unsigned integers, then 4x bytes), but using so many
bytes is unnecessary.

An entropy analysis of around 150,000 bytecode functions from the CPython
standard library showed that the optimal Shannon coding would need about
7.1 bits on average to encode these 6 numbers, compared to the existing 48
bits.

This patch attempts to get close to this optimal value by packing the 6
numbers into a single, varible-length unsigned integer via bit-wise
interleaving.  The interleaving scheme is chosen to minimise the average
number of bytes needed, and at the same time keep the scheme simple enough
so it can be implemented without too much overhead in code size or speed.
The scheme requires about 10.5 bits on average to store the 6 numbers.

As a result most functions which originally took 6 bytes to encode these 6
numbers now need only 1 byte (in 80% of cases).
pull/5143/head
Damien George 2019-09-16 22:12:59 +10:00
rodzic 81d04a0200
commit b5ebfadbd6
14 zmienionych plików z 171 dodań i 106 usunięć

15
py/bc.c
Wyświetl plik

@ -124,13 +124,14 @@ void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw
code_state->frame = NULL;
#endif
// get params
size_t n_state = mp_decode_uint(&code_state->ip);
code_state->ip = mp_decode_uint_skip(code_state->ip); // skip n_exc_stack
size_t scope_flags = *code_state->ip++;
size_t n_pos_args = *code_state->ip++;
size_t n_kwonly_args = *code_state->ip++;
size_t n_def_pos_args = *code_state->ip++;
// Get cached n_state (rather than decode it again)
size_t n_state = code_state->n_state;
// Decode prelude
size_t n_state_unused, n_exc_stack_unused, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args;
MP_BC_PRELUDE_SIG_DECODE_INTO(code_state->ip, n_state_unused, n_exc_stack_unused, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args);
(void)n_state_unused;
(void)n_exc_stack_unused;
code_state->sp = &code_state->state[0] - 1;
code_state->exc_sp_idx = 0;

74
py/bc.h
Wyświetl plik

@ -32,12 +32,15 @@
// bytecode layout:
//
// n_state : var uint
// n_exc_stack : var uint
// scope_flags : byte
// n_pos_args : byte number of arguments this function takes
// n_kwonly_args : byte number of keyword-only arguments this function takes
// n_def_pos_args : byte number of default positional arguments
// func signature : var uint
// contains six values interleaved bit-wise as: xSSSSEAA [xFSSKAED repeated]
// x = extension another byte follows
// S = n_state - 1 number of entries in Python value stack
// E = n_exc_stack number of entries in exception stack
// F = scope_flags four bits of flags, MP_SCOPE_FLAG_xxx
// A = n_pos_args number of arguments this function takes
// K = n_kwonly_args number of keyword-only arguments this function takes
// D = n_def_pos_args number of default positional arguments
//
// code_info_size : var uint | code_info_size counts bytes in this chunk
// simple_name : var qstr |
@ -60,6 +63,65 @@
// const0 : obj
// constN : obj
#define MP_BC_PRELUDE_SIG_ENCODE(S, E, scope, out_byte, out_env) \
do { \
/*// Get values to store in prelude */ \
size_t F = scope->scope_flags & 0x0f; /* only need to store lower 4 flag bits */ \
size_t A = scope->num_pos_args; \
size_t K = scope->num_kwonly_args; \
size_t D = scope->num_def_pos_args; \
\
/* Adjust S to shrink range, to compress better */ \
S -= 1; \
\
/* Encode prelude */ \
/* xSSSSEAA */ \
uint8_t z = (S & 0xf) << 3 | (E & 1) << 2 | (A & 3); \
S >>= 4; \
E >>= 1; \
A >>= 2; \
while (S | E | F | A | K | D) { \
out_byte(out_env, 0x80 | z); \
/* xFSSKAED */ \
z = (F & 1) << 6 | (S & 3) << 4 | (K & 1) << 3 \
| (A & 1) << 2 | (E & 1) << 1 | (D & 1); \
S >>= 2; \
E >>= 1; \
F >>= 1; \
A >>= 1; \
K >>= 1; \
D >>= 1; \
} \
out_byte(out_env, z); \
} while (0)
#define MP_BC_PRELUDE_SIG_DECODE_INTO(ip, S, E, F, A, K, D) \
do { \
uint8_t z = *(ip)++; \
/* xSSSSEAA */ \
S = (z >> 3) & 0xf; \
E = (z >> 2) & 0x1; \
F = 0; \
A = z & 0x3; \
K = 0; \
D = 0; \
for (unsigned n = 0; z & 0x80; ++n) { \
z = *(ip)++; \
/* xFSSKAED */ \
S |= (z & 0x30) << (2 * n); \
E |= (z & 0x02) << n; \
F |= ((z & 0x40) >> 6) << n; \
A |= (z & 0x4) << n; \
K |= ((z & 0x08) >> 3) << n; \
D |= (z & 0x1) << n; \
} \
S += 1; \
} while (0)
#define MP_BC_PRELUDE_SIG_DECODE(ip) \
size_t n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args; \
MP_BC_PRELUDE_SIG_DECODE_INTO(ip, n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args)
// Sentinel value for mp_code_state_t.exc_sp_idx
#define MP_CODE_STATE_EXC_SP_IDX_SENTINEL ((uint16_t)-1)

Wyświetl plik

@ -328,7 +328,7 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
emit->bytecode_offset = 0;
emit->code_info_offset = 0;
// Write local state size and exception stack size.
// Write local state size, exception stack size, scope flags and number of arguments
{
mp_uint_t n_state = scope->num_locals + scope->stack_size;
if (n_state == 0) {
@ -341,16 +341,10 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
// An extra slot in the stack is needed to detect VM stack overflow
n_state += 1;
#endif
emit_write_code_info_uint(emit, n_state);
emit_write_code_info_uint(emit, scope->exc_stack_size);
}
// Write scope flags and number of arguments.
// TODO check that num args all fit in a byte
emit_write_code_info_byte(emit, emit->scope->scope_flags);
emit_write_code_info_byte(emit, emit->scope->num_pos_args);
emit_write_code_info_byte(emit, emit->scope->num_kwonly_args);
emit_write_code_info_byte(emit, emit->scope->num_def_pos_args);
size_t n_exc_stack = scope->exc_stack_size;
MP_BC_PRELUDE_SIG_ENCODE(n_state, n_exc_stack, scope, emit_write_code_info_byte, emit);
}
// Write size of the rest of the code info. We don't know how big this
// variable uint will be on the MP_PASS_CODE_SIZE pass so we reserve 2 bytes

Wyświetl plik

@ -573,18 +573,19 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
}
static inline void emit_native_write_code_info_byte(emit_t *emit, byte val) {
mp_asm_base_data(&emit->as->base, 1, val);
}
STATIC void emit_native_end_pass(emit_t *emit) {
emit_native_global_exc_exit(emit);
if (!emit->do_viper_types) {
emit->prelude_offset = mp_asm_base_get_code_pos(&emit->as->base);
mp_asm_base_data(&emit->as->base, 1, 0x80 | ((emit->n_state >> 7) & 0x7f));
mp_asm_base_data(&emit->as->base, 1, emit->n_state & 0x7f);
mp_asm_base_data(&emit->as->base, 1, 0); // n_exc_stack
mp_asm_base_data(&emit->as->base, 1, emit->scope->scope_flags);
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_pos_args);
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_kwonly_args);
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_def_pos_args);
size_t n_state = emit->n_state;
size_t n_exc_stack = 0; // exc-stack not needed for native code
MP_BC_PRELUDE_SIG_ENCODE(n_state, n_exc_stack, emit->scope, emit_native_write_code_info_byte, emit);
// write code info
#if MICROPY_PERSISTENT_CODE

Wyświetl plik

@ -161,12 +161,7 @@ qstr mp_obj_fun_get_name(mp_const_obj_t fun_in) {
#endif
const byte *bc = fun->bytecode;
bc = mp_decode_uint_skip(bc); // skip n_state
bc = mp_decode_uint_skip(bc); // skip n_exc_stack
bc++; // skip scope_params
bc++; // skip n_pos_args
bc++; // skip n_kwonly_args
bc++; // skip n_def_pos_args
MP_BC_PRELUDE_SIG_DECODE(bc);
return mp_obj_code_get_name(bc);
}
@ -197,10 +192,10 @@ STATIC void dump_args(const mp_obj_t *a, size_t sz) {
#define DECODE_CODESTATE_SIZE(bytecode, n_state_out_var, state_size_out_var) \
{ \
/* bytecode prelude: state size and exception stack size */ \
n_state_out_var = mp_decode_uint_value(bytecode); \
size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(bytecode)); \
\
const uint8_t *ip = bytecode; \
size_t n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_args; \
MP_BC_PRELUDE_SIG_DECODE_INTO(ip, n_state_out_var, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_args); \
\
/* state size in bytes */ \
state_size_out_var = n_state_out_var * sizeof(mp_obj_t) \
+ n_exc_stack * sizeof(mp_exc_stack_t); \
@ -295,9 +290,11 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const
assert(0);
}
}
const byte *bytecode_ptr = mp_decode_uint_skip(mp_decode_uint_skip(self->bytecode));
size_t n_pos_args = bytecode_ptr[1];
size_t n_kwonly_args = bytecode_ptr[2];
const byte *bytecode_ptr = self->bytecode;
size_t n_state_unused, n_exc_stack_unused, scope_flags_unused;
size_t n_pos_args, n_kwonly_args, n_def_args_unused;
MP_BC_PRELUDE_SIG_DECODE_INTO(bytecode_ptr, n_state_unused, n_exc_stack_unused,
scope_flags_unused, n_pos_args, n_kwonly_args, n_def_args_unused);
// We can't check the case when an exception is returned in state[0]
// and there are no arguments, because in this case our detection slot may have
// been overwritten by the returned exception (which is allowed).

Wyświetl plik

@ -51,8 +51,8 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_kw, cons
mp_obj_fun_bc_t *self_fun = MP_OBJ_TO_PTR(self_in);
// bytecode prelude: get state size and exception stack size
size_t n_state = mp_decode_uint_value(self_fun->bytecode);
size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(self_fun->bytecode));
const uint8_t *ip = self_fun->bytecode;
MP_BC_PRELUDE_SIG_DECODE(ip);
// allocate the generator object, with room for local stack and exception stack
mp_obj_gen_instance_t *o = m_new_obj_var(mp_obj_gen_instance_t, byte,
@ -88,7 +88,9 @@ STATIC mp_obj_t native_gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_k
// Determine start of prelude, and extract n_state from it
uintptr_t prelude_offset = ((uintptr_t*)self_fun->bytecode)[0];
size_t n_state = mp_decode_uint_value(self_fun->bytecode + prelude_offset);
const uint8_t *ip = self_fun->bytecode + prelude_offset;
size_t n_state, n_exc_stack_unused, scope_flags, n_pos_args, n_kwonly_args, n_def_args;
MP_BC_PRELUDE_SIG_DECODE_INTO(ip, n_state, n_exc_stack_unused, scope_flags, n_pos_args, n_kwonly_args, n_def_args);
size_t n_exc_stack = 0;
// Allocate the generator object, with room for local stack and exception stack

Wyświetl plik

@ -157,17 +157,16 @@ typedef struct _bytecode_prelude_t {
uint code_info_size;
} bytecode_prelude_t;
#if MICROPY_PERSISTENT_CODE_SAVE || MICROPY_EMIT_MACHINE_CODE
// ip will point to start of opcodes
// ip2 will point to simple_name, source_file qstrs
STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_t *prelude) {
prelude->n_state = mp_decode_uint(ip);
prelude->n_exc_stack = mp_decode_uint(ip);
prelude->scope_flags = *(*ip)++;
prelude->n_pos_args = *(*ip)++;
prelude->n_kwonly_args = *(*ip)++;
prelude->n_def_pos_args = *(*ip)++;
MP_BC_PRELUDE_SIG_DECODE(*ip);
prelude->n_state = n_state;
prelude->n_exc_stack = n_exc_stack;
prelude->scope_flags = scope_flags;
prelude->n_pos_args = n_pos_args;
prelude->n_kwonly_args = n_kwonly_args;
prelude->n_def_pos_args = n_def_pos_args;
*ip2 = *ip;
prelude->code_info_size = mp_decode_uint(ip2);
*ip += prelude->code_info_size;
@ -175,8 +174,6 @@ STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_
}
}
#endif
#endif // MICROPY_PERSISTENT_CODE_LOAD || MICROPY_PERSISTENT_CODE_SAVE
#if MICROPY_PERSISTENT_CODE_LOAD
@ -285,19 +282,19 @@ STATIC mp_obj_t load_obj(mp_reader_t *reader) {
}
STATIC void load_prelude(mp_reader_t *reader, byte **ip, byte **ip2, bytecode_prelude_t *prelude) {
prelude->n_state = read_uint(reader, ip);
prelude->n_exc_stack = read_uint(reader, ip);
read_bytes(reader, *ip, 4);
prelude->scope_flags = *(*ip)++;
prelude->n_pos_args = *(*ip)++;
prelude->n_kwonly_args = *(*ip)++;
prelude->n_def_pos_args = *(*ip)++;
*ip2 = *ip;
prelude->code_info_size = read_uint(reader, ip2);
read_bytes(reader, *ip2, prelude->code_info_size - (*ip2 - *ip));
*ip += prelude->code_info_size;
while ((*(*ip)++ = read_byte(reader)) != 255) {
// Read in the prelude
byte *ip_read = *ip;
read_uint(reader, &ip_read); // read in n_state/etc (is effectively a var-uint)
byte *ip_read_save = ip_read;
size_t code_info_size = read_uint(reader, &ip_read); // read in code_info_size
code_info_size -= ip_read - ip_read_save; // subtract bytes taken by code_info_size itself
read_bytes(reader, ip_read, code_info_size); // read remaining code info
ip_read += code_info_size;
while ((*ip_read++ = read_byte(reader)) != 255) {
}
// Entire prelude has been read into *ip, now decode and extract values from it
extract_prelude((const byte**)ip, (const byte**)ip2, prelude);
}
STATIC void load_bytecode(mp_reader_t *reader, qstr_window_t *qw, byte *ip, byte *ip_top) {

Wyświetl plik

@ -40,12 +40,13 @@ STATIC uint mp_prof_bytecode_lineno(const mp_raw_code_t *rc, size_t bc) {
void mp_prof_extract_prelude(const byte *bytecode, mp_bytecode_prelude_t *prelude) {
const byte *ip = bytecode;
prelude->n_state = mp_decode_uint(&ip);
prelude->n_exc_stack = mp_decode_uint(&ip);
prelude->scope_flags = *ip++;
prelude->n_pos_args = *ip++;
prelude->n_kwonly_args = *ip++;
prelude->n_def_pos_args = *ip++;
MP_BC_PRELUDE_SIG_DECODE(ip);
prelude->n_state = n_state;
prelude->n_exc_stack = n_exc_stack;
prelude->scope_flags = scope_flags;
prelude->n_pos_args = n_pos_args;
prelude->n_kwonly_args = n_kwonly_args;
prelude->n_def_pos_args = n_def_pos_args;
const byte *code_info = ip;
size_t code_info_size = mp_decode_uint(&ip);

Wyświetl plik

@ -28,9 +28,9 @@
// The first four must fit in 8 bits, see emitbc.c
// The remaining must fit in 16 bits, see scope.h
#define MP_SCOPE_FLAG_VARARGS (0x01)
#define MP_SCOPE_FLAG_GENERATOR (0x01)
#define MP_SCOPE_FLAG_VARKEYWORDS (0x02)
#define MP_SCOPE_FLAG_GENERATOR (0x04)
#define MP_SCOPE_FLAG_VARARGS (0x04)
#define MP_SCOPE_FLAG_DEFKWARGS (0x08)
#define MP_SCOPE_FLAG_REFGLOBALS (0x10) // used only if native emitter enabled
#define MP_SCOPE_FLAG_HASCONSTS (0x20) // used only if native emitter enabled

Wyświetl plik

@ -83,13 +83,8 @@ const mp_uint_t *mp_showbc_const_table;
void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const mp_uint_t *const_table) {
mp_showbc_code_start = ip;
// get bytecode parameters
mp_uint_t n_state = mp_decode_uint(&ip);
mp_uint_t n_exc_stack = mp_decode_uint(&ip);
/*mp_uint_t scope_flags =*/ ip++;
mp_uint_t n_pos_args = *ip++;
mp_uint_t n_kwonly_args = *ip++;
/*mp_uint_t n_def_pos_args =*/ ip++;
// Decode prelude
MP_BC_PRELUDE_SIG_DECODE(ip);
const byte *code_info = ip;
mp_uint_t code_info_size = mp_decode_uint(&code_info);
@ -123,8 +118,8 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m
}
printf("\n");
printf("(N_STATE " UINT_FMT ")\n", n_state);
printf("(N_EXC_STACK " UINT_FMT ")\n", n_exc_stack);
printf("(N_STATE %u)\n", (unsigned)n_state);
printf("(N_EXC_STACK %u)\n", (unsigned)n_exc_stack);
// for printing line number info
const byte *bytecode_start = ip;

Wyświetl plik

@ -1440,12 +1440,7 @@ unwind_loop:
&& *code_state->ip != MP_BC_END_FINALLY
&& *code_state->ip != MP_BC_RAISE_LAST) {
const byte *ip = code_state->fun_bc->bytecode;
ip = mp_decode_uint_skip(ip); // skip n_state
ip = mp_decode_uint_skip(ip); // skip n_exc_stack
ip++; // skip scope_params
ip++; // skip n_pos_args
ip++; // skip n_kwonly_args
ip++; // skip n_def_pos_args
MP_BC_PRELUDE_SIG_DECODE(ip);
size_t bc = code_state->ip - ip;
size_t code_info_size = mp_decode_uint_value(ip);
ip = mp_decode_uint_skip(ip); // skip code_info_size

Wyświetl plik

@ -1,6 +1,6 @@
File cmdline/cmd_verbose.py, code block '<module>' (descriptor: \.\+, bytecode \.\+ bytes)
Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
02 \.\+
08 \.\+
########
\.\+63
arg names:

Wyświetl plik

@ -56,8 +56,8 @@ user_files = {
'/mod1.mpy': (
b'M\x05\x0b\x1f\x20' # header
b'\x38' # n bytes, bytecode
b'\x01\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\xff' # prelude
b'\x24' # n bytes, bytecode
b'\x00\x05\x00\x00\x00\x00\xff' # prelude
b'\x51' # LOAD_CONST_NONE
b'\x63' # RETURN_VALUE

Wyświetl plik

@ -152,13 +152,38 @@ def decode_uint(bytecode, ip):
break
return ip, unum
def read_prelude_sig(read_byte):
z = read_byte()
# xSSSSEAA
S = (z >> 3) & 0xf
E = (z >> 2) & 0x1
F = 0
A = z & 0x3
K = 0
D = 0
n = 0
while z & 0x80:
z = read_byte()
# xFSSKAED
S |= (z & 0x30) << (2 * n)
E |= (z & 0x02) << n
F |= ((z & 0x40) >> 6) << n
A |= (z & 0x4) << n
K |= ((z & 0x08) >> 3) << n
D |= (z & 0x1) << n
n += 1
S += 1
return S, E, F, A, K, D
def extract_prelude(bytecode, ip):
ip, n_state = decode_uint(bytecode, ip)
ip, n_exc_stack = decode_uint(bytecode, ip)
scope_flags = bytecode[ip]; ip += 1
n_pos_args = bytecode[ip]; ip += 1
n_kwonly_args = bytecode[ip]; ip += 1
n_def_pos_args = bytecode[ip]; ip += 1
def local_read_byte():
b = bytecode[ip_ref[0]]
ip_ref[0] += 1
return b
ip_ref = [ip] # to close over ip in Python 2 and 3
n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args = read_prelude_sig(local_read_byte)
ip = ip_ref[0]
ip2, code_info_size = decode_uint(bytecode, ip)
ip += code_info_size
while bytecode[ip] != 0xff:
@ -557,12 +582,7 @@ def read_obj(f):
assert 0
def read_prelude(f, bytecode):
n_state = read_uint(f, bytecode)
n_exc_stack = read_uint(f, bytecode)
scope_flags = read_byte(f, bytecode)
n_pos_args = read_byte(f, bytecode)
n_kwonly_args = read_byte(f, bytecode)
n_def_pos_args = read_byte(f, bytecode)
n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args = read_prelude_sig(lambda: read_byte(f, bytecode))
l1 = bytecode.idx
code_info_size = read_uint(f, bytecode)
l2 = bytecode.idx