py: Improve encoding scheme for line-number to bytecode map.

Reduces by about a factor of 10 on average the amount of RAM needed to
store the line-number to bytecode map in the bytecode prelude.

Using CPython3.4's stdlib for statistics: previously, an average of
13 bytes were used per (bytecode offset, line-number offset) pair, and
now with this improvement, that's down to 1.3 bytes on average.

Large RAM usage before was due to some very large steps in line numbers,
both from the start of the first line in a function way down in the
file, and also functions that have big comments and/or big strings in
them (both cases were significant).

Although the savings are large on average for the CPython stdlib, it
won't have such a big effect for small scripts used in embedded
programming.

Addresses issue #648.
pull/781/head
Damien George 2014-07-31 16:12:01 +00:00
rodzic 8cc2018d47
commit 4747becc64
3 zmienionych plików z 48 dodań i 10 usunięć

Wyświetl plik

@ -115,12 +115,24 @@ STATIC void emit_write_code_info_qstr(emit_t* emit, qstr qstr) {
#if MICROPY_ENABLE_SOURCE_LINE
STATIC void emit_write_code_info_bytes_lines(emit_t* emit, uint bytes_to_skip, uint lines_to_skip) {
assert(bytes_to_skip > 0 || lines_to_skip > 0);
//printf(" %d %d\n", bytes_to_skip, lines_to_skip);
while (bytes_to_skip > 0 || lines_to_skip > 0) {
uint b = MIN(bytes_to_skip, 31);
uint l = MIN(lines_to_skip, 7);
mp_uint_t b, l;
if (lines_to_skip <= 6) {
// use 0b0LLBBBBB encoding
b = MIN(bytes_to_skip, 0x1f);
l = MIN(lines_to_skip, 0x3);
*emit_get_cur_to_write_code_info(emit, 1) = b | (l << 5);
} else {
// use 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
b = MIN(bytes_to_skip, 0xf);
l = MIN(lines_to_skip, 0x7ff);
byte *ci = emit_get_cur_to_write_code_info(emit, 2);
ci[0] = 0x80 | b | ((l >> 4) & 0x70);
ci[1] = l;
}
bytes_to_skip -= b;
lines_to_skip -= l;
*emit_get_cur_to_write_code_info(emit, 1) = b | (l << 5);
}
}
#endif
@ -363,7 +375,6 @@ STATIC void emit_bc_set_source_line(emit_t *emit, int source_line) {
uint bytes_to_skip = emit->bytecode_offset - emit->last_source_line_offset;
uint lines_to_skip = source_line - emit->last_source_line;
emit_write_code_info_bytes_lines(emit, bytes_to_skip, lines_to_skip);
//printf(" %d %d\n", bytes_to_skip, lines_to_skip);
emit->last_source_line_offset = emit->bytecode_offset;
emit->last_source_line = source_line;
}

Wyświetl plik

@ -95,9 +95,18 @@ void mp_bytecode_print(const void *descr, const byte *ip, int len) {
mp_int_t bc = (code_info + code_info_size) - ip;
mp_uint_t source_line = 1;
printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
for (const byte* ci = code_info + 12; *ci; ci++) {
bc += *ci & 31;
source_line += *ci >> 5;
for (const byte* ci = code_info + 12; *ci;) {
if ((ci[0] & 0x80) == 0) {
// 0b0LLBBBBB encoding
bc += ci[0] & 0x1f;
source_line += ci[0] >> 5;
ci += 1;
} else {
// 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
bc += ci[0] & 0xf;
source_line += ((ci[0] << 4) & 0x700) | ci[1];
ci += 2;
}
printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
}
}

24
py/vm.c
Wyświetl plik

@ -931,9 +931,27 @@ exception_handler:
const byte* ci = code_info + 12;
if (*ci) {
source_line = 1;
for (; *ci && bc >= ((*ci) & 31); ci++) {
bc -= *ci & 31;
source_line += *ci >> 5;
mp_uint_t c;
while ((c = *ci)) {
mp_uint_t b, l;
if ((c & 0x80) == 0) {
// 0b0LLBBBBB encoding
b = c & 0x1f;
l = c >> 5;
ci += 1;
} else {
// 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
b = c & 0xf;
l = ((c << 4) & 0x700) | ci[1];
ci += 2;
}
if (bc >= b) {
bc -= b;
source_line += l;
} else {
// found source line corresponding to bytecode offset
break;
}
}
}
mp_obj_exception_add_traceback(nlr.ret_val, source_file, source_line, block_name);