From 18b1ba086c0e5547ca81030bf13b026961f80720 Mon Sep 17 00:00:00 2001 From: Artyom Skrobov Date: Mon, 3 May 2021 14:17:36 -0400 Subject: [PATCH] py/qstr: Separate hash and len from string data. This allows the compiler to merge strings: e.g. "update", "difference_update" and "symmetric_difference_update" will all point to the same memory. No functional change. The size reduction depends on the number of qstrs in the build. The change this commit brings is: bare-arm: -4 -0.007% minimal x86: +150 +0.092% [incl +48(data)] unix x64: -608 -0.118% unix nanbox: -572 -0.126% [incl +32(data)] stm32: -1392 -0.352% PYBV10 cc3200: -448 -0.244% esp8266: -1208 -0.173% GENERIC esp32: -1028 -0.068% GENERIC[incl -1020(data)] nrf: -440 -0.252% pca10040 rp2: -1072 -0.217% PICO samd: -368 -0.264% ADAFRUIT_ITSYBITSY_M4_EXPRESS Performance is also improved (on bare metal at least) for the core_import_mpy_multi.py, core_import_mpy_single.py and core_qstr.py performance benchmarks. Originally at adafruit#4583 Signed-off-by: Artyom Skrobov --- py/makeqstrdata.py | 29 +++++----- py/mpstate.h | 2 +- py/qstr.c | 132 +++++++++++++++++++++++---------------------- py/qstr.h | 22 +++++++- tools/mpy-tool.py | 29 +++++++--- 5 files changed, 121 insertions(+), 93 deletions(-) diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 403c406888..e332ab94ed 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -317,26 +317,24 @@ def parse_input_headers(infiles): return qcfgs, qstrs +def escape_bytes(qstr, qbytes): + if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr): + # qstr is all printable ASCII so render it as-is (for easier debugging) + return qstr + else: + # qstr contains non-printable codes so render entire thing as hex pairs + return "".join(("\\x%02x" % b) for b in qbytes) + + def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr): qbytes = bytes_cons(qstr, "utf8") qlen = len(qbytes) qhash = compute_hash(qbytes, cfg_bytes_hash) - if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr): - # qstr is all printable ASCII so render it as-is (for easier debugging) - qdata = qstr - else: - # qstr contains non-printable codes so render entire thing as hex pairs - qdata = "".join(("\\x%02x" % b) for b in qbytes) if qlen >= (1 << (8 * cfg_bytes_len)): print("qstr is too long:", qstr) assert False - qlen_str = ("\\x%02x" * cfg_bytes_len) % tuple( - ((qlen >> (8 * i)) & 0xFF) for i in range(cfg_bytes_len) - ) - qhash_str = ("\\x%02x" * cfg_bytes_hash) % tuple( - ((qhash >> (8 * i)) & 0xFF) for i in range(cfg_bytes_hash) - ) - return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata) + qdata = escape_bytes(qstr, qbytes) + return '%d, %d, "%s"' % (qhash, qlen, qdata) def print_qstr_data(qcfgs, qstrs): @@ -349,10 +347,7 @@ def print_qstr_data(qcfgs, qstrs): print("") # add NULL qstr with no hash or data - print( - 'QDEF(MP_QSTRnull, (const byte*)"%s%s" "")' - % ("\\x00" * cfg_bytes_hash, "\\x00" * cfg_bytes_len) - ) + print('QDEF(MP_QSTRnull, 0, 0, "")') # go through each qstr and print it out for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): diff --git a/py/mpstate.h b/py/mpstate.h index 8ece151663..7f86399f57 100644 --- a/py/mpstate.h +++ b/py/mpstate.h @@ -202,7 +202,7 @@ typedef struct _mp_state_vm_t { // pointer and sizes to store interned string data // (qstr_last_chunk can be root pointer but is also stored in qstr pool) - byte *qstr_last_chunk; + char *qstr_last_chunk; size_t qstr_last_alloc; size_t qstr_last_used; diff --git a/py/qstr.c b/py/qstr.c index e5b13b700d..848a583304 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -35,7 +35,6 @@ // NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings) // ultimately we will replace this with a static hash table of some kind -// also probably need to include the length in the string data, to allow null bytes in the string #if MICROPY_DEBUG_VERBOSE // print debugging info #define DEBUG_printf DEBUG_printf @@ -44,34 +43,9 @@ #endif // A qstr is an index into the qstr pool. -// The data for a qstr contains (hash, length, data): -// - hash (configurable number of bytes) -// - length (configurable number of bytes) -// - data ("length" number of bytes) -// - \0 terminated (so they can be printed using printf) +// The data for a qstr is \0 terminated (so they can be printed using printf) -#if MICROPY_QSTR_BYTES_IN_HASH == 1 - #define Q_HASH_MASK (0xff) - #define Q_GET_HASH(q) ((mp_uint_t)(q)[0]) - #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); } while (0) -#elif MICROPY_QSTR_BYTES_IN_HASH == 2 - #define Q_HASH_MASK (0xffff) - #define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8)) - #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); (q)[1] = (hash) >> 8; } while (0) -#else - #error unimplemented qstr hash decoding -#endif -#define Q_GET_ALLOC(q) (MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1) -#define Q_GET_DATA(q) ((q) + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN) -#if MICROPY_QSTR_BYTES_IN_LEN == 1 - #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH]) - #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); } while (0) -#elif MICROPY_QSTR_BYTES_IN_LEN == 2 - #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH] | ((q)[MICROPY_QSTR_BYTES_IN_HASH + 1] << 8)) - #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); (q)[MICROPY_QSTR_BYTES_IN_HASH + 1] = (len) >> 8; } while (0) -#else - #error unimplemented qstr length decoding -#endif +#define Q_HASH_MASK ((1 << (8 * MICROPY_QSTR_BYTES_IN_HASH)) - 1) #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL #define QSTR_ENTER() mp_thread_mutex_lock(&MP_STATE_VM(qstr_mutex), 1) @@ -100,14 +74,32 @@ mp_uint_t qstr_compute_hash(const byte *data, size_t len) { return hash; } +const qstr_hash_t mp_qstr_const_hashes[] = { + #ifndef NO_QSTR +#define QDEF(id, hash, len, str) hash, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF + #endif +}; + +const qstr_len_t mp_qstr_const_lengths[] = { + #ifndef NO_QSTR +#define QDEF(id, hash, len, str) len, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF + #endif +}; + const qstr_pool_t mp_qstr_const_pool = { NULL, // no previous pool 0, // no previous pool MICROPY_ALLOC_QSTR_ENTRIES_INIT, MP_QSTRnumber_of, // corresponds to number of strings in array just below + (qstr_hash_t *)mp_qstr_const_hashes, + (qstr_len_t *)mp_qstr_const_lengths, { #ifndef NO_QSTR -#define QDEF(id, str) str, +#define QDEF(id, hash, len, str) str, #include "genhdr/qstrdefs.generated.h" #undef QDEF #endif @@ -130,19 +122,21 @@ void qstr_init(void) { #endif } -STATIC const byte *find_qstr(qstr q) { +STATIC qstr_pool_t *find_qstr(qstr *q) { // search pool for this qstr // total_prev_len==0 in the final pool, so the loop will always terminate qstr_pool_t *pool = MP_STATE_VM(last_pool); - while (q < pool->total_prev_len) { + while (*q < pool->total_prev_len) { pool = pool->prev; } - return pool->qstrs[q - pool->total_prev_len]; + *q -= pool->total_prev_len; + assert(*q < pool->len); + return pool; } // qstr_mutex must be taken while in this function -STATIC qstr qstr_add(const byte *q_ptr) { - DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_DATA(q_ptr)); +STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { + DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", hash, len, len, q_ptr); // make sure we have room in the pool for a new qstr if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) { @@ -151,7 +145,9 @@ STATIC qstr qstr_add(const byte *q_ptr) { // Put a lower bound on the allocation size in case the extra qstr pool has few entries new_alloc = MAX(MICROPY_ALLOC_QSTR_ENTRIES_INIT, new_alloc); #endif - qstr_pool_t *pool = m_new_obj_var_maybe(qstr_pool_t, const char *, new_alloc); + mp_uint_t pool_size = sizeof(qstr_pool_t) + + (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * new_alloc; + qstr_pool_t *pool = (qstr_pool_t *)m_malloc_maybe(pool_size); if (pool == NULL) { // Keep qstr_last_chunk consistent with qstr_pool_t: qstr_last_chunk is not scanned // at garbage collection since it's reachable from a qstr_pool_t. And the caller of @@ -162,6 +158,8 @@ STATIC qstr qstr_add(const byte *q_ptr) { QSTR_EXIT(); m_malloc_fail(new_alloc); } + pool->hashes = (qstr_hash_t *)(pool->qstrs + new_alloc); + pool->lengths = (qstr_len_t *)(pool->hashes + new_alloc); pool->prev = MP_STATE_VM(last_pool); pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len; pool->alloc = new_alloc; @@ -171,10 +169,14 @@ STATIC qstr qstr_add(const byte *q_ptr) { } // add the new qstr - MP_STATE_VM(last_pool)->qstrs[MP_STATE_VM(last_pool)->len++] = q_ptr; + mp_uint_t at = MP_STATE_VM(last_pool)->len; + MP_STATE_VM(last_pool)->hashes[at] = hash; + MP_STATE_VM(last_pool)->lengths[at] = len; + MP_STATE_VM(last_pool)->qstrs[at] = q_ptr; + MP_STATE_VM(last_pool)->len++; // return id for the newly-added qstr - return MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len - 1; + return MP_STATE_VM(last_pool)->total_prev_len + at; } qstr qstr_find_strn(const char *str, size_t str_len) { @@ -183,9 +185,10 @@ qstr qstr_find_strn(const char *str, size_t str_len) { // search pools for the data for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) { - for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { - if (Q_GET_HASH(*q) == str_hash && Q_GET_LENGTH(*q) == str_len && memcmp(Q_GET_DATA(*q), str, str_len) == 0) { - return pool->total_prev_len + (q - pool->qstrs); + for (mp_uint_t at = 0, top = pool->len; at < top; at++) { + if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len + && memcmp(pool->qstrs[at], str, str_len) == 0) { + return pool->total_prev_len + at; } } } @@ -211,14 +214,14 @@ qstr qstr_from_strn(const char *str, size_t len) { } // compute number of bytes needed to intern this string - size_t n_bytes = MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1; + size_t n_bytes = len + 1; if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) { // not enough room at end of previously interned string so try to grow - byte *new_p = m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false); + char *new_p = m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false); if (new_p == NULL) { // could not grow existing memory; shrink it to fit previous - (void)m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false); + (void)m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false); MP_STATE_VM(qstr_last_chunk) = NULL; } else { // could grow existing memory @@ -232,10 +235,10 @@ qstr qstr_from_strn(const char *str, size_t len) { if (al < MICROPY_ALLOC_QSTR_CHUNK_INIT) { al = MICROPY_ALLOC_QSTR_CHUNK_INIT; } - MP_STATE_VM(qstr_last_chunk) = m_new_maybe(byte, al); + MP_STATE_VM(qstr_last_chunk) = m_new_maybe(char, al); if (MP_STATE_VM(qstr_last_chunk) == NULL) { // failed to allocate a large chunk so try with exact size - MP_STATE_VM(qstr_last_chunk) = m_new_maybe(byte, n_bytes); + MP_STATE_VM(qstr_last_chunk) = m_new_maybe(char, n_bytes); if (MP_STATE_VM(qstr_last_chunk) == NULL) { QSTR_EXIT(); m_malloc_fail(n_bytes); @@ -247,40 +250,38 @@ qstr qstr_from_strn(const char *str, size_t len) { } // allocate memory from the chunk for this new interned string's data - byte *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used); + char *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used); MP_STATE_VM(qstr_last_used) += n_bytes; // store the interned strings' data mp_uint_t hash = qstr_compute_hash((const byte *)str, len); - Q_SET_HASH(q_ptr, hash); - Q_SET_LENGTH(q_ptr, len); - memcpy(q_ptr + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN, str, len); - q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0'; - q = qstr_add(q_ptr); + memcpy(q_ptr, str, len); + q_ptr[len] = '\0'; + q = qstr_add(hash, len, q_ptr); } QSTR_EXIT(); return q; } mp_uint_t qstr_hash(qstr q) { - const byte *qd = find_qstr(q); - return Q_GET_HASH(qd); + qstr_pool_t *pool = find_qstr(&q); + return pool->hashes[q]; } size_t qstr_len(qstr q) { - const byte *qd = find_qstr(q); - return Q_GET_LENGTH(qd); + qstr_pool_t *pool = find_qstr(&q); + return pool->lengths[q]; } const char *qstr_str(qstr q) { - const byte *qd = find_qstr(q); - return (const char *)Q_GET_DATA(qd); + qstr_pool_t *pool = find_qstr(&q); + return pool->qstrs[q]; } const byte *qstr_data(qstr q, size_t *len) { - const byte *qd = find_qstr(q); - *len = Q_GET_LENGTH(qd); - return Q_GET_DATA(qd); + qstr_pool_t *pool = find_qstr(&q); + *len = pool->lengths[q]; + return (byte *)pool->qstrs[q]; } void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, size_t *n_total_bytes) { @@ -292,13 +293,14 @@ void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, si for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) { *n_pool += 1; *n_qstr += pool->len; - for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { - *n_str_data_bytes += Q_GET_ALLOC(*q); + for (qstr_len_t *l = pool->lengths, *l_top = pool->lengths + pool->len; l < l_top; l++) { + *n_str_data_bytes += *l + 1; } #if MICROPY_ENABLE_GC *n_total_bytes += gc_nbytes(pool); // this counts actual bytes used in heap #else - *n_total_bytes += sizeof(qstr_pool_t) + sizeof(qstr) * pool->alloc; + *n_total_bytes += sizeof(qstr_pool_t) + + (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * pool->alloc; #endif } *n_total_bytes += *n_str_data_bytes; @@ -309,8 +311,8 @@ void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, si void qstr_dump_data(void) { QSTR_ENTER(); for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) { - for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { - mp_printf(&mp_plat_print, "Q(%s)\n", Q_GET_DATA(*q)); + for (const char **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { + mp_printf(&mp_plat_print, "Q(%s)\n", *q); } } QSTR_EXIT(); diff --git a/py/qstr.h b/py/qstr.h index 0b6fb12b08..19672afc0d 100644 --- a/py/qstr.h +++ b/py/qstr.h @@ -38,7 +38,7 @@ // first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr enum { #ifndef NO_QSTR -#define QDEF(id, str) id, +#define QDEF(id, hash, len, str) id, #include "genhdr/qstrdefs.generated.h" #undef QDEF #endif @@ -47,12 +47,30 @@ enum { typedef size_t qstr; +#if MICROPY_QSTR_BYTES_IN_HASH == 1 +typedef uint8_t qstr_hash_t; +#elif MICROPY_QSTR_BYTES_IN_HASH == 2 +typedef uint16_t qstr_hash_t; +#else +#error unimplemented qstr hash decoding +#endif + +#if MICROPY_QSTR_BYTES_IN_LEN == 1 +typedef uint8_t qstr_len_t; +#elif MICROPY_QSTR_BYTES_IN_LEN == 2 +typedef uint16_t qstr_len_t; +#else +#error unimplemented qstr length decoding +#endif + typedef struct _qstr_pool_t { struct _qstr_pool_t *prev; size_t total_prev_len; size_t alloc; size_t len; - const byte *qstrs[]; + qstr_hash_t *hashes; + qstr_len_t *lengths; + const char *qstrs[]; } qstr_pool_t; #define QSTR_TOTAL() (MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len) diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index aa0272111c..337f580b6e 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -814,7 +814,7 @@ def freeze_mpy(base_qstrs, raw_codes): # don't add duplicates if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new: continue - new[q.qstr_esc] = (len(new), q.qstr_esc, q.str) + new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8")) new = sorted(new.values(), key=lambda x: x[0]) print('#include "py/mpconfig.h"') @@ -864,6 +864,22 @@ def freeze_mpy(base_qstrs, raw_codes): # As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len qstr_pool_alloc = min(len(new), 10) + print() + print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {") + qstr_size = {"metadata": 0, "data": 0} + for _, _, _, qbytes in new: + qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH) + print(" %d," % qhash) + print("};") + print() + print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {") + for _, _, _, qbytes in new: + print(" %d," % len(qbytes)) + qstr_size["metadata"] += ( + config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + ) + qstr_size["data"] += len(qbytes) + print("};") print() print("extern const qstr_pool_t mp_qstr_const_pool;") print("const qstr_pool_t mp_qstr_frozen_const_pool = {") @@ -871,14 +887,11 @@ def freeze_mpy(base_qstrs, raw_codes): print(" MP_QSTRnumber_of, // previous pool size") print(" %u, // allocated entries" % qstr_pool_alloc) print(" %u, // used entries" % len(new)) + print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,") + print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,") print(" {") - for _, _, qstr in new: - print( - " %s," - % qstrutil.make_bytes( - config.MICROPY_QSTR_BYTES_IN_LEN, config.MICROPY_QSTR_BYTES_IN_HASH, qstr - ) - ) + for _, _, qstr, qbytes in new: + print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes)) print(" },") print("};")