From 64c79a54239d700ecb8d7f07b13388dc79f28993 Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Wed, 15 Feb 2023 16:09:04 +1100 Subject: [PATCH] py/qstr: Add support for sorted qstr pools. This provides a significant performance boost for qstr_find_strn, which is called a lot during parsing and loading of .mpy files, as well as interning of string objects (which happens in most string methods that return new strings). Also adds comments to explain the "static" qstrs. These are part of the .mpy ABI and avoid needing to duplicate string data for QSTRs known to already be in the firmware. The static pool isn't currently sorted, but in the future we could either split the static pool into the sorted regions, or in the next .mpy version just sort them. Based on initial work done by @amirgon in #6896. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared --- py/makeqstrdata.py | 114 ++++++++++++++++++++++++++++++++------------ py/persistentcode.c | 5 ++ py/qstr.c | 98 +++++++++++++++++++++++++++++++------ py/qstr.h | 19 ++++++-- tools/mpy-tool.py | 18 ++++--- 5 files changed, 200 insertions(+), 54 deletions(-) diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 71f529fb62..2350fe04d2 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -52,7 +52,8 @@ codepoint2name[ord("^")] = "caret" codepoint2name[ord("|")] = "pipe" codepoint2name[ord("~")] = "tilde" -# static qstrs, should be sorted +# static qstrs, these must maintain a specific order for .mpy compatibility +# See QSTR_LAST_STATIC at the top of py/persistentcode.c static_qstr_list = [ "", @@ -222,6 +223,71 @@ static_qstr_list = [ "zip", ] +# Additional QSTRs that must have index <255 because they are stored in +# `mp_binary_op_method_name` and `mp_unary_op_method_name` (see py/objtype.c). +# These are not part of the .mpy compatibility list, but we place them in the +# fixed unsorted pool (i.e. QDEF0) to ensure their indices are small. +operator_qstr_list = { + "__bool__", + "__pos__", + "__neg__", + "__invert__", + "__abs__", + "__float__", + "__complex__", + "__sizeof__", + "__lt__", + "__gt__", + "__eq__", + "__le__", + "__ge__", + "__ne__", + "__contains__", + "__iadd__", + "__isub__", + "__imul__", + "__imatmul__", + "__ifloordiv__", + "__itruediv__", + "__imod__", + "__ipow__", + "__ior__", + "__ixor__", + "__iand__", + "__ilshift__", + "__irshift__", + "__add__", + "__sub__", + "__mul__", + "__matmul__", + "__floordiv__", + "__truediv__", + "__mod__", + "__divmod__", + "__pow__", + "__or__", + "__xor__", + "__and__", + "__lshift__", + "__rshift__", + "__radd__", + "__rsub__", + "__rmul__", + "__rmatmul__", + "__rfloordiv__", + "__rtruediv__", + "__rmod__", + "__rpow__", + "__ror__", + "__rxor__", + "__rand__", + "__rlshift__", + "__rrshift__", + "__get__", + "__set__", + "__delete__", +} + # this must match the equivalent function in qstr.c def compute_hash(qstr, bytes_hash): @@ -244,22 +310,13 @@ def qstr_escape(qst): return re.sub(r"[^A-Za-z0-9_]", esc_char, qst) +static_qstr_list_ident = list(map(qstr_escape, static_qstr_list)) + + def parse_input_headers(infiles): qcfgs = {} qstrs = {} - # add static qstrs - for qstr in static_qstr_list: - # work out the corresponding qstr name - ident = qstr_escape(qstr) - - # don't add duplicates - assert ident not in qstrs - - # add the qstr to the list, with order number to retain original order in file - order = len(qstrs) - 300000 - qstrs[ident] = (order, ident, qstr) - # read the qstrs in from the input files for infile in infiles: with open(infile, "rt") as f: @@ -294,22 +351,12 @@ def parse_input_headers(infiles): ident = qstr_escape(qstr) # don't add duplicates + if ident in static_qstr_list_ident: + continue if ident in qstrs: continue - # add the qstr to the list, with order number to retain original order in file - order = len(qstrs) - # but put special method names like __add__ at the top of list, so - # that their id's fit into a byte - if ident == "": - # Sort empty qstr above all still - order = -200000 - elif ident == "__dir__": - # Put __dir__ after empty qstr for builtin dir() to work - order = -190000 - elif ident.startswith("__"): - order -= 100000 - qstrs[ident] = (order, ident, qstr) + qstrs[ident] = (ident, qstr) if not qcfgs: sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n") @@ -348,12 +395,19 @@ def print_qstr_data(qcfgs, qstrs): print("") # add NULL qstr with no hash or data - print('QDEF(MP_QSTRnull, 0, 0, "")') + print('QDEF0(MP_QSTRnull, 0, 0, "")') - # go through each qstr and print it out - for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): + # add static qstrs to the first unsorted pool + for qstr in static_qstr_list: qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) - print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes)) + print("QDEF0(MP_QSTR_%s, %s)" % (qstr_escape(qstr), qbytes)) + + # add remaining qstrs to the sorted (by value) pool (unless they're in + # operator_qstr_list, in which case add them to the unsorted pool) + for ident, qstr in sorted(qstrs.values(), key=lambda x: x[1]): + qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) + pool = 0 if qstr in operator_qstr_list else 1 + print("QDEF%d(MP_QSTR_%s, %s)" % (pool, ident, qbytes)) def do_work(infiles): diff --git a/py/persistentcode.c b/py/persistentcode.c index 0feb93bdbe..df1003babf 100644 --- a/py/persistentcode.c +++ b/py/persistentcode.c @@ -40,6 +40,11 @@ #include "py/smallint.h" +// makeqstrdata.py has a fixed list of qstrs at the start that we can assume +// are available with know indices on all MicroPython implementations, and +// avoid needing to duplicate the string data in the .mpy file. This is the +// last one in that list (anything with a qstr less than or equal to this is +// assumed to be in the list). #define QSTR_LAST_STATIC MP_QSTR_zip #if MICROPY_DYNAMIC_COMPILER diff --git a/py/qstr.c b/py/qstr.c index ea700566f4..0ae08354a5 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -33,9 +33,6 @@ #include "py/gc.h" #include "py/runtime.h" -// NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings) -// ultimately we will replace this with a static hash table of some kind - #if MICROPY_DEBUG_VERBOSE // print debugging info #define DEBUG_printf DEBUG_printf #else // don't print debugging info @@ -74,38 +71,94 @@ size_t qstr_compute_hash(const byte *data, size_t len) { return hash; } +// The first pool is the static qstr table. The contents must remain stable as +// it is part of the .mpy ABI. See the top of py/persistentcode.c and +// static_qstr_list in makeqstrdata.py. This pool is unsorted (although in a +// future .mpy version we could re-order them and make it sorted). It also +// contains additional qstrs that must have IDs <256, see operator_qstr_list +// in makeqstrdata.py. +const qstr_hash_t mp_qstr_const_hashes_static[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) hash, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; + +const qstr_len_t mp_qstr_const_lengths_static[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) len, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; + +const qstr_pool_t mp_qstr_const_pool_static = { + NULL, // no previous pool + 0, // no previous pool + false, // is_sorted + MICROPY_ALLOC_QSTR_ENTRIES_INIT, + MP_QSTRnumber_of_static, // corresponds to number of strings in array just below + (qstr_hash_t *)mp_qstr_const_hashes_static, + (qstr_len_t *)mp_qstr_const_lengths_static, + { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) str, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif + }, +}; + +// The next pool is the remainder of the qstrs defined in the firmware. This +// is sorted. const qstr_hash_t mp_qstr_const_hashes[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) hash, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) hash, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; const qstr_len_t mp_qstr_const_lengths[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) len, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) len, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; const qstr_pool_t mp_qstr_const_pool = { - NULL, // no previous pool - 0, // no previous pool + &mp_qstr_const_pool_static, + MP_QSTRnumber_of_static, + true, // is_sorted MICROPY_ALLOC_QSTR_ENTRIES_INIT, - MP_QSTRnumber_of, // corresponds to number of strings in array just below + MP_QSTRnumber_of - MP_QSTRnumber_of_static, // corresponds to number of strings in array just below (qstr_hash_t *)mp_qstr_const_hashes, (qstr_len_t *)mp_qstr_const_lengths, { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) str, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) str, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }, }; +// If frozen code is enabled, then there is an additional, sorted, ROM pool +// containing additional qstrs required by the frozen code. #ifdef MICROPY_QSTR_EXTRA_POOL extern const qstr_pool_t MICROPY_QSTR_EXTRA_POOL; #define CONST_POOL MICROPY_QSTR_EXTRA_POOL @@ -185,7 +238,24 @@ qstr qstr_find_strn(const char *str, size_t str_len) { // search pools for the data for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) { - for (mp_uint_t at = 0, top = pool->len; at < top; at++) { + size_t low = 0; + size_t high = pool->len - 1; + + // binary search inside the pool + if (pool->is_sorted) { + while (high - low > 1) { + size_t mid = (low + high) / 2; + int cmp = strncmp(str, pool->qstrs[mid], str_len); + if (cmp <= 0) { + high = mid; + } else { + low = mid; + } + } + } + + // sequential search for the remaining strings + for (mp_uint_t at = low; at < high + 1; at++) { if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len && memcmp(pool->qstrs[at], str, str_len) == 0) { return pool->total_prev_len + at; @@ -194,7 +264,7 @@ qstr qstr_find_strn(const char *str, size_t str_len) { } // not found; return null qstr - return 0; + return MP_QSTRnull; } qstr qstr_from_str(const char *str) { diff --git a/py/qstr.h b/py/qstr.h index 0ef861f33e..0c4fc4632c 100644 --- a/py/qstr.h +++ b/py/qstr.h @@ -38,9 +38,21 @@ // first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr enum { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) id, +#define QDEF0(id, hash, len, str) id, +#define QDEF1(id, hash, len, str) #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 + #endif + MP_QSTRnumber_of_static, + MP_QSTRstart_of_main = MP_QSTRnumber_of_static - 1, // unused but shifts the enum counter back one + + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) id, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 #endif MP_QSTRnumber_of, // no underscore so it can't clash with any of the above }; @@ -66,7 +78,8 @@ typedef uint16_t qstr_len_t; typedef struct _qstr_pool_t { const struct _qstr_pool_t *prev; - size_t total_prev_len; + size_t total_prev_len : (8 * sizeof(size_t) - 1); + size_t is_sorted : 1; size_t alloc; size_t len; qstr_hash_t *hashes; diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index fc3518d359..95b2c492d9 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -1396,15 +1396,16 @@ def disassemble_mpy(compiled_modules): cm.disassemble() -def freeze_mpy(base_qstrs, compiled_modules): +def freeze_mpy(firmware_qstr_idents, compiled_modules): # add to qstrs new = {} for q in global_qstrs.qstrs: - # don't add duplicates - if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new: + # don't add duplicates that are already in the firmware + if q is None or q.qstr_esc in firmware_qstr_idents or q.qstr_esc in new: continue new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8")) - new = sorted(new.values(), key=lambda x: x[0]) + # Sort by string value (because this is a sorted pool). + new = sorted(new.values(), key=lambda x: x[2]) print('#include "py/mpconfig.h"') print('#include "py/objint.h"') @@ -1485,6 +1486,7 @@ def freeze_mpy(base_qstrs, compiled_modules): print("const qstr_pool_t mp_qstr_frozen_const_pool = {") print(" &mp_qstr_const_pool, // previous pool") print(" MP_QSTRnumber_of, // previous pool size") + print(" true, // is_sorted") print(" %u, // allocated entries" % qstr_pool_alloc) print(" %u, // used entries" % len(new)) print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,") @@ -1779,14 +1781,16 @@ def main(): config.native_arch = MP_NATIVE_ARCH_NONE # set config values for qstrs, and get the existing base set of qstrs + # already in the firmware if args.qstr_header: - qcfgs, base_qstrs = qstrutil.parse_input_headers([args.qstr_header]) + qcfgs, extra_qstrs = qstrutil.parse_input_headers([args.qstr_header]) + firmware_qstr_idents = set(qstrutil.static_qstr_list_ident) | set(extra_qstrs.keys()) config.MICROPY_QSTR_BYTES_IN_LEN = int(qcfgs["BYTES_IN_LEN"]) config.MICROPY_QSTR_BYTES_IN_HASH = int(qcfgs["BYTES_IN_HASH"]) else: config.MICROPY_QSTR_BYTES_IN_LEN = 1 config.MICROPY_QSTR_BYTES_IN_HASH = 1 - base_qstrs = list(qstrutil.static_qstr_list) + firmware_qstr_idents = set(qstrutil.static_qstr_list) # Create initial list of global qstrs. global_qstrs = GlobalQStrList() @@ -1808,7 +1812,7 @@ def main(): if args.freeze: try: - freeze_mpy(base_qstrs, compiled_modules) + freeze_mpy(firmware_qstr_idents, compiled_modules) except FreezeError as er: print(er, file=sys.stderr) sys.exit(1)