From c3bd9415cc8e6ba3619fe4b7a2bdac20260b0236 Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 20 Jul 2015 11:03:13 +0000 Subject: [PATCH] py: Make qstr hash size configurable, defaults to 2 bytes. This patch makes configurable, via MICROPY_QSTR_BYTES_IN_HASH, the number of bytes used for a qstr hash. It was originally fixed at 2 bytes, and now defaults to 2 bytes. Setting it to 1 byte will save ROM and RAM at a small expense of hash collisions. --- bare-arm/mpconfigport.h | 1 + py/makeqstrdata.py | 12 ++++++---- py/mpconfig.h | 5 ++++ py/qstr.c | 53 +++++++++++++++++++++++------------------ py/qstrdefs.h | 1 + 5 files changed, 44 insertions(+), 28 deletions(-) diff --git a/bare-arm/mpconfigport.h b/bare-arm/mpconfigport.h index cae27cfd9b..b35f9ab463 100644 --- a/bare-arm/mpconfigport.h +++ b/bare-arm/mpconfigport.h @@ -2,6 +2,7 @@ // options to control how Micro Python is built +#define MICROPY_QSTR_BYTES_IN_HASH (1) #define MICROPY_ALLOC_PATH_MAX (512) #define MICROPY_EMIT_X64 (0) #define MICROPY_EMIT_THUMB (0) diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 6b7df39cb7..234e65d46f 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -37,12 +37,12 @@ codepoint2name[ord('!')] = 'bang' codepoint2name[ord('\\')] = 'backslash' # this must match the equivalent function in qstr.c -def compute_hash(qstr): +def compute_hash(qstr, bytes_hash): hash = 5381 for char in qstr: hash = (hash * 33) ^ ord(char) # Make sure that valid hash is never zero, zero means "hash not computed" - return (hash & 0xffff) or 1 + return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1 def do_work(infiles): # read the qstrs in from the input files @@ -81,6 +81,7 @@ def do_work(infiles): # get config variables cfg_bytes_len = int(qcfgs['BYTES_IN_LEN']) + cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH']) cfg_max_len = 1 << (8 * cfg_bytes_len) # print out the starte of the generated C header file @@ -88,11 +89,11 @@ def do_work(infiles): print('') # add NULL qstr with no hash or data - print('QDEF(MP_QSTR_NULL, (const byte*)"\\x00\\x00%s" "")' % ('\\x00' * cfg_bytes_len)) + print('QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' % ('\\x00' * cfg_bytes_hash, '\\x00' * cfg_bytes_len)) # go through each qstr and print it out for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): - qhash = compute_hash(qstr) + qhash = compute_hash(qstr, cfg_bytes_hash) # Calculate len of str, taking escapes into account qlen = len(qstr.replace("\\\\", "-").replace("\\", "")) qdata = qstr.replace('"', '\\"') @@ -100,7 +101,8 @@ def do_work(infiles): print('qstr is too long:', qstr) assert False qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len)) - print('QDEF(MP_QSTR_%s, (const byte*)"\\x%02x\\x%02x%s" "%s")' % (ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen_str, qdata)) + qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash)) + print('QDEF(MP_QSTR_%s, (const byte*)"%s%s" "%s")' % (ident, qhash_str, qlen_str, qdata)) if __name__ == "__main__": do_work(sys.argv[1:]) diff --git a/py/mpconfig.h b/py/mpconfig.h index 0b86ec4350..9f0e909f53 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -152,6 +152,11 @@ #define MICROPY_QSTR_BYTES_IN_LEN (1) #endif +// Number of bytes used to store qstr hash +#ifndef MICROPY_QSTR_BYTES_IN_HASH +#define MICROPY_QSTR_BYTES_IN_HASH (2) +#endif + // Avoid using C stack when making Python function calls. C stack still // may be used if there's no free heap. #ifndef MICROPY_STACKLESS diff --git a/py/qstr.c b/py/qstr.c index dd04ae8f75..afbf0ff9b6 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -43,22 +43,31 @@ #endif // A qstr is an index into the qstr pool. -// The data for a qstr contains (hash, length, data). -// For now we use very simple encoding, just to get the framework correct: -// - hash is 2 bytes (see function below) -// - length is 2 bytes -// - data follows -// - \0 terminated (for now, so they can be printed using printf) +// The data for a qstr contains (hash, length, data): +// - hash (configurable number of bytes) +// - length (configurable number of bytes) +// - data ("length" number of bytes) +// - \0 terminated (so they can be printed using printf) -#define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8)) -#define Q_GET_ALLOC(q) (2 + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1) -#define Q_GET_DATA(q) ((q) + 2 + MICROPY_QSTR_BYTES_IN_LEN) +#if MICROPY_QSTR_BYTES_IN_HASH == 1 + #define Q_HASH_MASK (0xff) + #define Q_GET_HASH(q) ((mp_uint_t)(q)[0]) + #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); } while (0) +#elif MICROPY_QSTR_BYTES_IN_HASH == 2 + #define Q_HASH_MASK (0xffff) + #define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8)) + #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); (q)[1] = (hash) >> 8; } while (0) +#else + #error unimplemented qstr hash decoding +#endif +#define Q_GET_ALLOC(q) (MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1) +#define Q_GET_DATA(q) ((q) + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN) #if MICROPY_QSTR_BYTES_IN_LEN == 1 - #define Q_GET_LENGTH(q) ((q)[2]) - #define Q_SET_LENGTH(q, len) do { (q)[2] = (len); } while (0) + #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH]) + #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); } while (0) #elif MICROPY_QSTR_BYTES_IN_LEN == 2 - #define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8)) - #define Q_SET_LENGTH(q, len) do { (q)[2] = (len); (q)[3] = (len) >> 8; } while (0) + #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH] | ((q)[MICROPY_QSTR_BYTES_IN_HASH + 1] << 8)) + #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); (q)[MICROPY_QSTR_BYTES_IN_HASH + 1] = (len) >> 8; } while (0) #else #error unimplemented qstr length decoding #endif @@ -70,7 +79,7 @@ mp_uint_t qstr_compute_hash(const byte *data, mp_uint_t len) { for (const byte *top = data + len; data < top; data++) { hash = ((hash << 5) + hash) ^ (*data); // hash * 33 ^ data } - hash &= 0xffff; + hash &= Q_HASH_MASK; // Make sure that valid hash is never zero, zero means "hash not computed" if (hash == 0) { hash++; @@ -156,7 +165,7 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) { // qstr does not exist in interned pool so need to add it // compute number of bytes needed to intern this string - mp_uint_t n_bytes = 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1; + mp_uint_t n_bytes = MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1; if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) { // not enough room at end of previously interned string so try to grow @@ -193,11 +202,10 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) { // store the interned strings' data mp_uint_t hash = qstr_compute_hash((const byte*)str, len); - q_ptr[0] = hash; - q_ptr[1] = hash >> 8; + Q_SET_HASH(q_ptr, hash); Q_SET_LENGTH(q_ptr, len); - memcpy(q_ptr + 2 + MICROPY_QSTR_BYTES_IN_LEN, str, len); - q_ptr[2 + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0'; + memcpy(q_ptr + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN, str, len); + q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0'; q = qstr_add(q_ptr); } return q; @@ -205,7 +213,7 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) { byte *qstr_build_start(mp_uint_t len, byte **q_ptr) { assert(len < (1 << (8 * MICROPY_QSTR_BYTES_IN_LEN))); - *q_ptr = m_new(byte, 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1); + *q_ptr = m_new(byte, MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1); Q_SET_LENGTH(*q_ptr, len); return Q_GET_DATA(*q_ptr); } @@ -215,9 +223,8 @@ qstr qstr_build_end(byte *q_ptr) { if (q == 0) { mp_uint_t len = Q_GET_LENGTH(q_ptr); mp_uint_t hash = qstr_compute_hash(Q_GET_DATA(q_ptr), len); - q_ptr[0] = hash; - q_ptr[1] = hash >> 8; - q_ptr[2 + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0'; + Q_SET_HASH(q_ptr, hash); + q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0'; q = qstr_add(q_ptr); } else { m_del(byte, q_ptr, Q_GET_ALLOC(q_ptr)); diff --git a/py/qstrdefs.h b/py/qstrdefs.h index 48131636f1..0f066e3249 100644 --- a/py/qstrdefs.h +++ b/py/qstrdefs.h @@ -31,6 +31,7 @@ // qstr configuration passed to makeqstrdata.py of the form QCFG(key, value) QCFG(BYTES_IN_LEN, MICROPY_QSTR_BYTES_IN_LEN) +QCFG(BYTES_IN_HASH, MICROPY_QSTR_BYTES_IN_HASH) Q() Q(*)