From 0699c6bf9e7a1754002948063390b68590fc3e96 Mon Sep 17 00:00:00 2001 From: Damien George Date: Sun, 31 Jan 2016 21:45:22 +0000 Subject: [PATCH] tools: Add mpy-tool.py, to work with .mpy files. Currently it can freeze .mpy files. --- tools/mpy-tool.py | 515 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 515 insertions(+) create mode 100755 tools/mpy-tool.py diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py new file mode 100755 index 0000000000..fd79b56f6c --- /dev/null +++ b/tools/mpy-tool.py @@ -0,0 +1,515 @@ +#!/usr/bin/env python3 +# +# This file is part of the MicroPython project, http://micropython.org/ +# +# The MIT License (MIT) +# +# Copyright (c) 2016 Damien P. George +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import sys +from collections import namedtuple + +sys.path.append('../py') +import makeqstrdata as qstrutil + +class FreezeError(Exception): + def __init__(self, rawcode, msg): + self.rawcode = rawcode + self.msg = msg + + def __str__(self): + return 'error while freezing %s: %s' % (self.rawcode.source_file, self.msg) + +class Config: + MICROPY_LONGINT_IMPL_NONE = 0 + MICROPY_LONGINT_IMPL_LONGLONG = 1 + MICROPY_LONGINT_IMPL_MPZ = 2 +config = Config() + +MP_OPCODE_BYTE = 0 +MP_OPCODE_QSTR = 1 +MP_OPCODE_VAR_UINT = 2 +MP_OPCODE_OFFSET = 3 + +# extra bytes: +MP_BC_MAKE_CLOSURE = 0x62 +MP_BC_MAKE_CLOSURE_DEFARGS = 0x63 +MP_BC_RAISE_VARARGS = 0x5c +# extra byte if caching enabled: +MP_BC_LOAD_NAME = 0x1c +MP_BC_LOAD_GLOBAL = 0x1d +MP_BC_LOAD_ATTR = 0x1e +MP_BC_STORE_ATTR = 0x26 + +def make_opcode_format(): + def OC4(a, b, c, d): + return a | (b << 2) | (c << 4) | (d << 6) + U = 0 + B = 0 + Q = 1 + V = 2 + O = 3 + return bytes(( + # this table is taken verbatim from py/bc.c + OC4(U, U, U, U), # 0x00-0x03 + OC4(U, U, U, U), # 0x04-0x07 + OC4(U, U, U, U), # 0x08-0x0b + OC4(U, U, U, U), # 0x0c-0x0f + OC4(B, B, B, U), # 0x10-0x13 + OC4(V, U, Q, V), # 0x14-0x17 + OC4(B, U, V, V), # 0x18-0x1b + OC4(Q, Q, Q, Q), # 0x1c-0x1f + OC4(B, B, V, V), # 0x20-0x23 + OC4(Q, Q, Q, B), # 0x24-0x27 + OC4(V, V, Q, Q), # 0x28-0x2b + OC4(U, U, U, U), # 0x2c-0x2f + OC4(B, B, B, B), # 0x30-0x33 + OC4(B, O, O, O), # 0x34-0x37 + OC4(O, O, U, U), # 0x38-0x3b + OC4(U, O, B, O), # 0x3c-0x3f + OC4(O, B, B, O), # 0x40-0x43 + OC4(B, B, O, U), # 0x44-0x47 + OC4(U, U, U, U), # 0x48-0x4b + OC4(U, U, U, U), # 0x4c-0x4f + OC4(V, V, V, V), # 0x50-0x53 + OC4(B, V, V, V), # 0x54-0x57 + OC4(V, V, V, B), # 0x58-0x5b + OC4(B, B, B, U), # 0x5c-0x5f + OC4(V, V, V, V), # 0x60-0x63 + OC4(V, V, V, V), # 0x64-0x67 + OC4(Q, Q, B, U), # 0x68-0x6b + OC4(U, U, U, U), # 0x6c-0x6f + + OC4(B, B, B, B), # 0x70-0x73 + OC4(B, B, B, B), # 0x74-0x77 + OC4(B, B, B, B), # 0x78-0x7b + OC4(B, B, B, B), # 0x7c-0x7f + OC4(B, B, B, B), # 0x80-0x83 + OC4(B, B, B, B), # 0x84-0x87 + OC4(B, B, B, B), # 0x88-0x8b + OC4(B, B, B, B), # 0x8c-0x8f + OC4(B, B, B, B), # 0x90-0x93 + OC4(B, B, B, B), # 0x94-0x97 + OC4(B, B, B, B), # 0x98-0x9b + OC4(B, B, B, B), # 0x9c-0x9f + OC4(B, B, B, B), # 0xa0-0xa3 + OC4(B, B, B, B), # 0xa4-0xa7 + OC4(B, B, B, B), # 0xa8-0xab + OC4(B, B, B, B), # 0xac-0xaf + + OC4(B, B, B, B), # 0xb0-0xb3 + OC4(B, B, B, B), # 0xb4-0xb7 + OC4(B, B, B, B), # 0xb8-0xbb + OC4(B, B, B, B), # 0xbc-0xbf + + OC4(B, B, B, B), # 0xc0-0xc3 + OC4(B, B, B, B), # 0xc4-0xc7 + OC4(B, B, B, B), # 0xc8-0xcb + OC4(B, B, B, B), # 0xcc-0xcf + + OC4(B, B, B, B), # 0xd0-0xd3 + OC4(B, B, B, B), # 0xd4-0xd7 + OC4(B, B, B, B), # 0xd8-0xdb + OC4(B, B, B, B), # 0xdc-0xdf + + OC4(B, B, B, B), # 0xe0-0xe3 + OC4(B, B, B, B), # 0xe4-0xe7 + OC4(B, B, B, B), # 0xe8-0xeb + OC4(B, B, B, B), # 0xec-0xef + + OC4(B, B, B, B), # 0xf0-0xf3 + OC4(B, B, B, B), # 0xf4-0xf7 + OC4(B, B, B, U), # 0xf8-0xfb + OC4(U, U, U, U), # 0xfc-0xff + )) + +# this function mirrors that in py/bc.c +def mp_opcode_format(bytecode, ip, opcode_format=make_opcode_format()): + opcode = bytecode[ip] + ip_start = ip + f = (opcode_format[opcode >> 2] >> (2 * (opcode & 3))) & 3 + if f == MP_OPCODE_QSTR: + ip += 3 + else: + extra_byte = ( + opcode == MP_BC_RAISE_VARARGS + or opcode == MP_BC_MAKE_CLOSURE + or opcode == MP_BC_MAKE_CLOSURE_DEFARGS + or config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE and ( + opcode == MP_BC_LOAD_NAME + or opcode == MP_BC_LOAD_GLOBAL + or opcode == MP_BC_LOAD_ATTR + or opcode == MP_BC_STORE_ATTR + ) + ) + ip += 1 + if f == MP_OPCODE_VAR_UINT: + while bytecode[ip] & 0x80 != 0: + ip += 1 + ip += 1 + elif f == MP_OPCODE_OFFSET: + ip += 2 + ip += extra_byte + return f, ip - ip_start + +def decode_uint(bytecode, ip): + unum = 0 + while True: + val = bytecode[ip] + ip += 1 + unum = (unum << 7) | (val & 0x7f) + if not (val & 0x80): + break + return ip, unum + +def extract_prelude(bytecode): + ip = 0 + ip, n_state = decode_uint(bytecode, ip) + ip, n_exc_stack = decode_uint(bytecode, ip) + scope_flags = bytecode[ip]; ip += 1 + n_pos_args = bytecode[ip]; ip += 1 + n_kwonly_args = bytecode[ip]; ip += 1 + n_def_pos_args = bytecode[ip]; ip += 1 + ip2, code_info_size = decode_uint(bytecode, ip) + ip += code_info_size + while bytecode[ip] != 0xff: + ip += 1 + ip += 1 + # ip now points to first opcode + # ip2 points to simple_name qstr + return ip, ip2, (n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args, code_info_size) + +class RawCode: + def __init__(self, bytecode, qstrs, objs, raw_codes): + # set core variables + self.bytecode = bytecode + self.qstrs = qstrs + self.objs = objs + self.raw_codes = raw_codes + + # extract prelude + self.ip, self.ip2, self.prelude = extract_prelude(self.bytecode) + self.simple_name = self._unpack_qstr(self.ip2) + self.source_file = self._unpack_qstr(self.ip2 + 2) + + def _unpack_qstr(self, ip): + qst = self.bytecode[ip] | self.bytecode[ip + 1] << 8 + return global_qstrs[qst] + + def dump(self): + # dump children first + for rc in self.raw_codes: + rc.freeze() + # TODO + + def freeze(self, parent_name): + self.escaped_name = parent_name + self.simple_name.qstr_esc + + # emit children first + for rc in self.raw_codes: + rc.freeze(self.escaped_name + '_') + + # generate bytecode data + print() + print('// frozen bytecode for file %s, scope %s%s' % (self.source_file.str, parent_name, self.simple_name.str)) + print('STATIC const byte bytecode_data_%s[%u] = {' % (self.escaped_name, len(self.bytecode))) + print(' ', end='') + for i in range(self.ip2): + print(' 0x%02x,' % self.bytecode[i], end='') + print() + print(' ', self.simple_name.qstr_id, '& 0xff,', self.simple_name.qstr_id, '>> 8,') + print(' ', self.source_file.qstr_id, '& 0xff,', self.source_file.qstr_id, '>> 8,') + print(' ', end='') + for i in range(self.ip2 + 4, self.ip): + print(' 0x%02x,' % self.bytecode[i], end='') + print() + ip = self.ip + while ip < len(self.bytecode): + f, sz = mp_opcode_format(self.bytecode, ip) + if f == 1: + qst = self._unpack_qstr(ip + 1).qstr_id + print(' ', '0x%02x,' % self.bytecode[ip], qst, '& 0xff,', qst, '>> 8,') + else: + print(' ', ''.join('0x%02x, ' % self.bytecode[ip + i] for i in range(sz))) + ip += sz + print('};') + + # generate constant objects + for i, obj in enumerate(self.objs): + obj_name = 'const_obj_%s_%u' % (self.escaped_name, i) + if type(obj) is str: + obj = bytes(obj, 'utf8') + print('STATIC const mp_obj_str_t %s = ' + '{{&mp_type_str}, 0, %u, (const byte*)"%s"};' + % (obj_name, len(obj), ''.join(('\\x%02x' % b) for b in obj))) + elif type(obj) is bytes: + print('STATIC const mp_obj_str_t %s = ' + '{{&mp_type_bytes}, 0, %u, (const byte*)"%s"};' + % (obj_name, len(obj), ''.join(('\\x%02x' % b) for b in obj))) + elif type(obj) is int: + if config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_NONE: + # TODO check if we can actually fit this long-int into a small-int + raise FreezeError(self, 'target does not support long int') + elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_LONGLONG: + # TODO + raise FreezeError(self, 'freezing int to long-long is not implemented') + elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_MPZ: + neg = 0 + if obj < 0: + obj = -obj + neg = 1 + bits_per_dig = config.MPZ_DIG_SIZE + digs = [] + z = obj + while z: + digs.append(z & ((1 << bits_per_dig) - 1)) + z >>= bits_per_dig + ndigs = len(digs) + digs = ','.join(('%#x' % d) for d in digs) + print('STATIC const mp_obj_int_t %s = {{&mp_type_int}, ' + '{.neg=%u, .fixed_dig=1, .alloc=%u, .len=%u, .dig=(uint%u_t[]){%s}}};' + % (obj_name, neg, ndigs, ndigs, bits_per_dig, digs)) + elif type(obj) is float: + # works for REPR A and B only + print('STATIC const mp_obj_float_t %s = {{&mp_type_float}, %.16g};' + % (obj_name, obj)) + else: + # TODO + raise FreezeError(self, 'freezing of object %r is not implemented' % (obj,)) + + # generate constant table + print('STATIC const mp_uint_t const_table_data_%s[%u] = {' + % (self.escaped_name, len(self.qstrs) + len(self.objs) + len(self.raw_codes))) + for qst in self.qstrs: + print(' (mp_uint_t)MP_OBJ_NEW_QSTR(%s),' % global_qstrs[qst].qstr_id) + for i in range(len(self.objs)): + print(' (mp_uint_t)&const_obj_%s_%u,' % (self.escaped_name, i)) + for rc in self.raw_codes: + print(' (mp_uint_t)&raw_code_%s,' % rc.escaped_name) + print('};') + + # generate module + if self.simple_name.str != '': + print('STATIC ', end='') + print('const mp_raw_code_t raw_code_%s = {' % self.escaped_name) + print(' .kind = MP_CODE_BYTECODE,') + print(' .scope_flags = 0x%02x,' % self.prelude[2]) + print(' .n_pos_args = %u,' % self.prelude[3]) + print(' .data.u_byte = {') + print(' .bytecode = bytecode_data_%s,' % self.escaped_name) + print(' .const_table = const_table_data_%s,' % self.escaped_name) + print(' #if MICROPY_PERSISTENT_CODE_SAVE') + print(' .bc_len = %u,' % len(self.bytecode)) + print(' .n_obj = %u,' % len(self.objs)) + print(' .n_raw_code = %u,' % len(self.raw_codes)) + print(' #endif') + print(' },') + print('};') + +def read_uint(f): + i = 0 + while True: + b = f.read(1)[0] + i = (i << 7) | (b & 0x7f) + if b & 0x80 == 0: + break + return i + +global_qstrs = [] +qstr_type = namedtuple('qstr', ('str', 'qstr_esc', 'qstr_id')) +def read_qstr(f): + ln = read_uint(f) + data = str(f.read(ln), 'utf8') + qstr_esc = qstrutil.qstr_escape(data) + global_qstrs.append(qstr_type(data, qstr_esc, 'MP_QSTR_' + qstr_esc)) + return len(global_qstrs) - 1 + +def read_obj(f): + obj_type = f.read(1) + if obj_type == b'e': + return Ellipsis + else: + buf = f.read(read_uint(f)) + if obj_type == b's': + return str(buf, 'utf8') + elif obj_type == b'b': + return buf + elif obj_type == b'i': + return int(str(buf, 'ascii'), 10) + elif obj_type == b'f': + return float(str(buf, 'ascii')) + elif obj_type == b'c': + return complex(str(buf, 'ascii')) + else: + assert 0 + +def read_qstr_and_pack(f, bytecode, ip): + qst = read_qstr(f) + bytecode[ip] = qst & 0xff + bytecode[ip + 1] = qst >> 8 + +def read_bytecode_qstrs(file, bytecode, ip): + while ip < len(bytecode): + f, sz = mp_opcode_format(bytecode, ip) + if f == 1: + read_qstr_and_pack(file, bytecode, ip + 1) + ip += sz + +def read_raw_code(f): + bc_len = read_uint(f) + bytecode = bytearray(f.read(bc_len)) + ip, ip2, prelude = extract_prelude(bytecode) + read_qstr_and_pack(f, bytecode, ip2) # simple_name + read_qstr_and_pack(f, bytecode, ip2 + 2) # source_file + read_bytecode_qstrs(f, bytecode, ip) + n_obj = read_uint(f) + n_raw_code = read_uint(f) + qstrs = [read_qstr(f) for _ in range(prelude[3] + prelude[4])] + objs = [read_obj(f) for _ in range(n_obj)] + raw_codes = [read_raw_code(f) for _ in range(n_raw_code)] + return RawCode(bytecode, qstrs, objs, raw_codes) + +def read_mpy(filename): + with open(filename, 'rb') as f: + header = f.read(4) + if header[0] != ord('M'): + raise Exception('not a valid .mpy file') + if header[1] != 0: + raise Exception('incompatible version') + feature_flags = header[2] + config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE = (feature_flags & 1) != 0 + config.MICROPY_PY_BUILTINS_STR_UNICODE = (feature_flags & 2) != 0 + config.mp_small_int_bits = header[3] + return read_raw_code(f) + +def dump_mpy(raw_codes): + for rc in raw_codes: + rc.dump() + +def freeze_mpy(qcfgs, base_qstrs, raw_codes): + cfg_bytes_len = int(qcfgs['BYTES_IN_LEN']) + cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH']) + + # add to qstrs + new = {} + for q in global_qstrs: + # don't add duplicates + if q.qstr_esc in base_qstrs or q.qstr_esc in new: + continue + new[q.qstr_esc] = (len(new), q.qstr_esc, q.str) + new = sorted(new.values(), key=lambda x: x[0]) + + print('#include "py/mpconfig.h"') + print('#include "py/objint.h"') + print('#include "py/objstr.h"') + print('#include "py/emitglue.h"') + print() + + print('#if MICROPY_PY_BUILTINS_FLOAT') + print('typedef struct _mp_obj_float_t {') + print(' mp_obj_base_t base;') + print(' mp_float_t value;') + print('} mp_obj_float_t;') + print('#endif') + print() + + print('enum {') + for i in range(len(new)): + if i == 0: + print(' MP_QSTR_%s = MP_QSTRnumber_of,' % new[i][1]) + else: + print(' MP_QSTR_%s,' % new[i][1]) + print('};') + + print() + print('extern const qstr_pool_t mp_qstr_const_pool;'); + print('const qstr_pool_t mp_qstr_frozen_const_pool = {') + print(' (qstr_pool_t*)&mp_qstr_const_pool, // previous pool') + print(' MP_QSTRnumber_of, // previous pool size') + print(' %u, // allocated entries' % len(new)) + print(' %u, // used entries' % len(new)) + print(' {') + for _, _, qstr in new: + print(' %s,' % qstrutil.make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)) + print(' },') + print('};') + + for rc in raw_codes: + rc.freeze(rc.source_file.str.replace('/', '_')[:-3] + '_') + + print() + print('const char mp_frozen_mpy_names[] = {') + for rc in raw_codes: + module_name = rc.source_file.str[:-len(".py")] + slash = module_name.rfind('/') + if slash != -1: + module_name = module_name[slash + 1:] + print('"%s\\0"' % module_name) + print('"\\0"};') + + print('const mp_raw_code_t *const mp_frozen_mpy_content[] = {') + for rc in raw_codes: + print(' &raw_code_%s,' % rc.escaped_name) + print('};') + +def main(): + import argparse + cmd_parser = argparse.ArgumentParser(description='A tool to work with MicroPython .mpy files.') + cmd_parser.add_argument('-d', '--dump', action='store_true', + help='dump contents of files') + cmd_parser.add_argument('-f', '--freeze', action='store_true', + help='freeze files') + cmd_parser.add_argument('-q', '--qstr-header', + help='qstr header file to freeze against') + cmd_parser.add_argument('-mlongint-impl', choices=['none', 'longlong', 'mpz'], default='mpz', + help='long-int implementation used by target (default mpz)') + cmd_parser.add_argument('-mmpz-dig-size', metavar='N', type=int, default=16, + help='mpz digit size used by target (default 16)') + cmd_parser.add_argument('files', nargs='+', + help='input .mpy files') + args = cmd_parser.parse_args() + + # set config values relevant to target machine + config.MICROPY_LONGINT_IMPL = { + 'none':config.MICROPY_LONGINT_IMPL_NONE, + 'longlong':config.MICROPY_LONGINT_IMPL_LONGLONG, + 'mpz':config.MICROPY_LONGINT_IMPL_MPZ, + }[args.mlongint_impl] + config.MPZ_DIG_SIZE = args.mmpz_dig_size + + if args.qstr_header: + qcfgs, base_qstrs = qstrutil.parse_input_headers([args.qstr_header]) + else: + qcfgs, base_qstrs = {'BYTES_IN_LEN':1, 'BYTES_IN_HASH':1}, {} + + raw_codes = [read_mpy(file) for file in args.files] + + if args.dump: + dump_mpy(raw_codes) + elif args.freeze: + try: + freeze_mpy(qcfgs, base_qstrs, raw_codes) + except FreezeError as er: + print(er, file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main()