#!/usr/bin/env python3 """ This is a middle-processor for MicroPython source files. It takes the output of the C preprocessor, has the option to change it, then feeds this into the C compiler. It currently has the ability to reorder static hash tables so they are actually hashed, resulting in faster lookup times at runtime. To use, configure the Python variables below, and add the following line to the Makefile: CFLAGS += -no-integrated-cpp -B$(shell pwd)/../tools """ import sys import os import re ################################################################################ # these are the configuration variables # TODO somehow make them externally configurable # this is the path to the true C compiler cc1_path = '/usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/cc1' #cc1_path = '/usr/lib/gcc/arm-none-eabi/5.3.0/cc1' # this must be the same as MICROPY_QSTR_BYTES_IN_HASH bytes_in_qstr_hash = 2 # this must be 1 or more (can be a decimal) # larger uses more code size but yields faster lookups table_size_mult = 1 # these control output during processing print_stats = True print_debug = False # end configuration variables ################################################################################ # precompile regexs re_preproc_line = re.compile(r'# [0-9]+ ') re_map_entry = re.compile(r'\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},') re_mp_obj_dict_t = re.compile(r'(?P(static )?const mp_obj_dict_t (?P[a-z0-9_]+) = \{ \.base = \{&mp_type_dict\}, \.map = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P, \.used = .+ };)$') re_mp_map_t = re.compile(r'(?P(static )?const mp_map_t (?P[a-z0-9_]+) = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P, \.used = .+ };)$') re_mp_rom_map_elem_t = re.compile(r'static const mp_rom_map_elem_t [a-z_0-9]+\[\] = {$') # this must match the equivalent function in qstr.c def compute_hash(qstr): hash = 5381 for char in qstr: hash = (hash * 33) ^ ord(char) # Make sure that valid hash is never zero, zero means "hash not computed" return (hash & ((1 << (8 * bytes_in_qstr_hash)) - 1)) or 1 # this algo must match the equivalent in map.c def hash_insert(map, key, value): hash = compute_hash(key) pos = hash % len(map) start_pos = pos if print_debug: print(' insert %s: start at %u/%u -- ' % (key, pos, len(map)), end='') while True: if map[pos] is None: # found empty slot, so key is not in table if print_debug: print('put at %u' % pos) map[pos] = (key, value) return else: # not yet found, keep searching if map[pos][0] == key: raise AssertionError("duplicate key '%s'" % (key,)) pos = (pos + 1) % len(map) assert pos != start_pos def hash_find(map, key): hash = compute_hash(key) pos = hash % len(map) start_pos = pos attempts = 0 while True: attempts += 1 if map[pos] is None: return attempts, None elif map[pos][0] == key: return attempts, map[pos][1] else: pos = (pos + 1) % len(map) if pos == start_pos: return attempts, None def process_map_table(file, line, output): output.append(line) # consume all lines that are entries of the table and concat them # (we do it this way because there can be multiple entries on one line) table_contents = [] while True: line = file.readline() if len(line) == 0: print('unexpected end of input') sys.exit(1) line = line.strip() if len(line) == 0: # empty line continue if re_preproc_line.match(line): # preprocessor line number comment continue if line == '};': # end of table (we assume it appears on a single line) break table_contents.append(line) # make combined string of entries entries_str = ''.join(table_contents) # split into individual entries entries = [] while entries_str: # look for single entry, by matching nested braces match = None if entries_str[0] == '{': nested_braces = 0 for i in range(len(entries_str)): if entries_str[i] == '{': nested_braces += 1 elif entries_str[i] == '}': nested_braces -= 1 if nested_braces == 0: match = re_map_entry.match(entries_str[:i + 2]) break if not match: print('unknown line in table:', entries_str) sys.exit(1) # extract single entry line = match.group(0) qstr = match.group(1) entries_str = entries_str[len(line):].lstrip() # add the qstr and the whole line to list of all entries entries.append((qstr, line)) # sort entries so hash table construction is deterministic entries.sort() # create hash table map = [None] * int(len(entries) * table_size_mult) for qstr, line in entries: # We assume that qstr does not have any escape sequences in it. # This is reasonably safe, since keys in a module or class dict # should be standard identifiers. # TODO verify this and raise an error if escape sequence found hash_insert(map, qstr, line) # compute statistics total_attempts = 0 for qstr, _ in entries: attempts, line = hash_find(map, qstr) assert line is not None if print_debug: print(' %s lookup took %u attempts' % (qstr, attempts)) total_attempts += attempts if len(entries): stats = len(map), len(entries) / len(map), total_attempts / len(entries) else: stats = 0, 0, 0 if print_debug: print(' table stats: size=%d, load=%.2f, avg_lookups=%.1f' % stats) # output hash table for row in map: if row is None: output.append('{ 0, 0 },\n') else: output.append(row[1] + '\n') output.append('};\n') # skip to next non-blank line while True: line = file.readline() if len(line) == 0: print('unexpected end of input') sys.exit(1) line = line.strip() if len(line) == 0: continue break # transform the is_ordered param from 1 to 0 match = re_mp_obj_dict_t.match(line) if match is None: match = re_mp_map_t.match(line) if match is None: print('expecting mp_obj_dict_t or mp_map_t definition') print(output[0]) print(line) sys.exit(1) line = match.group('head') + '0' + match.group('tail') + '\n' output.append(line) return (match.group('id'),) + stats def process_file(filename): output = [] file_changed = False with open(filename, 'rt') as f: while True: line = f.readline() if not line: break if re_mp_rom_map_elem_t.match(line): file_changed = True stats = process_map_table(f, line, output) if print_stats: print(' [%s: size=%d, load=%.2f, avg_lookups=%.1f]' % stats) else: output.append(line) if file_changed: if print_debug: print(' modifying static maps in', output[0].strip()) with open(filename, 'wt') as f: for line in output: f.write(line) def main(): # run actual C compiler # need to quote args that have special characters in them def quote(s): if s.find('<') != -1 or s.find('>') != -1: return "'" + s + "'" else: return s ret = os.system(cc1_path + ' ' + ' '.join(quote(s) for s in sys.argv[1:])) if ret != 0: ret = (ret & 0x7f) or 127 # make it in range 0-127, but non-zero sys.exit(ret) if sys.argv[1] == '-E': # CPP has been run, now do our processing stage for i, arg in enumerate(sys.argv): if arg == '-o': return process_file(sys.argv[i + 1]) print('%s: could not find "-o" option' % (sys.argv[0],)) sys.exit(1) elif sys.argv[1] == '-fpreprocessed': # compiler has been run, nothing more to do return else: # unknown processing stage print('%s: unknown first option "%s"' % (sys.argv[0], sys.argv[1])) sys.exit(1) if __name__ == '__main__': main()