diff --git a/tools/cc1 b/tools/cc1 new file mode 100755 index 0000000000..827d5886a0 --- /dev/null +++ b/tools/cc1 @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 + +""" +This is a middle-processor for MicroPython source files. It takes the output +of the C preprocessor, has the option to change it, then feeds this into the +C compiler. + +It currently has the ability to reorder static hash tables so they are actually +hashed, resulting in faster lookup times at runtime. + +To use, configure the Python variables below, and add the following line to the +Makefile: + +CFLAGS += -no-integrated-cpp -B$(shell pwd)/../tools +""" + +import sys +import os +import re + +################################################################################ +# these are the configuration variables +# TODO somehow make them externally configurable + +# this is the path to the true C compiler +cc1_path = '/usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/cc1' +#cc1_path = '/usr/lib/gcc/arm-none-eabi/5.3.0/cc1' + +# this must be the same as MICROPY_QSTR_BYTES_IN_HASH +bytes_in_qstr_hash = 2 + +# this must be 1 or more (can be a decimal) +# larger uses more code size but yields faster lookups +table_size_mult = 1 + +# these control output during processing +print_stats = True +print_debug = False + +# end configuration variables +################################################################################ + +# precompile regexs +re_preproc_line = re.compile(r'# [0-9]+ ') +re_map_entry = re.compile(r'\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},') +re_mp_obj_dict_t = re.compile(r'(?P(static )?const mp_obj_dict_t (?P[a-z0-9_]+) = \{ \.base = \{&mp_type_dict\}, \.map = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P, \.used = .+ };)$') +re_mp_map_t = re.compile(r'(?P(static )?const mp_map_t (?P[a-z0-9_]+) = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P, \.used = .+ };)$') +re_mp_rom_map_elem_t = re.compile(r'static const mp_rom_map_elem_t [a-z_0-9]+\[\] = {$') + +# this must match the equivalent function in qstr.c +def compute_hash(qstr): + hash = 5381 + for char in qstr: + hash = (hash * 33) ^ ord(char) + # Make sure that valid hash is never zero, zero means "hash not computed" + return (hash & ((1 << (8 * bytes_in_qstr_hash)) - 1)) or 1 + +# this algo must match the equivalent in map.c +def hash_insert(map, key, value): + hash = compute_hash(key) + pos = hash % len(map) + start_pos = pos + if print_debug: + print(' insert %s: start at %u/%u -- ' % (key, pos, len(map)), end='') + while True: + if map[pos] is None: + # found empty slot, so key is not in table + if print_debug: + print('put at %u' % pos) + map[pos] = (key, value) + return + else: + # not yet found, keep searching + if map[pos][0] == key: + raise AssertionError("duplicate key '%s'" % (key,)) + pos = (pos + 1) % len(map) + assert pos != start_pos + +def hash_find(map, key): + hash = compute_hash(key) + pos = hash % len(map) + start_pos = pos + attempts = 0 + while True: + attempts += 1 + if map[pos] is None: + return attempts, None + elif map[pos][0] == key: + return attempts, map[pos][1] + else: + pos = (pos + 1) % len(map) + if pos == start_pos: + return attempts, None + +def process_map_table(file, line, output): + output.append(line) + + # consume all lines that are entries of the table and concat them + # (we do it this way because there can be multiple entries on one line) + table_contents = [] + while True: + line = file.readline() + if len(line) == 0: + print('unexpected end of input') + sys.exit(1) + line = line.strip() + if len(line) == 0: + # empty line + continue + if re_preproc_line.match(line): + # preprocessor line number comment + continue + if line == '};': + # end of table (we assume it appears on a single line) + break + table_contents.append(line) + + # make combined string of entries + entries_str = ''.join(table_contents) + + # split into individual entries + entries = [] + while entries_str: + # look for single entry, by matching nested braces + match = None + if entries_str[0] == '{': + nested_braces = 0 + for i in range(len(entries_str)): + if entries_str[i] == '{': + nested_braces += 1 + elif entries_str[i] == '}': + nested_braces -= 1 + if nested_braces == 0: + match = re_map_entry.match(entries_str[:i + 2]) + break + + if not match: + print('unknown line in table:', entries_str) + sys.exit(1) + + # extract single entry + line = match.group(0) + qstr = match.group(1) + entries_str = entries_str[len(line):].lstrip() + + # add the qstr and the whole line to list of all entries + entries.append((qstr, line)) + + # sort entries so hash table construction is deterministic + entries.sort() + + # create hash table + map = [None] * int(len(entries) * table_size_mult) + for qstr, line in entries: + # We assume that qstr does not have any escape sequences in it. + # This is reasonably safe, since keys in a module or class dict + # should be standard identifiers. + # TODO verify this and raise an error if escape sequence found + hash_insert(map, qstr, line) + + # compute statistics + total_attempts = 0 + for qstr, _ in entries: + attempts, line = hash_find(map, qstr) + assert line is not None + if print_debug: + print(' %s lookup took %u attempts' % (qstr, attempts)) + total_attempts += attempts + if len(entries): + stats = len(map), len(entries) / len(map), total_attempts / len(entries) + else: + stats = 0, 0, 0 + if print_debug: + print(' table stats: size=%d, load=%.2f, avg_lookups=%.1f' % stats) + + # output hash table + for row in map: + if row is None: + output.append('{ 0, 0 },\n') + else: + output.append(row[1] + '\n') + output.append('};\n') + + # skip to next non-blank line + while True: + line = file.readline() + if len(line) == 0: + print('unexpected end of input') + sys.exit(1) + line = line.strip() + if len(line) == 0: + continue + break + + # transform the is_ordered param from 1 to 0 + match = re_mp_obj_dict_t.match(line) + if match is None: + match = re_mp_map_t.match(line) + if match is None: + print('expecting mp_obj_dict_t or mp_map_t definition') + print(output[0]) + print(line) + sys.exit(1) + line = match.group('head') + '0' + match.group('tail') + '\n' + output.append(line) + + return (match.group('id'),) + stats + +def process_file(filename): + output = [] + file_changed = False + with open(filename, 'rt') as f: + while True: + line = f.readline() + if not line: + break + if re_mp_rom_map_elem_t.match(line): + file_changed = True + stats = process_map_table(f, line, output) + if print_stats: + print(' [%s: size=%d, load=%.2f, avg_lookups=%.1f]' % stats) + else: + output.append(line) + + if file_changed: + if print_debug: + print(' modifying static maps in', output[0].strip()) + with open(filename, 'wt') as f: + for line in output: + f.write(line) + +def main(): + # run actual C compiler + # need to quote args that have special characters in them + def quote(s): + if s.find('<') != -1 or s.find('>') != -1: + return "'" + s + "'" + else: + return s + ret = os.system(cc1_path + ' ' + ' '.join(quote(s) for s in sys.argv[1:])) + if ret != 0: + ret = (ret & 0x7f) or 127 # make it in range 0-127, but non-zero + sys.exit(ret) + + if sys.argv[1] == '-E': + # CPP has been run, now do our processing stage + for i, arg in enumerate(sys.argv): + if arg == '-o': + return process_file(sys.argv[i + 1]) + + print('%s: could not find "-o" option' % (sys.argv[0],)) + sys.exit(1) + elif sys.argv[1] == '-fpreprocessed': + # compiler has been run, nothing more to do + return + else: + # unknown processing stage + print('%s: unknown first option "%s"' % (sys.argv[0], sys.argv[1])) + sys.exit(1) + +if __name__ == '__main__': + main()