commit 429d71943d6b94c7dc3c40a39ff1a09742c77dc2 Author: Damien Date: Fri Oct 4 19:53:11 2013 +0100 Initial commit. diff --git a/py/.gitignore b/py/.gitignore new file mode 100644 index 0000000000..5761abcfdf --- /dev/null +++ b/py/.gitignore @@ -0,0 +1 @@ +*.o diff --git a/py/Makefile b/py/Makefile new file mode 100644 index 0000000000..669453dba9 --- /dev/null +++ b/py/Makefile @@ -0,0 +1,50 @@ +CC = gcc +CFLAGS = -Wall -ansi -std=gnu99 -Os #-DNDEBUG +LDFLAGS = + +SRC = \ + malloc.c \ + misc.c \ + qstr.c \ + lexer.c \ + lexerfile.c \ + parse.c \ + scope.c \ + compile.c \ + emitcommon.c \ + emitcpy.c \ + emitbc.c \ + asmx64.c \ + emitx64v2.c \ + emitthumb.c \ + asmthumb.c \ + runtime.c \ + bc.c \ + main.c \ + +SRC_ASM = \ + runtime1.s \ + +OBJ = $(SRC:.c=.o) $(SRC_ASM:.s=.o) +LIB = +PROG = py + +$(PROG): $(OBJ) + $(CC) -o $@ $(OBJ) $(LIB) $(LDFLAGS) + +runtime.o: runtime.c + $(CC) $(CFLAGS) -O3 -c -o $@ $< + +bc.o: bc.c + $(CC) $(CFLAGS) -O3 -c -o $@ $< + +parse.o: grammar.h +compile.o: grammar.h +emitcpy.o: emit.h +emitbc.o: emit.h +emitx64.o: emit.h +emitx64v2.o: emit.h +emitthumb.o: emit.h + +clean: + /bin/rm $(OBJ) diff --git a/py/asmthumb.c b/py/asmthumb.c new file mode 100644 index 0000000000..ea7547d4b1 --- /dev/null +++ b/py/asmthumb.c @@ -0,0 +1,421 @@ +#include +#include +#include +#include + +#include "misc.h" +#include "machine.h" +#include "asmthumb.h" + +#define UNSIGNED_FIT8(x) (((x) & 0xffffff00) == 0) +#define UNSIGNED_FIT16(x) (((x) & 0xffff0000) == 0) +#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80) +#define SIGNED_FIT9(x) (((x) & 0xffffff00) == 0) || (((x) & 0xffffff00) == 0xffffff00) +#define SIGNED_FIT12(x) (((x) & 0xfffff800) == 0) || (((x) & 0xfffff800) == 0xfffff800) + +struct _asm_thumb_t { + int pass; + uint code_offset; + uint code_size; + byte *code_base; + byte dummy_data[8]; + + int next_label; + int max_num_labels; + int *label_offsets; + int num_locals; + uint push_reglist; + uint stack_adjust; +}; + +asm_thumb_t *asm_thumb_new() { + asm_thumb_t *as; + + as = m_new(asm_thumb_t, 1); + as->pass = 0; + as->code_offset = 0; + as->code_size = 0; + as->code_base = NULL; + as->label_offsets = NULL; + as->num_locals = 0; + + return as; +} + +void asm_thumb_free(asm_thumb_t *as, bool free_code) { + if (free_code) { + m_free(as->code_base); + } + /* + if (as->label != NULL) { + int i; + for (i = 0; i < as->label->len; ++i) + { + Label *lab = &g_array_index(as->label, Label, i); + if (lab->unresolved != NULL) + g_array_free(lab->unresolved, true); + } + g_array_free(as->label, true); + } + */ + m_free(as); +} + +void asm_thumb_start_pass(asm_thumb_t *as, int pass) { + as->pass = pass; + as->code_offset = 0; + as->next_label = 1; + if (pass == ASM_THUMB_PASS_1) { + as->max_num_labels = 0; + } else { + if (pass == ASM_THUMB_PASS_2) { + memset(as->label_offsets, -1, as->max_num_labels * sizeof(int)); + } + } +} + +void asm_thumb_end_pass(asm_thumb_t *as) { + if (as->pass == ASM_THUMB_PASS_1) { + // calculate number of labels need + if (as->next_label > as->max_num_labels) { + as->max_num_labels = as->next_label; + } + as->label_offsets = m_new(int, as->max_num_labels); + } else if (as->pass == ASM_THUMB_PASS_2) { + // calculate size of code in bytes + as->code_size = as->code_offset; + as->code_base = m_new(byte, as->code_size); + printf("code_size: %u\n", as->code_size); + } + + /* + // check labels are resolved + if (as->label != NULL) + { + int i; + for (i = 0; i < as->label->len; ++i) + if (g_array_index(as->label, Label, i).unresolved != NULL) + return false; + } + */ +} + +// all functions must go through this one to emit bytes +static byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int num_bytes_to_write) { + //printf("emit %d\n", num_bytes_to_write); + if (as->pass < ASM_THUMB_PASS_3) { + as->code_offset += num_bytes_to_write; + return as->dummy_data; + } else { + assert(as->code_offset + num_bytes_to_write <= as->code_size); + byte *c = as->code_base + as->code_offset; + as->code_offset += num_bytes_to_write; + return c; + } +} + +uint asm_thumb_get_code_size(asm_thumb_t *as) { + return as->code_size; +} + +void *asm_thumb_get_code(asm_thumb_t *as) { + // need to set low bit to indicate that it's thumb code + return (void *)(((machine_uint_t)as->code_base) | 1); +} + +/* +static void asm_thumb_write_byte_1(asm_thumb_t *as, byte b1) { + byte *c = asm_thumb_get_cur_to_write_bytes(as, 1); + c[0] = b1; +} +*/ + +static void asm_thumb_write_op16(asm_thumb_t *as, uint op) { + byte *c = asm_thumb_get_cur_to_write_bytes(as, 2); + // little endian + c[0] = op; + c[1] = op >> 8; +} + +static void asm_thumb_write_op32(asm_thumb_t *as, uint op1, uint op2) { + byte *c = asm_thumb_get_cur_to_write_bytes(as, 4); + // little endian, op1 then op2 + c[0] = op1; + c[1] = op1 >> 8; + c[2] = op2; + c[3] = op2 >> 8; +} + +/* +#define IMM32_L0(x) ((x) & 0xff) +#define IMM32_L1(x) (((x) >> 8) & 0xff) +#define IMM32_L2(x) (((x) >> 16) & 0xff) +#define IMM32_L3(x) (((x) >> 24) & 0xff) + +static void asm_thumb_write_word32(asm_thumb_t *as, int w32) { + byte *c = asm_thumb_get_cur_to_write_bytes(as, 4); + c[0] = IMM32_L0(w32); + c[1] = IMM32_L1(w32); + c[2] = IMM32_L2(w32); + c[3] = IMM32_L3(w32); +} +*/ + +// rlolist is a bit map indicating desired lo-registers +#define OP_PUSH_RLIST(rlolist) (0xb400 | (rlolist)) +#define OP_PUSH_RLIST_LR(rlolist) (0xb400 | 0x0100 | (rlolist)) +#define OP_POP_RLIST(rlolist) (0xbc00 | (rlolist)) +#define OP_POP_RLIST_PC(rlolist) (0xbc00 | 0x0100 | (rlolist)) + +#define OP_ADD_SP(num_words) (0xb000 | (num_words)) +#define OP_SUB_SP(num_words) (0xb080 | (num_words)) + +void asm_thumb_entry(asm_thumb_t *as, int num_locals) { + // work out what to push and how many extra space to reserve on stack + // so that we have enough for all locals and it's aligned an 8-byte boundary + uint reglist; + uint stack_adjust; + if (num_locals < 0) { + num_locals = 0; + } + // don't ppop r0 because it's used for return value + switch (num_locals) { + case 0: + reglist = 0xf2; + stack_adjust = 0; + break; + + case 1: + reglist = 0xf2; + stack_adjust = 0; + break; + + case 2: + reglist = 0xfe; + stack_adjust = 0; + break; + + case 3: + reglist = 0xfe; + stack_adjust = 0; + break; + + default: + reglist = 0xfe; + stack_adjust = ((num_locals - 3) + 1) & (~1); + break; + } + asm_thumb_write_op16(as, OP_PUSH_RLIST_LR(reglist)); + if (stack_adjust > 0) { + asm_thumb_write_op16(as, OP_SUB_SP(stack_adjust)); + } + as->push_reglist = reglist; + as->stack_adjust = stack_adjust; + as->num_locals = num_locals; +} + +void asm_thumb_exit(asm_thumb_t *as) { + if (as->stack_adjust > 0) { + asm_thumb_write_op16(as, OP_ADD_SP(as->stack_adjust)); + } + asm_thumb_write_op16(as, OP_POP_RLIST_PC(as->push_reglist)); +} + +int asm_thumb_label_new(asm_thumb_t *as) { + return as->next_label++; +} + +void asm_thumb_label_assign(asm_thumb_t *as, int label) { + if (as->pass > ASM_THUMB_PASS_1) { + assert(label < as->max_num_labels); + if (as->pass == ASM_THUMB_PASS_2) { + // assign label offset + assert(as->label_offsets[label] == -1); + as->label_offsets[label] = as->code_offset; + } else if (as->pass == ASM_THUMB_PASS_3) { + // ensure label offset has not changed from PASS_2 to PASS_3 + //printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset); + assert(as->label_offsets[label] == as->code_offset); + } + } +} + +// the i8 value will be zero extended into the r32 register! +void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8) { + assert(rlo_dest < REG_R8); + // movs rlo_dest, #i8 + asm_thumb_write_op16(as, 0x2000 | (rlo_dest << 8) | i8); +} + +// if loading lo half, the i16 value will be zero extended into the r32 register! +void asm_thumb_mov_i16_to_reg(asm_thumb_t *as, int i16, uint reg_dest, bool load_hi_half) { + assert(reg_dest < REG_R15); + uint op; + if (load_hi_half) { + // movt reg_dest, #i16 + op = 0xf2c0; + } else { + // movw reg_dest, #i16 + op = 0xf240; + } + asm_thumb_write_op32(as, op | ((i16 >> 1) & 0x0400) | ((i16 >> 12) & 0xf), ((i16 << 4) & 0x7000) | (reg_dest << 8) | (i16 & 0xff)); +} + +void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32) { + // movw, movt does it in 8 bytes + // ldr [pc, #], dw does it in 6 bytes, but we might not reach to end of code for dw + + asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false); + asm_thumb_mov_i16_to_reg(as, i32 >> 16, reg_dest, true); +} + +void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32) { + if (reg_dest < 8 && UNSIGNED_FIT8(i32)) { + asm_thumb_mov_reg_i8(as, reg_dest, i32); + } else if (UNSIGNED_FIT16(i32)) { + asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false); + } else { + asm_thumb_mov_reg_i32(as, reg_dest, i32); + } +} + +void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src) { + uint op_lo; + if (reg_src < 8) { + op_lo = reg_src << 3; + } else { + op_lo = 0x40 | ((reg_src - 8) << 3); + } + if (reg_dest < 8) { + op_lo |= reg_dest; + } else { + op_lo |= 0x80 | (reg_dest - 8); + } + asm_thumb_write_op16(as, 0x4600 | op_lo); +} + +#define OP_STR_TO_SP_OFFSET(rlo_dest, word_offset) (0x9000 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff)) +#define OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset) (0x9800 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff)) + +void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num, uint rlo_src) { + assert(rlo_src < REG_R8); + int word_offset = as->num_locals - local_num - 1; + assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0); + asm_thumb_write_op16(as, OP_STR_TO_SP_OFFSET(rlo_src, word_offset)); +} + +void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num) { + assert(rlo_dest < REG_R8); + int word_offset = as->num_locals - local_num - 1; + assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0); + asm_thumb_write_op16(as, OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset)); +} + +void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num) { + assert(0); + // see format 12, load address + asm_thumb_write_op16(as, 0x0000); +} + +#define OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b) (0x1800 | ((rlo_src_b) << 6) | ((rlo_src_a) << 3) | (rlo_dest)) + +void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b) { + asm_thumb_write_op16(as, OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b)); +} + +#define OP_CMP_REG_REG(rlo_a, rlo_b) (0x4280 | ((rlo_b) << 3) | (rlo_a)) + +void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b) { + asm_thumb_write_op16(as, OP_CMP_REG_REG(rlo_a, rlo_b)); +} + +void asm_thumb_ite_ge(asm_thumb_t *as) { + asm_thumb_write_op16(as, 0xbfac); +} + +#define OP_B(byte_offset) (0xe000 | (((byte_offset) >> 1) & 0x07ff)) +// this could be wrong, because it should have a range of +/- 16MiB... +#define OP_BW_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff)) +#define OP_BW_LO(byte_offset) (0xb800 | (((byte_offset) >> 1) & 0x07ff)) + +void asm_thumb_b_label(asm_thumb_t *as, int label) { + if (as->pass > ASM_THUMB_PASS_1) { + int dest = as->label_offsets[label]; + int rel = dest - as->code_offset; + rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction + if (dest >= 0 && rel <= -4) { + // is a backwards jump, so we know the size of the jump on the first pass + // calculate rel assuming 12 bit relative jump + if (SIGNED_FIT12(rel)) { + asm_thumb_write_op16(as, OP_B(rel)); + } else { + goto large_jump; + } + } else { + // is a forwards jump, so need to assume it's large + large_jump: + asm_thumb_write_op32(as, OP_BW_HI(rel), OP_BW_LO(rel)); + } + } +} + +#define OP_CMP_REG_IMM(rlo, i8) (0x2800 | ((rlo) << 8) | (i8)) +// all these bit arithmetics need coverage testing! +#define OP_BEQ(byte_offset) (0xd000 | (((byte_offset) >> 1) & 0x00ff)) +#define OP_BEQW_HI(byte_offset) (0xf000 | (((byte_offset) >> 10) & 0x0400) | (((byte_offset) >> 14) & 0x003f)) +#define OP_BEQW_LO(byte_offset) (0x8000 | ((byte_offset) & 0x2000) | (((byte_offset) >> 1) & 0x0fff)) + +void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label) { + assert(rlo < REG_R8); + + // compare reg with 0 + asm_thumb_write_op16(as, OP_CMP_REG_IMM(rlo, 0)); + + // branch if equal + if (as->pass > ASM_THUMB_PASS_1) { + int dest = as->label_offsets[label]; + int rel = dest - as->code_offset; + rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction + if (dest >= 0 && rel <= -4) { + // is a backwards jump, so we know the size of the jump on the first pass + // calculate rel assuming 12 bit relative jump + if (SIGNED_FIT9(rel)) { + asm_thumb_write_op16(as, OP_BEQ(rel)); + } else { + goto large_jump; + } + } else { + // is a forwards jump, so need to assume it's large + large_jump: + asm_thumb_write_op32(as, OP_BEQW_HI(rel), OP_BEQW_LO(rel)); + } + } +} + +#define OP_BLX(reg) (0x4780 | ((reg) << 3)) +#define OP_SVC(arg) (0xdf00 | (arg)) +#define OP_LDR_FROM_BASE_OFFSET(rlo_dest, rlo_base, word_offset) (0x6800 | (((word_offset) << 6) & 0x07c0) | ((rlo_base) << 3) | (rlo_dest)) + +void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp) { + /* TODO make this use less bytes + uint rlo_base = REG_R3; + uint rlo_dest = REG_R7; + uint word_offset = 4; + asm_thumb_write_op16(as, 0x0000); + asm_thumb_write_op16(as, 0x6800 | (word_offset << 6) | (rlo_base << 3) | rlo_dest); // ldr rlo_dest, [rlo_base, #offset] + asm_thumb_write_op16(as, 0x4780 | (REG_R9 << 3)); // blx reg + */ + + if (0) { + // load ptr to function into register using immediate, then branch + // not relocatable + asm_thumb_mov_reg_i32(as, reg_temp, (machine_uint_t)fun_ptr); + asm_thumb_write_op16(as, OP_BLX(reg_temp)); + } else if (1) { + asm_thumb_write_op16(as, OP_LDR_FROM_BASE_OFFSET(reg_temp, REG_R7, fun_id)); + asm_thumb_write_op16(as, OP_BLX(reg_temp)); + } else { + // use SVC + asm_thumb_write_op16(as, OP_SVC(fun_id)); + } +} diff --git a/py/asmthumb.h b/py/asmthumb.h new file mode 100644 index 0000000000..d3ffb9a003 --- /dev/null +++ b/py/asmthumb.h @@ -0,0 +1,60 @@ +#define ASM_THUMB_PASS_1 (1) +#define ASM_THUMB_PASS_2 (2) +#define ASM_THUMB_PASS_3 (3) + +#define REG_R0 (0) +#define REG_R1 (1) +#define REG_R2 (2) +#define REG_R3 (3) +#define REG_R4 (4) +#define REG_R5 (5) +#define REG_R6 (6) +#define REG_R7 (7) +#define REG_R8 (8) +#define REG_R9 (9) +#define REG_R10 (10) +#define REG_R11 (11) +#define REG_R12 (12) +#define REG_R13 (13) +#define REG_R14 (14) +#define REG_R15 (15) +#define REG_LR (REG_R14) + +#define REG_RET REG_R0 +#define REG_ARG_1 REG_R0 +#define REG_ARG_2 REG_R1 +#define REG_ARG_3 REG_R2 +#define REG_ARG_4 REG_R3 + +typedef struct _asm_thumb_t asm_thumb_t; + +asm_thumb_t *asm_thumb_new(); +void asm_thumb_free(asm_thumb_t *as, bool free_code); +void asm_thumb_start_pass(asm_thumb_t *as, int pass); +void asm_thumb_end_pass(asm_thumb_t *as); +uint asm_thumb_get_code_size(asm_thumb_t *as); +void *asm_thumb_get_code(asm_thumb_t *as); + +void asm_thumb_entry(asm_thumb_t *as, int num_locals); +void asm_thumb_exit(asm_thumb_t *as); + +int asm_thumb_label_new(asm_thumb_t *as); +void asm_thumb_label_assign(asm_thumb_t *as, int label); + +// argument order follows ARM, in general dest is first + +void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8_src); +void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32_src); +void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32_src); +void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src); +void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num_dest, uint rlo_src); +void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num); +void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num); + +void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b); +void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b); +void asm_thumb_ite_ge(asm_thumb_t *as); + +void asm_thumb_b_label(asm_thumb_t *as, int label); +void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label); +void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp); diff --git a/py/asmx64.c b/py/asmx64.c new file mode 100644 index 0000000000..59c8113bc4 --- /dev/null +++ b/py/asmx64.c @@ -0,0 +1,621 @@ +#include +#include +#include +#include +#include + +#include "misc.h" +#include "asmx64.h" + +/* all offsets are measured in multiples of 8 bytes */ +#define WORD_SIZE (8) + +#define OPCODE_NOP (0x90) +#define OPCODE_PUSH_R64 (0x50) +#define OPCODE_PUSH_I64 (0x68) +#define OPCODE_PUSH_M64 (0xff) /* /6 */ +#define OPCODE_POP_R64 (0x58) +#define OPCODE_RET (0xc3) +#define OPCODE_MOV_I8_TO_R8 (0xb0) /* +rb */ +#define OPCODE_MOV_I64_TO_R64 (0xb8) +#define OPCODE_MOV_I32_TO_RM32 (0xc7) +#define OPCODE_MOV_R64_TO_RM64 (0x89) +#define OPCODE_MOV_RM64_TO_R64 (0x8b) +#define OPCODE_LEA_MEM_TO_R64 (0x8d) /* /r */ +#define OPCODE_XOR_R64_TO_RM64 (0x31) /* /r */ +#define OPCODE_ADD_R64_TO_RM64 (0x01) +#define OPCODE_ADD_I32_TO_RM32 (0x81) /* /0 */ +#define OPCODE_ADD_I8_TO_RM32 (0x83) /* /0 */ +#define OPCODE_SUB_R64_FROM_RM64 (0x29) +#define OPCODE_SUB_I32_FROM_RM64 (0x81) /* /5 */ +#define OPCODE_SUB_I8_FROM_RM64 (0x83) /* /5 */ +#define OPCODE_SHL_RM32_BY_I8 (0xc1) /* /4 */ +#define OPCODE_SHR_RM32_BY_I8 (0xc1) /* /5 */ +#define OPCODE_SAR_RM32_BY_I8 (0xc1) /* /7 */ +#define OPCODE_CMP_I32_WITH_RM32 (0x81) /* /7 */ +#define OPCODE_CMP_I8_WITH_RM32 (0x83) /* /7 */ +#define OPCODE_CMP_R64_WITH_RM64 (0x39) +#define OPCODE_CMP_RM32_WITH_R32 (0x3b) +#define OPCODE_TEST_R8_WITH_RM8 (0x84) /* /r */ +#define OPCODE_JMP_REL8 (0xeb) +#define OPCODE_JMP_REL32 (0xe9) +#define OPCODE_JCC_REL8 (0x70) /* | jcc type */ +#define OPCODE_JCC_REL32_A (0x0f) +#define OPCODE_JCC_REL32_B (0x80) /* | jcc type */ +#define OPCODE_SETCC_RM8_A (0x0f) +#define OPCODE_SETCC_RM8_B (0x90) /* | jcc type, /0 */ +#define OPCODE_CALL_REL32 (0xe8) +#define OPCODE_CALL_RM32 (0xff) /* /2 */ +#define OPCODE_LEAVE (0xc9) + +#define MODRM_R64(x) ((x) << 3) +#define MODRM_RM_DISP0 (0x00) +#define MODRM_RM_DISP8 (0x40) +#define MODRM_RM_DISP32 (0x80) +#define MODRM_RM_REG (0xc0) +#define MODRM_RM_R64(x) (x) + +#define REX_PREFIX (0x40) +#define REX_W (0x08) // width +#define REX_R (0x04) // register +#define REX_X (0x02) // index +#define REX_B (0x01) // base + +#define IMM32_L0(x) ((x) & 0xff) +#define IMM32_L1(x) (((x) >> 8) & 0xff) +#define IMM32_L2(x) (((x) >> 16) & 0xff) +#define IMM32_L3(x) (((x) >> 24) & 0xff) +#define IMM64_L4(x) (((x) >> 32) & 0xff) +#define IMM64_L5(x) (((x) >> 40) & 0xff) +#define IMM64_L6(x) (((x) >> 48) & 0xff) +#define IMM64_L7(x) (((x) >> 56) & 0xff) + +#define UNSIGNED_FIT8(x) (((x) & 0xffffffffffffff00) == 0) +#define UNSIGNED_FIT32(x) (((x) & 0xffffffff00000000) == 0) +#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80) + +struct _asm_x64_t { + int pass; + uint code_offset; + uint code_size; + byte *code_base; + byte dummy_data[8]; + + int next_label; + int max_num_labels; + int *label_offsets; +}; + +// for allocating memory, see src/v8/src/platform-linux.cc +void *alloc_mem(uint req_size, uint *alloc_size, bool is_exec) { + req_size = (req_size + 0xfff) & (~0xfff); + int prot = PROT_READ | PROT_WRITE | (is_exec ? PROT_EXEC : 0); + void *ptr = mmap(NULL, req_size, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) { + assert(0); + } + *alloc_size = req_size; + return ptr; +} + +asm_x64_t* asm_x64_new() { + asm_x64_t* as; + + as = m_new(asm_x64_t, 1); + as->pass = 0; + as->code_offset = 0; + as->code_size = 0; + as->code_base = NULL; + as->label_offsets = NULL; + + return as; +} + +void asm_x64_free(asm_x64_t* as, bool free_code) { + if (free_code) { + m_free(as->code_base); + } + /* + if (as->label != NULL) { + int i; + for (i = 0; i < as->label->len; ++i) + { + Label* lab = &g_array_index(as->label, Label, i); + if (lab->unresolved != NULL) + g_array_free(lab->unresolved, true); + } + g_array_free(as->label, true); + } + */ + m_free(as); +} + +void asm_x64_start_pass(asm_x64_t *as, int pass) { + as->pass = pass; + as->code_offset = 0; + as->next_label = 1; + if (pass == ASM_X64_PASS_1) { + as->max_num_labels = 0; + } else { + if (pass == ASM_X64_PASS_2) { + memset(as->label_offsets, -1, as->max_num_labels * sizeof(int)); + } + } +} + +void asm_x64_end_pass(asm_x64_t *as) { + if (as->pass == ASM_X64_PASS_1) { + // calculate number of labels need + if (as->next_label > as->max_num_labels) { + as->max_num_labels = as->next_label; + } + as->label_offsets = m_new(int, as->max_num_labels); + } else if (as->pass == ASM_X64_PASS_2) { + // calculate size of code in bytes + as->code_size = as->code_offset; + as->code_base = m_new(byte, as->code_size); + printf("code_size: %u\n", as->code_size); + } + + /* + // check labels are resolved + if (as->label != NULL) + { + int i; + for (i = 0; i < as->label->len; ++i) + if (g_array_index(as->label, Label, i).unresolved != NULL) + return false; + } + */ +} + +// all functions must go through this one to emit bytes +static byte* asm_x64_get_cur_to_write_bytes(asm_x64_t* as, int num_bytes_to_write) { + //printf("emit %d\n", num_bytes_to_write); + if (as->pass < ASM_X64_PASS_3) { + as->code_offset += num_bytes_to_write; + return as->dummy_data; + } else { + assert(as->code_offset + num_bytes_to_write <= as->code_size); + byte *c = as->code_base + as->code_offset; + as->code_offset += num_bytes_to_write; + return c; + } +} + +uint asm_x64_get_code_size(asm_x64_t* as) { + return as->code_size; +} + +void* asm_x64_get_code(asm_x64_t* as) { + return as->code_base; +} + +static void asm_x64_write_byte_1(asm_x64_t* as, byte b1) { + byte* c = asm_x64_get_cur_to_write_bytes(as, 1); + c[0] = b1; +} + +static void asm_x64_write_byte_2(asm_x64_t* as, byte b1, byte b2) { + byte* c = asm_x64_get_cur_to_write_bytes(as, 2); + c[0] = b1; + c[1] = b2; +} + +static void asm_x64_write_byte_3(asm_x64_t* as, byte b1, byte b2, byte b3) { + byte* c = asm_x64_get_cur_to_write_bytes(as, 3); + c[0] = b1; + c[1] = b2; + c[2] = b3; +} + +static void asm_x64_write_word32(asm_x64_t* as, int w32) { + byte* c = asm_x64_get_cur_to_write_bytes(as, 4); + c[0] = IMM32_L0(w32); + c[1] = IMM32_L1(w32); + c[2] = IMM32_L2(w32); + c[3] = IMM32_L3(w32); +} + +static void asm_x64_write_word64(asm_x64_t* as, int64_t w64) { + byte* c = asm_x64_get_cur_to_write_bytes(as, 8); + c[0] = IMM32_L0(w64); + c[1] = IMM32_L1(w64); + c[2] = IMM32_L2(w64); + c[3] = IMM32_L3(w64); + c[4] = IMM64_L4(w64); + c[5] = IMM64_L5(w64); + c[6] = IMM64_L6(w64); + c[7] = IMM64_L7(w64); +} + +/* unused +static void asm_x64_write_word32_to(asm_x64_t* as, int offset, int w32) { + byte* c; + assert(offset + 4 <= as->code_size); + c = as->code_base + offset; + c[0] = IMM32_L0(w32); + c[1] = IMM32_L1(w32); + c[2] = IMM32_L2(w32); + c[3] = IMM32_L3(w32); +} +*/ + +static void asm_x64_write_r64_disp(asm_x64_t* as, int r64, int disp_r64, int disp_offset) { + assert(disp_r64 != REG_RSP); + + if (disp_offset == 0 && disp_r64 != REG_RBP) { + asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP0 | MODRM_RM_R64(disp_r64)); + } else if (SIGNED_FIT8(disp_offset)) { + asm_x64_write_byte_2(as, MODRM_R64(r64) | MODRM_RM_DISP8 | MODRM_RM_R64(disp_r64), IMM32_L0(disp_offset)); + } else { + asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP32 | MODRM_RM_R64(disp_r64)); + asm_x64_write_word32(as, disp_offset); + } +} + +void asm_x64_nop(asm_x64_t* as) +{ + asm_x64_write_byte_1(as, OPCODE_NOP); +} + +void asm_x64_push_r64(asm_x64_t* as, int src_r64) +{ + asm_x64_write_byte_1(as, OPCODE_PUSH_R64 | src_r64); +} + +void asm_x64_push_i32(asm_x64_t* as, int src_i32) +{ + asm_x64_write_byte_1(as, OPCODE_PUSH_I64); + asm_x64_write_word32(as, src_i32); // will be sign extended to 64 bits +} + +void asm_x64_push_disp(asm_x64_t* as, int src_r64, int src_offset) { + asm_x64_write_byte_1(as, OPCODE_PUSH_M64); + asm_x64_write_r64_disp(as, 6, src_r64, src_offset); +} + +void asm_x64_pop_r64(asm_x64_t* as, int dest_r64) +{ + asm_x64_write_byte_1(as, OPCODE_POP_R64 | dest_r64); +} + +static void asm_x64_ret(asm_x64_t* as) +{ + asm_x64_write_byte_1(as, OPCODE_RET); +} + +void asm_x64_mov_r32_to_r32(asm_x64_t* as, int src_r32, int dest_r32) { + // defaults to 32 bit operation + asm_x64_write_byte_2(as, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); +} + +void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) { + // use REX prefix for 64 bit operation + asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +} + +void asm_x64_mov_r64_to_disp(asm_x64_t* as, int src_r64, int dest_r64, int dest_disp) { + // use REX prefix for 64 bit operation + asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64); + asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp); +} + +void asm_x64_mov_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) { + // use REX prefix for 64 bit operation + asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_RM64_TO_R64); + asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp); +} + +void asm_x64_lea_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) { + // use REX prefix for 64 bit operation + asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_LEA_MEM_TO_R64); + asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp); +} + +void asm_x64_mov_i8_to_r8(asm_x64_t *as, int src_i8, int dest_r64) { + asm_x64_write_byte_2(as, OPCODE_MOV_I8_TO_R8 | dest_r64, src_i8); +} + +void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64) { + // cpu defaults to i32 to r64, with zero extension + asm_x64_write_byte_1(as, OPCODE_MOV_I64_TO_R64 | dest_r64); + asm_x64_write_word32(as, src_i32); +} + +void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64) { + // cpu defaults to i32 to r64 + // to mov i64 to r64 need to use REX prefix + asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_I64_TO_R64 | dest_r64); + asm_x64_write_word64(as, src_i64); +} + +void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64) { + if (UNSIGNED_FIT32(src_i64)) { + // 5 bytes + asm_x64_mov_i32_to_r64(as, src_i64 & 0xffffffff, dest_r64); + } else { + // 10 bytes + asm_x64_mov_i64_to_r64(as, src_i64, dest_r64); + } +} + +void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp) +{ + assert(0); + asm_x64_write_byte_1(as, OPCODE_MOV_I32_TO_RM32); + //asm_x64_write_r32_disp(as, 0, dest_r32, dest_disp); + asm_x64_write_word32(as, src_i32); +} + +void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64) { + asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_XOR_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +} + +void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) { + asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_ADD_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +} + +void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32) +{ + assert(dest_r32 != REG_RSP); // in this case i think src_i32 must be 64 bits + if (SIGNED_FIT8(src_i32)) + { + asm_x64_write_byte_2(as, OPCODE_ADD_I8_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); + asm_x64_write_byte_1(as, src_i32 & 0xff); + } + else + { + asm_x64_write_byte_2(as, OPCODE_ADD_I32_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); + asm_x64_write_word32(as, src_i32); + } +} + +void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32) { + // defaults to 32 bit operation + asm_x64_write_byte_2(as, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); +} + +void asm_x64_sub_r64_from_r64(asm_x64_t* as, int src_r64, int dest_r64) { + // use REX prefix for 64 bit operation + asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +} + +void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32) { + if (SIGNED_FIT8(src_i32)) { + // defaults to 32 bit operation + asm_x64_write_byte_2(as, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); + asm_x64_write_byte_1(as, src_i32 & 0xff); + } else { + // defaults to 32 bit operation + asm_x64_write_byte_2(as, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); + asm_x64_write_word32(as, src_i32); + } +} + +void asm_x64_sub_i32_from_r64(asm_x64_t* as, int src_i32, int dest_r64) { + if (SIGNED_FIT8(src_i32)) { + // use REX prefix for 64 bit operation + asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); + asm_x64_write_byte_1(as, src_i32 & 0xff); + } else { + // use REX prefix for 64 bit operation + asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); + asm_x64_write_word32(as, src_i32); + } +} + +/* shifts not tested */ +void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm) { + asm_x64_write_byte_2(as, OPCODE_SHL_RM32_BY_I8, MODRM_R64(4) | MODRM_RM_REG | MODRM_RM_R64(r32)); + asm_x64_write_byte_1(as, imm); +} + +void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm) { + asm_x64_write_byte_2(as, OPCODE_SHR_RM32_BY_I8, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(r32)); + asm_x64_write_byte_1(as, imm); +} + +void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm) { + asm_x64_write_byte_2(as, OPCODE_SAR_RM32_BY_I8, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(r32)); + asm_x64_write_byte_1(as, imm); +} + +void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b) { + asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_CMP_R64_WITH_RM64, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b)); +} + +void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b) { + assert(0); + asm_x64_write_byte_1(as, OPCODE_CMP_R64_WITH_RM64); + //asm_x64_write_r32_disp(as, src_r32_a, src_r32_b, src_disp_b); +} + +void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b) { + assert(0); + asm_x64_write_byte_1(as, OPCODE_CMP_RM32_WITH_R32); + //asm_x64_write_r32_disp(as, src_r32_b, src_r32_a, src_disp_a); +} + +void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32) { + if (SIGNED_FIT8(src_i32)) { + asm_x64_write_byte_2(as, OPCODE_CMP_I8_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32)); + asm_x64_write_byte_1(as, src_i32 & 0xff); + } else { + asm_x64_write_byte_2(as, OPCODE_CMP_I32_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32)); + asm_x64_write_word32(as, src_i32); + } +} + +void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b) { + asm_x64_write_byte_2(as, OPCODE_TEST_R8_WITH_RM8, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b)); +} + +void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8) { + asm_x64_write_byte_3(as, OPCODE_SETCC_RM8_A, OPCODE_SETCC_RM8_B | jcc_type, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r8)); +} + +int asm_x64_label_new(asm_x64_t* as) { + return as->next_label++; +} + +void asm_x64_label_assign(asm_x64_t* as, int label) { + if (as->pass > ASM_X64_PASS_1) { + assert(label < as->max_num_labels); + if (as->pass == ASM_X64_PASS_2) { + // assign label offset + assert(as->label_offsets[label] == -1); + as->label_offsets[label] = as->code_offset; + } else if (as->pass == ASM_X64_PASS_3) { + // ensure label offset has not changed from PASS_2 to PASS_3 + //printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset); + assert(as->label_offsets[label] == as->code_offset); + } + } +} + +void asm_x64_jmp_label(asm_x64_t* as, int label) { + if (as->pass > ASM_X64_PASS_1) { + int dest = as->label_offsets[label]; + int rel = dest - as->code_offset; + if (dest >= 0 && rel < 0) { + // is a backwards jump, so we know the size of the jump on the first pass + // calculate rel assuming 8 bit relative jump + rel -= 2; + if (SIGNED_FIT8(rel)) { + asm_x64_write_byte_2(as, OPCODE_JMP_REL8, rel & 0xff); + } else { + rel += 2; + goto large_jump; + } + } else { + // is a forwards jump, so need to assume it's large + large_jump: + rel -= 5; + asm_x64_write_byte_1(as, OPCODE_JMP_REL32); + asm_x64_write_word32(as, rel); + } + } +} + +void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label) { + if (as->pass > ASM_X64_PASS_1) { + int dest = as->label_offsets[label]; + int rel = dest - as->code_offset; + if (dest >= 0 && rel < 0) { + // is a backwards jump, so we know the size of the jump on the first pass + // calculate rel assuming 8 bit relative jump + rel -= 2; + if (SIGNED_FIT8(rel)) { + asm_x64_write_byte_2(as, OPCODE_JCC_REL8 | jcc_type, rel & 0xff); + } else { + rel += 2; + goto large_jump; + } + } else { + // is a forwards jump, so need to assume it's large + large_jump: + rel -= 6; + asm_x64_write_byte_2(as, OPCODE_JCC_REL32_A, OPCODE_JCC_REL32_B | jcc_type); + asm_x64_write_word32(as, rel); + } + } +} + +void asm_x64_entry(asm_x64_t* as, int num_locals) { + asm_x64_push_r64(as, REG_RBP); + asm_x64_mov_r64_to_r64(as, REG_RSP, REG_RBP); + if (num_locals < 0) { + num_locals = 0; + } + num_locals |= 1; // make it odd so stack is aligned on 16 byte boundary + asm_x64_sub_i32_from_r64(as, num_locals * WORD_SIZE, REG_RSP); + asm_x64_push_r64(as, REG_RBX); +} + +void asm_x64_exit(asm_x64_t* as) { + asm_x64_pop_r64(as, REG_RBX); + asm_x64_write_byte_1(as, OPCODE_LEAVE); + asm_x64_ret(as); +} + +void asm_x64_push_arg(asm_x64_t* as, int src_arg_num) { + assert(0); + asm_x64_push_disp(as, REG_RBP, 8 + src_arg_num * WORD_SIZE); +} + +void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32) { + assert(0); + //asm_x64_mov_disp_to_r32(as, REG_RBP, 8 + src_arg_num * WORD_SIZE, dest_r32); +} + +void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num) { + assert(0); + //asm_x64_mov_r32_to_disp(as, src_r32, REG_RBP, 8 + dest_arg_num * WORD_SIZE); +} + +static int asm_x64_local_offset_from_ebp(int local_num) +{ + return -(local_num + 1) * WORD_SIZE; +} + +void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64) { + asm_x64_mov_disp_to_r64(as, REG_RBP, asm_x64_local_offset_from_ebp(src_local_num), dest_r64); +} + +void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num) { + asm_x64_mov_r64_to_disp(as, src_r64, REG_RBP, asm_x64_local_offset_from_ebp(dest_local_num)); +} + +void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64) { + int offset = asm_x64_local_offset_from_ebp(local_num); + if (offset == 0) { + asm_x64_mov_r64_to_r64(as, REG_RBP, dest_r64); + } else { + asm_x64_lea_disp_to_r64(as, REG_RBP, offset, dest_r64); + } +} + +void asm_x64_push_local(asm_x64_t* as, int local_num) { + asm_x64_push_disp(as, REG_RBP, asm_x64_local_offset_from_ebp(local_num)); +} + +void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r64) +{ + asm_x64_mov_r64_to_r64(as, REG_RBP, temp_r64); + asm_x64_add_i32_to_r32(as, asm_x64_local_offset_from_ebp(local_num), temp_r64); + asm_x64_push_r64(as, temp_r64); +} + +/* + can't use these because code might be relocated when resized + +void asm_x64_call(asm_x64_t* as, void* func) +{ + asm_x64_sub_i32_from_r32(as, 8, REG_RSP); + asm_x64_write_byte_1(as, OPCODE_CALL_REL32); + asm_x64_write_word32(as, func - (void*)(as->code_cur + 4)); + asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP); +} + +void asm_x64_call_i1(asm_x64_t* as, void* func, int i1) +{ + asm_x64_sub_i32_from_r32(as, 8, REG_RSP); + asm_x64_sub_i32_from_r32(as, 12, REG_RSP); + asm_x64_push_i32(as, i1); + asm_x64_write_byte_1(as, OPCODE_CALL_REL32); + asm_x64_write_word32(as, func - (void*)(as->code_cur + 4)); + asm_x64_add_i32_to_r32(as, 16, REG_RSP); + asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP); +} +*/ + +void asm_x64_call_ind(asm_x64_t* as, void *ptr, int temp_r64) { + /* + asm_x64_mov_i64_to_r64_optimised(as, (int64_t)ptr, temp_r64); + asm_x64_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R64(2) | MODRM_RM_REG | MODRM_RM_R64(temp_r64)); + */ + // this reduces code size by 2 bytes per call, but doesn't seem to speed it up at all + asm_x64_write_byte_1(as, OPCODE_CALL_REL32); + asm_x64_write_word32(as, ptr - (void*)(as->code_base + as->code_offset + 4)); +} diff --git a/py/asmx64.h b/py/asmx64.h new file mode 100644 index 0000000000..4871dbff8a --- /dev/null +++ b/py/asmx64.h @@ -0,0 +1,76 @@ +#define ASM_X64_PASS_1 (1) +#define ASM_X64_PASS_2 (2) +#define ASM_X64_PASS_3 (3) + +#define REG_RAX (0) +#define REG_RCX (1) +#define REG_RDX (2) +#define REG_RBX (3) +#define REG_RSP (4) +#define REG_RBP (5) +#define REG_RSI (6) +#define REG_RDI (7) + +// condition codes, used for jcc and setcc (desipite their j-name!) +#define JCC_JB (0x2) // below, unsigned +#define JCC_JZ (0x4) +#define JCC_JE (0x4) +#define JCC_JNZ (0x5) +#define JCC_JNE (0x5) +#define JCC_JL (0xc) // less, signed + +#define REG_RET REG_RAX +#define REG_ARG_1 REG_RDI +#define REG_ARG_2 REG_RSI +#define REG_ARG_3 REG_RDX + +typedef struct _asm_x64_t asm_x64_t; + +asm_x64_t* asm_x64_new(); +void asm_x64_free(asm_x64_t* as, bool free_code); +void asm_x64_start_pass(asm_x64_t *as, int pass); +void asm_x64_end_pass(asm_x64_t *as); +uint asm_x64_get_code_size(asm_x64_t* as); +void* asm_x64_get_code(asm_x64_t* as); + +void asm_x64_nop(asm_x64_t* as); +void asm_x64_push_r64(asm_x64_t* as, int src_r64); +void asm_x64_push_i32(asm_x64_t* as, int src_i32); // will be sign extended to 64 bits +void asm_x64_push_disp(asm_x64_t* as, int src_r32, int src_offset); +void asm_x64_pop_r64(asm_x64_t* as, int dest_r64); +void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64); +void asm_x64_mov_r32_to_disp(asm_x64_t* as, int src_r32, int dest_r32, int dest_disp); +void asm_x64_mov_disp_to_r32(asm_x64_t* as, int src_r32, int src_disp, int dest_r32); +void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64); +void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64); +void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp); +void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64); +void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64); +void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64); +void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32); +void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32); +void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32); +void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm); +void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm); +void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm); +void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b); +void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b); +void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b); +void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32); +void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b); +void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8); +int asm_x64_label_new(asm_x64_t* as); +void asm_x64_label_assign(asm_x64_t* as, int label); +void asm_x64_jmp_label(asm_x64_t* as, int label); +void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label); +void asm_x64_entry(asm_x64_t* as, int num_locals); +void asm_x64_exit(asm_x64_t* as); +void asm_x64_push_arg(asm_x64_t* as, int src_arg_num); +void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32); +void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num); +void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64); +void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num); +void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64); +void asm_x64_push_local(asm_x64_t* as, int local_num); +void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r32); +void asm_x64_call_ind(asm_x64_t* as, void* ptr, int temp_r32); diff --git a/py/bc.c b/py/bc.c new file mode 100644 index 0000000000..1edd911ab6 --- /dev/null +++ b/py/bc.c @@ -0,0 +1,272 @@ +#include +#include +#include +#include +#include + +#include "misc.h" +#include "machine.h" +#include "runtime.h" +#include "bc.h" + +#define DECODE_UINT do { unum = *ip++; if (unum > 127) { unum = ((unum & 0x3f) << 8) | (*ip++); } } while (0) +#define DECODE_QSTR do { qstr = *ip++; if (qstr > 127) { qstr = ((qstr & 0x3f) << 8) | (*ip++); } } while (0) +#define PUSH(val) *--sp = (val) +#define POP() (*sp++) + +py_obj_t py_execute_byte_code(byte *code, uint len, py_obj_t *args, uint n_args) { + byte *ip = code; + py_obj_t stack[10]; + py_obj_t *sp = &stack[10]; // stack grows down, sp points to top of stack + machine_uint_t unum; + machine_int_t snum; + qstr qstr; + py_obj_t obj1, obj2; + py_obj_t fast0 = NULL, fast1 = NULL, fast2 = NULL, fastn[4] = {NULL, NULL, NULL, NULL}; + + // init args + for (int i = 0; i < n_args; i++) { + if (i == 0) { + fast0 = args[0]; + } else if (i == 1) { + fast1 = args[1]; + } else if (i == 2) { + fast2 = args[2]; + } else { + assert(i - 3 < 4); + fastn[i - 3] = args[i]; + } + } + + // execute byte code + for (;;) { + int op = *ip++; + switch (op) { + case PYBC_LOAD_CONST_FALSE: + PUSH(py_const_false); + break; + + case PYBC_LOAD_CONST_NONE: + PUSH(py_const_none); + break; + + case PYBC_LOAD_CONST_TRUE: + PUSH(py_const_true); + break; + + case PYBC_LOAD_CONST_SMALL_INT: + snum = ip[0] | (ip[1] << 8); + if (snum & 0x8000) { + snum |= ~0xffff; + } + ip += 2; + PUSH((py_obj_t)(snum << 1 | 1)); + break; + + case PYBC_LOAD_CONST_ID: + DECODE_QSTR; + PUSH(rt_load_const_str(qstr)); // TODO + break; + + case PYBC_LOAD_CONST_STRING: + DECODE_QSTR; + PUSH(rt_load_const_str(qstr)); + break; + + case PYBC_LOAD_FAST_0: + PUSH(fast0); + break; + + case PYBC_LOAD_FAST_1: + PUSH(fast1); + break; + + case PYBC_LOAD_FAST_2: + PUSH(fast2); + break; + + case PYBC_LOAD_FAST_N: + DECODE_UINT; + PUSH(fastn[unum - 3]); + break; + + case PYBC_LOAD_NAME: + DECODE_QSTR; + PUSH(rt_load_name(qstr)); + break; + + case PYBC_LOAD_GLOBAL: + DECODE_QSTR; + PUSH(rt_load_global(qstr)); + break; + + case PYBC_LOAD_ATTR: + DECODE_QSTR; + *sp = rt_load_attr(*sp, qstr); + break; + + case PYBC_LOAD_METHOD: + DECODE_QSTR; + sp -= 1; + rt_load_method(sp[1], qstr, sp); + break; + + case PYBC_LOAD_BUILD_CLASS: + PUSH(rt_load_build_class()); + break; + + case PYBC_STORE_FAST_0: + fast0 = POP(); + break; + + case PYBC_STORE_FAST_1: + fast1 = POP(); + break; + + case PYBC_STORE_FAST_2: + fast2 = POP(); + break; + + case PYBC_STORE_FAST_N: + DECODE_UINT; + fastn[unum - 3] = POP(); + break; + + case PYBC_STORE_NAME: + DECODE_QSTR; + rt_store_name(qstr, POP()); + break; + + case PYBC_STORE_SUBSCR: + rt_store_subscr(sp[1], sp[0], sp[2]); + sp += 3; + break; + + case PYBC_DUP_TOP: + obj1 = *sp; + PUSH(obj1); + break; + + case PYBC_DUP_TOP_TWO: + sp -= 2; + sp[0] = sp[2]; + sp[1] = sp[3]; + break; + + case PYBC_POP_TOP: + ++sp; + break; + + case PYBC_ROT_THREE: + obj1 = sp[0]; + sp[0] = sp[1]; + sp[1] = sp[2]; + sp[2] = obj1; + break; + + case PYBC_JUMP: + DECODE_UINT; + ip = code + unum; + break; + + case PYBC_POP_JUMP_IF_FALSE: + DECODE_UINT; + if (!rt_is_true(POP())) { + ip = code + unum; + } + break; + + case PYBC_SETUP_LOOP: + DECODE_UINT; + break; + + case PYBC_POP_BLOCK: + break; + + case PYBC_BINARY_OP: + unum = *ip++; + obj2 = POP(); + obj1 = *sp; + *sp = rt_binary_op(unum, obj1, obj2); + break; + + case PYBC_COMPARE_OP: + unum = *ip++; + obj2 = POP(); + obj1 = *sp; + *sp = rt_compare_op(unum, obj1, obj2); + break; + + case PYBC_BUILD_LIST: + DECODE_UINT; + obj1 = rt_build_list(unum, sp); + sp += unum - 1; + *sp = obj1; + break; + + case PYBC_BUILD_MAP: + DECODE_UINT; + PUSH(rt_build_map(unum)); + break; + + case PYBC_STORE_MAP: + sp += 2; + rt_store_map(sp[0], sp[-2], sp[-1]); + break; + + case PYBC_BUILD_SET: + DECODE_UINT; + obj1 = rt_build_set(unum, sp); + sp += unum - 1; + *sp = obj1; + break; + + case PYBC_MAKE_FUNCTION: + DECODE_UINT; + PUSH(rt_make_function_from_id(unum)); + break; + + case PYBC_CALL_FUNCTION: + DECODE_UINT; + assert((unum & 0xff00) == 0); // n_keyword + // switch on n_positional + if ((unum & 0xff) == 0) { + *sp = rt_call_function_0(*sp); + } else if ((unum & 0xff) == 1) { + obj1 = *sp++; // the single argument + *sp = rt_call_function_1(*sp, obj1); + } else if ((unum & 0xff) == 2) { + obj2 = *sp++; // the second argument + obj1 = *sp++; // the first argument + *sp = rt_call_function_2(*sp, obj1, obj2); + } else { + assert(0); + } + break; + + case PYBC_CALL_METHOD: + DECODE_UINT; + assert((unum & 0xff00) == 0); // n_keyword + // switch on n_positional + if ((unum & 0xff) == 0) { + obj1 = *sp++; // the self object (or NULL) + *sp = rt_call_method_1(*sp, obj1); + } else if ((unum & 0xff) == 1) { + obj2 = *sp++; // the first argument + obj1 = *sp++; // the self object (or NULL) + *sp = rt_call_function_2(*sp, obj1, obj2); + } else { + assert(0); + } + break; + + case PYBC_RETURN_VALUE: + return *sp; + + default: + printf("code %p, offset %u, byte code 0x%02x not implemented\n", code, (uint)(ip - code), op); + assert(0); + return py_const_none; + } + } +} diff --git a/py/bc.h b/py/bc.h new file mode 100644 index 0000000000..f09843a960 --- /dev/null +++ b/py/bc.h @@ -0,0 +1,97 @@ +#define PYBC_LOAD_CONST_FALSE (0x10) +#define PYBC_LOAD_CONST_NONE (0x11) +#define PYBC_LOAD_CONST_TRUE (0x12) +#define PYBC_LOAD_CONST_SMALL_INT (0x13) // int +#define PYBC_LOAD_CONST_INT (0x14) // qstr +#define PYBC_LOAD_CONST_DEC (0x15) // qstr +#define PYBC_LOAD_CONST_ID (0x16) // qstr +#define PYBC_LOAD_CONST_BYTES (0x17) // qstr +#define PYBC_LOAD_CONST_STRING (0x18) // qstr + +#define PYBC_LOAD_FAST_0 (0x20) +#define PYBC_LOAD_FAST_1 (0x21) +#define PYBC_LOAD_FAST_2 (0x22) +#define PYBC_LOAD_FAST_N (0x23) // uint +#define PYBC_LOAD_NAME (0x24) // qstr +#define PYBC_LOAD_GLOBAL (0x25) // qstr +#define PYBC_LOAD_ATTR (0x26) // qstr +#define PYBC_LOAD_METHOD (0x27) // qstr +#define PYBC_LOAD_BUILD_CLASS (0x28) + +#define PYBC_STORE_FAST_0 (0x30) +#define PYBC_STORE_FAST_1 (0x31) +#define PYBC_STORE_FAST_2 (0x32) +#define PYBC_STORE_FAST_N (0x33) // uint +#define PYBC_STORE_NAME (0x34) // qstr +#define PYBC_STORE_GLOBAL (0x35) // qstr +#define PYBC_STORE_ATTR (0x36) // qstr +#define PYBC_STORE_LOCALS (0x37) +#define PYBC_STORE_SUBSCR (0x38) + +#define PYBC_DELETE_FAST_N (0x39) // uint +#define PYBC_DELETE_NAME (0x3a) // qstr +#define PYBC_DELETE_GLOBAL (0x3b) // qstr +#define PYBC_DELETE_DEREF (0x3c) // qstr +#define PYBC_DELETE_ATTR (0x3d) // qstr +#define PYBC_DELETE_SUBSCR (0x3e) + +#define PYBC_DUP_TOP (0x40) +#define PYBC_DUP_TOP_TWO (0x41) +#define PYBC_POP_TOP (0x42) +#define PYBC_ROT_TWO (0x43) +#define PYBC_ROT_THREE (0x44) +#define PYBC_JUMP (0x45) // pos +#define PYBC_POP_JUMP_IF_TRUE (0x46) // pos +#define PYBC_POP_JUMP_IF_FALSE (0x47) // pos +#define PYBC_JUMP_IF_TRUE_OR_POP (0x48) // pos +#define PYBC_JUMP_IF_FALSE_OR_POP (0x49) // pos +#define PYBC_SETUP_LOOP (0x4a) // pos +#define PYBC_BREAK_LOOP (0x4b) // pos +#define PYBC_CONTINUE_LOOP (0x4c) // pos +#define PYBC_SETUP_WITH (0x4d) // pos +#define PYBC_WITH_CLEANUP (0x4e) +#define PYBC_SETUP_EXCEPT (0x4f) // pos +#define PYBC_SETUP_FINALLY (0x50) // pos +#define PYBC_END_FINALLY (0x51) +#define PYBC_GET_ITER (0x52) +#define PYBC_FOR_ITER (0x53) // pos +#define PYBC_POP_BLOCK (0x54) +#define PYBC_POP_EXCEPT (0x55) + +#define PYBC_UNARY_OP (0x60) // byte +#define PYBC_BINARY_OP (0x61) // byte +#define PYBC_COMPARE_OP (0x62) // byte + +#define PYBC_BUILD_TUPLE (0x70) // uint +#define PYBC_BUILD_LIST (0x71) // uint +#define PYBC_LIST_APPEND (0x72) // uint +#define PYBC_BUILD_MAP (0x73) // uint +#define PYBC_STORE_MAP (0x74) +#define PYBC_MAP_ADD (0x75) // uint +#define PYBC_BUILD_SET (0x76) // uint +#define PYBC_SET_ADD (0x77) // uint +#define PYBC_BUILD_SLICE (0x78) // uint +#define PYBC_UNPACK_SEQUENCE (0x79) // uint +#define PYBC_UNPACK_EX (0x7a) // uint + +#define PYBC_RETURN_VALUE (0x80) +#define PYBC_RAISE_VARARGS (0x81) // uint +#define PYBC_YIELD_VALUE (0x82) +#define PYBC_YIELD_FROM (0x83) + +#define PYBC_MAKE_FUNCTION (0x90) // uint +#define PYBC_MAKE_CLOSURE (0x91) // uint? +#define PYBC_CALL_FUNCTION (0x92) // uint +#define PYBC_CALL_FUNCTION_VAR (0x93) // uint +#define PYBC_CALL_FUNCTION_KW (0x94) // uint +#define PYBC_CALL_FUNCTION_VAR_KW (0x95) // uint +#define PYBC_CALL_METHOD (0x96) // uint +#define PYBC_CALL_METHOD_VAR (0x97) // uint +#define PYBC_CALL_METHOD_KW (0x98) // uint +#define PYBC_CALL_METHOD_VAR_KW (0x99) // uint + +#define PYBC_IMPORT_NAME (0xe0) +#define PYBC_IMPORT_FROM (0xe1) +#define PYBC_IMPORT_STAR (0xe2) + +py_obj_t py_execute_byte_code(byte *code, uint len, py_obj_t *args, uint n_args); diff --git a/py/compile.c b/py/compile.c new file mode 100644 index 0000000000..0e6ce4443b --- /dev/null +++ b/py/compile.c @@ -0,0 +1,2510 @@ +#include +#include +#include +#include +#include +#include + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "scope.h" +#include "compile.h" +#include "runtime.h" +#include "emit.h" + +// TODO need to mangle __attr names + +typedef enum { + PN_none = 0, +#define DEF_RULE(rule, comp, kind, arg...) PN_##rule, +#include "grammar.h" +#undef DEF_RULE + PN_maximum_number_of, +} pn_kind_t; + +#define EMIT(fun, arg...) (emit_##fun(comp->emit, ##arg)) + +typedef struct _compiler_t { + qstr qstr___class__; + qstr qstr___locals__; + qstr qstr___name__; + qstr qstr___module__; + qstr qstr___qualname__; + qstr qstr___doc__; + qstr qstr_assertion_error; + + pass_kind_t pass; + + int break_label; + int continue_label; + int except_nest_level; + + int n_arg_keyword; + bool have_star_arg; + bool have_dbl_star_arg; + bool have_bare_star; + int param_pass; + int param_pass_num_dict_params; + int param_pass_num_default_params; + + scope_t *scope_head; + scope_t *scope_cur; + + emitter_t *emit; +} compiler_t; + +py_parse_node_t fold_constants(py_parse_node_t pn) { + if (PY_PARSE_NODE_IS_STRUCT(pn)) { + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + + // fold arguments first + for (int i = 0; i < n; i++) { + pns->nodes[i] = fold_constants(pns->nodes[i]); + } + + switch (PY_PARSE_NODE_STRUCT_KIND(pns)) { + case PN_shift_expr: + if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) { + int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); + int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]); + if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_DBL_LESS)) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 << arg1); // XXX can overflow; enabled only to compare with CPython + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_DBL_MORE)) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 >> arg1); + } else { + // shouldn't happen + assert(0); + } + } + break; + + case PN_arith_expr: + // XXX can overflow; enabled only to compare with CPython + if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) { + int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); + int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]); + if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_PLUS)) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 + arg1); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_MINUS)) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 - arg1); + } else { + // shouldn't happen + assert(0); + } + } + break; + + case PN_term: + // XXX can overflow; enabled only to compare with CPython + if (n == 3 && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[2])) { + int arg0 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); + int arg1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[2]); + if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_STAR)) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 * arg1); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_SLASH)) { + ; // pass + //} else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_)) { + //pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg0 - arg1); + } else { + // shouldn't happen + assert(0); + } + } + break; + + case PN_factor_2: + if (PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[1])) { + machine_int_t arg = PY_PARSE_NODE_LEAF_ARG(pns->nodes[1]); + if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_PLUS)) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, arg); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_MINUS)) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, -arg); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_TILDE)) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, ~arg); + } else { + // shouldn't happen + assert(0); + } + } + break; + + case PN_power: + // XXX can overflow; enabled only to compare with CPython + if (PY_PARSE_NODE_IS_SMALL_INT(pns->nodes[0]) && PY_PARSE_NODE_IS_NULL(pns->nodes[1]) && !PY_PARSE_NODE_IS_NULL(pns->nodes[2])) { + py_parse_node_struct_t* pns2 = (py_parse_node_struct_t*)pns->nodes[2]; + if (PY_PARSE_NODE_IS_SMALL_INT(pns2->nodes[0])) { + int power = PY_PARSE_NODE_LEAF_ARG(pns2->nodes[0]); + if (power >= 0) { + int ans = 1; + int base = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); + for (; power > 0; power--) { + ans *= base; + } + pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, ans); + } + } + } + break; + } + } + + return pn; +} + +void compile_node(compiler_t *comp, py_parse_node_t pn); + +scope_t *scope_new_and_link(compiler_t *comp, scope_kind_t kind, py_parse_node_t pn) { + scope_t *scope = scope_new(kind, pn); + scope->parent = comp->scope_cur; + scope->next = NULL; + if (comp->scope_head == NULL) { + comp->scope_head = scope; + } else { + scope_t *s = comp->scope_head; + while (s->next != NULL) { + s = s->next; + } + s->next = scope; + } + return scope; +} + +int list_len(py_parse_node_t pn, int pn_kind) { + if (PY_PARSE_NODE_IS_NULL(pn)) { + return 0; + } else if (PY_PARSE_NODE_IS_LEAF(pn)) { + return 1; + } else { + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + if (PY_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) { + return 1; + } else { + return PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + } + } +} + +void apply_to_single_or_list(compiler_t *comp, py_parse_node_t pn, int pn_list_kind, void (*f)(compiler_t*, py_parse_node_t)) { + if (PY_PARSE_NODE_IS_STRUCT(pn) && PY_PARSE_NODE_STRUCT_KIND((py_parse_node_struct_t*)pn) == pn_list_kind) { + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + for (int i = 0; i < num_nodes; i++) { + f(comp, pns->nodes[i]); + } + } else if (!PY_PARSE_NODE_IS_NULL(pn)) { + f(comp, pn); + } +} + +int list_get(py_parse_node_t *pn, int pn_kind, py_parse_node_t **nodes) { + if (PY_PARSE_NODE_IS_NULL(*pn)) { + *nodes = NULL; + return 0; + } else if (PY_PARSE_NODE_IS_LEAF(*pn)) { + *nodes = pn; + return 1; + } else { + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)(*pn); + if (PY_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) { + *nodes = pn; + return 1; + } else { + *nodes = pns->nodes; + return PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + } + } +} + +void compile_do_nothing(compiler_t *comp, py_parse_node_struct_t *pns) { +} + +void compile_generic_all_nodes(compiler_t *comp, py_parse_node_struct_t *pns) { + int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + for (int i = 0; i < num_nodes; i++) { + compile_node(comp, pns->nodes[i]); + } +} + +bool c_tuple_is_const(py_parse_node_t pn) { + if (!PY_PARSE_NODE_IS_LEAF(pn)) { + return false; + } + if (PY_PARSE_NODE_IS_ID(pn)) { + return false; + } + return true; +} + +void c_tuple_emit_const(compiler_t *comp, py_parse_node_t pn) { + assert(PY_PARSE_NODE_IS_LEAF(pn)); + int arg = PY_PARSE_NODE_LEAF_ARG(pn); + switch (PY_PARSE_NODE_LEAF_KIND(pn)) { + case PY_PARSE_NODE_ID: assert(0); + case PY_PARSE_NODE_SMALL_INT: EMIT(load_const_verbatim_int, arg); break; + case PY_PARSE_NODE_INTEGER: EMIT(load_const_verbatim_str, qstr_str(arg)); break; + case PY_PARSE_NODE_DECIMAL: EMIT(load_const_verbatim_str, qstr_str(arg)); break; + case PY_PARSE_NODE_STRING: EMIT(load_const_verbatim_quoted_str, arg, false); break; + case PY_PARSE_NODE_BYTES: EMIT(load_const_verbatim_quoted_str, arg, true); break; + case PY_PARSE_NODE_TOKEN: + switch (arg) { + case PY_TOKEN_KW_FALSE: EMIT(load_const_verbatim_str, "False"); break; + case PY_TOKEN_KW_NONE: EMIT(load_const_verbatim_str, "None"); break; + case PY_TOKEN_KW_TRUE: EMIT(load_const_verbatim_str, "True"); break; + default: assert(0); + } + break; + default: assert(0); + } +} + +// funnelling all tuple creations through this function and all this constant stuff is purely to agree with CPython +void c_tuple(compiler_t *comp, py_parse_node_t pn, py_parse_node_struct_t *pns_list) { + int n = 0; + if (pns_list != NULL) { + n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns_list); + } + int total = n; + bool is_const = true; + if (!PY_PARSE_NODE_IS_NULL(pn)) { + total += 1; + if (!c_tuple_is_const(pn)) { + is_const = false; + } + } + for (int i = 0; i < n; i++) { + if (!c_tuple_is_const(pns_list->nodes[i])) { + is_const = false; + break; + } + } + if (total > 0 && is_const) { + bool need_comma = false; + EMIT(load_const_verbatim_start); + EMIT(load_const_verbatim_str, "("); + if (!PY_PARSE_NODE_IS_NULL(pn)) { + c_tuple_emit_const(comp, pn); + need_comma = true; + } + for (int i = 0; i < n; i++) { + if (need_comma) { + EMIT(load_const_verbatim_str, ", "); + } + c_tuple_emit_const(comp, pns_list->nodes[i]); + need_comma = true; + } + if (total == 1) { + EMIT(load_const_verbatim_str, ",)"); + } else { + EMIT(load_const_verbatim_str, ")"); + } + EMIT(load_const_verbatim_end); + } else { + if (!PY_PARSE_NODE_IS_NULL(pn)) { + compile_node(comp, pn); + } + for (int i = 0; i < n; i++) { + compile_node(comp, pns_list->nodes[i]); + } + EMIT(build_tuple, total); + } +} + +void compile_generic_tuple(compiler_t *comp, py_parse_node_struct_t *pns) { + // a simple tuple expression + /* + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + for (int i = 0; i < n; i++) { + compile_node(comp, pns->nodes[i]); + } + EMIT(build_tuple, n); + */ + c_tuple(comp, PY_PARSE_NODE_NULL, pns); +} + +bool node_is_const_false(py_parse_node_t pn) { + return PY_PARSE_NODE_IS_TOKEN_KIND(pn, PY_TOKEN_KW_FALSE); + // untested: || (PY_PARSE_NODE_IS_SMALL_INT(pn) && PY_PARSE_NODE_LEAF_ARG(pn) == 1); +} + +bool node_is_const_true(py_parse_node_t pn) { + return PY_PARSE_NODE_IS_TOKEN_KIND(pn, PY_TOKEN_KW_TRUE) || (PY_PARSE_NODE_IS_SMALL_INT(pn) && PY_PARSE_NODE_LEAF_ARG(pn) == 1); +} + +// having c_if_cond_2 and the is_nested variable is purely to match with CPython, which doesn't fully optimise not's +void c_if_cond_2(compiler_t *comp, py_parse_node_t pn, bool jump_if, int label, bool is_nested) { + if (node_is_const_false(pn)) { + if (jump_if == false) { + EMIT(jump, label); + } + return; + } else if (node_is_const_true(pn)) { + if (jump_if == true) { + EMIT(jump, label); + } + return; + } else if (PY_PARSE_NODE_IS_STRUCT(pn)) { + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_or_test) { + if (jump_if == false) { + int label2 = EMIT(label_new); + for (int i = 0; i < n - 1; i++) { + c_if_cond_2(comp, pns->nodes[i], true, label2, true); + } + c_if_cond_2(comp, pns->nodes[n - 1], false, label, true); + EMIT(label_assign, label2); + } else { + for (int i = 0; i < n; i++) { + c_if_cond_2(comp, pns->nodes[i], true, label, true); + } + } + return; + } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_and_test) { + if (jump_if == false) { + for (int i = 0; i < n; i++) { + c_if_cond_2(comp, pns->nodes[i], false, label, true); + } + } else { + int label2 = EMIT(label_new); + for (int i = 0; i < n - 1; i++) { + c_if_cond_2(comp, pns->nodes[i], false, label2, true); + } + c_if_cond_2(comp, pns->nodes[n - 1], true, label, true); + EMIT(label_assign, label2); + } + return; + } else if (!is_nested && PY_PARSE_NODE_STRUCT_KIND(pns) == PN_not_test_2) { + c_if_cond_2(comp, pns->nodes[0], !jump_if, label, true); + return; + } + } + + // nothing special, fall back to default compiling for node and jump + compile_node(comp, pn); + if (jump_if == false) { + EMIT(pop_jump_if_false, label); + } else { + EMIT(pop_jump_if_true, label); + } +} + +void c_if_cond(compiler_t *comp, py_parse_node_t pn, bool jump_if, int label) { + c_if_cond_2(comp, pn, jump_if, label, false); +} + +typedef enum { ASSIGN_STORE, ASSIGN_AUG_LOAD, ASSIGN_AUG_STORE } assign_kind_t; +void c_assign(compiler_t *comp, py_parse_node_t pn, assign_kind_t kind); + +void c_assign_power(compiler_t *comp, py_parse_node_struct_t *pns, assign_kind_t assign_kind) { + if (assign_kind != ASSIGN_AUG_STORE) { + compile_node(comp, pns->nodes[0]); + } + + if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { + py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1]; + if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_power_trailers) { + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1); + if (assign_kind != ASSIGN_AUG_STORE) { + for (int i = 0; i < n - 1; i++) { + compile_node(comp, pns1->nodes[i]); + } + } + assert(PY_PARSE_NODE_IS_STRUCT(pns1->nodes[n - 1])); + pns1 = (py_parse_node_struct_t*)pns1->nodes[n - 1]; + } + if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_paren) { + printf("SyntaxError: can't assign to function call\n"); + return; + } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_bracket) { + if (assign_kind == ASSIGN_AUG_STORE) { + EMIT(rot_three); + EMIT(store_subscr); + } else { + compile_node(comp, pns1->nodes[0]); + if (assign_kind == ASSIGN_AUG_LOAD) { + EMIT(dup_top_two); + EMIT(binary_op, RT_BINARY_OP_SUBSCR); + } else { + EMIT(store_subscr); + } + } + } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_period) { + assert(PY_PARSE_NODE_IS_ID(pns1->nodes[0])); + if (assign_kind == ASSIGN_AUG_LOAD) { + EMIT(dup_top); + EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0])); + } else { + if (assign_kind == ASSIGN_AUG_STORE) { + EMIT(rot_two); + } + EMIT(store_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0])); + } + } else { + // shouldn't happen + assert(0); + } + } else { + // shouldn't happen + assert(0); + } + + if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) { + // SyntaxError, cannot assign + assert(0); + } +} + +void c_assign_tuple(compiler_t *comp, int n, py_parse_node_t *nodes) { + assert(n >= 0); + int have_star_index = -1; + for (int i = 0; i < n; i++) { + if (PY_PARSE_NODE_IS_STRUCT_KIND(nodes[i], PN_star_expr)) { + if (have_star_index < 0) { + EMIT(unpack_ex, i, n - i - 1); + have_star_index = i; + } else { + printf("SyntaxError: two starred expressions in assignment\n"); + return; + } + } + } + if (have_star_index < 0) { + EMIT(unpack_sequence, n); + } + for (int i = 0; i < n; i++) { + if (i == have_star_index) { + c_assign(comp, ((py_parse_node_struct_t*)nodes[i])->nodes[0], ASSIGN_STORE); + } else { + c_assign(comp, nodes[i], ASSIGN_STORE); + } + } +} + +// assigns top of stack to pn +void c_assign(compiler_t *comp, py_parse_node_t pn, assign_kind_t assign_kind) { + tail_recursion: + if (PY_PARSE_NODE_IS_NULL(pn)) { + assert(0); + } else if (PY_PARSE_NODE_IS_LEAF(pn)) { + if (PY_PARSE_NODE_IS_ID(pn)) { + int arg = PY_PARSE_NODE_LEAF_ARG(pn); + switch (assign_kind) { + case ASSIGN_STORE: + case ASSIGN_AUG_STORE: + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, arg); + break; + case ASSIGN_AUG_LOAD: + emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, arg); + break; + } + } else { + printf("SyntaxError: can't assign to literal\n"); + return; + } + } else { + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + switch (PY_PARSE_NODE_STRUCT_KIND(pns)) { + case PN_power: + // lhs is an index or attribute + c_assign_power(comp, pns, assign_kind); + break; + + case PN_testlist_star_expr: + case PN_exprlist: + // lhs is a tuple + if (assign_kind != ASSIGN_STORE) { + goto bad_aug; + } + c_assign_tuple(comp, PY_PARSE_NODE_STRUCT_NUM_NODES(pns), pns->nodes); + break; + + case PN_atom_paren: + // lhs is something in parenthesis + if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { + // empty tuple + printf("SyntaxError: can't assign to ()\n"); + return; + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) { + pns = (py_parse_node_struct_t*)pns->nodes[0]; + goto testlist_comp; + } else { + // parenthesis around 1 item, is just that item + pn = pns->nodes[0]; + goto tail_recursion; + } + break; + + case PN_atom_bracket: + // lhs is something in brackets + if (assign_kind != ASSIGN_STORE) { + goto bad_aug; + } + if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { + // empty list, assignment allowed + c_assign_tuple(comp, 0, NULL); + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) { + pns = (py_parse_node_struct_t*)pns->nodes[0]; + goto testlist_comp; + } else { + // brackets around 1 item + c_assign_tuple(comp, 1, &pns->nodes[0]); + } + break; + + default: + printf("unknown assign, %u\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns)); + assert(0); + } + return; + + testlist_comp: + // lhs is a sequence + if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { + py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1]; + if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3b) { + // sequence of one item, with trailing comma + assert(PY_PARSE_NODE_IS_NULL(pns2->nodes[0])); + c_assign_tuple(comp, 1, &pns->nodes[0]); + } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3c) { + // sequence of many items + // TODO call c_assign_tuple instead + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns2); + EMIT(unpack_sequence, 1 + n); + c_assign(comp, pns->nodes[0], ASSIGN_STORE); + for (int i = 0; i < n; i++) { + c_assign(comp, pns2->nodes[i], ASSIGN_STORE); + } + } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_comp_for) { + // TODO not implemented + assert(0); + } else { + // sequence with 2 items + goto sequence_with_2_items; + } + } else { + // sequence with 2 items + sequence_with_2_items: + c_assign_tuple(comp, 2, pns->nodes); + } + return; + } + return; + + bad_aug: + printf("SyntaxError: illegal expression for augmented assignment\n"); +} + +// stuff for lambda and comprehensions and generators +void close_over_variables_etc(compiler_t *comp, scope_t *this_scope, int n_dict_params, int n_default_params) { + // make closed over variables, if any + int nfree = 0; + if (comp->scope_cur->kind != SCOPE_MODULE) { + for (int i = 0; i < this_scope->id_info_len; i++) { + id_info_t *id_info = &this_scope->id_info[i]; + if (id_info->kind == ID_INFO_KIND_FREE) { + EMIT(load_closure, id_info->qstr); + nfree += 1; + } + } + } + if (nfree > 0) { + EMIT(build_tuple, nfree); + } + + // make the function/closure + if (nfree == 0) { + EMIT(make_function, this_scope, n_dict_params, n_default_params); + } else { + EMIT(make_closure, this_scope, n_dict_params, n_default_params); + } +} + +void compile_funcdef_param(compiler_t *comp, py_parse_node_t pn) { + assert(PY_PARSE_NODE_IS_STRUCT(pn)); + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_typedargslist_name) { + if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) { + // this parameter has a default value + // in CPython, None (and True, False?) as default parameters are loaded with LOAD_NAME; don't understandy why + if (comp->have_bare_star) { + comp->param_pass_num_dict_params += 1; + if (comp->param_pass == 1) { + EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); + compile_node(comp, pns->nodes[2]); + } + } else { + comp->param_pass_num_default_params += 1; + if (comp->param_pass == 2) { + compile_node(comp, pns->nodes[2]); + } + } + } + } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_typedargslist_star) { + if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { + // bare star + comp->have_bare_star = true; + } + } +} + +// leaves function object on stack +// returns function name +qstr compile_funcdef_helper(compiler_t *comp, py_parse_node_struct_t *pns) { + if (comp->pass == PASS_1) { + // create a new scope for this function + scope_t *s = scope_new_and_link(comp, SCOPE_FUNCTION, (py_parse_node_t)pns); + // store the function scope so the compiling function can use it at each pass + pns->nodes[4] = (py_parse_node_t)s; + } + + // save variables (probably don't need to do this, since we can't have nested definitions..?) + bool old_have_bare_star = comp->have_bare_star; + int old_param_pass = comp->param_pass; + int old_param_pass_num_dict_params = comp->param_pass_num_dict_params; + int old_param_pass_num_default_params = comp->param_pass_num_default_params; + + // compile default parameters + comp->have_bare_star = false; + comp->param_pass = 1; // pass 1 does any default parameters after bare star + comp->param_pass_num_dict_params = 0; + comp->param_pass_num_default_params = 0; + apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_funcdef_param); + comp->have_bare_star = false; + comp->param_pass = 2; // pass 2 does any default parameters before bare star + comp->param_pass_num_dict_params = 0; + comp->param_pass_num_default_params = 0; + apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_funcdef_param); + + // get the scope for this function + scope_t *fscope = (scope_t*)pns->nodes[4]; + + // make the function + close_over_variables_etc(comp, fscope, comp->param_pass_num_dict_params, comp->param_pass_num_default_params); + + // restore variables + comp->have_bare_star = old_have_bare_star; + comp->param_pass = old_param_pass; + comp->param_pass_num_dict_params = old_param_pass_num_dict_params; + comp->param_pass_num_default_params = old_param_pass_num_default_params; + + // return its name (the 'f' in "def f(...):") + return fscope->simple_name; +} + +// leaves class object on stack +// returns class name +qstr compile_classdef_helper(compiler_t *comp, py_parse_node_struct_t *pns) { + if (comp->pass == PASS_1) { + // create a new scope for this class + scope_t *s = scope_new_and_link(comp, SCOPE_CLASS, (py_parse_node_t)pns); + // store the class scope so the compiling function can use it at each pass + pns->nodes[3] = (py_parse_node_t)s; + } + + EMIT(load_build_class); + + // scope for this class + scope_t *cscope = (scope_t*)pns->nodes[3]; + + // compile the class + close_over_variables_etc(comp, cscope, 0, 0); + + // get its name + EMIT(load_const_id, cscope->simple_name); + + // nodes[1] has parent classes, if any + if (PY_PARSE_NODE_IS_NULL(pns->nodes[1])) { + // no parent classes + EMIT(call_function, 2, 0, false, false); + } else { + // have a parent class or classes + // TODO what if we have, eg, *a or **a in the parent list? + compile_node(comp, pns->nodes[1]); + EMIT(call_function, 2 + list_len(pns->nodes[1], PN_arglist), 0, false, false); + } + + // return its name (the 'C' in class C(...):") + return cscope->simple_name; +} + +void compile_decorated(compiler_t *comp, py_parse_node_struct_t *pns) { + // get the list of decorators + py_parse_node_t *nodes; + int n = list_get(&pns->nodes[0], PN_decorators, &nodes); + + // load each decorator + for (int i = 0; i < n; i++) { + assert(PY_PARSE_NODE_IS_STRUCT_KIND(nodes[i], PN_decorator)); // should be + py_parse_node_struct_t *pns_decorator = (py_parse_node_struct_t*)nodes[i]; + py_parse_node_t *nodes2; + int n2 = list_get(&pns_decorator->nodes[0], PN_dotted_name, &nodes2); + compile_node(comp, nodes2[0]); + for (int i = 1; i < n2; i++) { + EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(nodes2[i])); + } + if (!PY_PARSE_NODE_IS_NULL(pns_decorator->nodes[1])) { + // first call the function with these arguments + compile_node(comp, pns_decorator->nodes[1]); + } + } + + // compile the body (funcdef or classdef) and get its name + py_parse_node_struct_t *pns_body = (py_parse_node_struct_t*)pns->nodes[1]; + qstr body_name = 0; + if (PY_PARSE_NODE_STRUCT_KIND(pns_body) == PN_funcdef) { + body_name = compile_funcdef_helper(comp, pns_body); + } else if (PY_PARSE_NODE_STRUCT_KIND(pns_body) == PN_classdef) { + body_name = compile_classdef_helper(comp, pns_body); + } else { + // shouldn't happen + assert(0); + } + + // call each decorator + for (int i = 0; i < n; i++) { + EMIT(call_function, 1, 0, false, false); + } + + // store func/class object into name + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, body_name); +} + +void compile_funcdef(compiler_t *comp, py_parse_node_struct_t *pns) { + qstr fname = compile_funcdef_helper(comp, pns); + // store function object into function name + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, fname); +} + +void c_del_stmt(compiler_t *comp, py_parse_node_t pn) { + if (PY_PARSE_NODE_IS_ID(pn)) { + emit_common_delete_id(comp->pass, comp->scope_cur, comp->emit, PY_PARSE_NODE_LEAF_ARG(pn)); + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_power)) { + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + + compile_node(comp, pns->nodes[0]); // base of the power node + + if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { + py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1]; + if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_power_trailers) { + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1); + for (int i = 0; i < n - 1; i++) { + compile_node(comp, pns1->nodes[i]); + } + assert(PY_PARSE_NODE_IS_STRUCT(pns1->nodes[n - 1])); + pns1 = (py_parse_node_struct_t*)pns1->nodes[n - 1]; + } + if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_paren) { + // SyntaxError: can't delete a function call + assert(0); + } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_bracket) { + compile_node(comp, pns1->nodes[0]); + EMIT(delete_subscr); + } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_trailer_period) { + assert(PY_PARSE_NODE_IS_ID(pns1->nodes[0])); + EMIT(delete_attr, PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0])); + } else { + // shouldn't happen + assert(0); + } + } else { + // shouldn't happen + assert(0); + } + + if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) { + // SyntaxError, cannot delete + assert(0); + } + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_atom_paren)) { + pn = ((py_parse_node_struct_t*)pn)->nodes[0]; + if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_testlist_comp)) { + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + // TODO perhaps factorise testlist_comp code with other uses of PN_testlist_comp + + if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { + py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1]; + if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_testlist_comp_3b) { + // sequence of one item, with trailing comma + assert(PY_PARSE_NODE_IS_NULL(pns1->nodes[0])); + c_del_stmt(comp, pns->nodes[0]); + } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_testlist_comp_3c) { + // sequence of many items + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1); + c_del_stmt(comp, pns->nodes[0]); + for (int i = 0; i < n; i++) { + c_del_stmt(comp, pns1->nodes[i]); + } + } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_comp_for) { + // TODO not implemented; can't del comprehension? + assert(0); + } else { + // sequence with 2 items + goto sequence_with_2_items; + } + } else { + // sequence with 2 items + sequence_with_2_items: + c_del_stmt(comp, pns->nodes[0]); + c_del_stmt(comp, pns->nodes[1]); + } + } else { + // tuple with 1 element + c_del_stmt(comp, pn); + } + } else { + // not implemented + assert(0); + } +} + +void compile_del_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + apply_to_single_or_list(comp, pns->nodes[0], PN_exprlist, c_del_stmt); +} + +void compile_break_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + if (comp->break_label == 0) { + printf("ERROR: cannot break from here\n"); + } + EMIT(break_loop, comp->break_label); +} + +void compile_continue_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + if (comp->continue_label == 0) { + printf("ERROR: cannot continue from here\n"); + } + if (comp->except_nest_level > 0) { + EMIT(continue_loop, comp->continue_label); + } else { + EMIT(jump, comp->continue_label); + } +} + +void compile_return_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_test_if_expr)) { + // special case when returning an if-expression; to match CPython optimisation + py_parse_node_struct_t *pns_test_if_expr = (py_parse_node_struct_t*)pns->nodes[0]; + py_parse_node_struct_t *pns_test_if_else = (py_parse_node_struct_t*)pns_test_if_expr->nodes[1]; + + int l_fail = EMIT(label_new); + c_if_cond(comp, pns_test_if_else->nodes[0], false, l_fail); // condition + compile_node(comp, pns_test_if_expr->nodes[0]); // success value + EMIT(return_value); + EMIT(label_assign, l_fail); + compile_node(comp, pns_test_if_else->nodes[1]); // failure value + } else { + compile_node(comp, pns->nodes[0]); + } + EMIT(return_value); +} + +void compile_yield_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + compile_node(comp, pns->nodes[0]); + EMIT(pop_top); +} + +void compile_raise_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { + // raise + EMIT(raise_varargs, 0); + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_raise_stmt_arg)) { + // raise x from y + pns = (py_parse_node_struct_t*)pns->nodes[0]; + compile_node(comp, pns->nodes[0]); + compile_node(comp, pns->nodes[1]); + EMIT(raise_varargs, 2); + } else { + // raise x + compile_node(comp, pns->nodes[0]); + EMIT(raise_varargs, 1); + } +} + +// q1 holds the base, q2 the full name +// eg a -> q1=q2=a +// a.b.c -> q1=a, q2=a.b.c +void do_import_name(compiler_t *comp, py_parse_node_t pn, qstr *q1, qstr *q2) { + bool is_as = false; + if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_dotted_as_name)) { + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + // a name of the form x as y; unwrap it + *q1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[1]); + pn = pns->nodes[0]; + is_as = true; + } + if (PY_PARSE_NODE_IS_ID(pn)) { + // just a simple name + *q2 = PY_PARSE_NODE_LEAF_ARG(pn); + if (!is_as) { + *q1 = *q2; + } + EMIT(import_name, *q2); + } else if (PY_PARSE_NODE_IS_STRUCT(pn)) { + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dotted_name) { + // a name of the form a.b.c + if (!is_as) { + *q1 = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); + } + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + int len = n - 1; + for (int i = 0; i < n; i++) { + len += strlen(qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]))); + } + char *str = m_new(char, len + 1); + str[0] = 0; + for (int i = 0; i < n; i++) { + if (i > 0) { + strcat(str, "."); + } + strcat(str, qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i]))); + } + *q2 = qstr_from_str_take(str); + EMIT(import_name, *q2); + if (is_as) { + for (int i = 1; i < n; i++) { + EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])); + } + } + } else { + // TODO not implemented + assert(0); + } + } else { + // TODO not implemented + assert(0); + } +} + +void compile_dotted_as_name(compiler_t *comp, py_parse_node_t pn) { + EMIT(load_const_small_int, 0); // ?? + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + qstr q1, q2; + do_import_name(comp, pn, &q1, &q2); + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, q1); +} + +void compile_import_name(compiler_t *comp, py_parse_node_struct_t *pns) { + apply_to_single_or_list(comp, pns->nodes[0], PN_dotted_as_names, compile_dotted_as_name); +} + +void compile_import_from(compiler_t *comp, py_parse_node_struct_t *pns) { + if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[1], PY_TOKEN_OP_STAR)) { + EMIT(load_const_small_int, 0); // what's this for?? + EMIT(load_const_verbatim_start); + EMIT(load_const_verbatim_str, "('*',)"); + EMIT(load_const_verbatim_end); + qstr dummy_q, id1; + do_import_name(comp, pns->nodes[0], &dummy_q, &id1); + EMIT(import_star); + } else { + py_parse_node_t *pn_nodes; + int n = list_get(&pns->nodes[1], PN_import_as_names, &pn_nodes); + + EMIT(load_const_small_int, 0); // what's this for?? + EMIT(load_const_verbatim_start); + EMIT(load_const_verbatim_str, "("); + for (int i = 0; i < n; i++) { + assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_nodes[i], PN_import_as_name)); + py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pn_nodes[i]; + qstr id2 = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[0]); // should be id + if (i > 0) { + EMIT(load_const_verbatim_str, ", "); + } + EMIT(load_const_verbatim_str, "'"); + EMIT(load_const_verbatim_str, qstr_str(id2)); + EMIT(load_const_verbatim_str, "'"); + } + if (n == 1) { + EMIT(load_const_verbatim_str, ","); + } + EMIT(load_const_verbatim_str, ")"); + EMIT(load_const_verbatim_end); + qstr dummy_q, id1; + do_import_name(comp, pns->nodes[0], &dummy_q, &id1); + for (int i = 0; i < n; i++) { + assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_nodes[i], PN_import_as_name)); + py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pn_nodes[i]; + qstr id2 = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[0]); // should be id + EMIT(import_from, id2); + if (PY_PARSE_NODE_IS_NULL(pns3->nodes[1])) { + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, id2); + } else { + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, PY_PARSE_NODE_LEAF_ARG(pns3->nodes[1])); + } + } + EMIT(pop_top); + } +} + +void compile_global_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) { + emit_common_declare_global(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); + } else { + pns = (py_parse_node_struct_t*)pns->nodes[0]; + int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + for (int i = 0; i < num_nodes; i++) { + emit_common_declare_global(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])); + } + } +} + +void compile_nonlocal_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) { + emit_common_declare_nonlocal(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); + } else { + pns = (py_parse_node_struct_t*)pns->nodes[0]; + int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + for (int i = 0; i < num_nodes; i++) { + emit_common_declare_nonlocal(comp->pass, comp->scope_cur, PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])); + } + } +} + +void compile_assert_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + int l_end = EMIT(label_new); + c_if_cond(comp, pns->nodes[0], true, l_end); + emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr_assertion_error); + if (!PY_PARSE_NODE_IS_NULL(pns->nodes[1])) { + // assertion message + compile_node(comp, pns->nodes[1]); + EMIT(call_function, 1, 0, false, false); + } + EMIT(raise_varargs, 1); + EMIT(label_assign, l_end); +} + +void compile_if_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + // TODO proper and/or short circuiting + + int l_end = EMIT(label_new); + + int l_fail = EMIT(label_new); + c_if_cond(comp, pns->nodes[0], false, l_fail); // if condition + + compile_node(comp, pns->nodes[1]); // if block + //if (!(PY_PARSE_NODE_IS_NULL(pns->nodes[2]) && PY_PARSE_NODE_IS_NULL(pns->nodes[3]))) { // optimisation; doesn't align with CPython + // jump over elif/else blocks if they exist + if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython + EMIT(jump, l_end); + } + //} + EMIT(label_assign, l_fail); + + if (!PY_PARSE_NODE_IS_NULL(pns->nodes[2])) { + // compile elif blocks + + py_parse_node_struct_t *pns_elif = (py_parse_node_struct_t*)pns->nodes[2]; + + if (PY_PARSE_NODE_STRUCT_KIND(pns_elif) == PN_if_stmt_elif_list) { + // multiple elif blocks + + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns_elif); + for (int i = 0; i < n; i++) { + py_parse_node_struct_t *pns_elif2 = (py_parse_node_struct_t*)pns_elif->nodes[i]; + l_fail = EMIT(label_new); + c_if_cond(comp, pns_elif2->nodes[0], false, l_fail); // elif condition + + compile_node(comp, pns_elif2->nodes[1]); // elif block + if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython + EMIT(jump, l_end); + } + EMIT(label_assign, l_fail); + } + + } else { + // a single elif block + + l_fail = EMIT(label_new); + c_if_cond(comp, pns_elif->nodes[0], false, l_fail); // elif condition + + compile_node(comp, pns_elif->nodes[1]); // elif block + if (!emit_last_emit_was_return_value(comp->emit)) { // simple optimisation to align with CPython + EMIT(jump, l_end); + } + EMIT(label_assign, l_fail); + } + } + + // compile else block + compile_node(comp, pns->nodes[3]); // can be null + + EMIT(label_assign, l_end); +} + +void compile_while_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + int old_break_label = comp->break_label; + int old_continue_label = comp->continue_label; + + int done_label = EMIT(label_new); + int end_label = EMIT(label_new); + int break_label = EMIT(label_new); + int continue_label = EMIT(label_new); + + comp->break_label = break_label; + comp->continue_label = continue_label; + + EMIT(setup_loop, end_label); + EMIT(label_assign, continue_label); + c_if_cond(comp, pns->nodes[0], false, done_label); // condition + compile_node(comp, pns->nodes[1]); // body + if (!emit_last_emit_was_return_value(comp->emit)) { + EMIT(jump, continue_label); + } + EMIT(label_assign, done_label); + + // break/continue apply to outer loop (if any) in the else block + comp->break_label = old_break_label; + comp->continue_label = old_continue_label; + + // CPython does not emit POP_BLOCK if the condition was a constant; don't undertand why + // this is a small hack to agree with CPython + if (!node_is_const_true(pns->nodes[0])) { + EMIT(pop_block); + } + + compile_node(comp, pns->nodes[2]); // else + + EMIT(label_assign, break_label); + EMIT(label_assign, end_label); +} + +void compile_for_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + int old_break_label = comp->break_label; + int old_continue_label = comp->continue_label; + + int for_label = EMIT(label_new); + int pop_label = EMIT(label_new); + int end_label = EMIT(label_new); + + int break_label = EMIT(label_new); + + comp->continue_label = for_label; + comp->break_label = break_label; + + EMIT(setup_loop, end_label); + compile_node(comp, pns->nodes[1]); // iterator + EMIT(get_iter); + EMIT(label_assign, for_label); + EMIT(for_iter, pop_label); + c_assign(comp, pns->nodes[0], ASSIGN_STORE); // variable + compile_node(comp, pns->nodes[2]); // body + if (!emit_last_emit_was_return_value(comp->emit)) { + EMIT(jump, for_label); + } + EMIT(label_assign, pop_label); + EMIT(for_iter_end); + + // break/continue apply to outer loop (if any) in the else block + comp->break_label = old_break_label; + comp->continue_label = old_continue_label; + + EMIT(pop_block); + + compile_node(comp, pns->nodes[3]); // else (not tested) + + EMIT(label_assign, break_label); + EMIT(label_assign, end_label); +} + +void compile_try_except(compiler_t *comp, py_parse_node_t pn_body, int n_except, py_parse_node_t *pn_excepts, py_parse_node_t pn_else) { + // this function is a bit of a hack at the moment + // don't understand how the stack works with exceptions, so we force it to return to the correct value + + // setup code + int stack_size = EMIT(get_stack_size); + int l1 = EMIT(label_new); + int success_label = EMIT(label_new); + comp->except_nest_level += 1; // for correct handling of continue + EMIT(setup_except, l1); + compile_node(comp, pn_body); // body + EMIT(pop_block); + EMIT(jump, success_label); + EMIT(label_assign, l1); + int l2 = EMIT(label_new); + + for (int i = 0; i < n_except; i++) { + assert(PY_PARSE_NODE_IS_STRUCT_KIND(pn_excepts[i], PN_try_stmt_except)); // should be + py_parse_node_struct_t *pns_except = (py_parse_node_struct_t*)pn_excepts[i]; + + qstr qstr_exception_local = 0; + int end_finally_label = EMIT(label_new); + + if (PY_PARSE_NODE_IS_NULL(pns_except->nodes[0])) { + // this is a catch all exception handler + if (i + 1 != n_except) { + printf("SyntaxError: default 'except:' must be last\n"); + return; + } + } else { + // this exception handler requires a match to a certain type of exception + py_parse_node_t pns_exception_expr = pns_except->nodes[0]; + if (PY_PARSE_NODE_IS_STRUCT(pns_exception_expr)) { + py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pns_exception_expr; + if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_try_stmt_as_name) { + // handler binds the exception to a local + pns_exception_expr = pns3->nodes[0]; + qstr_exception_local = PY_PARSE_NODE_LEAF_ARG(pns3->nodes[1]); + } + } + EMIT(dup_top); + compile_node(comp, pns_exception_expr); + EMIT(compare_op, RT_COMPARE_OP_EXCEPTION_MATCH); + EMIT(pop_jump_if_false, end_finally_label); + } + + EMIT(pop_top); + + if (qstr_exception_local == 0) { + EMIT(pop_top); + } else { + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local); + } + + EMIT(pop_top); + + int l3; + if (qstr_exception_local != 0) { + l3 = EMIT(label_new); + EMIT(setup_finally, l3); + } + compile_node(comp, pns_except->nodes[1]); + if (qstr_exception_local != 0) { + EMIT(pop_block); + } + EMIT(pop_except); + if (qstr_exception_local != 0) { + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + EMIT(label_assign, l3); + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local); + emit_common_delete_id(comp->pass, comp->scope_cur, comp->emit, qstr_exception_local); + EMIT(end_finally); + } + EMIT(jump, l2); + EMIT(label_assign, end_finally_label); + } + + EMIT(end_finally); + EMIT(label_assign, success_label); + comp->except_nest_level -= 1; + compile_node(comp, pn_else); // else block, can be null + EMIT(label_assign, l2); + EMIT(set_stack_size, stack_size); +} + +void compile_try_finally(compiler_t *comp, py_parse_node_t pn_body, int n_except, py_parse_node_t *pn_except, py_parse_node_t pn_else, py_parse_node_t pn_finally) { + // don't understand how the stack works with exceptions, so we force it to return to the correct value + int stack_size = EMIT(get_stack_size); + int l_finally_block = EMIT(label_new); + EMIT(setup_finally, l_finally_block); + if (n_except == 0) { + assert(PY_PARSE_NODE_IS_NULL(pn_else)); + compile_node(comp, pn_body); + } else { + compile_try_except(comp, pn_body, n_except, pn_except, pn_else); + } + EMIT(pop_block); + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + EMIT(label_assign, l_finally_block); + compile_node(comp, pn_finally); + EMIT(end_finally); + EMIT(set_stack_size, stack_size); +} + +void compile_try_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { + py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1]; + if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_try_stmt_finally) { + // just try-finally + compile_try_finally(comp, pns->nodes[0], 0, NULL, PY_PARSE_NODE_NULL, pns2->nodes[0]); + } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_try_stmt_except_and_more) { + // try-except and possibly else and/or finally + py_parse_node_t *pn_excepts; + int n_except = list_get(&pns2->nodes[0], PN_try_stmt_except_list, &pn_excepts); + if (PY_PARSE_NODE_IS_NULL(pns2->nodes[2])) { + // no finally + compile_try_except(comp, pns->nodes[0], n_except, pn_excepts, pns2->nodes[1]); + } else { + // have finally + compile_try_finally(comp, pns->nodes[0], n_except, pn_excepts, pns2->nodes[1], ((py_parse_node_struct_t*)pns2->nodes[2])->nodes[0]); + } + } else { + // just try-except + py_parse_node_t *pn_excepts; + int n_except = list_get(&pns->nodes[1], PN_try_stmt_except_list, &pn_excepts); + compile_try_except(comp, pns->nodes[0], n_except, pn_excepts, PY_PARSE_NODE_NULL); + } + } else { + // shouldn't happen + assert(0); + } +} + +void compile_with_stmt_helper(compiler_t *comp, int n, py_parse_node_t *nodes, py_parse_node_t body) { + if (n == 0) { + // no more pre-bits, compile the body of the with + compile_node(comp, body); + } else { + int l_end = EMIT(label_new); + if (PY_PARSE_NODE_IS_STRUCT_KIND(nodes[0], PN_with_item)) { + // this pre-bit is of the form "a as b" + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)nodes[0]; + compile_node(comp, pns->nodes[0]); + EMIT(setup_with, l_end); + c_assign(comp, pns->nodes[1], ASSIGN_STORE); + } else { + // this pre-bit is just an expression + compile_node(comp, nodes[0]); + EMIT(setup_with, l_end); + EMIT(pop_top); + } + // compile additional pre-bits and the body + compile_with_stmt_helper(comp, n - 1, nodes + 1, body); + // finish this with block + EMIT(pop_block); + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + EMIT(label_assign, l_end); + EMIT(with_cleanup); + EMIT(end_finally); + } +} + +void compile_with_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + // get the nodes for the pre-bit of the with (the a as b, c as d, ... bit) + py_parse_node_t *nodes; + int n = list_get(&pns->nodes[0], PN_with_stmt_list, &nodes); + assert(n > 0); + + // compile in a nested fashion + compile_with_stmt_helper(comp, n, nodes, pns->nodes[1]); +} + +void compile_expr_stmt(compiler_t *comp, py_parse_node_struct_t *pns) { + if (PY_PARSE_NODE_IS_NULL(pns->nodes[1])) { + if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !PY_PARSE_NODE_IS_ID(pns->nodes[0])) { + // do nothing with a lonely constant + } else { + compile_node(comp, pns->nodes[0]); // just an expression + EMIT(pop_top); // discard last result since this is a statement and leaves nothing on the stack + } + } else { + py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1]; + int kind = PY_PARSE_NODE_STRUCT_KIND(pns1); + if (kind == PN_expr_stmt_augassign) { + c_assign(comp, pns->nodes[0], ASSIGN_AUG_LOAD); // lhs load for aug assign + compile_node(comp, pns1->nodes[1]); // rhs + assert(PY_PARSE_NODE_IS_TOKEN(pns1->nodes[0])); + // note that we don't really need to implement separate inplace ops, just normal binary ops will suffice + switch (PY_PARSE_NODE_LEAF_ARG(pns1->nodes[0])) { + case PY_TOKEN_DEL_PIPE_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_OR); break; + case PY_TOKEN_DEL_CARET_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_XOR); break; + case PY_TOKEN_DEL_AMPERSAND_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_AND); break; + case PY_TOKEN_DEL_DBL_LESS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_LSHIFT); break; + case PY_TOKEN_DEL_DBL_MORE_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_RSHIFT); break; + case PY_TOKEN_DEL_PLUS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_ADD); break; + case PY_TOKEN_DEL_MINUS_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_SUBTRACT); break; + case PY_TOKEN_DEL_STAR_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_MULTIPLY); break; + case PY_TOKEN_DEL_DBL_SLASH_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_FLOOR_DIVIDE); break; + case PY_TOKEN_DEL_SLASH_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_TRUE_DIVIDE); break; + case PY_TOKEN_DEL_PERCENT_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_MODULO); break; + case PY_TOKEN_DEL_DBL_STAR_EQUAL: EMIT(binary_op, RT_BINARY_OP_INPLACE_POWER); break; + default: assert(0); // shouldn't happen + } + c_assign(comp, pns->nodes[0], ASSIGN_AUG_STORE); // lhs store for aug assign + } else if (kind == PN_expr_stmt_assign_list) { + int rhs = PY_PARSE_NODE_STRUCT_NUM_NODES(pns1) - 1; + compile_node(comp, ((py_parse_node_struct_t*)pns1->nodes[rhs])->nodes[0]); // rhs + // following CPython, we store left-most first + if (rhs > 0) { + EMIT(dup_top); + } + c_assign(comp, pns->nodes[0], ASSIGN_STORE); // lhs store + for (int i = 0; i < rhs; i++) { + if (i + 1 < rhs) { + EMIT(dup_top); + } + c_assign(comp, ((py_parse_node_struct_t*)pns1->nodes[i])->nodes[0], ASSIGN_STORE); // middle store + } + } else if (kind == PN_expr_stmt_assign) { + if (PY_PARSE_NODE_IS_STRUCT_KIND(pns1->nodes[0], PN_testlist_star_expr) + && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_star_expr) + && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns1->nodes[0]) == 2 + && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns->nodes[0]) == 2) { + // optimisation for a, b = c, d; to match CPython's optimisation + py_parse_node_struct_t* pns10 = (py_parse_node_struct_t*)pns1->nodes[0]; + py_parse_node_struct_t* pns0 = (py_parse_node_struct_t*)pns->nodes[0]; + compile_node(comp, pns10->nodes[0]); // rhs + compile_node(comp, pns10->nodes[1]); // rhs + EMIT(rot_two); + c_assign(comp, pns0->nodes[0], ASSIGN_STORE); // lhs store + c_assign(comp, pns0->nodes[1], ASSIGN_STORE); // lhs store + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns1->nodes[0], PN_testlist_star_expr) + && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_star_expr) + && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns1->nodes[0]) == 3 + && PY_PARSE_NODE_STRUCT_NUM_NODES((py_parse_node_struct_t*)pns->nodes[0]) == 3) { + // optimisation for a, b, c = d, e, f; to match CPython's optimisation + py_parse_node_struct_t* pns10 = (py_parse_node_struct_t*)pns1->nodes[0]; + py_parse_node_struct_t* pns0 = (py_parse_node_struct_t*)pns->nodes[0]; + compile_node(comp, pns10->nodes[0]); // rhs + compile_node(comp, pns10->nodes[1]); // rhs + compile_node(comp, pns10->nodes[2]); // rhs + EMIT(rot_three); + EMIT(rot_two); + c_assign(comp, pns0->nodes[0], ASSIGN_STORE); // lhs store + c_assign(comp, pns0->nodes[1], ASSIGN_STORE); // lhs store + c_assign(comp, pns0->nodes[2], ASSIGN_STORE); // lhs store + } else { + compile_node(comp, pns1->nodes[0]); // rhs + c_assign(comp, pns->nodes[0], ASSIGN_STORE); // lhs store + } + } else { + // shouldn't happen + assert(0); + } + } +} + +void c_binary_op(compiler_t *comp, py_parse_node_struct_t *pns, rt_binary_op_t binary_op) { + int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + compile_node(comp, pns->nodes[0]); + for (int i = 1; i < num_nodes; i += 1) { + compile_node(comp, pns->nodes[i]); + EMIT(binary_op, binary_op); + } +} + +void compile_test_if_expr(compiler_t *comp, py_parse_node_struct_t *pns) { + assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_test_if_else)); + py_parse_node_struct_t *pns_test_if_else = (py_parse_node_struct_t*)pns->nodes[1]; + + int stack_size = EMIT(get_stack_size); + int l_fail = EMIT(label_new); + int l_end = EMIT(label_new); + c_if_cond(comp, pns_test_if_else->nodes[0], false, l_fail); // condition + compile_node(comp, pns->nodes[0]); // success value + EMIT(jump, l_end); + EMIT(label_assign, l_fail); + EMIT(set_stack_size, stack_size); // force stack size reset + compile_node(comp, pns_test_if_else->nodes[1]); // failure value + EMIT(label_assign, l_end); +} + +void compile_lambdef(compiler_t *comp, py_parse_node_struct_t *pns) { + // TODO default params etc for lambda; possibly just use funcdef code + //py_parse_node_t pn_params = pns->nodes[0]; + //py_parse_node_t pn_body = pns->nodes[1]; + + if (comp->pass == PASS_1) { + // create a new scope for this lambda + scope_t *s = scope_new_and_link(comp, SCOPE_LAMBDA, (py_parse_node_t)pns); + // store the lambda scope so the compiling function (this one) can use it at each pass + pns->nodes[2] = (py_parse_node_t)s; + } + + // get the scope for this lambda + scope_t *this_scope = (scope_t*)pns->nodes[2]; + + // make the lambda + close_over_variables_etc(comp, this_scope, 0, 0); +} + +void compile_or_test(compiler_t *comp, py_parse_node_struct_t *pns) { + int l_end = EMIT(label_new); + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + for (int i = 0; i < n; i += 1) { + compile_node(comp, pns->nodes[i]); + if (i + 1 < n) { + EMIT(jump_if_true_or_pop, l_end); + } + } + EMIT(label_assign, l_end); +} + +void compile_and_test(compiler_t *comp, py_parse_node_struct_t *pns) { + int l_end = EMIT(label_new); + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + for (int i = 0; i < n; i += 1) { + compile_node(comp, pns->nodes[i]); + if (i + 1 < n) { + EMIT(jump_if_false_or_pop, l_end); + } + } + EMIT(label_assign, l_end); +} + +void compile_not_test_2(compiler_t *comp, py_parse_node_struct_t *pns) { + compile_node(comp, pns->nodes[0]); + EMIT(unary_op, RT_UNARY_OP_NOT); +} + +void compile_comparison(compiler_t *comp, py_parse_node_struct_t *pns) { + int stack_size = EMIT(get_stack_size); + int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + compile_node(comp, pns->nodes[0]); + bool multi = (num_nodes > 3); + int l_fail = 0; + if (multi) { + l_fail = EMIT(label_new); + } + for (int i = 1; i + 1 < num_nodes; i += 2) { + compile_node(comp, pns->nodes[i + 1]); + if (i + 2 < num_nodes) { + EMIT(dup_top); + EMIT(rot_three); + } + if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_LESS)) { + EMIT(compare_op, RT_COMPARE_OP_LESS); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MORE)) { + EMIT(compare_op, RT_COMPARE_OP_MORE); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_EQUAL)) { + EMIT(compare_op, RT_COMPARE_OP_EQUAL); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_LESS_EQUAL)) { + EMIT(compare_op, RT_COMPARE_OP_LESS_EQUAL); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MORE_EQUAL)) { + EMIT(compare_op, RT_COMPARE_OP_MORE_EQUAL); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_NOT_EQUAL)) { + EMIT(compare_op, RT_COMPARE_OP_NOT_EQUAL); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_KW_IN)) { + EMIT(compare_op, RT_COMPARE_OP_IN); + } else if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[i])) { + py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[i]; + int kind = PY_PARSE_NODE_STRUCT_KIND(pns2); + if (kind == PN_comp_op_not_in) { + EMIT(compare_op, RT_COMPARE_OP_NOT_IN); + } else if (kind == PN_comp_op_is) { + if (PY_PARSE_NODE_IS_NULL(pns2->nodes[0])) { + EMIT(compare_op, RT_COMPARE_OP_IS); + } else { + EMIT(compare_op, RT_COMPARE_OP_IS_NOT); + } + } else { + // shouldn't happen + assert(0); + } + } else { + // shouldn't happen + assert(0); + } + if (i + 2 < num_nodes) { + EMIT(jump_if_false_or_pop, l_fail); + } + } + if (multi) { + int l_end = EMIT(label_new); + EMIT(jump, l_end); + EMIT(label_assign, l_fail); + EMIT(rot_two); + EMIT(pop_top); + EMIT(label_assign, l_end); + EMIT(set_stack_size, stack_size + 1); // force stack size + } +} + +void compile_star_expr(compiler_t *comp, py_parse_node_struct_t *pns) { + // TODO + assert(0); + compile_node(comp, pns->nodes[0]); + //EMIT(unary_op, "UNARY_STAR"); +} + +void compile_expr(compiler_t *comp, py_parse_node_struct_t *pns) { + c_binary_op(comp, pns, RT_BINARY_OP_OR); +} + +void compile_xor_expr(compiler_t *comp, py_parse_node_struct_t *pns) { + c_binary_op(comp, pns, RT_BINARY_OP_XOR); +} + +void compile_and_expr(compiler_t *comp, py_parse_node_struct_t *pns) { + c_binary_op(comp, pns, RT_BINARY_OP_AND); +} + +void compile_shift_expr(compiler_t *comp, py_parse_node_struct_t *pns) { + int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + compile_node(comp, pns->nodes[0]); + for (int i = 1; i + 1 < num_nodes; i += 2) { + compile_node(comp, pns->nodes[i + 1]); + if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_LESS)) { + EMIT(binary_op, RT_BINARY_OP_LSHIFT); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_MORE)) { + EMIT(binary_op, RT_BINARY_OP_RSHIFT); + } else { + // shouldn't happen + assert(0); + } + } +} + +void compile_arith_expr(compiler_t *comp, py_parse_node_struct_t *pns) { + int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + compile_node(comp, pns->nodes[0]); + for (int i = 1; i + 1 < num_nodes; i += 2) { + compile_node(comp, pns->nodes[i + 1]); + if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_PLUS)) { + EMIT(binary_op, RT_BINARY_OP_ADD); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_MINUS)) { + EMIT(binary_op, RT_BINARY_OP_SUBTRACT); + } else { + // shouldn't happen + assert(0); + } + } +} + +void compile_term(compiler_t *comp, py_parse_node_struct_t *pns) { + int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + compile_node(comp, pns->nodes[0]); + for (int i = 1; i + 1 < num_nodes; i += 2) { + compile_node(comp, pns->nodes[i + 1]); + if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_STAR)) { + EMIT(binary_op, RT_BINARY_OP_MULTIPLY); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_DBL_SLASH)) { + EMIT(binary_op, RT_BINARY_OP_FLOOR_DIVIDE); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_SLASH)) { + EMIT(binary_op, RT_BINARY_OP_TRUE_DIVIDE); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[i], PY_TOKEN_OP_PERCENT)) { + EMIT(binary_op, RT_BINARY_OP_MODULO); + } else { + // shouldn't happen + assert(0); + } + } +} + +void compile_factor_2(compiler_t *comp, py_parse_node_struct_t *pns) { + compile_node(comp, pns->nodes[1]); + if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_PLUS)) { + EMIT(unary_op, RT_UNARY_OP_POSITIVE); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_MINUS)) { + EMIT(unary_op, RT_UNARY_OP_NEGATIVE); + } else if (PY_PARSE_NODE_IS_TOKEN_KIND(pns->nodes[0], PY_TOKEN_OP_TILDE)) { + EMIT(unary_op, RT_UNARY_OP_INVERT); + } else { + // shouldn't happen + assert(0); + } +} + +void compile_trailer_paren_helper(compiler_t *comp, py_parse_node_struct_t *pns, bool is_method_call) { + // function to call is on top of stack + + int old_n_arg_keyword = comp->n_arg_keyword; + bool old_have_star_arg = comp->have_star_arg; + bool old_have_dbl_star_arg = comp->have_dbl_star_arg; + comp->n_arg_keyword = 0; + comp->have_star_arg = false; + comp->have_dbl_star_arg = false; + + compile_node(comp, pns->nodes[0]); // arguments to function call; can be null + + // compute number of positional arguments + int n_positional = list_len(pns->nodes[0], PN_arglist) - comp->n_arg_keyword; + if (comp->have_star_arg) { + n_positional -= 1; + } + if (comp->have_dbl_star_arg) { + n_positional -= 1; + } + + if (is_method_call) { + EMIT(call_method, n_positional, comp->n_arg_keyword, comp->have_star_arg, comp->have_dbl_star_arg); + } else { + EMIT(call_function, n_positional, comp->n_arg_keyword, comp->have_star_arg, comp->have_dbl_star_arg); + } + + comp->n_arg_keyword = old_n_arg_keyword; + comp->have_star_arg = old_have_star_arg; + comp->have_dbl_star_arg = old_have_dbl_star_arg; +} + +void compile_power_trailers(compiler_t *comp, py_parse_node_struct_t *pns) { + int num_nodes = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + for (int i = 0; i < num_nodes; i++) { + if (i + 1 < num_nodes && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[i], PN_trailer_period) && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[i + 1], PN_trailer_paren)) { + // optimisation for method calls a.f(...), following PyPy + py_parse_node_struct_t *pns_period = (py_parse_node_struct_t*)pns->nodes[i]; + py_parse_node_struct_t *pns_paren = (py_parse_node_struct_t*)pns->nodes[i + 1]; + EMIT(load_method, PY_PARSE_NODE_LEAF_ARG(pns_period->nodes[0])); // get the method + compile_trailer_paren_helper(comp, pns_paren, true); + i += 1; + } else { + compile_node(comp, pns->nodes[i]); + } + } +} + +void compile_power_dbl_star(compiler_t *comp, py_parse_node_struct_t *pns) { + compile_node(comp, pns->nodes[0]); + EMIT(binary_op, RT_BINARY_OP_POWER); +} + +void compile_atom_string(compiler_t *comp, py_parse_node_struct_t *pns) { + // a list of strings + EMIT(load_const_verbatim_start); + EMIT(load_const_verbatim_str, "'"); + int n = PY_PARSE_NODE_STRUCT_NUM_NODES(pns); + for (int i = 0; i < n; i++) { + // TODO allow concatenation of either strings or bytes, but not mixed + assert(PY_PARSE_NODE_IS_LEAF(pns->nodes[i])); + assert(PY_PARSE_NODE_LEAF_KIND(pns->nodes[i]) == PY_PARSE_NODE_STRING); + const char *str = qstr_str(PY_PARSE_NODE_LEAF_ARG(pns->nodes[i])); + EMIT(load_const_verbatim_strn, str, strlen(str)); + } + EMIT(load_const_verbatim_str, "'"); + EMIT(load_const_verbatim_end); +} + +// pns needs to have 2 nodes, first is lhs of comprehension, second is PN_comp_for node +void compile_comprehension(compiler_t *comp, py_parse_node_struct_t *pns, scope_kind_t kind) { + assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 2); + assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_comp_for)); + py_parse_node_struct_t *pns_comp_for = (py_parse_node_struct_t*)pns->nodes[1]; + + if (comp->pass == PASS_1) { + // create a new scope for this comprehension + scope_t *s = scope_new_and_link(comp, kind, (py_parse_node_t)pns); + // store the comprehension scope so the compiling function (this one) can use it at each pass + pns_comp_for->nodes[3] = (py_parse_node_t)s; + } + + // get the scope for this comprehension + scope_t *this_scope = (scope_t*)pns_comp_for->nodes[3]; + + // compile the comprehension + close_over_variables_etc(comp, this_scope, 0, 0); + + compile_node(comp, pns_comp_for->nodes[1]); // source of the iterator + EMIT(get_iter); + EMIT(call_function, 1, 0, false, false); +} + +void compile_atom_paren(compiler_t *comp, py_parse_node_struct_t *pns) { + if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { + // an empty tuple + /* + EMIT(build_tuple, 0); + */ + c_tuple(comp, PY_PARSE_NODE_NULL, NULL); + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) { + pns = (py_parse_node_struct_t*)pns->nodes[0]; + assert(!PY_PARSE_NODE_IS_NULL(pns->nodes[1])); + if (PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])) { + py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1]; + if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3b) { + // tuple of one item, with trailing comma + assert(PY_PARSE_NODE_IS_NULL(pns2->nodes[0])); + /* + compile_node(comp, pns->nodes[0]); + EMIT(build_tuple, 1); + */ + c_tuple(comp, pns->nodes[0], NULL); + } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3c) { + // tuple of many items + /* + compile_node(comp, pns->nodes[0]); + compile_generic_all_nodes(comp, pns2); + EMIT(build_tuple, 1 + PY_PARSE_NODE_STRUCT_NUM_NODES(pns2)); + */ + c_tuple(comp, pns->nodes[0], pns2); + } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_comp_for) { + // generator expression + compile_comprehension(comp, pns, SCOPE_GEN_EXPR); + } else { + // tuple with 2 items + goto tuple_with_2_items; + } + } else { + // tuple with 2 items + tuple_with_2_items: + /* + compile_node(comp, pns->nodes[0]); + compile_node(comp, pns->nodes[1]); + EMIT(build_tuple, 2); + */ + c_tuple(comp, PY_PARSE_NODE_NULL, pns); + } + } else { + // parenthesis around a single item, is just that item + compile_node(comp, pns->nodes[0]); + } +} + +void compile_atom_bracket(compiler_t *comp, py_parse_node_struct_t *pns) { + if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { + // empty list + EMIT(build_list, 0); + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) { + py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[0]; + if (PY_PARSE_NODE_IS_STRUCT(pns2->nodes[1])) { + py_parse_node_struct_t *pns3 = (py_parse_node_struct_t*)pns2->nodes[1]; + if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_testlist_comp_3b) { + // list of one item, with trailing comma + assert(PY_PARSE_NODE_IS_NULL(pns3->nodes[0])); + compile_node(comp, pns2->nodes[0]); + EMIT(build_list, 1); + } else if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_testlist_comp_3c) { + // list of many items + compile_node(comp, pns2->nodes[0]); + compile_generic_all_nodes(comp, pns3); + EMIT(build_list, 1 + PY_PARSE_NODE_STRUCT_NUM_NODES(pns3)); + } else if (PY_PARSE_NODE_STRUCT_KIND(pns3) == PN_comp_for) { + // list comprehension + compile_comprehension(comp, pns2, SCOPE_LIST_COMP); + } else { + // list with 2 items + goto list_with_2_items; + } + } else { + // list with 2 items + list_with_2_items: + compile_node(comp, pns2->nodes[0]); + compile_node(comp, pns2->nodes[1]); + EMIT(build_list, 2); + } + } else { + // list with 1 item + compile_node(comp, pns->nodes[0]); + EMIT(build_list, 1); + } +} + +void compile_atom_brace(compiler_t *comp, py_parse_node_struct_t *pns) { + py_parse_node_t pn = pns->nodes[0]; + if (PY_PARSE_NODE_IS_NULL(pn)) { + // empty dict + EMIT(build_map, 0); + } else if (PY_PARSE_NODE_IS_STRUCT(pn)) { + pns = (py_parse_node_struct_t*)pn; + if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dictorsetmaker_item) { + // dict with one element + EMIT(build_map, 1); + compile_node(comp, pn); + EMIT(store_map); + } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_dictorsetmaker) { + assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should succeed + py_parse_node_struct_t *pns1 = (py_parse_node_struct_t*)pns->nodes[1]; + if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_dictorsetmaker_list) { + // dict/set with multiple elements + + // get tail elements (2nd, 3rd, ...) + py_parse_node_t *nodes; + int n = list_get(&pns1->nodes[0], PN_dictorsetmaker_list2, &nodes); + + // first element sets whether it's a dict or set + bool is_dict; + if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_dictorsetmaker_item)) { + // a dictionary + EMIT(build_map, 1 + n); + compile_node(comp, pns->nodes[0]); + EMIT(store_map); + is_dict = true; + } else { + // a set + compile_node(comp, pns->nodes[0]); // 1st value of set + is_dict = false; + } + + // process rest of elements + for (int i = 0; i < n; i++) { + py_parse_node_t pn = nodes[i]; + bool is_key_value = PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_dictorsetmaker_item); + compile_node(comp, pn); + if (is_dict) { + if (!is_key_value) { + printf("SyntaxError?: expecting key:value for dictionary"); + return; + } + EMIT(store_map); + } else { + if (is_key_value) { + printf("SyntaxError?: expecting just a value for set"); + return; + } + } + } + + // if it's a set, build it + if (!is_dict) { + EMIT(build_set, 1 + n); + } + } else if (PY_PARSE_NODE_STRUCT_KIND(pns1) == PN_comp_for) { + // dict/set comprehension + if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_dictorsetmaker_item)) { + // a dictionary comprehension + compile_comprehension(comp, pns, SCOPE_DICT_COMP); + } else { + // a set comprehension + compile_comprehension(comp, pns, SCOPE_SET_COMP); + } + } else { + // shouldn't happen + assert(0); + } + } else { + // set with one element + goto set_with_one_element; + } + } else { + // set with one element + set_with_one_element: + compile_node(comp, pn); + EMIT(build_set, 1); + } +} + +void compile_trailer_paren(compiler_t *comp, py_parse_node_struct_t *pns) { + compile_trailer_paren_helper(comp, pns, false); +} + +void compile_trailer_bracket(compiler_t *comp, py_parse_node_struct_t *pns) { + // object who's index we want is on top of stack + compile_node(comp, pns->nodes[0]); // the index + EMIT(binary_op, RT_BINARY_OP_SUBSCR); +} + +void compile_trailer_period(compiler_t *comp, py_parse_node_struct_t *pns) { + // object who's attribute we want is on top of stack + EMIT(load_attr, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); // attribute to get +} + +void compile_subscript_3_helper(compiler_t *comp, py_parse_node_struct_t *pns) { + assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3); // should always be + py_parse_node_t pn = pns->nodes[0]; + if (PY_PARSE_NODE_IS_NULL(pn)) { + // [?:] + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + EMIT(build_slice, 2); + } else if (PY_PARSE_NODE_IS_STRUCT(pn)) { + pns = (py_parse_node_struct_t*)pn; + if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3c) { + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + pn = pns->nodes[0]; + if (PY_PARSE_NODE_IS_NULL(pn)) { + // [?::] + EMIT(build_slice, 2); + } else { + // [?::x] + compile_node(comp, pn); + EMIT(build_slice, 3); + } + } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == PN_subscript_3d) { + compile_node(comp, pns->nodes[0]); + assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be + pns = (py_parse_node_struct_t*)pns->nodes[1]; + assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_sliceop); // should always be + if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { + // [?:x:] + EMIT(build_slice, 2); + } else { + // [?:x:x] + compile_node(comp, pns->nodes[0]); + EMIT(build_slice, 3); + } + } else { + // [?:x] + compile_node(comp, pn); + EMIT(build_slice, 2); + } + } else { + // [?:x] + compile_node(comp, pn); + EMIT(build_slice, 2); + } +} + +void compile_subscript_2(compiler_t *comp, py_parse_node_struct_t *pns) { + compile_node(comp, pns->nodes[0]); // start of slice + assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be + compile_subscript_3_helper(comp, (py_parse_node_struct_t*)pns->nodes[1]); +} + +void compile_subscript_3(compiler_t *comp, py_parse_node_struct_t *pns) { + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + compile_subscript_3_helper(comp, pns); +} + +void compile_dictorsetmaker_item(compiler_t *comp, py_parse_node_struct_t *pns) { + // if this is called then we are compiling a dict key:value pair + compile_node(comp, pns->nodes[1]); // value + compile_node(comp, pns->nodes[0]); // key +} + +void compile_classdef(compiler_t *comp, py_parse_node_struct_t *pns) { + qstr cname = compile_classdef_helper(comp, pns); + // store class object into class name + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, cname); +} + +void compile_arglist_star(compiler_t *comp, py_parse_node_struct_t *pns) { + if (comp->have_star_arg) { + printf("SyntaxError?: can't have multiple *x\n"); + return; + } + comp->have_star_arg = true; + compile_node(comp, pns->nodes[0]); +} + +void compile_arglist_dbl_star(compiler_t *comp, py_parse_node_struct_t *pns) { + if (comp->have_dbl_star_arg) { + printf("SyntaxError?: can't have multiple **x\n"); + return; + } + comp->have_dbl_star_arg = true; + compile_node(comp, pns->nodes[0]); +} + +void compile_argument(compiler_t *comp, py_parse_node_struct_t *pns) { + assert(PY_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should always be + py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pns->nodes[1]; + if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_argument_3) { + if (!PY_PARSE_NODE_IS_ID(pns->nodes[0])) { + printf("SyntaxError?: lhs of keyword argument must be an id\n"); + return; + } + EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); + compile_node(comp, pns2->nodes[0]); + comp->n_arg_keyword += 1; + } else if (PY_PARSE_NODE_STRUCT_KIND(pns2) == PN_comp_for) { + compile_comprehension(comp, pns, SCOPE_GEN_EXPR); + } else { + // shouldn't happen + assert(0); + } +} + +void compile_yield_expr(compiler_t *comp, py_parse_node_struct_t *pns) { + if (comp->scope_cur->kind != SCOPE_FUNCTION) { + printf("SyntaxError: 'yield' outside function\n"); + return; + } + if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + EMIT(yield_value); + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_yield_arg_from)) { + pns = (py_parse_node_struct_t*)pns->nodes[0]; + compile_node(comp, pns->nodes[0]); + EMIT(get_iter); + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + EMIT(yield_from); + } else { + compile_node(comp, pns->nodes[0]); + EMIT(yield_value); + } +} + +typedef void (*compile_function_t)(compiler_t*, py_parse_node_struct_t*); +static compile_function_t compile_function[] = { + NULL, +#define nc NULL +#define c(f) compile_##f +#define DEF_RULE(rule, comp, kind, arg...) comp, +#include "grammar.h" +#undef nc +#undef c +#undef DEF_RULE +}; + +void compile_node(compiler_t *comp, py_parse_node_t pn) { + if (PY_PARSE_NODE_IS_NULL(pn)) { + // pass + } else if (PY_PARSE_NODE_IS_LEAF(pn)) { + int arg = PY_PARSE_NODE_LEAF_ARG(pn); + switch (PY_PARSE_NODE_LEAF_KIND(pn)) { + case PY_PARSE_NODE_ID: emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, arg); break; + case PY_PARSE_NODE_SMALL_INT: EMIT(load_const_small_int, arg); break; + case PY_PARSE_NODE_INTEGER: EMIT(load_const_int, arg); break; + case PY_PARSE_NODE_DECIMAL: EMIT(load_const_dec, arg); break; + case PY_PARSE_NODE_STRING: EMIT(load_const_str, arg, false); break; + case PY_PARSE_NODE_BYTES: EMIT(load_const_str, arg, true); break; + case PY_PARSE_NODE_TOKEN: EMIT(load_const_tok, arg); break; + default: assert(0); + } + } else { + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + compile_function_t f = compile_function[PY_PARSE_NODE_STRUCT_KIND(pns)]; + if (f == NULL) { + printf("node %u cannot be compiled\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns)); + parse_node_show(pn, 0); + assert(0); + } else { + f(comp, pns); + } + } +} + +void compile_scope_func_lambda_param(compiler_t *comp, py_parse_node_t pn, pn_kind_t pn_name, pn_kind_t pn_star, pn_kind_t pn_dbl_star, bool allow_annotations) { + // TODO verify that *k and **k are last etc + assert(PY_PARSE_NODE_IS_STRUCT(pn)); + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)pn; + qstr param_name = 0; + py_parse_node_t pn_annotation = PY_PARSE_NODE_NULL; + if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_name) { + param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); + //int node_index = 1; unused + if (allow_annotations) { + if (!PY_PARSE_NODE_IS_NULL(pns->nodes[1])) { + // this parameter has an annotation + pn_annotation = pns->nodes[1]; + } + //node_index = 2; unused + } + /* this is obsolete now that num dict/default params are calculated in compile_funcdef_param + if (!PY_PARSE_NODE_IS_NULL(pns->nodes[node_index])) { + // this parameter has a default value + if (comp->have_bare_star) { + comp->scope_cur->num_dict_params += 1; + } else { + comp->scope_cur->num_default_params += 1; + } + } + */ + if (comp->have_bare_star) { + // comes after a bare star, so doesn't count as a parameter + } else { + comp->scope_cur->num_params += 1; + } + } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_star) { + if (PY_PARSE_NODE_IS_NULL(pns->nodes[0])) { + // bare star + // TODO see http://www.python.org/dev/peps/pep-3102/ + comp->have_bare_star = true; + //assert(comp->scope_cur->num_dict_params == 0); + } else if (PY_PARSE_NODE_IS_ID(pns->nodes[0])) { + // named star + comp->scope_cur->flags |= SCOPE_FLAG_VARARGS; + param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); + } else if (allow_annotations && PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_tfpdef)) { + // named star with annotation + comp->scope_cur->flags |= SCOPE_FLAG_VARARGS; + pns = (py_parse_node_struct_t*)pns->nodes[0]; + param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); + pn_annotation = pns->nodes[1]; + } else { + // shouldn't happen + assert(0); + } + } else if (PY_PARSE_NODE_STRUCT_KIND(pns) == pn_dbl_star) { + param_name = PY_PARSE_NODE_LEAF_ARG(pns->nodes[0]); + if (allow_annotations && !PY_PARSE_NODE_IS_NULL(pns->nodes[1])) { + // this parameter has an annotation + pn_annotation = pns->nodes[1]; + } + comp->scope_cur->flags |= SCOPE_FLAG_VARKEYWORDS; + } else { + // TODO anything to implement? + assert(0); + } + + if (param_name != 0) { + if (!PY_PARSE_NODE_IS_NULL(pn_annotation)) { + // TODO this parameter has an annotation + } + bool added; + id_info_t *id_info = scope_find_or_add_id(comp->scope_cur, param_name, &added); + if (!added) { + printf("SyntaxError?: same name used for parameter; %s\n", qstr_str(param_name)); + return; + } + id_info->param = true; + id_info->kind = ID_INFO_KIND_LOCAL; + } +} + +void compile_scope_func_param(compiler_t *comp, py_parse_node_t pn) { + compile_scope_func_lambda_param(comp, pn, PN_typedargslist_name, PN_typedargslist_star, PN_typedargslist_dbl_star, true); +} + +void compile_scope_lambda_param(compiler_t *comp, py_parse_node_t pn) { + compile_scope_func_lambda_param(comp, pn, PN_varargslist_name, PN_varargslist_star, PN_varargslist_dbl_star, false); +} + +void compile_scope_comp_iter(compiler_t *comp, py_parse_node_t pn_iter, py_parse_node_t pn_inner_expr, int l_top, int for_depth) { + tail_recursion: + if (PY_PARSE_NODE_IS_NULL(pn_iter)) { + // no more nested if/for; compile inner expression + compile_node(comp, pn_inner_expr); + if (comp->scope_cur->kind == SCOPE_LIST_COMP) { + EMIT(list_append, for_depth + 2); + } else if (comp->scope_cur->kind == SCOPE_DICT_COMP) { + EMIT(map_add, for_depth + 2); + } else if (comp->scope_cur->kind == SCOPE_SET_COMP) { + EMIT(set_add, for_depth + 2); + } else { + EMIT(yield_value); + EMIT(pop_top); + } + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn_iter, PN_comp_if)) { + // if condition + py_parse_node_struct_t *pns_comp_if = (py_parse_node_struct_t*)pn_iter; + c_if_cond(comp, pns_comp_if->nodes[0], false, l_top); + pn_iter = pns_comp_if->nodes[1]; + goto tail_recursion; + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn_iter, PN_comp_for)) { + // for loop + py_parse_node_struct_t *pns_comp_for2 = (py_parse_node_struct_t*)pn_iter; + compile_node(comp, pns_comp_for2->nodes[1]); + int l_end2 = EMIT(label_new); + int l_top2 = EMIT(label_new); + EMIT(get_iter); + EMIT(label_assign, l_top2); + EMIT(for_iter, l_end2); + c_assign(comp, pns_comp_for2->nodes[0], ASSIGN_STORE); + compile_scope_comp_iter(comp, pns_comp_for2->nodes[2], pn_inner_expr, l_top2, for_depth + 1); + EMIT(jump, l_top2); + EMIT(label_assign, l_end2); + EMIT(for_iter_end); + } else { + // shouldn't happen + assert(0); + } +} + +void check_for_doc_string(compiler_t *comp, py_parse_node_t pn) { + // see http://www.python.org/dev/peps/pep-0257/ + + // look for the first statement + if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_expr_stmt)) { + // fall through + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_file_input_2)) { + pn = ((py_parse_node_struct_t*)pn)->nodes[0]; + } else if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_suite_block_stmts)) { + pn = ((py_parse_node_struct_t*)pn)->nodes[0]; + } else { + return; + } + + // check the first statement for a doc string + if (PY_PARSE_NODE_IS_STRUCT_KIND(pn, PN_expr_stmt)) { + py_parse_node_struct_t* pns = (py_parse_node_struct_t*)pn; + if (PY_PARSE_NODE_IS_LEAF(pns->nodes[0])) { + int kind = PY_PARSE_NODE_LEAF_KIND(pns->nodes[0]); + if (kind == PY_PARSE_NODE_STRING) { + compile_node(comp, pns->nodes[0]); // a doc string + // store doc string + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___doc__); + } + } + } +} + +void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) { + comp->pass = pass; + comp->scope_cur = scope; + emit_start_pass(comp->emit, pass, scope); + + if (comp->pass == PASS_1) { + scope->stack_size = 0; + } + + if (comp->pass == PASS_3) { + //printf("----\n"); + scope_print_info(scope); + } + + // compile + if (scope->kind == SCOPE_MODULE) { + check_for_doc_string(comp, scope->pn); + compile_node(comp, scope->pn); + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + EMIT(return_value); + } else if (scope->kind == SCOPE_FUNCTION) { + assert(PY_PARSE_NODE_IS_STRUCT(scope->pn)); + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn; + assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_funcdef); + + // work out number of parameters, keywords and default parameters, and add them to the id_info array + if (comp->pass == PASS_1) { + comp->have_bare_star = false; + apply_to_single_or_list(comp, pns->nodes[1], PN_typedargslist, compile_scope_func_param); + } + + assert(pns->nodes[2] == 0); // 2 is something... + + compile_node(comp, pns->nodes[3]); // 3 is function body + // emit return if it wasn't the last opcode + if (!emit_last_emit_was_return_value(comp->emit)) { + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + EMIT(return_value); + } + } else if (scope->kind == SCOPE_LAMBDA) { + assert(PY_PARSE_NODE_IS_STRUCT(scope->pn)); + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn; + assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 3); + + // work out number of parameters, keywords and default parameters, and add them to the id_info array + if (comp->pass == PASS_1) { + comp->have_bare_star = false; + apply_to_single_or_list(comp, pns->nodes[0], PN_varargslist, compile_scope_lambda_param); + } + + compile_node(comp, pns->nodes[1]); // 1 is lambda body + EMIT(return_value); + } else if (scope->kind == SCOPE_LIST_COMP || scope->kind == SCOPE_DICT_COMP || scope->kind == SCOPE_SET_COMP || scope->kind == SCOPE_GEN_EXPR) { + // a bit of a hack at the moment + + assert(PY_PARSE_NODE_IS_STRUCT(scope->pn)); + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn; + assert(PY_PARSE_NODE_STRUCT_NUM_NODES(pns) == 2); + assert(PY_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_comp_for)); + py_parse_node_struct_t *pns_comp_for = (py_parse_node_struct_t*)pns->nodes[1]; + + qstr qstr_arg = qstr_from_strn_copy(".0", 2); + if (comp->pass == PASS_1) { + bool added; + id_info_t *id_info = scope_find_or_add_id(comp->scope_cur, qstr_arg, &added); + assert(added); + id_info->kind = ID_INFO_KIND_LOCAL; + scope->num_params = 1; + } + + if (scope->kind == SCOPE_LIST_COMP) { + EMIT(build_list, 0); + } else if (scope->kind == SCOPE_DICT_COMP) { + EMIT(build_map, 0); + } else if (scope->kind == SCOPE_SET_COMP) { + EMIT(build_set, 0); + } + + int l_end = EMIT(label_new); + int l_top = EMIT(label_new); + emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, qstr_arg); + EMIT(label_assign, l_top); + EMIT(for_iter, l_end); + c_assign(comp, pns_comp_for->nodes[0], ASSIGN_STORE); + compile_scope_comp_iter(comp, pns_comp_for->nodes[2], pns->nodes[0], l_top, 0); + EMIT(jump, l_top); + EMIT(label_assign, l_end); + EMIT(for_iter_end); + + if (scope->kind == SCOPE_GEN_EXPR) { + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + } + EMIT(return_value); + } else { + assert(scope->kind == SCOPE_CLASS); + assert(PY_PARSE_NODE_IS_STRUCT(scope->pn)); + py_parse_node_struct_t *pns = (py_parse_node_struct_t*)scope->pn; + assert(PY_PARSE_NODE_STRUCT_KIND(pns) == PN_classdef); + + if (comp->pass == PASS_1) { + bool added; + id_info_t *id_info = scope_find_or_add_id(scope, comp->qstr___class__, &added); + assert(added); + id_info->kind = ID_INFO_KIND_LOCAL; + id_info = scope_find_or_add_id(scope, comp->qstr___locals__, &added); + assert(added); + id_info->kind = ID_INFO_KIND_LOCAL; + id_info->param = true; + scope->num_params = 1; // __locals__ is the parameter + } + + emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr___locals__); + EMIT(store_locals); + emit_common_load_id(comp->pass, comp->scope_cur, comp->qstr___class__, comp->emit, comp->qstr___name__); + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___module__); + EMIT(load_const_id, PY_PARSE_NODE_LEAF_ARG(pns->nodes[0])); // 0 is class name + emit_common_store_id(comp->pass, comp->scope_cur, comp->emit, comp->qstr___qualname__); + + check_for_doc_string(comp, pns->nodes[2]); + compile_node(comp, pns->nodes[2]); // 2 is class body + + id_info_t *id = scope_find(scope, comp->qstr___class__); + assert(id != NULL); + if (id->kind == ID_INFO_KIND_LOCAL) { + EMIT(load_const_tok, PY_TOKEN_KW_NONE); + } else { + EMIT(load_closure, comp->qstr___class__); + } + EMIT(return_value); + } + + emit_end_pass(comp->emit); +} + +void compile_scope_compute_things(compiler_t *comp, scope_t *scope) { + // in functions, turn implicit globals into explicit globals + // compute num_locals, and the index of each local + scope->num_locals = 0; + for (int i = 0; i < scope->id_info_len; i++) { + id_info_t *id = &scope->id_info[i]; + if (scope->kind == SCOPE_CLASS && id->qstr == comp->qstr___class__) { + // __class__ is not counted as a local; if it's used then it becomes a ID_INFO_KIND_CELL + continue; + } + if (scope->kind >= SCOPE_FUNCTION && scope->kind <= SCOPE_GEN_EXPR && id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { + id->kind = ID_INFO_KIND_GLOBAL_EXPLICIT; + } + if (id->param || id->kind == ID_INFO_KIND_LOCAL) { + id->local_num = scope->num_locals; + scope->num_locals += 1; + } + } + + // compute flags + //scope->flags = 0; since we set some things in parameters + if (scope->kind != SCOPE_MODULE) { + scope->flags |= SCOPE_FLAG_NEWLOCALS; + } + if (scope->kind == SCOPE_FUNCTION || scope->kind == SCOPE_LAMBDA || scope->kind == SCOPE_LIST_COMP || scope->kind == SCOPE_DICT_COMP || scope->kind == SCOPE_SET_COMP || scope->kind == SCOPE_GEN_EXPR) { + assert(scope->parent != NULL); + scope->flags |= SCOPE_FLAG_OPTIMISED; + + // TODO possibly other ways it can be nested + if (scope->parent->kind == SCOPE_FUNCTION || (scope->parent->kind == SCOPE_CLASS && scope->parent->parent->kind == SCOPE_FUNCTION)) { + scope->flags |= SCOPE_FLAG_NESTED; + } + } + int num_free = 0; + for (int i = 0; i < scope->id_info_len; i++) { + id_info_t *id = &scope->id_info[i]; + if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) { + num_free += 1; + } + } + if (num_free == 0) { + scope->flags |= SCOPE_FLAG_NOFREE; + } +} + +void py_compile(py_parse_node_t pn) { + compiler_t *comp = m_new(compiler_t, 1); + + comp->qstr___class__ = qstr_from_strn_copy("__class__", 9); + comp->qstr___locals__ = qstr_from_strn_copy("__locals__", 10); + comp->qstr___name__ = qstr_from_strn_copy("__name__", 8); + comp->qstr___module__ = qstr_from_strn_copy("__module__", 10); + comp->qstr___qualname__ = qstr_from_strn_copy("__qualname__", 12); + comp->qstr___doc__ = qstr_from_strn_copy("__doc__", 7); + comp->qstr_assertion_error = qstr_from_strn_copy("AssertionError", 14); + + comp->break_label = 0; + comp->continue_label = 0; + comp->except_nest_level = 0; + comp->scope_head = NULL; + comp->scope_cur = NULL; + + comp->emit = emit_new(comp->qstr___class__); + + pn = fold_constants(pn); + scope_new_and_link(comp, SCOPE_MODULE, pn); + + for (scope_t *s = comp->scope_head; s != NULL; s = s->next) { + compile_scope(comp, s, PASS_1); + } + + for (scope_t *s = comp->scope_head; s != NULL; s = s->next) { + compile_scope_compute_things(comp, s); + } + + for (scope_t *s = comp->scope_head; s != NULL; s = s->next) { + compile_scope(comp, s, PASS_2); + compile_scope(comp, s, PASS_3); + } + + m_free(comp); +} diff --git a/py/compile.h b/py/compile.h new file mode 100644 index 0000000000..339acca0c0 --- /dev/null +++ b/py/compile.h @@ -0,0 +1 @@ +void py_compile(py_parse_node_t pn); diff --git a/py/emit.h b/py/emit.h new file mode 100644 index 0000000000..8cad745dde --- /dev/null +++ b/py/emit.h @@ -0,0 +1,120 @@ +//#define EMIT_DO_CPY +#define EMIT_DO_BC +//#define EMIT_DO_X64 +//#define EMIT_DO_THUMB + +/* Notes on passes: + * We don't know exactly the opcodes in pass 1 because they depend on the + * closing over of variables (LOAD_CLOSURE, BUILD_TUPLE, MAKE_CLOSURE), which + * depends on determining the scope of variables in each function, and this + * is not known until the end of pass 1. + * As a consequence, we don't know the maximum stack size until the end of pass 2. + * This is problematic for some emitters (x64) since they need to know the maximum + * stack size to compile the entry to the function, and this effects code size. + */ + +typedef enum { + PASS_1 = 1, // work out id's and their kind, and number of labels + PASS_2 = 2, // work out stack size and code size and label offsets + PASS_3 = 3, // emit code +} pass_kind_t; + +typedef struct _emitter_t emitter_t; + +void emit_common_declare_global(pass_kind_t pass, scope_t *scope, qstr qstr); +void emit_common_declare_nonlocal(pass_kind_t pass, scope_t *scope, qstr qstr); +void emit_common_load_id(pass_kind_t pass, scope_t *scope, qstr qstr___class__, emitter_t *emit, qstr qstr); +void emit_common_store_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr); +void emit_common_delete_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr); + +emitter_t *emit_new(); +void emit_set_native_types(emitter_t *emit, bool do_native_types); +void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope); +void emit_end_pass(emitter_t *emit); +bool emit_last_emit_was_return_value(emitter_t *emit); +int emit_get_stack_size(emitter_t *emit); +void emit_set_stack_size(emitter_t *emit, int size); + +int emit_label_new(emitter_t *emit); +void emit_label_assign(emitter_t *emit, int l); +void emit_import_name(emitter_t *emit, qstr qstr); +void emit_import_from(emitter_t *emit, qstr qstr); +void emit_import_star(emitter_t *emit); +void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok); +void emit_load_const_small_int(emitter_t *emit, int arg); +void emit_load_const_int(emitter_t *emit, qstr qstr); +void emit_load_const_dec(emitter_t *emit, qstr qstr); +void emit_load_const_id(emitter_t *emit, qstr qstr); +void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes); +void emit_load_const_verbatim_start(emitter_t *emit); +void emit_load_const_verbatim_int(emitter_t *emit, int val); +void emit_load_const_verbatim_str(emitter_t *emit, const char *str); +void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len); +void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes); +void emit_load_const_verbatim_end(emitter_t *emit); +void emit_load_fast(emitter_t *emit, qstr qstr, int local_num); +void emit_load_name(emitter_t *emit, qstr qstr); +void emit_load_global(emitter_t *emit, qstr qstr); +void emit_load_deref(emitter_t *emit, qstr qstr); +void emit_load_closure(emitter_t *emit, qstr qstr); +void emit_load_attr(emitter_t *emit, qstr qstr); +void emit_load_method(emitter_t *emit, qstr qstr); +void emit_load_build_class(emitter_t *emit); +void emit_store_fast(emitter_t *emit, qstr qstr, int local_num); +void emit_store_name(emitter_t *emit, qstr qstr); +void emit_store_global(emitter_t *emit, qstr qstr); +void emit_store_deref(emitter_t *emit, qstr qstr); +void emit_store_attr(emitter_t *emit, qstr qstr); +void emit_store_locals(emitter_t *emit); +void emit_store_subscr(emitter_t *emit); +void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num); +void emit_delete_name(emitter_t *emit, qstr qstr); +void emit_delete_global(emitter_t *emit, qstr qstr); +void emit_delete_deref(emitter_t *emit, qstr qstr); +void emit_delete_attr(emitter_t *emit, qstr qstr); +void emit_delete_subscr(emitter_t *emit); +void emit_dup_top(emitter_t *emit); +void emit_dup_top_two(emitter_t *emit); +void emit_pop_top(emitter_t *emit); +void emit_rot_two(emitter_t *emit); +void emit_rot_three(emitter_t *emit); +void emit_jump(emitter_t *emit, int label); +void emit_pop_jump_if_true(emitter_t *emit, int label); +void emit_pop_jump_if_false(emitter_t *emit, int label); +void emit_jump_if_true_or_pop(emitter_t *emit, int label); +void emit_jump_if_false_or_pop(emitter_t *emit, int label); +void emit_setup_loop(emitter_t *emit, int label); +void emit_break_loop(emitter_t *emit, int label); +void emit_continue_loop(emitter_t *emit, int label); +void emit_setup_with(emitter_t *emit, int label); +void emit_with_cleanup(emitter_t *emit); +void emit_setup_except(emitter_t *emit, int label); +void emit_setup_finally(emitter_t *emit, int label); +void emit_end_finally(emitter_t *emit); +void emit_get_iter(emitter_t *emit); // tos = getiter(tos) +void emit_for_iter(emitter_t *emit, int label); +void emit_for_iter_end(emitter_t *emit); +void emit_pop_block(emitter_t *emit); +void emit_pop_except(emitter_t *emit); +void emit_unary_op(emitter_t *emit, rt_unary_op_t op); +void emit_binary_op(emitter_t *emit, rt_binary_op_t op); +void emit_compare_op(emitter_t *emit, rt_compare_op_t op); +void emit_build_tuple(emitter_t *emit, int n_args); +void emit_build_list(emitter_t *emit, int n_args); +void emit_list_append(emitter_t *emit, int list_stack_index); +void emit_build_map(emitter_t *emit, int n_args); +void emit_store_map(emitter_t *emit); +void emit_map_add(emitter_t *emit, int map_stack_index); +void emit_build_set(emitter_t *emit, int n_args); +void emit_set_add(emitter_t *emit, int set_stack_index); +void emit_build_slice(emitter_t *emit, int n_args); +void emit_unpack_sequence(emitter_t *emit, int n_args); +void emit_unpack_ex(emitter_t *emit, int n_left, int n_right); +void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params); +void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params); +void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg); +void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg); +void emit_return_value(emitter_t *emit); +void emit_raise_varargs(emitter_t *emit, int n_args); +void emit_yield_value(emitter_t *emit); +void emit_yield_from(emitter_t *emit); diff --git a/py/emitbc.c b/py/emitbc.c new file mode 100644 index 0000000000..9d159ae605 --- /dev/null +++ b/py/emitbc.c @@ -0,0 +1,692 @@ +#include +#include +#include +#include +#include +#include + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "compile.h" +#include "scope.h" +#include "runtime.h" +#include "emit.h" +#include "bc.h" + +#ifdef EMIT_DO_BC + +struct _emitter_t { + int pass; + int next_label; + int stack_size; + bool last_emit_was_return_value; + + scope_t *scope; + + int max_num_labels; + uint *label_offsets; + + uint code_offset; + uint code_size; + byte *code_base; + byte dummy_data[8]; +}; + +emitter_t *emit_new() { + emitter_t *emit = m_new(emitter_t, 1); + emit->max_num_labels = 0; + emit->label_offsets = NULL; + emit->code_offset = 0; + emit->code_size = 0; + emit->code_base = NULL; + return emit; +} + +uint emit_get_code_size(emitter_t* emit) { + return emit->code_size; +} + +void* emit_get_code(emitter_t* emit) { + return emit->code_base; +} + +void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) { + emit->pass = pass; + emit->next_label = 1; + emit->stack_size = 0; + emit->last_emit_was_return_value = false; + emit->scope = scope; + if (pass == PASS_1) { + scope->unique_code_id = rt_get_new_unique_code_id(); + } else if (pass > PASS_1) { + if (emit->label_offsets == NULL) { + emit->label_offsets = m_new(uint, emit->max_num_labels); + } + if (pass == PASS_2) { + memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(uint)); + } + } + emit->code_offset = 0; +} + +void emit_end_pass(emitter_t *emit) { + // check stack is back to zero size + if (emit->stack_size != 0) { + printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size); + } + + if (emit->pass == PASS_1) { + // calculate number of labels need + if (emit->next_label > emit->max_num_labels) { + emit->max_num_labels = emit->next_label; + } + + } else if (emit->pass == PASS_2) { + // calculate size of code in bytes + emit->code_size = emit->code_offset; + emit->code_base = m_new(byte, emit->code_size); + printf("code_size: %u\n", emit->code_size); + + } else if (emit->pass == PASS_3) { + rt_assign_byte_code(emit->scope->unique_code_id, emit->code_base, emit->code_size, emit->scope->num_params); + } +} + +// all functions must go through this one to emit bytes +static byte* emit_get_cur_to_write_bytes(emitter_t* emit, int num_bytes_to_write) { + //printf("emit %d\n", num_bytes_to_write); + if (emit->pass < PASS_3) { + emit->code_offset += num_bytes_to_write; + return emit->dummy_data; + } else { + assert(emit->code_offset + num_bytes_to_write <= emit->code_size); + byte *c = emit->code_base + emit->code_offset; + emit->code_offset += num_bytes_to_write; + return c; + } +} + +static void emit_write_byte_1(emitter_t* emit, byte b1) { + byte* c = emit_get_cur_to_write_bytes(emit, 1); + c[0] = b1; +} + +static void emit_write_byte_1_byte(emitter_t* emit, byte b1, uint b2) { + assert((b2 & (~0xff)) == 0); + byte* c = emit_get_cur_to_write_bytes(emit, 2); + c[0] = b1; + c[1] = b2; +} + +static void emit_write_byte_1_int(emitter_t* emit, byte b1, int num) { + assert((num & (~0x7fff)) == 0 || (num & (~0x7fff)) == (~0x7fff)); + byte* c = emit_get_cur_to_write_bytes(emit, 3); + c[0] = b1; + c[1] = num; + c[2] = num >> 8; +} + +static void emit_write_byte_1_uint(emitter_t* emit, byte b1, uint num) { + if (num <= 127) { // fits in 0x7f + // fit argument in single byte + byte* c = emit_get_cur_to_write_bytes(emit, 2); + c[0] = b1; + c[1] = num; + } else if (num <= 16383) { // fits in 0x3fff + // fit argument in two bytes + byte* c = emit_get_cur_to_write_bytes(emit, 3); + c[0] = b1; + c[1] = (num >> 8) | 0x80; + c[2] = num; + } else { + // larger numbers not implemented/supported + assert(0); + } +} + +static void emit_write_byte_1_qstr(emitter_t* emit, byte b1, qstr qstr) { + emit_write_byte_1_uint(emit, b1, qstr); +} + +static void emit_write_byte_1_label(emitter_t* emit, byte b1, int label) { + uint code_offset; + if (emit->pass < PASS_3) { + code_offset = 0; + } else { + code_offset = emit->label_offsets[label]; + } + emit_write_byte_1_uint(emit, b1, code_offset); +} + +bool emit_last_emit_was_return_value(emitter_t *emit) { + return emit->last_emit_was_return_value; +} + +int emit_get_stack_size(emitter_t *emit) { + return emit->stack_size; +} + +void emit_set_stack_size(emitter_t *emit, int size) { + if (emit->pass > PASS_1) { + emit->stack_size = size; + } +} + +static void emit_pre(emitter_t *emit, int stack_size_delta) { + if (emit->pass > PASS_1) { + emit->stack_size += stack_size_delta; + if (emit->stack_size > emit->scope->stack_size) { + emit->scope->stack_size = emit->stack_size; + } + } + emit->last_emit_was_return_value = false; +} + +int emit_label_new(emitter_t *emit) { + return emit->next_label++; +} + +void emit_label_assign(emitter_t *emit, int l) { + emit_pre(emit, 0); + if (emit->pass > PASS_1) { + assert(l < emit->max_num_labels); + if (emit->pass == PASS_2) { + // assign label offset + assert(emit->label_offsets[l] == -1); + emit->label_offsets[l] = emit->code_offset; + } else if (emit->pass == PASS_3) { + // ensure label offset has not changed from PASS_2 to PASS_3 + assert(emit->label_offsets[l] == emit->code_offset); + //printf("l%d: (at %d)\n", l, emit->code_offset); + } + } +} + +void emit_import_name(emitter_t *emit, qstr qstr) { + emit_pre(emit, -1); + emit_write_byte_1_qstr(emit, PYBC_IMPORT_NAME, qstr); +} + +void emit_import_from(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1); + emit_write_byte_1_qstr(emit, PYBC_IMPORT_FROM, qstr); +} + +void emit_import_star(emitter_t *emit) { + emit_pre(emit, -1); + emit_write_byte_1(emit, PYBC_IMPORT_STAR); +} + +void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) { + emit_pre(emit, 1); + switch (tok) { + case PY_TOKEN_KW_FALSE: emit_write_byte_1(emit, PYBC_LOAD_CONST_FALSE); break; + case PY_TOKEN_KW_NONE: emit_write_byte_1(emit, PYBC_LOAD_CONST_NONE); break; + case PY_TOKEN_KW_TRUE: emit_write_byte_1(emit, PYBC_LOAD_CONST_TRUE); break; + default: assert(0); + } +} + +void emit_load_const_small_int(emitter_t *emit, int arg) { + emit_pre(emit, 1); + emit_write_byte_1_int(emit, PYBC_LOAD_CONST_SMALL_INT, arg); +} + +void emit_load_const_int(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1); + emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_INT, qstr); +} + +void emit_load_const_dec(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1); + emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_DEC, qstr); +} + +void emit_load_const_id(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1); + emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_ID, qstr); +} + +void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) { + emit_pre(emit, 1); + if (bytes) { + emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_BYTES, qstr); + } else { + emit_write_byte_1_qstr(emit, PYBC_LOAD_CONST_STRING, qstr); + } +} + +void emit_load_const_verbatim_start(emitter_t *emit) { + emit_pre(emit, 1); + assert(0); +} + +void emit_load_const_verbatim_int(emitter_t *emit, int val) { + assert(0); +} + +void emit_load_const_verbatim_str(emitter_t *emit, const char *str) { + assert(0); +} + +void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) { + assert(0); +} + +void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) { + assert(0); +} + +void emit_load_const_verbatim_end(emitter_t *emit) { + assert(0); +} + +void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) { + assert(local_num >= 0); + emit_pre(emit, 1); + switch (local_num) { + case 0: emit_write_byte_1(emit, PYBC_LOAD_FAST_0); break; + case 1: emit_write_byte_1(emit, PYBC_LOAD_FAST_1); break; + case 2: emit_write_byte_1(emit, PYBC_LOAD_FAST_2); break; + default: emit_write_byte_1_uint(emit, PYBC_LOAD_FAST_N, local_num); break; + } +} + +void emit_load_name(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1); + emit_write_byte_1_qstr(emit, PYBC_LOAD_NAME, qstr); +} + +void emit_load_global(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1); + emit_write_byte_1_qstr(emit, PYBC_LOAD_GLOBAL, qstr); +} + +void emit_load_deref(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1); + assert(0); +} + +void emit_load_closure(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1); + assert(0); +} + +void emit_load_attr(emitter_t *emit, qstr qstr) { + emit_pre(emit, 0); + emit_write_byte_1_qstr(emit, PYBC_LOAD_ATTR, qstr); +} + +void emit_load_method(emitter_t *emit, qstr qstr) { + emit_pre(emit, 0); + emit_write_byte_1_qstr(emit, PYBC_LOAD_METHOD, qstr); +} + +void emit_load_build_class(emitter_t *emit) { + emit_pre(emit, 1); + emit_write_byte_1(emit, PYBC_LOAD_BUILD_CLASS); +} + +void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) { + assert(local_num >= 0); + emit_pre(emit, -1); + switch (local_num) { + case 0: emit_write_byte_1(emit, PYBC_STORE_FAST_0); break; + case 1: emit_write_byte_1(emit, PYBC_STORE_FAST_1); break; + case 2: emit_write_byte_1(emit, PYBC_STORE_FAST_2); break; + default: emit_write_byte_1_uint(emit, PYBC_STORE_FAST_N, local_num); break; + } +} + +void emit_store_name(emitter_t *emit, qstr qstr) { + emit_pre(emit, -1); + emit_write_byte_1_qstr(emit, PYBC_STORE_NAME, qstr); +} + +void emit_store_global(emitter_t *emit, qstr qstr) { + emit_pre(emit, -1); + emit_write_byte_1_qstr(emit, PYBC_STORE_GLOBAL, qstr); +} + +void emit_store_deref(emitter_t *emit, qstr qstr) { + emit_pre(emit, -1); + assert(0); +} + +void emit_store_attr(emitter_t *emit, qstr qstr) { + emit_pre(emit, -2); + emit_write_byte_1_qstr(emit, PYBC_STORE_ATTR, qstr); +} + +void emit_store_locals(emitter_t *emit) { + emit_pre(emit, -1); + emit_write_byte_1(emit, PYBC_STORE_LOCALS); +} + +void emit_store_subscr(emitter_t *emit) { + emit_pre(emit, -3); + emit_write_byte_1(emit, PYBC_STORE_SUBSCR); +} + +void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) { + assert(local_num >= 0); + emit_pre(emit, 0); + emit_write_byte_1_uint(emit, PYBC_DELETE_FAST_N, local_num); +} + +void emit_delete_name(emitter_t *emit, qstr qstr) { + emit_pre(emit, 0); + emit_write_byte_1_qstr(emit, PYBC_DELETE_NAME, qstr); +} + +void emit_delete_global(emitter_t *emit, qstr qstr) { + emit_pre(emit, 0); + emit_write_byte_1_qstr(emit, PYBC_DELETE_GLOBAL, qstr); +} + +void emit_delete_deref(emitter_t *emit, qstr qstr) { + emit_pre(emit, 0); + emit_write_byte_1_qstr(emit, PYBC_DELETE_DEREF, qstr); +} + +void emit_delete_attr(emitter_t *emit, qstr qstr) { + emit_pre(emit, -1); + emit_write_byte_1_qstr(emit, PYBC_DELETE_ATTR, qstr); +} + +void emit_delete_subscr(emitter_t *emit) { + emit_pre(emit, -2); + emit_write_byte_1(emit, PYBC_DELETE_SUBSCR); +} + +void emit_dup_top(emitter_t *emit) { + emit_pre(emit, 1); + emit_write_byte_1(emit, PYBC_DUP_TOP); +} + +void emit_dup_top_two(emitter_t *emit) { + emit_pre(emit, 2); + emit_write_byte_1(emit, PYBC_DUP_TOP_TWO); +} + +void emit_pop_top(emitter_t *emit) { + emit_pre(emit, -1); + emit_write_byte_1(emit, PYBC_POP_TOP); +} + +void emit_rot_two(emitter_t *emit) { + emit_pre(emit, 0); + emit_write_byte_1(emit, PYBC_ROT_TWO); +} + +void emit_rot_three(emitter_t *emit) { + emit_pre(emit, 0); + emit_write_byte_1(emit, PYBC_ROT_THREE); +} + +void emit_jump(emitter_t *emit, int label) { + emit_pre(emit, 0); + emit_write_byte_1_label(emit, PYBC_JUMP, label); +} + +void emit_pop_jump_if_true(emitter_t *emit, int label) { + emit_pre(emit, -1); + emit_write_byte_1_label(emit, PYBC_POP_JUMP_IF_TRUE, label); +} + +void emit_pop_jump_if_false(emitter_t *emit, int label) { + emit_pre(emit, -1); + emit_write_byte_1_label(emit, PYBC_POP_JUMP_IF_FALSE, label); +} + +void emit_jump_if_true_or_pop(emitter_t *emit, int label) { + emit_pre(emit, -1); + emit_write_byte_1_label(emit, PYBC_JUMP_IF_TRUE_OR_POP, label); +} + +void emit_jump_if_false_or_pop(emitter_t *emit, int label) { + emit_pre(emit, -1); + emit_write_byte_1_label(emit, PYBC_JUMP_IF_FALSE_OR_POP, label); +} + +void emit_setup_loop(emitter_t *emit, int label) { + emit_pre(emit, 0); + emit_write_byte_1_label(emit, PYBC_SETUP_LOOP, label); +} + +void emit_break_loop(emitter_t *emit, int label) { + emit_pre(emit, 0); + emit_write_byte_1_label(emit, PYBC_BREAK_LOOP, label); +} + +void emit_continue_loop(emitter_t *emit, int label) { + emit_pre(emit, 0); + emit_write_byte_1_label(emit, PYBC_CONTINUE_LOOP, label); +} + +void emit_setup_with(emitter_t *emit, int label) { + emit_pre(emit, 7); + emit_write_byte_1_label(emit, PYBC_SETUP_WITH, label); +} + +void emit_with_cleanup(emitter_t *emit) { + emit_pre(emit, -7); + emit_write_byte_1(emit, PYBC_WITH_CLEANUP); +} + +void emit_setup_except(emitter_t *emit, int label) { + emit_pre(emit, 6); + emit_write_byte_1_label(emit, PYBC_SETUP_EXCEPT, label); +} + +void emit_setup_finally(emitter_t *emit, int label) { + emit_pre(emit, 6); + emit_write_byte_1_label(emit, PYBC_SETUP_FINALLY, label); +} + +void emit_end_finally(emitter_t *emit) { + emit_pre(emit, -1); + emit_write_byte_1(emit, PYBC_END_FINALLY); +} + +void emit_get_iter(emitter_t *emit) { + emit_pre(emit, 0); + emit_write_byte_1(emit, PYBC_GET_ITER); +} + +void emit_for_iter(emitter_t *emit, int label) { + emit_pre(emit, 1); + emit_write_byte_1_label(emit, PYBC_FOR_ITER, label); +} + +void emit_for_iter_end(emitter_t *emit) { + emit_pre(emit, -1); +} + +void emit_pop_block(emitter_t *emit) { + emit_pre(emit, 0); + emit_write_byte_1(emit, PYBC_POP_BLOCK); +} + +void emit_pop_except(emitter_t *emit) { + emit_pre(emit, 0); + emit_write_byte_1(emit, PYBC_POP_EXCEPT); +} + +void emit_unary_op(emitter_t *emit, rt_unary_op_t op) { + emit_pre(emit, 0); + emit_write_byte_1_byte(emit, PYBC_UNARY_OP, op); +} + +void emit_binary_op(emitter_t *emit, rt_binary_op_t op) { + emit_pre(emit, -1); + emit_write_byte_1_byte(emit, PYBC_BINARY_OP, op); +} + +void emit_compare_op(emitter_t *emit, rt_compare_op_t op) { + emit_pre(emit, -1); + emit_write_byte_1_byte(emit, PYBC_COMPARE_OP, op); +} + +void emit_build_tuple(emitter_t *emit, int n_args) { + assert(n_args >= 0); + emit_pre(emit, 1 - n_args); + emit_write_byte_1_uint(emit, PYBC_BUILD_TUPLE, n_args); +} + +void emit_build_list(emitter_t *emit, int n_args) { + assert(n_args >= 0); + emit_pre(emit, 1 - n_args); + emit_write_byte_1_uint(emit, PYBC_BUILD_LIST, n_args); +} + +void emit_list_append(emitter_t *emit, int list_stack_index) { + assert(list_stack_index >= 0); + emit_pre(emit, -1); + emit_write_byte_1_uint(emit, PYBC_LIST_APPEND, list_stack_index); +} + +void emit_build_map(emitter_t *emit, int n_args) { + assert(n_args >= 0); + emit_pre(emit, 1); + emit_write_byte_1_uint(emit, PYBC_BUILD_MAP, n_args); +} + +void emit_store_map(emitter_t *emit) { + emit_pre(emit, -2); + emit_write_byte_1(emit, PYBC_STORE_MAP); +} + +void emit_map_add(emitter_t *emit, int map_stack_index) { + assert(map_stack_index >= 0); + emit_pre(emit, -2); + emit_write_byte_1_uint(emit, PYBC_MAP_ADD, map_stack_index); +} + +void emit_build_set(emitter_t *emit, int n_args) { + assert(n_args >= 0); + emit_pre(emit, 1 - n_args); + emit_write_byte_1_uint(emit, PYBC_BUILD_SET, n_args); +} + +void emit_set_add(emitter_t *emit, int set_stack_index) { + assert(set_stack_index >= 0); + emit_pre(emit, -1); + emit_write_byte_1_uint(emit, PYBC_SET_ADD, set_stack_index); +} + +void emit_build_slice(emitter_t *emit, int n_args) { + assert(n_args >= 0); + emit_pre(emit, 1 - n_args); + emit_write_byte_1_uint(emit, PYBC_BUILD_SLICE, n_args); +} + +void emit_unpack_sequence(emitter_t *emit, int n_args) { + assert(n_args >= 0); + emit_pre(emit, -1 + n_args); + emit_write_byte_1_uint(emit, PYBC_UNPACK_SEQUENCE, n_args); +} + +void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) { + assert(n_left >=0 && n_right >= 0); + emit_pre(emit, -1 + n_left + n_right + 1); + emit_write_byte_1_uint(emit, PYBC_UNPACK_EX, n_left | (n_right << 8)); +} + +void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { + assert(n_default_params == 0 && n_dict_params == 0); + emit_pre(emit, 1); + emit_write_byte_1_uint(emit, PYBC_MAKE_FUNCTION, scope->unique_code_id); +} + +void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { + assert(0); + emit_pre(emit, -2 - n_default_params - 2 * n_dict_params); + if (emit->pass == PASS_3) { + printf("MAKE_CLOSURE %d\n", (n_dict_params << 8) | n_default_params); + } +} + +void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { + int s = 0; + if (have_star_arg) { + s += 1; + } + if (have_dbl_star_arg) { + s += 1; + } + emit_pre(emit, -n_positional - 2 * n_keyword - s); + int op; + if (have_star_arg) { + if (have_dbl_star_arg) { + op = PYBC_CALL_FUNCTION_VAR_KW; + } else { + op = PYBC_CALL_FUNCTION_VAR; + } + } else { + if (have_dbl_star_arg) { + op = PYBC_CALL_FUNCTION_KW; + } else { + op = PYBC_CALL_FUNCTION; + } + } + emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints +} + +void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { + int s = 0; + if (have_star_arg) { + s += 1; + } + if (have_dbl_star_arg) { + s += 1; + } + emit_pre(emit, -n_positional - 2 * n_keyword - s); + int op; + if (have_star_arg) { + if (have_dbl_star_arg) { + op = PYBC_CALL_METHOD_VAR_KW; + } else { + op = PYBC_CALL_METHOD_VAR; + } + } else { + if (have_dbl_star_arg) { + op = PYBC_CALL_METHOD_KW; + } else { + op = PYBC_CALL_METHOD; + } + } + emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints +} + +void emit_return_value(emitter_t *emit) { + emit_pre(emit, -1); + emit->last_emit_was_return_value = true; + emit_write_byte_1(emit, PYBC_RETURN_VALUE); +} + +void emit_raise_varargs(emitter_t *emit, int n_args) { + assert(n_args >= 0); + emit_pre(emit, -n_args); + emit_write_byte_1_uint(emit, PYBC_RAISE_VARARGS, n_args); +} + +void emit_yield_value(emitter_t *emit) { + emit_pre(emit, 0); + if (emit->pass == PASS_2) { + emit->scope->flags |= SCOPE_FLAG_GENERATOR; + } + emit_write_byte_1(emit, PYBC_YIELD_VALUE); +} + +void emit_yield_from(emitter_t *emit) { + emit_pre(emit, -1); + if (emit->pass == PASS_2) { + emit->scope->flags |= SCOPE_FLAG_GENERATOR; + } + emit_write_byte_1(emit, PYBC_YIELD_FROM); +} + +#endif // EMIT_DO_BC diff --git a/py/emitcommon.c b/py/emitcommon.c new file mode 100644 index 0000000000..1fd8697c3a --- /dev/null +++ b/py/emitcommon.c @@ -0,0 +1,171 @@ +#include +#include +#include +#include +#include + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "scope.h" +#include "runtime.h" +#include "emit.h" + +#define EMIT(fun, arg...) (emit_##fun(emit, ##arg)) + +void emit_common_declare_global(pass_kind_t pass, scope_t *scope, qstr qstr) { + if (pass == PASS_1) { + if (scope->kind == SCOPE_MODULE) { + printf("SyntaxError?: can't declare global in outer code\n"); + return; + } + bool added; + id_info_t *id_info = scope_find_or_add_id(scope, qstr, &added); + if (!added) { + printf("SyntaxError?: identifier already declared something\n"); + return; + } + id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT; + + // if the id exists in the global scope, set its kind to EXPLICIT_GLOBAL + id_info = scope_find_global(scope, qstr); + if (id_info != NULL) { + id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT; + } + } +} + +void emit_common_declare_nonlocal(pass_kind_t pass, scope_t *scope, qstr qstr) { + if (pass == PASS_1) { + if (scope->kind == SCOPE_MODULE) { + printf("SyntaxError?: can't declare nonlocal in outer code\n"); + return; + } + bool added; + id_info_t *id_info = scope_find_or_add_id(scope, qstr, &added); + if (!added) { + printf("SyntaxError?: identifier already declared something\n"); + return; + } + id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr); + if (id_info2 == NULL || !(id_info2->kind == ID_INFO_KIND_LOCAL || id_info2->kind == ID_INFO_KIND_CELL || id_info2->kind == ID_INFO_KIND_FREE)) { + printf("SyntaxError: no binding for nonlocal '%s' found\n", qstr_str(qstr)); + return; + } + id_info->kind = ID_INFO_KIND_FREE; + scope_close_over_in_parents(scope, qstr); + } +} + +void emit_common_load_id(pass_kind_t pass, scope_t *scope, qstr qstr___class__, emitter_t *emit, qstr qstr) { + id_info_t *id_info = NULL; + if (pass == PASS_1) { + // name adding/lookup + bool added; + id_info = scope_find_or_add_id(scope, qstr, &added); + if (added) { + if (strcmp(qstr_str(qstr), "AssertionError") == 0) { + id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT; + // TODO how much of a hack is this? + } else if (strcmp(qstr_str(qstr), "super") == 0 && scope->kind == SCOPE_FUNCTION) { + // special case, super is a global, and also counts as use of __class__ + id_info->kind = ID_INFO_KIND_GLOBAL_EXPLICIT; + id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr___class__); + if (id_info2 != NULL) { + id_info2 = scope_find_or_add_id(scope, qstr___class__, &added); + if (added) { + id_info2->kind = ID_INFO_KIND_FREE; + scope_close_over_in_parents(scope, qstr___class__); + } + } + } else { + id_info_t *id_info2 = scope_find_local_in_parent(scope, qstr); + if (id_info2 != NULL && (id_info2->kind == ID_INFO_KIND_LOCAL || id_info2->kind == ID_INFO_KIND_CELL || id_info2->kind == ID_INFO_KIND_FREE)) { + id_info->kind = ID_INFO_KIND_FREE; + scope_close_over_in_parents(scope, qstr); + } else { + id_info->kind = ID_INFO_KIND_GLOBAL_IMPLICIT; + } + } + } + } else { + id_info = scope_find(scope, qstr); + } + + assert(id_info != NULL); // TODO can this ever fail? + + // call the emit backend with the correct code + if (id_info == NULL || id_info->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { + EMIT(load_name, qstr); + } else if (id_info->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) { + EMIT(load_global, qstr); + } else if (id_info->kind == ID_INFO_KIND_LOCAL) { + EMIT(load_fast, qstr, id_info->local_num); + } else if (id_info->kind == ID_INFO_KIND_CELL || id_info->kind == ID_INFO_KIND_FREE) { + EMIT(load_deref, qstr); + } else { + assert(0); + } +} + +static id_info_t *get_id_for_modification(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) { + id_info_t *id_info = NULL; + if (pass == PASS_1) { + // name adding/lookup + bool added; + id_info = scope_find_or_add_id(scope, qstr, &added); + if (added) { + if (scope->kind == SCOPE_MODULE || scope->kind == SCOPE_CLASS) { + id_info->kind = ID_INFO_KIND_GLOBAL_IMPLICIT; + } else { + id_info->kind = ID_INFO_KIND_LOCAL; + } + } else if (scope->kind >= SCOPE_FUNCTION && scope->kind <= SCOPE_GEN_EXPR && id_info->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { + // rebind as a local variable + id_info->kind = ID_INFO_KIND_LOCAL; + } + } else { + id_info = scope_find(scope, qstr); + } + + assert(id_info != NULL); // TODO can this ever fail? + + return id_info; +} + +void emit_common_store_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) { + // create/get the id info + id_info_t *id = get_id_for_modification(pass, scope, emit, qstr); + + // call the emit backend with the correct code + if (id == NULL || id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { + EMIT(store_name, qstr); + } else if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) { + EMIT(store_global, qstr); + } else if (id->kind == ID_INFO_KIND_LOCAL) { + EMIT(store_fast, qstr, id->local_num); + } else if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) { + EMIT(store_deref, qstr); + } else { + assert(0); + } +} + +void emit_common_delete_id(pass_kind_t pass, scope_t *scope, emitter_t *emit, qstr qstr) { + // create/get the id info + id_info_t *id = get_id_for_modification(pass, scope, emit, qstr); + + // call the emit backend with the correct code + if (id == NULL || id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { + EMIT(delete_name, qstr); + } else if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) { + EMIT(delete_global, qstr); + } else if (id->kind == ID_INFO_KIND_LOCAL) { + EMIT(delete_fast, qstr, id->local_num); + } else if (id->kind == ID_INFO_KIND_CELL || id->kind == ID_INFO_KIND_FREE) { + EMIT(delete_deref, qstr); + } else { + assert(0); + } +} diff --git a/py/emitcpy.c b/py/emitcpy.c new file mode 100644 index 0000000000..637abd772a --- /dev/null +++ b/py/emitcpy.c @@ -0,0 +1,834 @@ +#include +#include +#include +#include +#include +#include + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "compile.h" +#include "scope.h" +#include "runtime.h" +#include "emit.h" + +#ifdef EMIT_DO_CPY + +struct _emitter_t { + int pass; + int next_label; + int byte_code_offset; + int stack_size; + bool last_emit_was_return_value; + + scope_t *scope; + + int max_num_labels; + int *label_offsets; +}; + +emitter_t *emit_new() { + emitter_t *emit = m_new(emitter_t, 1); + emit->max_num_labels = 0; + emit->label_offsets = NULL; + return emit; +} + +void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) { + emit->pass = pass; + emit->next_label = 1; + emit->byte_code_offset = 0; + emit->stack_size = 0; + emit->last_emit_was_return_value = false; + emit->scope = scope; + if (pass > PASS_1) { + if (emit->label_offsets == NULL) { + emit->label_offsets = m_new(int, emit->max_num_labels); + } + if (pass == PASS_2) { + memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(int)); + } + } +} + +void emit_end_pass(emitter_t *emit) { + // check stack is back to zero size + if (emit->stack_size != 0) { + printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size); + } + + // calculate number of labels need + if (emit->pass == PASS_1) { + if (emit->next_label > emit->max_num_labels) { + emit->max_num_labels = emit->next_label; + } + } +} + +bool emit_last_emit_was_return_value(emitter_t *emit) { + return emit->last_emit_was_return_value; +} + +int emit_get_stack_size(emitter_t *emit) { + return emit->stack_size; +} + +void emit_set_stack_size(emitter_t *emit, int size) { + emit->stack_size = size; +} + +static void emit_pre(emitter_t *emit, int stack_size_delta, int byte_code_size) { + emit->stack_size += stack_size_delta; + if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) { + emit->scope->stack_size = emit->stack_size; + } + emit->last_emit_was_return_value = false; + if (emit->pass == PASS_3 && byte_code_size > 0) { + if (emit->byte_code_offset >= 1000) { + printf("%d ", emit->byte_code_offset); + } else { + printf("% 4d ", emit->byte_code_offset); + } + } + emit->byte_code_offset += byte_code_size; +} + +int emit_label_new(emitter_t *emit) { + return emit->next_label++; +} + +void emit_label_assign(emitter_t *emit, int l) { + emit_pre(emit, 0, 0); + if (emit->pass > PASS_1) { + assert(l < emit->max_num_labels); + if (emit->pass == PASS_2) { + // assign label offset + assert(emit->label_offsets[l] == -1); + emit->label_offsets[l] = emit->byte_code_offset; + } else if (emit->pass == PASS_3) { + // ensure label offset has not changed from PASS_2 to PASS_3 + assert(emit->label_offsets[l] == emit->byte_code_offset); + //printf("l%d: (at %d)\n", l, emit->byte_code_offset); + } + } +} + +void emit_import_name(emitter_t *emit, qstr qstr) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("IMPORT_NAME %s\n", qstr_str(qstr)); + } +} + +void emit_import_from(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("IMPORT_FROM %s\n", qstr_str(qstr)); + } +} + +void emit_import_star(emitter_t *emit) { + emit_pre(emit, -1, 1); + if (emit->pass == PASS_3) { + printf("IMPORT_STAR\n"); + } +} + +void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_CONST "); + switch (tok) { + case PY_TOKEN_KW_FALSE: printf("False"); break; + case PY_TOKEN_KW_NONE: printf("None"); break; + case PY_TOKEN_KW_TRUE: printf("True"); break; + default: printf("?=%d\n", tok); return; assert(0); + } + printf("\n"); + } +} + +void emit_load_const_small_int(emitter_t *emit, int arg) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_CONST %d\n", arg); + } +} + +void emit_load_const_int(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_CONST %s\n", qstr_str(qstr)); + } +} + +void emit_load_const_dec(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_CONST %s\n", qstr_str(qstr)); + } +} + +void emit_load_const_id(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_CONST '%s'\n", qstr_str(qstr)); + } +} + +void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_CONST "); + emit_load_const_verbatim_quoted_str(emit, qstr, bytes); + printf("\n"); + } +} + +void emit_load_const_verbatim_start(emitter_t *emit) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_CONST "); + } +} + +void emit_load_const_verbatim_int(emitter_t *emit, int val) { + if (emit->pass == PASS_3) { + printf("%d", val); + } +} + +void emit_load_const_verbatim_str(emitter_t *emit, const char *str) { + if (emit->pass == PASS_3) { + printf("%s", str); + } +} + +void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) { + if (emit->pass == PASS_3) { + printf("%.*s", len, str); + } +} + +void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) { + // TODO strings should be escaped before we get here + if (emit->pass == PASS_3) { + const char *str = qstr_str(qstr); + int len = strlen(str); + bool has_single_quote = false; + bool has_double_quote = false; + for (int i = 0; i < len; i++) { + if (str[i] == '\'') { + has_single_quote = true; + } else if (str[i] == '"') { + has_double_quote = true; + } + } + if (bytes) { + printf("b"); + } + bool quote_single = false; + if (has_single_quote && !has_double_quote) { + printf("\""); + } else { + quote_single = true; + printf("'"); + } + for (int i = 0; i < len; i++) { + if (str[i] == '\n') { + printf("\\n"); + } else if (str[i] == '\\' && str[i + 1] == '\'') { + i += 1; + if (quote_single) { + printf("\\'"); + } else { + printf("'"); + } + } else if (str[i] == '\'' && quote_single) { + printf("\\'"); + } else { + printf("%c", str[i]); + } + } + if (has_single_quote && !has_double_quote) { + printf("\""); + } else { + printf("'"); + } + } +} + +void emit_load_const_verbatim_end(emitter_t *emit) { + if (emit->pass == PASS_3) { + printf("\n"); + } +} + +void emit_load_name(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_NAME %s\n", qstr_str(qstr)); + } +} + +void emit_load_global(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_GLOBAL %s\n", qstr_str(qstr)); + } +} + +void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_FAST %s\n", qstr_str(qstr)); + } +} + +void emit_load_deref(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_DEREF %s\n", qstr_str(qstr)); + } +} + +void emit_load_closure(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_CLOSURE %s\n", qstr_str(qstr)); + } +} + +void emit_load_attr(emitter_t *emit, qstr qstr) { + emit_pre(emit, 0, 3); + if (emit->pass == PASS_3) { + printf("LOAD_ATTR %s\n", qstr_str(qstr)); + } +} + +void emit_load_method(emitter_t *emit, qstr qstr) { + emit_load_attr(emit, qstr); +} + +void emit_load_build_class(emitter_t *emit) { + emit_pre(emit, 1, 1); + if (emit->pass == PASS_3) { + printf("LOAD_BUILD_CLASS\n"); + } +} + +void emit_store_name(emitter_t *emit, qstr qstr) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("STORE_NAME %s\n", qstr_str(qstr)); + } +} + +void emit_store_global(emitter_t *emit, qstr qstr) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("STORE_GLOBAL %s\n", qstr_str(qstr)); + } +} + +void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("STORE_FAST %s\n", qstr_str(qstr)); + } +} + +void emit_store_deref(emitter_t *emit, qstr qstr) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("STORE_DEREF %s\n", qstr_str(qstr)); + } +} + +void emit_store_attr(emitter_t *emit, qstr qstr) { + emit_pre(emit, -2, 3); + if (emit->pass == PASS_3) { + printf("STORE_ATTR %s\n", qstr_str(qstr)); + } +} + +void emit_store_locals(emitter_t *emit) { + emit_pre(emit, -1, 1); + if (emit->pass == PASS_3) { + printf("STORE_LOCALS\n"); + } +} + +void emit_store_subscr(emitter_t *emit) { + emit_pre(emit, -3, 1); + if (emit->pass == PASS_3) { + printf("STORE_SUBSCR\n"); + } +} + +void emit_delete_name(emitter_t *emit, qstr qstr) { + emit_pre(emit, 0, 3); + if (emit->pass == PASS_3) { + printf("DELETE_NAME %s\n", qstr_str(qstr)); + } +} + +void emit_delete_global(emitter_t *emit, qstr qstr) { + emit_pre(emit, 0, 3); + if (emit->pass == PASS_3) { + printf("DELETE_GLOBAL %s\n", qstr_str(qstr)); + } +} + +void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) { + emit_pre(emit, 0, 3); + if (emit->pass == PASS_3) { + printf("DELETE_FAST %s\n", qstr_str(qstr)); + } +} + +void emit_delete_deref(emitter_t *emit, qstr qstr) { + emit_pre(emit, 0, 3); + if (emit->pass == PASS_3) { + printf("DELETE_DEREF %s\n", qstr_str(qstr)); + } +} + +void emit_delete_attr(emitter_t *emit, qstr qstr) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("DELETE_ATTR %s\n", qstr_str(qstr)); + } +} + +void emit_delete_subscr(emitter_t *emit) { + emit_pre(emit, -2, 1); + if (emit->pass == PASS_3) { + printf("DELETE_SUBSCR\n"); + } +} + +void emit_dup_top(emitter_t *emit) { + emit_pre(emit, 1, 1); + if (emit->pass == PASS_3) { + printf("DUP_TOP\n"); + } +} + +void emit_dup_top_two(emitter_t *emit) { + emit_pre(emit, 2, 1); + if (emit->pass == PASS_3) { + printf("DUP_TOP_TWO\n"); + } +} + +void emit_pop_top(emitter_t *emit) { + emit_pre(emit, -1, 1); + if (emit->pass == PASS_3) { + printf("POP_TOP\n"); + } +} + +void emit_rot_two(emitter_t *emit) { + emit_pre(emit, 0, 1); + if (emit->pass == PASS_3) { + printf("ROT_TWO\n"); + } +} + +void emit_rot_three(emitter_t *emit) { + emit_pre(emit, 0, 1); + if (emit->pass == PASS_3) { + printf("ROT_THREE\n"); + } +} + +void emit_jump(emitter_t *emit, int label) { + emit_pre(emit, 0, 3); + if (emit->pass == PASS_3) { + int dest = emit->label_offsets[label]; + if (dest < emit->byte_code_offset) { + printf("JUMP_ABSOLUTE %d\n", emit->label_offsets[label]); + } else { + printf("JUMP_FORWARD %d\n", emit->label_offsets[label]); + } + } +} + +void emit_pop_jump_if_true(emitter_t *emit, int label) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("POP_JUMP_IF_TRUE %d\n", emit->label_offsets[label]); + } +} + +void emit_pop_jump_if_false(emitter_t *emit, int label) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("POP_JUMP_IF_FALSE %d\n", emit->label_offsets[label]); + } +} + +void emit_jump_if_true_or_pop(emitter_t *emit, int label) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("JUMP_IF_TRUE_OR_POP %d\n", emit->label_offsets[label]); + } +} + +void emit_jump_if_false_or_pop(emitter_t *emit, int label) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("JUMP_IF_FALSE_OR_POP %d\n", emit->label_offsets[label]); + } +} + +void emit_setup_loop(emitter_t *emit, int label) { + emit_pre(emit, 0, 3); + if (emit->pass == PASS_3) { + printf("SETUP_LOOP %d\n", emit->label_offsets[label]); + } +} + +void emit_break_loop(emitter_t *emit, int label) { + emit_pre(emit, 0, 1); + if (emit->pass == PASS_3) { + printf("BREAK_LOOP\n"); // CPython doesn't have label + //printf("BREAK_LOOP %d\n", emit->label_offsets[label]); + } +} + +void emit_continue_loop(emitter_t *emit, int label) { + emit_pre(emit, 0, 3); + if (emit->pass == PASS_3) { + printf("CONTINUE_LOOP %d\n", emit->label_offsets[label]); + } +} + +void emit_setup_with(emitter_t *emit, int label) { + emit_pre(emit, 7, 3); + if (emit->pass == PASS_3) { + printf("SETUP_WITH %d\n", emit->label_offsets[label]); + } +} + +void emit_with_cleanup(emitter_t *emit) { + emit_pre(emit, -7, 1); + if (emit->pass == PASS_3) { + printf("WITH_CLEANUP\n"); + } +} + +void emit_setup_except(emitter_t *emit, int label) { + emit_pre(emit, 6, 3); + if (emit->pass == PASS_3) { + printf("SETUP_EXCEPT %d\n", emit->label_offsets[label]); + } +} + +void emit_setup_finally(emitter_t *emit, int label) { + emit_pre(emit, 6, 3); + if (emit->pass == PASS_3) { + printf("SETUP_FINALLY %d\n", emit->label_offsets[label]); + } +} + +void emit_end_finally(emitter_t *emit) { + emit_pre(emit, -1, 1); + if (emit->pass == PASS_3) { + printf("END_FINALLY\n"); + } +} + +void emit_get_iter(emitter_t *emit) { + emit_pre(emit, 0, 1); + if (emit->pass == PASS_3) { + printf("GET_ITER\n"); + } +} + +void emit_for_iter(emitter_t *emit, int label) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("FOR_ITER %d\n", emit->label_offsets[label]); + } +} + +void emit_for_iter_end(emitter_t *emit) { + emit_pre(emit, -1, 0); +} + +void emit_pop_block(emitter_t *emit) { + emit_pre(emit, 0, 1); + if (emit->pass == PASS_3) { + printf("POP_BLOCK\n"); + } +} + +void emit_pop_except(emitter_t *emit) { + emit_pre(emit, 0, 1); + if (emit->pass == PASS_3) { + printf("POP_EXCEPT\n"); + } +} + +void emit_unary_op(emitter_t *emit, rt_unary_op_t op) { + emit_pre(emit, 0, 1); + if (emit->pass == PASS_3) { + switch (op) { + case RT_UNARY_OP_NOT: printf("UNARY_NOT\n"); break; + case RT_UNARY_OP_POSITIVE: printf("UNARY_POSITIVE\n"); break; + case RT_UNARY_OP_NEGATIVE: printf("UNARY_NEGATIVE\n"); break; + case RT_UNARY_OP_INVERT: printf("UNARY_INVERT\n"); break; + default: assert(0); + } + } +} + +void emit_binary_op(emitter_t *emit, rt_binary_op_t op) { + emit_pre(emit, -1, 1); + if (emit->pass == PASS_3) { + switch (op) { + case RT_BINARY_OP_SUBSCR: printf("BINARY_SUBSCR\n"); break; + case RT_BINARY_OP_OR: printf("BINARY_OR\n"); break; + case RT_BINARY_OP_XOR: printf("BINARY_XOR\n"); break; + case RT_BINARY_OP_AND: printf("BINARY_AND\n"); break; + case RT_BINARY_OP_LSHIFT: printf("BINARY_LSHIFT\n"); break; + case RT_BINARY_OP_RSHIFT: printf("BINARY_RSHIFT\n"); break; + case RT_BINARY_OP_ADD: printf("BINARY_ADD\n"); break; + case RT_BINARY_OP_SUBTRACT: printf("BINARY_SUBTRACT\n"); break; + case RT_BINARY_OP_MULTIPLY: printf("BINARY_MULTIPLY\n"); break; + case RT_BINARY_OP_FLOOR_DIVIDE: printf("BINARY_FLOOR_DIVIDE\n"); break; + case RT_BINARY_OP_TRUE_DIVIDE: printf("BINARY_TRUE_DIVIDE\n"); break; + case RT_BINARY_OP_MODULO: printf("BINARY_MODULO\n"); break; + case RT_BINARY_OP_POWER: printf("BINARY_POWER\n"); break; + case RT_BINARY_OP_INPLACE_OR: printf("INPLACE_OR\n"); break; + case RT_BINARY_OP_INPLACE_XOR: printf("INPLACE_XOR\n"); break; + case RT_BINARY_OP_INPLACE_AND: printf("INPLACE_AND\n"); break; + case RT_BINARY_OP_INPLACE_LSHIFT: printf("INPLACE_LSHIFT\n"); break; + case RT_BINARY_OP_INPLACE_RSHIFT: printf("INPLACE_RSHIFT\n"); break; + case RT_BINARY_OP_INPLACE_ADD: printf("INPLACE_ADD\n"); break; + case RT_BINARY_OP_INPLACE_SUBTRACT: printf("INPLACE_SUBTRACT\n"); break; + case RT_BINARY_OP_INPLACE_MULTIPLY: printf("INPLACE_MULTIPLY\n"); break; + case RT_BINARY_OP_INPLACE_FLOOR_DIVIDE: printf("INPLACE_FLOOR_DIVIDE\n"); break; + case RT_BINARY_OP_INPLACE_TRUE_DIVIDE: printf("INPLACE_TRUE_DIVIDE\n"); break; + case RT_BINARY_OP_INPLACE_MODULO: printf("INPLACE_MODULO\n"); break; + case RT_BINARY_OP_INPLACE_POWER: printf("INPLACE_POWER\n"); break; + default: assert(0); + } + } +} + +void emit_compare_op(emitter_t *emit, rt_compare_op_t op) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + switch (op) { + case RT_COMPARE_OP_LESS: printf("COMPARE_OP <\n"); break; + case RT_COMPARE_OP_MORE: printf("COMPARE_OP >\n"); break; + case RT_COMPARE_OP_EQUAL: printf("COMPARE_OP ==\n"); break; + case RT_COMPARE_OP_LESS_EQUAL: printf("COMPARE_OP <=\n"); break; + case RT_COMPARE_OP_MORE_EQUAL: printf("COMPARE_OP >=\n"); break; + case RT_COMPARE_OP_NOT_EQUAL: printf("COMPARE_OP !=\n"); break; + case RT_COMPARE_OP_IN: printf("COMPARE_OP in\n"); break; + case RT_COMPARE_OP_NOT_IN: printf("COMPARE_OP not in\n"); break; + case RT_COMPARE_OP_IS: printf("COMPARE_OP is\n"); break; + case RT_COMPARE_OP_IS_NOT: printf("COMPARE_OP is not\n"); break; + case RT_COMPARE_OP_EXCEPTION_MATCH: printf("COMPARE_OP exception match\n"); break; + default: assert(0); + } + } +} + +void emit_build_tuple(emitter_t *emit, int n_args) { + emit_pre(emit, 1 - n_args, 3); + if (emit->pass == PASS_3) { + printf("BUILD_TUPLE %d\n", n_args); + } +} + +void emit_build_list(emitter_t *emit, int n_args) { + emit_pre(emit, 1 - n_args, 3); + if (emit->pass == PASS_3) { + printf("BUILD_LIST %d\n", n_args); + } +} + +void emit_list_append(emitter_t *emit, int list_index) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("LIST_APPEND %d\n", list_index); + } +} + +void emit_build_map(emitter_t *emit, int n_args) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("BUILD_MAP %d\n", n_args); + } +} + +void emit_store_map(emitter_t *emit) { + emit_pre(emit, -2, 1); + if (emit->pass == PASS_3) { + printf("STORE_MAP\n"); + } +} + +void emit_map_add(emitter_t *emit, int map_index) { + emit_pre(emit, -2, 3); + if (emit->pass == PASS_3) { + printf("MAP_ADD %d\n", map_index); + } +} + +void emit_build_set(emitter_t *emit, int n_args) { + emit_pre(emit, 1 - n_args, 3); + if (emit->pass == PASS_3) { + printf("BUILD_SET %d\n", n_args); + } +} + +void emit_set_add(emitter_t *emit, int set_index) { + emit_pre(emit, -1, 3); + if (emit->pass == PASS_3) { + printf("SET_ADD %d\n", set_index); + } +} + +void emit_build_slice(emitter_t *emit, int n_args) { + emit_pre(emit, 1 - n_args, 3); + if (emit->pass == PASS_3) { + printf("BUILD_SLICE %d\n", n_args); + } +} + +void emit_unpack_sequence(emitter_t *emit, int n_args) { + emit_pre(emit, -1 + n_args, 3); + if (emit->pass == PASS_3) { + printf("UNPACK_SEQUENCE %d\n", n_args); + } +} + +void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) { + emit_pre(emit, -1 + n_left + n_right + 1, 3); + if (emit->pass == PASS_3) { + printf("UNPACK_EX %d\n", n_left | (n_right << 8)); + } +} + +void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { + int s = 0; + if (have_star_arg) { + s += 1; + } + if (have_dbl_star_arg) { + s += 1; + } + emit_pre(emit, -n_positional - 2 * n_keyword - s, 3); + if (emit->pass == PASS_3) { + if (have_star_arg) { + if (have_dbl_star_arg) { + printf("CALL_FUNCTION_VAR_KW"); + } else { + printf("CALL_FUNCTION_VAR"); + } + } else { + if (have_dbl_star_arg) { + printf("CALL_FUNCTION_KW"); + } else { + printf("CALL_FUNCTION"); + } + } + printf(" %d, %d\n", n_positional, n_keyword); + } +} + +void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { + emit_call_function(emit, n_positional, n_keyword, have_star_arg, have_dbl_star_arg); +} + +void emit_return_value(emitter_t *emit) { + emit_pre(emit, -1, 1); + emit->last_emit_was_return_value = true; + if (emit->pass == PASS_3) { + printf("RETURN_VALUE\n"); + } +} + +void emit_raise_varargs(emitter_t *emit, int n_args) { + emit_pre(emit, -n_args, 3); + if (emit->pass == PASS_3) { + printf("RAISE_VARARGS %d\n", n_args); + } +} + +void load_const_code_and_name(emitter_t *emit, qstr qstr) { + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_CONST code %s\n", qstr_str(qstr)); + } + // load qualified name + emit_pre(emit, 1, 3); + if (emit->pass == PASS_3) { + printf("LOAD_CONST '"); + // code just to work out the qualname (or whatever it is) + { + int depth = 0; + for (scope_t *s = emit->scope; s->parent != NULL; s = s->parent) { + depth += 1; + } + for (int wanted_depth = depth; wanted_depth >= 0; wanted_depth--) { + scope_t *s = emit->scope; + for (int i = 0; i < wanted_depth; i++) { + s = s->parent; + } + if (s->kind == SCOPE_FUNCTION) { + printf("%s..", qstr_str(s->simple_name)); + } else if (s->kind == SCOPE_CLASS) { + printf("%s.", qstr_str(s->simple_name)); + } + } + } + printf("%s'\n", qstr_str(qstr)); + } +} + +void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { + load_const_code_and_name(emit, scope->simple_name); + emit_pre(emit, -1 - n_default_params - 2 * n_dict_params, 3); + if (emit->pass == PASS_3) { + printf("MAKE_FUNCTION %d\n", (n_dict_params << 8) | n_default_params); + } +} + +void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { + load_const_code_and_name(emit, scope->simple_name); + emit_pre(emit, -2 - n_default_params - 2 * n_dict_params, 3); + if (emit->pass == PASS_3) { + printf("MAKE_CLOSURE %d\n", (n_dict_params << 8) | n_default_params); + } +} + +void emit_yield_value(emitter_t *emit) { + emit_pre(emit, 0, 1); + if (emit->pass == PASS_2) { + emit->scope->flags |= SCOPE_FLAG_GENERATOR; + } + if (emit->pass == PASS_3) { + printf("YIELD_VALUE\n"); + } +} + +void emit_yield_from(emitter_t *emit) { + emit_pre(emit, -1, 1); + if (emit->pass == PASS_2) { + emit->scope->flags |= SCOPE_FLAG_GENERATOR; + } + if (emit->pass == PASS_3) { + printf("YIELD_FROM\n"); + } +} + +#endif // EMIT_DO_CPY diff --git a/py/emitthumb.c b/py/emitthumb.c new file mode 100644 index 0000000000..cad6b65044 --- /dev/null +++ b/py/emitthumb.c @@ -0,0 +1,673 @@ +#include +#include +#include +#include +#include +#include + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "scope.h" +#include "runtime.h" +#include "emit.h" +#include "asmthumb.h" + +#ifdef EMIT_DO_THUMB + +#define REG_LOCAL_1 (REG_R4) +#define REG_LOCAL_2 (REG_R5) +#define REG_LOCAL_3 (REG_R6) +#define REG_TEMP (REG_R7) +#define REG_LOCAL_NUM (3) + +typedef enum { + NEED_TO_PUSH_NOTHING, + NEED_TO_PUSH_REG, + NEED_TO_PUSH_I32, +} need_to_push_t; + +struct _emitter_t { + int pass; + int stack_start; + int stack_size; + bool last_emit_was_return_value; + need_to_push_t need_to_push; + int last_reg; + int32_t last_i32; + + scope_t *scope; + + asm_thumb_t *as; + bool do_native_types; +}; + +emitter_t *emit_new() { + emitter_t *emit = m_new(emitter_t, 1); + emit->as = asm_thumb_new(); + emit->do_native_types = true; + return emit; +} + +void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) { + emit->pass = pass; + emit->stack_start = 0; + emit->stack_size = 0; + emit->last_emit_was_return_value = false; + emit->need_to_push = NEED_TO_PUSH_NOTHING; + emit->scope = scope; + if (pass == PASS_1) { + scope->unique_code_id = rt_get_new_unique_code_id(); + } + + asm_thumb_start_pass(emit->as, pass); + + // entry to function + int num_locals = 0; + if (pass > PASS_1) { + num_locals = scope->num_locals - REG_LOCAL_NUM; + if (num_locals < 0) { + num_locals = 0; + } + emit->stack_start = num_locals; + num_locals += scope->stack_size; + } + asm_thumb_entry(emit->as, num_locals); + + // initialise locals from parameters + for (int i = 0; i < scope->num_params; i++) { + if (i == 0) { + asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_1, REG_ARG_1); + } else if (i == 1) { + asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_2, REG_ARG_2); + } else if (i == 2) { + asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_3, REG_ARG_3); + } else if (i == 3) { + asm_thumb_mov_local_reg(emit->as, i - REG_LOCAL_NUM, REG_ARG_4); + } else { + // TODO not implemented + assert(0); + } + } + + asm_thumb_mov_reg_i32(emit->as, REG_R7, (machine_uint_t)rt_fun_table); +} + +void emit_end_pass(emitter_t *emit) { + if (!emit->last_emit_was_return_value) { + asm_thumb_exit(emit->as); + } + asm_thumb_end_pass(emit->as); + + // check stack is back to zero size + if (emit->stack_size != 0) { + printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size); + } + + if (emit->pass == PASS_3) { + py_fun_t f = asm_thumb_get_code(emit->as); + rt_assign_native_code(emit->scope->unique_code_id, f, asm_thumb_get_code_size(emit->as), emit->scope->num_params); + } +} + +bool emit_last_emit_was_return_value(emitter_t *emit) { + return emit->last_emit_was_return_value; +} + +int emit_get_stack_size(emitter_t *emit) { + return emit->stack_size; +} + +void emit_set_stack_size(emitter_t *emit, int size) { + emit->stack_size = size; +} + +static void adjust_stack(emitter_t *emit, int stack_size_delta) { + emit->stack_size += stack_size_delta; + assert(emit->stack_size >= 0); + if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) { + emit->scope->stack_size = emit->stack_size; + } +} + +static void stack_settle(emitter_t *emit) { + switch (emit->need_to_push) { + case NEED_TO_PUSH_NOTHING: + break; + + case NEED_TO_PUSH_REG: + asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, emit->last_reg); + adjust_stack(emit, 1); + break; + + case NEED_TO_PUSH_I32: + asm_thumb_mov_reg_i32_optimised(emit->as, REG_R0, emit->last_i32); + asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, REG_R0); + adjust_stack(emit, 1); + break; + } + emit->need_to_push = NEED_TO_PUSH_NOTHING; +} + +static void emit_pre_raw(emitter_t *emit, int stack_size_delta) { + adjust_stack(emit, stack_size_delta); + emit->last_emit_was_return_value = false; +} + +static void emit_pre(emitter_t *emit) { + stack_settle(emit); + emit_pre_raw(emit, 0); +} + +static void emit_pre_pop_reg(emitter_t *emit, int reg_dest) { + switch (emit->need_to_push) { + case NEED_TO_PUSH_NOTHING: + asm_thumb_mov_reg_local(emit->as, reg_dest, emit->stack_start + emit->stack_size - 1); + emit_pre_raw(emit, -1); + break; + + case NEED_TO_PUSH_REG: + emit_pre_raw(emit, 0); + if (emit->last_reg != reg_dest) { + asm_thumb_mov_reg_reg(emit->as, reg_dest, emit->last_reg); + } + break; + + case NEED_TO_PUSH_I32: + emit_pre_raw(emit, 0); + asm_thumb_mov_reg_i32_optimised(emit->as, reg_dest, emit->last_i32); + break; + } + emit->need_to_push = NEED_TO_PUSH_NOTHING; +} + +static void emit_pre_pop_reg_reg(emitter_t *emit, int rega, int regb) { + emit_pre_pop_reg(emit, rega); + asm_thumb_mov_reg_local(emit->as, regb, emit->stack_start + emit->stack_size - 1); + adjust_stack(emit, -1); +} + +static void emit_pre_pop_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc) { + emit_pre_pop_reg(emit, rega); + asm_thumb_mov_reg_local(emit->as, regb, emit->stack_start + emit->stack_size - 1); + asm_thumb_mov_reg_local(emit->as, regc, emit->stack_start + emit->stack_size - 2); + adjust_stack(emit, -2); +} + +static void emit_post(emitter_t *emit) { +} + +static void emit_post_push_reg(emitter_t *emit, int reg) { + emit->need_to_push = NEED_TO_PUSH_REG; + emit->last_reg = reg; +} + +static void emit_post_push_i32(emitter_t *emit, int32_t i32) { + emit->need_to_push = NEED_TO_PUSH_I32; + emit->last_i32 = i32; +} + +static void emit_post_push_reg_reg(emitter_t *emit, int rega, int regb) { + asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega); + emit->need_to_push = NEED_TO_PUSH_REG; + emit->last_reg = regb; + adjust_stack(emit, 1); +} + +static void emit_post_push_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc) { + asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega); + asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 1, regb); + asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 2, regc); + adjust_stack(emit, 3); +} + +static void emit_post_push_reg_reg_reg_reg(emitter_t *emit, int rega, int regb, int regc, int regd) { + asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size, rega); + asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 1, regb); + asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 2, regc); + asm_thumb_mov_local_reg(emit->as, emit->stack_start + emit->stack_size + 3, regd); + adjust_stack(emit, 4); +} + +static void emit_get_stack_pointer_to_reg_for_pop(emitter_t *emit, int reg_dest, int n_pop) { + asm_thumb_mov_reg_local_addr(emit->as, reg_dest, emit->stack_start + emit->stack_size - 1); + adjust_stack(emit, -n_pop); +} + +static void emit_get_stack_pointer_to_reg_for_push(emitter_t *emit, int reg_dest, int n_push) { + asm_thumb_mov_reg_local_addr(emit->as, reg_dest, emit->stack_start + emit->stack_size + n_push - 1); + adjust_stack(emit, n_push); +} + +static void emit_call(emitter_t *emit, rt_fun_kind_t fun_kind) { + asm_thumb_bl_ind(emit->as, rt_fun_table[fun_kind], fun_kind, REG_R3); +} + +static void emit_call_with_i32_arg(emitter_t *emit, rt_fun_kind_t fun_kind, int32_t arg_val, int arg_reg) { + asm_thumb_mov_reg_i32_optimised(emit->as, arg_reg, arg_val); + asm_thumb_bl_ind(emit->as, rt_fun_table[fun_kind], fun_kind, REG_R3); +} + +int emit_label_new(emitter_t *emit) { + return asm_thumb_label_new(emit->as); +} + +void emit_label_assign(emitter_t *emit, int l) { + asm_thumb_label_assign(emit->as, l); +} + +void emit_import_name(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_import_from(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_import_star(emitter_t *emit) { + assert(0); +} + +void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) { + emit_pre(emit); + py_obj_t o; + switch (tok) { + case PY_TOKEN_KW_NONE: o = py_const_none; break; + case PY_TOKEN_KW_FALSE: o = py_const_false; break; + case PY_TOKEN_KW_TRUE: o = py_const_true; break; + default: assert(0); o = 0; // shouldn't happen + } + emit_post_push_i32(emit, (machine_uint_t)o); +} + +void emit_load_const_small_int(emitter_t *emit, int arg) { + emit_pre(emit); + if (emit->do_native_types) { + emit_post_push_i32(emit, arg); + } else { + emit_post_push_i32(emit, (arg << 1) | 1); + } +} + +void emit_load_const_int(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_load_const_dec(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_load_const_id(emitter_t *emit, qstr qstr) { + assert(0); +} + +void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) { + emit_pre(emit); + emit_call_with_i32_arg(emit, RT_F_LOAD_CONST_STR, qstr, REG_ARG_1); + emit_post_push_reg(emit, REG_RET); +} + +void emit_load_const_verbatim_start(emitter_t *emit) { + assert(0); +} +void emit_load_const_verbatim_int(emitter_t *emit, int val) { + assert(0); +} +void emit_load_const_verbatim_str(emitter_t *emit, const char *str) { + assert(0); +} +void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) { + assert(0); +} +void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) { + assert(0); +} +void emit_load_const_verbatim_end(emitter_t *emit) { + assert(0); +} + +void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) { + emit_pre(emit); + if (local_num == 0) { + emit_post_push_reg(emit, REG_LOCAL_1); + } else if (local_num == 1) { + emit_post_push_reg(emit, REG_LOCAL_2); + } else if (local_num == 2) { + emit_post_push_reg(emit, REG_LOCAL_3); + } else { + asm_thumb_mov_reg_local(emit->as, REG_R0, local_num - 1); + emit_post_push_reg(emit, REG_R0); + } +} + +void emit_load_name(emitter_t *emit, qstr qstr) { + emit_pre(emit); + emit_call_with_i32_arg(emit, RT_F_LOAD_NAME, qstr, REG_ARG_1); + emit_post_push_reg(emit, REG_RET); +} + +void emit_load_global(emitter_t *emit, qstr qstr) { + emit_pre(emit); + emit_call_with_i32_arg(emit, RT_F_LOAD_GLOBAL, qstr, REG_ARG_1); + emit_post_push_reg(emit, REG_RET); +} + +void emit_load_deref(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_load_closure(emitter_t *emit, qstr qstr) { + assert(0); +} + +void emit_load_attr(emitter_t *emit, qstr qstr) { + emit_pre_pop_reg(emit, REG_ARG_1); // arg1 = base + emit_call_with_i32_arg(emit, RT_F_LOAD_ATTR, qstr, REG_ARG_2); // arg2 = attribute name + emit_post_push_reg(emit, REG_RET); +} + +void emit_load_method(emitter_t *emit, qstr qstr) { + emit_pre_pop_reg(emit, REG_ARG_1); // arg1 = base + emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr + emit_call_with_i32_arg(emit, RT_F_LOAD_METHOD, qstr, REG_ARG_2); // arg2 = method name +} + +void emit_load_build_class(emitter_t *emit) { + assert(0); +} // basically load __build_class__ from builtins + +void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) { + if (local_num == 0) { + emit_pre_pop_reg(emit, REG_LOCAL_1); + } else if (local_num == 1) { + emit_pre_pop_reg(emit, REG_LOCAL_2); + } else if (local_num == 2) { + emit_pre_pop_reg(emit, REG_LOCAL_3); + } else { + emit_pre_pop_reg(emit, REG_R0); + asm_thumb_mov_local_reg(emit->as, local_num - 1, REG_R0); + } + emit_post(emit); +} + +void emit_store_name(emitter_t *emit, qstr qstr) { + emit_pre_pop_reg(emit, REG_ARG_2); + emit_call_with_i32_arg(emit, RT_F_STORE_NAME, qstr, REG_ARG_1); // arg1 = name + emit_post(emit); +} + +void emit_store_global(emitter_t *emit, qstr qstr) { + assert(0); +} + +void emit_store_deref(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_store_attr(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_store_locals(emitter_t *emit) { + assert(0); +} + +void emit_store_subscr(emitter_t *emit) { + emit_pre_pop_reg_reg_reg(emit, REG_ARG_2, REG_ARG_1, REG_ARG_3); // index, base, value to store + emit_call(emit, RT_F_STORE_SUBSCR); +} + +void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) { + assert(0); +} +void emit_delete_name(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_delete_global(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_delete_deref(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_delete_attr(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_delete_subscr(emitter_t *emit) { + assert(0); +} + +void emit_dup_top(emitter_t *emit) { + emit_pre_pop_reg(emit, REG_R0); + emit_post_push_reg_reg(emit, REG_R0, REG_R0); +} + +void emit_dup_top_two(emitter_t *emit) { + emit_pre_pop_reg_reg(emit, REG_R0, REG_R1); + emit_post_push_reg_reg_reg_reg(emit, REG_R1, REG_R0, REG_R1, REG_R0); +} + +void emit_pop_top(emitter_t *emit) { + emit_pre_pop_reg(emit, REG_R0); + emit_post(emit); +} + +void emit_rot_two(emitter_t *emit) { + assert(0); +} + +void emit_rot_three(emitter_t *emit) { + emit_pre_pop_reg_reg_reg(emit, REG_R0, REG_R1, REG_R2); + emit_post_push_reg_reg_reg(emit, REG_R0, REG_R2, REG_R1); +} + +void emit_jump(emitter_t *emit, int label) { + emit_pre(emit); + asm_thumb_b_label(emit->as, label); + emit_post(emit); +} + +void emit_pop_jump_if_false(emitter_t *emit, int label) { + if (emit->do_native_types) { + emit_pre_pop_reg(emit, REG_RET); + asm_thumb_cmp_reg_bz_label(emit->as, REG_RET, label); + emit_post(emit); + } else { + emit_pre_pop_reg(emit, REG_ARG_1); + emit_call(emit, RT_F_IS_TRUE); + asm_thumb_cmp_reg_bz_label(emit->as, REG_RET, label); + emit_post(emit); + } +} + +void emit_pop_jump_if_true(emitter_t *emit, int label) { + assert(0); +} +void emit_jump_if_true_or_pop(emitter_t *emit, int label) { + assert(0); +} +void emit_jump_if_false_or_pop(emitter_t *emit, int label) { + assert(0); +} + +void emit_setup_loop(emitter_t *emit, int label) { + emit_pre(emit); + emit_post(emit); +} + +void emit_break_loop(emitter_t *emit, int label) { + assert(0); +} +void emit_continue_loop(emitter_t *emit, int label) { + assert(0); +} +void emit_setup_with(emitter_t *emit, int label) { + assert(0); +} +void emit_with_cleanup(emitter_t *emit) { + assert(0); +} +void emit_setup_except(emitter_t *emit, int label) { + assert(0); +} +void emit_setup_finally(emitter_t *emit, int label) { + assert(0); +} +void emit_end_finally(emitter_t *emit) { + assert(0); +} +void emit_get_iter(emitter_t *emit) { + assert(0); +} // tos = getiter(tos) +void emit_for_iter(emitter_t *emit, int label) { + assert(0); +} +void emit_for_iter_end(emitter_t *emit) { + assert(0); +} +void emit_pop_except(emitter_t *emit) { + assert(0); +} + +void emit_unary_op(emitter_t *emit, rt_unary_op_t op) { + emit_pre_pop_reg(emit, REG_ARG_2); + emit_call_with_i32_arg(emit, RT_F_UNARY_OP, op, REG_ARG_1); + emit_post_push_reg(emit, REG_RET); +} + +void emit_build_tuple(emitter_t *emit, int n_args) { + assert(0); +} + +void emit_build_list(emitter_t *emit, int n_args) { + emit_pre(emit); + emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order + emit_call_with_i32_arg(emit, RT_F_BUILD_LIST, n_args, REG_ARG_1); + emit_post_push_reg(emit, REG_RET); // new list +} + +void emit_list_append(emitter_t *emit, int list_index) { + assert(0); +} + +void emit_build_map(emitter_t *emit, int n_args) { + emit_pre(emit); + emit_call_with_i32_arg(emit, RT_F_BUILD_MAP, n_args, REG_ARG_1); + emit_post_push_reg(emit, REG_RET); // new map +} + +void emit_store_map(emitter_t *emit) { + emit_pre_pop_reg_reg_reg(emit, REG_ARG_2, REG_ARG_3, REG_ARG_1); // key, value, map + emit_call(emit, RT_F_STORE_MAP); + emit_post_push_reg(emit, REG_RET); // map +} + +void emit_map_add(emitter_t *emit, int map_index) { + assert(0); +} + +void emit_build_set(emitter_t *emit, int n_args) { + emit_pre(emit); + emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order + emit_call_with_i32_arg(emit, RT_F_BUILD_SET, n_args, REG_ARG_1); + emit_post_push_reg(emit, REG_RET); // new set +} + +void emit_set_add(emitter_t *emit, int set_index) { + assert(0); +} +void emit_build_slice(emitter_t *emit, int n_args) { + assert(0); +} +void emit_unpack_sequence(emitter_t *emit, int n_args) { + assert(0); +} +void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) { + assert(0); +} + +void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { + assert(n_default_params == 0 && n_dict_params == 0); + emit_pre(emit); + emit_call_with_i32_arg(emit, RT_F_MAKE_FUNCTION_FROM_ID, scope->unique_code_id, REG_ARG_1); + emit_post_push_reg(emit, REG_RET); +} + +void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { + assert(0); +} + +void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { + assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg); + if (n_positional == 0) { + emit_pre_pop_reg(emit, REG_ARG_1); // the function + emit_call(emit, RT_F_CALL_FUNCTION_0); + } else if (n_positional == 1) { + emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); // the single argument, the function + emit_call(emit, RT_F_CALL_FUNCTION_1); + } else if (n_positional == 2) { + emit_pre_pop_reg_reg_reg(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the second argument, the first argument, the function + emit_call(emit, RT_F_CALL_FUNCTION_2); + } else { + assert(0); + } + emit_post_push_reg(emit, REG_RET); +} + +void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { + assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg); + if (n_positional == 0) { + emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); // the self object (or NULL), the method + emit_call(emit, RT_F_CALL_METHOD_1); + } else if (n_positional == 1) { + emit_pre_pop_reg_reg_reg(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the first argument, the self object (or NULL), the method + emit_call(emit, RT_F_CALL_METHOD_2); + } else { + assert(0); + } + emit_post_push_reg(emit, REG_RET); +} + +void emit_pop_block(emitter_t *emit) { + emit_pre(emit); + emit_post(emit); +} + +void emit_binary_op(emitter_t *emit, rt_binary_op_t op) { + if (emit->do_native_types) { + emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); + asm_thumb_add_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, REG_ARG_2); + emit_post_push_reg(emit, REG_RET); + } else { + emit_pre_pop_reg_reg(emit, REG_ARG_3, REG_ARG_2); + emit_call_with_i32_arg(emit, RT_F_BINARY_OP, op, REG_ARG_1); + emit_post_push_reg(emit, REG_RET); + } +} + +void emit_compare_op(emitter_t *emit, rt_compare_op_t op) { + if (emit->do_native_types) { + emit_pre_pop_reg_reg(emit, REG_ARG_2, REG_ARG_1); + asm_thumb_cmp_reg_reg(emit->as, REG_ARG_1, REG_ARG_2); + asm_thumb_ite_ge(emit->as); + asm_thumb_mov_reg_i8(emit->as, REG_RET, 0); // if r0 >= r1 + asm_thumb_mov_reg_i8(emit->as, REG_RET, 1); // if r0 < r1 + emit_post_push_reg(emit, REG_RET); + } else { + emit_pre_pop_reg_reg(emit, REG_ARG_3, REG_ARG_2); + emit_call_with_i32_arg(emit, RT_F_COMPARE_OP, op, REG_ARG_1); + emit_post_push_reg(emit, REG_RET); + } +} + +void emit_return_value(emitter_t *emit) { + emit_pre_pop_reg(emit, REG_RET); + emit->last_emit_was_return_value = true; + //asm_thumb_call_ind(emit->as, 0, REG_R0); to seg fault for debugging with gdb + asm_thumb_exit(emit->as); +} + +void emit_raise_varargs(emitter_t *emit, int n_args) { + assert(0); +} +void emit_yield_value(emitter_t *emit) { + assert(0); +} +void emit_yield_from(emitter_t *emit) { + assert(0); +} + +#endif // EMIT_DO_THUMB diff --git a/py/emitx64.c b/py/emitx64.c new file mode 100644 index 0000000000..da4c7e333d --- /dev/null +++ b/py/emitx64.c @@ -0,0 +1,680 @@ +/* This code is equivalent to emitx64.c but pre-allocates stack + * space and uses mov instead of push/pop instructions to access + * the temporary stack. It runs in similar time, but uses 3*n + * more bytes, where n is number of push/pop instructions. + * + * This code is preferred because it keeps the stack aligned on a + * 16 byte boundary. + * + * Improvements: + * Doesn't call stub functions, does all the work inline. + * Has optimisations for loading i64s to stack. + */ + +#include +#include +#include +#include +#include +#include + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "scope.h" +#include "runtime.h" +#include "emit.h" +#include "asmx64.h" + +#ifdef EMIT_DO_X64 + +#define REG_LOCAL_1 (REG_RBX) +#define REG_LOCAL_NUM (1) + +typedef enum { + NEED_TO_PUSH_NOTHING, + NEED_TO_PUSH_R64, + NEED_TO_PUSH_I64, +} need_to_push_t; + +struct _emitter_t { + int pass; + int stack_start; + int stack_size; + bool last_emit_was_return_value; + need_to_push_t need_to_push; + int last_r64; + int64_t last_i64; + + scope_t *scope; + + asm_x64_t *as; + bool do_native_types; +}; + +emitter_t *emit_new() { + emitter_t *emit = m_new(emitter_t, 1); + emit->as = asm_x64_new(); + emit->do_native_types = false; + return emit; +} + +void emit_set_native_types(emitter_t *emit, bool do_native_types) { + emit->do_native_types = do_native_types; +} + +void emit_start_pass(emitter_t *emit, pass_kind_t pass, scope_t *scope) { + emit->pass = pass; + emit->stack_start = 0; + emit->stack_size = 0; + emit->last_emit_was_return_value = false; + emit->need_to_push = NEED_TO_PUSH_NOTHING; + emit->scope = scope; + if (pass == PASS_1) { + scope->unique_code_id = rt_get_new_unique_code_id(); + } + + asm_x64_start_pass(emit->as, pass); + + // entry to function + int num_locals = 0; + if (pass > PASS_1) { + num_locals = scope->num_locals - REG_LOCAL_NUM; + if (num_locals < 0) { + num_locals = 0; + } + emit->stack_start = num_locals; + num_locals += scope->stack_size; + } + asm_x64_entry(emit->as, num_locals); + + // initialise locals from parameters + for (int i = 0; i < scope->num_params; i++) { + if (i == 0) { + asm_x64_mov_r64_to_r64(emit->as, REG_ARG_1, REG_LOCAL_1); + } else if (i == 1) { + asm_x64_mov_r64_to_local(emit->as, REG_ARG_2, i - 1); + } else if (i == 2) { + asm_x64_mov_r64_to_local(emit->as, REG_ARG_3, i - 1); + } else { + // TODO not implemented + assert(0); + } + } +} + +void emit_end_pass(emitter_t *emit) { + if (!emit->last_emit_was_return_value) { + asm_x64_exit(emit->as); + } + asm_x64_end_pass(emit->as); + + // check stack is back to zero size + if (emit->stack_size != 0) { + printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size); + } + + if (emit->pass == PASS_3) { + py_fun_t f = asm_x64_get_code(emit->as); + rt_assign_native_code(emit->scope->unique_code_id, f, asm_x64_get_code_size(emit->as), emit->scope->num_params); + } +} + +bool emit_last_emit_was_return_value(emitter_t *emit) { + return emit->last_emit_was_return_value; +} + +int emit_get_stack_size(emitter_t *emit) { + return emit->stack_size; +} + +void emit_set_stack_size(emitter_t *emit, int size) { + emit->stack_size = size; +} + +static void adjust_stack(emitter_t *emit, int stack_size_delta) { + emit->stack_size += stack_size_delta; + assert(emit->stack_size >= 0); + if (emit->pass > PASS_1 && emit->stack_size > emit->scope->stack_size) { + emit->scope->stack_size = emit->stack_size; + } +} + +static void stack_settle(emitter_t *emit) { + switch (emit->need_to_push) { + case NEED_TO_PUSH_NOTHING: + break; + + case NEED_TO_PUSH_R64: + asm_x64_mov_r64_to_local(emit->as, emit->last_r64, emit->stack_start + emit->stack_size); + adjust_stack(emit, 1); + break; + + case NEED_TO_PUSH_I64: + asm_x64_mov_i64_to_r64_optimised(emit->as, emit->last_i64, REG_RAX); + asm_x64_mov_r64_to_local(emit->as, REG_RAX, emit->stack_start + emit->stack_size); + adjust_stack(emit, 1); + break; + } + emit->need_to_push = NEED_TO_PUSH_NOTHING; +} + +static void emit_pre_raw(emitter_t *emit, int stack_size_delta) { + adjust_stack(emit, stack_size_delta); + emit->last_emit_was_return_value = false; +} + +static void emit_pre(emitter_t *emit) { + stack_settle(emit); + emit_pre_raw(emit, 0); +} + +static void emit_pre_pop_r64(emitter_t *emit, int r64) { + switch (emit->need_to_push) { + case NEED_TO_PUSH_NOTHING: + asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64); + emit_pre_raw(emit, -1); + break; + + case NEED_TO_PUSH_R64: + emit_pre_raw(emit, 0); + if (emit->last_r64 != r64) { + asm_x64_mov_r64_to_r64(emit->as, emit->last_r64, r64); + } + break; + + case NEED_TO_PUSH_I64: + emit_pre_raw(emit, 0); + asm_x64_mov_i64_to_r64_optimised(emit->as, emit->last_i64, r64); + break; + } + emit->need_to_push = NEED_TO_PUSH_NOTHING; +} + +static void emit_pre_pop_r64_r64(emitter_t *emit, int r64a, int r64b) { + emit_pre_pop_r64(emit, r64a); + asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64b); + adjust_stack(emit, -1); +} + +static void emit_pre_pop_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c) { + emit_pre_pop_r64(emit, r64a); + asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64b); + asm_x64_mov_local_to_r64(emit->as, emit->stack_start + emit->stack_size - 2, r64c); + adjust_stack(emit, -2); +} + +static void emit_post(emitter_t *emit) { +} + +static void emit_post_push_r64(emitter_t *emit, int r64) { + emit->need_to_push = NEED_TO_PUSH_R64; + emit->last_r64 = r64; +} + +static void emit_post_push_i64(emitter_t *emit, int64_t i64) { + emit->need_to_push = NEED_TO_PUSH_I64; + emit->last_i64 = i64; +} + +static void emit_post_push_r64_r64(emitter_t *emit, int r64a, int r64b) { + asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size); + emit->need_to_push = NEED_TO_PUSH_R64; + emit->last_r64 = r64b; + adjust_stack(emit, 1); +} + +static void emit_post_push_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c) { + asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size); + asm_x64_mov_r64_to_local(emit->as, r64b, emit->stack_start + emit->stack_size + 1); + asm_x64_mov_r64_to_local(emit->as, r64c, emit->stack_start + emit->stack_size + 2); + adjust_stack(emit, 3); +} + +static void emit_post_push_r64_r64_r64_r64(emitter_t *emit, int r64a, int r64b, int r64c, int r64d) { + asm_x64_mov_r64_to_local(emit->as, r64a, emit->stack_start + emit->stack_size); + asm_x64_mov_r64_to_local(emit->as, r64b, emit->stack_start + emit->stack_size + 1); + asm_x64_mov_r64_to_local(emit->as, r64c, emit->stack_start + emit->stack_size + 2); + asm_x64_mov_r64_to_local(emit->as, r64d, emit->stack_start + emit->stack_size + 3); + adjust_stack(emit, 4); +} + +static void emit_get_stack_pointer_to_r64_for_pop(emitter_t *emit, int r64, int n_pop) { + asm_x64_mov_local_addr_to_r64(emit->as, emit->stack_start + emit->stack_size - 1, r64); + adjust_stack(emit, -n_pop); +} + +static void emit_get_stack_pointer_to_r64_for_push(emitter_t *emit, int r64, int n_push) { + asm_x64_mov_local_addr_to_r64(emit->as, emit->stack_start + emit->stack_size + n_push - 1, r64); + adjust_stack(emit, n_push); +} + +static void emit_call(emitter_t *emit, void *fun) { + asm_x64_call_ind(emit->as, fun, REG_RAX); +} + +static void emit_call_with_i64_arg(emitter_t *emit, void *fun, int64_t arg_val, int arg_r64) { + asm_x64_mov_i64_to_r64_optimised(emit->as, arg_val, arg_r64); + asm_x64_call_ind(emit->as, fun, REG_RAX); +} + +int emit_label_new(emitter_t *emit) { + return asm_x64_label_new(emit->as); +} + +void emit_label_assign(emitter_t *emit, int l) { + asm_x64_label_assign(emit->as, l); +} + +void emit_import_name(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_import_from(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_import_star(emitter_t *emit) { + assert(0); +} + +void emit_load_const_tok(emitter_t *emit, py_token_kind_t tok) { + emit_pre(emit); + py_obj_t o; + switch (tok) { + case PY_TOKEN_KW_NONE: o = py_const_none; break; + case PY_TOKEN_KW_FALSE: o = py_const_false; break; + case PY_TOKEN_KW_TRUE: o = py_const_true; break; + default: assert(0); // shouldn't happen + } + emit_post_push_i64(emit, (uint64_t)o); +} + +void emit_load_const_small_int(emitter_t *emit, int arg) { + emit_pre(emit); + if (emit->do_native_types) { + emit_post_push_i64(emit, arg); + } else { + emit_post_push_i64(emit, (arg << 1) | 1); + } +} + +void emit_load_const_int(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_load_const_dec(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_load_const_id(emitter_t *emit, qstr qstr) { + assert(0); +} + +void emit_load_const_str(emitter_t *emit, qstr qstr, bool bytes) { + emit_pre(emit); + emit_call_with_i64_arg(emit, rt_load_const_str, qstr, REG_ARG_1); + emit_post_push_r64(emit, REG_RET); +} + +void emit_load_const_verbatim_start(emitter_t *emit) { + assert(0); +} +void emit_load_const_verbatim_int(emitter_t *emit, int val) { + assert(0); +} +void emit_load_const_verbatim_str(emitter_t *emit, const char *str) { + assert(0); +} +void emit_load_const_verbatim_strn(emitter_t *emit, const char *str, int len) { + assert(0); +} +void emit_load_const_verbatim_quoted_str(emitter_t *emit, qstr qstr, bool bytes) { + assert(0); +} +void emit_load_const_verbatim_end(emitter_t *emit) { + assert(0); +} + +void emit_load_fast(emitter_t *emit, qstr qstr, int local_num) { + if (local_num == 0) { + emit_pre(emit); + emit_post_push_r64(emit, REG_LOCAL_1); + } else { + emit_pre(emit); + asm_x64_mov_local_to_r64(emit->as, local_num - 1, REG_RAX); + emit_post_push_r64(emit, REG_RAX); + } +} + +void emit_load_name(emitter_t *emit, qstr qstr) { + emit_pre(emit); + emit_call_with_i64_arg(emit, rt_load_name, qstr, REG_ARG_1); + emit_post_push_r64(emit, REG_RET); +} + +void emit_load_global(emitter_t *emit, qstr qstr) { + emit_pre(emit); + emit_call_with_i64_arg(emit, rt_load_global, qstr, REG_ARG_1); + emit_post_push_r64(emit, REG_RET); +} + +void emit_load_deref(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_load_closure(emitter_t *emit, qstr qstr) { + assert(0); +} + +void emit_load_attr(emitter_t *emit, qstr qstr) { + emit_pre_pop_r64(emit, REG_ARG_1); // arg1 = base + emit_call_with_i64_arg(emit, rt_load_attr, qstr, REG_ARG_2); // arg2 = attribute name + emit_post_push_r64(emit, REG_RET); +} + +void emit_load_method(emitter_t *emit, qstr qstr) { + emit_pre_pop_r64(emit, REG_ARG_1); // arg1 = base + emit_get_stack_pointer_to_r64_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr + emit_call_with_i64_arg(emit, rt_load_method, qstr, REG_ARG_2); // arg2 = method name +} + +void emit_load_build_class(emitter_t *emit) { + assert(0); +} // basically load __build_class__ from builtins + +void emit_store_fast(emitter_t *emit, qstr qstr, int local_num) { + if (local_num == 0) { + emit_pre_pop_r64(emit, REG_LOCAL_1); + emit_post(emit); + } else { + emit_pre_pop_r64(emit, REG_RAX); + asm_x64_mov_r64_to_local(emit->as, REG_RAX, local_num - 1); + emit_post(emit); + } +} + +void emit_store_name(emitter_t *emit, qstr qstr) { + emit_pre_pop_r64(emit, REG_ARG_2); + emit_call_with_i64_arg(emit, rt_store_name, qstr, REG_ARG_1); // arg1 = name + emit_post(emit); +} + +void emit_store_global(emitter_t *emit, qstr qstr) { + assert(0); +} + +void emit_store_deref(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_store_attr(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_store_locals(emitter_t *emit) { + assert(0); +} + +void emit_store_subscr(emitter_t *emit) { + emit_pre_pop_r64_r64_r64(emit, REG_ARG_2, REG_ARG_1, REG_ARG_3); // index, base, value to store + emit_call(emit, rt_store_subscr); +} + +void emit_delete_fast(emitter_t *emit, qstr qstr, int local_num) { + assert(0); +} +void emit_delete_name(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_delete_global(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_delete_deref(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_delete_attr(emitter_t *emit, qstr qstr) { + assert(0); +} +void emit_delete_subscr(emitter_t *emit) { + assert(0); +} + +void emit_dup_top(emitter_t *emit) { + emit_pre_pop_r64(emit, REG_RAX); + emit_post_push_r64_r64(emit, REG_RAX, REG_RAX); +} + +void emit_dup_top_two(emitter_t *emit) { + emit_pre_pop_r64_r64(emit, REG_RAX, REG_RDI); + emit_post_push_r64_r64_r64_r64(emit, REG_RDI, REG_RAX, REG_RDI, REG_RAX); +} + +void emit_pop_top(emitter_t *emit) { + emit_pre_pop_r64(emit, REG_RAX); + emit_post(emit); +} + +void emit_rot_two(emitter_t *emit) { + assert(0); +} + +void emit_rot_three(emitter_t *emit) { + emit_pre_pop_r64_r64_r64(emit, REG_RAX, REG_RDI, REG_RSI); + emit_post_push_r64_r64_r64(emit, REG_RAX, REG_RSI, REG_RDI); +} + +void emit_jump(emitter_t *emit, int label) { + emit_pre(emit); + asm_x64_jmp_label(emit->as, label); + emit_post(emit); +} + +void emit_pop_jump_if_false(emitter_t *emit, int label) { + if (emit->do_native_types) { + emit_pre_pop_r64(emit, REG_RET); + asm_x64_test_r8_with_r8(emit->as, REG_RET, REG_RET); + asm_x64_jcc_label(emit->as, JCC_JZ, label); + emit_post(emit); + } else { + emit_pre_pop_r64(emit, REG_ARG_1); + emit_call(emit, rt_is_true); + asm_x64_test_r8_with_r8(emit->as, REG_RET, REG_RET); + asm_x64_jcc_label(emit->as, JCC_JZ, label); + emit_post(emit); + } +} + +void emit_pop_jump_if_true(emitter_t *emit, int label) { + assert(0); +} +void emit_jump_if_true_or_pop(emitter_t *emit, int label) { + assert(0); +} +void emit_jump_if_false_or_pop(emitter_t *emit, int label) { + assert(0); +} + +void emit_setup_loop(emitter_t *emit, int label) { + emit_pre(emit); + emit_post(emit); +} + +void emit_break_loop(emitter_t *emit, int label) { + assert(0); +} +void emit_continue_loop(emitter_t *emit, int label) { + assert(0); +} +void emit_setup_with(emitter_t *emit, int label) { + assert(0); +} +void emit_with_cleanup(emitter_t *emit) { + assert(0); +} +void emit_setup_except(emitter_t *emit, int label) { + assert(0); +} +void emit_setup_finally(emitter_t *emit, int label) { + assert(0); +} +void emit_end_finally(emitter_t *emit) { + assert(0); +} +void emit_get_iter(emitter_t *emit) { + assert(0); +} // tos = getiter(tos) +void emit_for_iter(emitter_t *emit, int label) { + assert(0); +} +void emit_for_iter_end(emitter_t *emit) { + assert(0); +} +void emit_pop_except(emitter_t *emit) { + assert(0); +} + +void emit_unary_op(emitter_t *emit, rt_unary_op_t op) { + emit_pre_pop_r64(emit, REG_ARG_2); + emit_call_with_i64_arg(emit, rt_unary_op, op, REG_ARG_1); + emit_post_push_r64(emit, REG_RET); +} + +void emit_build_tuple(emitter_t *emit, int n_args) { + assert(0); +} + +void emit_build_list(emitter_t *emit, int n_args) { + emit_pre(emit); + emit_get_stack_pointer_to_r64_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order + emit_call_with_i64_arg(emit, rt_build_list, n_args, REG_ARG_1); + emit_post_push_r64(emit, REG_RET); // new list +} + +void emit_list_append(emitter_t *emit, int list_index) { + assert(0); +} + +void emit_build_map(emitter_t *emit, int n_args) { + emit_pre(emit); + emit_call_with_i64_arg(emit, rt_build_map, n_args, REG_ARG_1); + emit_post_push_r64(emit, REG_RET); // new map +} + +void emit_store_map(emitter_t *emit) { + emit_pre_pop_r64_r64_r64(emit, REG_ARG_2, REG_ARG_3, REG_ARG_1); // key, value, map + emit_call(emit, rt_store_map); + emit_post_push_r64(emit, REG_RET); // map +} + +void emit_map_add(emitter_t *emit, int map_index) { + assert(0); +} + +void emit_build_set(emitter_t *emit, int n_args) { + emit_pre(emit); + emit_get_stack_pointer_to_r64_for_pop(emit, REG_ARG_2, n_args); // pointer to items in reverse order + emit_call_with_i64_arg(emit, rt_build_set, n_args, REG_ARG_1); + emit_post_push_r64(emit, REG_RET); // new set +} + +void emit_set_add(emitter_t *emit, int set_index) { + assert(0); +} +void emit_build_slice(emitter_t *emit, int n_args) { + assert(0); +} +void emit_unpack_sequence(emitter_t *emit, int n_args) { + assert(0); +} +void emit_unpack_ex(emitter_t *emit, int n_left, int n_right) { + assert(0); +} + +void emit_make_function(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { + assert(n_default_params == 0 && n_dict_params == 0); + emit_pre(emit); + emit_call_with_i64_arg(emit, rt_make_function_from_id, scope->unique_code_id, REG_ARG_1); + emit_post_push_r64(emit, REG_RET); +} + +void emit_make_closure(emitter_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { + assert(0); +} + +void emit_call_function(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { + assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg); + if (n_positional == 0) { + emit_pre_pop_r64(emit, REG_ARG_1); // the function + emit_call(emit, rt_call_function_0); + } else if (n_positional == 1) { + emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_ARG_1); // the single argument, the function + emit_call(emit, rt_call_function_1); + } else if (n_positional == 2) { + emit_pre_pop_r64_r64_r64(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the second argument, the first argument, the function + emit_call(emit, rt_call_function_2); + } else { + assert(0); + } + emit_post_push_r64(emit, REG_RET); +} + +void emit_call_method(emitter_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { + assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg); + if (n_positional == 0) { + emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_ARG_1); // the self object (or NULL), the method + emit_call(emit, rt_call_method_1); + } else if (n_positional == 1) { + emit_pre_pop_r64_r64_r64(emit, REG_ARG_3, REG_ARG_2, REG_ARG_1); // the first argument, the self object (or NULL), the method + emit_call(emit, rt_call_method_2); + } else { + assert(0); + } + emit_post_push_r64(emit, REG_RET); +} + +void emit_pop_block(emitter_t *emit) { + emit_pre(emit); + emit_post(emit); +} + +void emit_binary_op(emitter_t *emit, rt_binary_op_t op) { + if (emit->do_native_types) { + assert(op == RT_BINARY_OP_ADD); + emit_pre_pop_r64_r64(emit, REG_ARG_2, REG_RET); + asm_x64_add_r64_to_r64(emit->as, REG_ARG_2, REG_RET); + emit_post_push_r64(emit, REG_RET); + } else { + emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2); + emit_call_with_i64_arg(emit, rt_binary_op, op, REG_ARG_1); + emit_post_push_r64(emit, REG_RET); + } +} + +void emit_compare_op(emitter_t *emit, rt_compare_op_t op) { + if (emit->do_native_types) { + assert(op == RT_COMPARE_OP_LESS); + emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2); + asm_x64_xor_r64_to_r64(emit->as, REG_RET, REG_RET); + asm_x64_cmp_r64_with_r64(emit->as, REG_ARG_3, REG_ARG_2); + asm_x64_setcc_r8(emit->as, JCC_JL, REG_RET); + emit_post_push_r64(emit, REG_RET); + } else { + emit_pre_pop_r64_r64(emit, REG_ARG_3, REG_ARG_2); + emit_call_with_i64_arg(emit, rt_compare_op, op, REG_ARG_1); + emit_post_push_r64(emit, REG_RET); + } +} + +void emit_return_value(emitter_t *emit) { + emit_pre_pop_r64(emit, REG_RAX); + emit->last_emit_was_return_value = true; + //asm_x64_call_ind(emit->as, 0, REG_RAX); to seg fault for debugging with gdb + asm_x64_exit(emit->as); +} + +void emit_raise_varargs(emitter_t *emit, int n_args) { + assert(0); +} +void emit_yield_value(emitter_t *emit) { + assert(0); +} +void emit_yield_from(emitter_t *emit) { + assert(0); +} + +#endif // EMIT_DO_X64 diff --git a/py/grammar.h b/py/grammar.h new file mode 100644 index 0000000000..05bb237a52 --- /dev/null +++ b/py/grammar.h @@ -0,0 +1,300 @@ +// rules for writing rules: +// - zero_or_more is implemented using opt_rule around a one_or_more rule +// - don't put opt_rule in arguments of or rule; instead, wrap the call to this or rule in opt_rule + +// # Start symbols for the grammar: +// # single_input is a single interactive statement; +// # file_input is a module or sequence of commands read from an input file; +// # eval_input is the input for the eval() functions. +// # NB: compound_stmt in single_input is followed by extra NEWLINE! +// single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +// file_input: (NEWLINE | stmt)* ENDMARKER +// eval_input: testlist NEWLINE* ENDMARKER + +DEF_RULE(file_input, nc, and(1), opt_rule(file_input_2)) +DEF_RULE(file_input_2, c(generic_all_nodes), one_or_more, rule(file_input_3)) +DEF_RULE(file_input_3, nc, or(2), tok(NEWLINE), rule(stmt)) + +// decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +// decorators: decorator+ +// decorated: decorators (classdef | funcdef) +// funcdef: 'def' NAME parameters ['->' test] ':' suite +// parameters: '(' [typedargslist] ')' +// typedargslist: tfpdef ['=' test] (',' tfpdef ['=' test])* [',' ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef +// tfpdef: NAME [':' test] +// varargslist: vfpdef ['=' test] (',' vfpdef ['=' test])* [',' ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef +// vfpdef: NAME + +DEF_RULE(decorator, nc, and(4), tok(DEL_AT), rule(dotted_name), opt_rule(trailer_paren), tok(NEWLINE)) +//DEF_RULE(decorator_2, nc, and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE)) +DEF_RULE(decorators, nc, one_or_more, rule(decorator)) +DEF_RULE(decorated, c(decorated), and(2), rule(decorators), rule(decorated_body)) +DEF_RULE(decorated_body, nc, or(2), rule(classdef), rule(funcdef)) +DEF_RULE(funcdef, c(funcdef), and(8), tok(KW_DEF), tok(NAME), tok(DEL_PAREN_OPEN), opt_rule(typedargslist), tok(DEL_PAREN_CLOSE), opt_rule(funcdef_2), tok(DEL_COLON), rule(suite)) +DEF_RULE(funcdef_2, nc, and(2), tok(DEL_MINUS_MORE), rule(test)) +// TODO typedargslist lets through more than is allowed +DEF_RULE(typedargslist, nc, list_with_end, rule(typedargslist_item), tok(DEL_COMMA)) +DEF_RULE(typedargslist_item, nc, or(3), rule(typedargslist_name), rule(typedargslist_star), rule(typedargslist_dbl_star)) +DEF_RULE(typedargslist_name, nc, and(3), tok(NAME), opt_rule(typedargslist_colon), opt_rule(typedargslist_equal)) +DEF_RULE(typedargslist_star, nc, and(2), tok(OP_STAR), opt_rule(tfpdef)) +DEF_RULE(typedargslist_dbl_star, nc, and(3), tok(OP_DBL_STAR), tok(NAME), opt_rule(typedargslist_colon)) +DEF_RULE(typedargslist_colon, nc, and(2), tok(DEL_COLON), rule(test)) +DEF_RULE(typedargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test)) +DEF_RULE(tfpdef, nc, and(2), tok(NAME), opt_rule(typedargslist_colon)) +// TODO varargslist lets through more than is allowed +DEF_RULE(varargslist, nc, list_with_end, rule(varargslist_item), tok(DEL_COMMA)) +DEF_RULE(varargslist_item, nc, or(3), rule(varargslist_name), rule(varargslist_star), rule(varargslist_dbl_star)) +DEF_RULE(varargslist_name, nc, and(2), tok(NAME), opt_rule(varargslist_equal)) +DEF_RULE(varargslist_star, nc, and(2), tok(OP_STAR), opt_rule(vfpdef)) +DEF_RULE(varargslist_dbl_star, nc, and(2), tok(OP_DBL_STAR), tok(NAME)) +DEF_RULE(varargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test)) +DEF_RULE(vfpdef, nc, and(1), tok(NAME)) + +// stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | simple_stmt + +DEF_RULE(stmt, nc, or(9), rule(if_stmt), rule(while_stmt), rule(for_stmt), rule(try_stmt), rule(with_stmt), rule(funcdef), rule(classdef), rule(decorated), rule(simple_stmt)) + +// simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE + +DEF_RULE(simple_stmt, nc, and(2), rule(simple_stmt_2), tok(NEWLINE)) +DEF_RULE(simple_stmt_2, c(generic_all_nodes), list_with_end, rule(small_stmt), tok(DEL_SEMICOLON)) + +// small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt +// expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | ('=' (yield_expr|testlist_star_expr))*) +// testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +// augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//=' +// # For normal assignments, additional restrictions enforced by the interpreter + +DEF_RULE(small_stmt, nc, or(8), rule(del_stmt), rule(pass_stmt), rule(flow_stmt), rule(import_stmt), rule(global_stmt), rule(nonlocal_stmt), rule(assert_stmt), rule(expr_stmt)) +DEF_RULE(expr_stmt, c(expr_stmt), and(2), rule(testlist_star_expr), opt_rule(expr_stmt_2)) +DEF_RULE(expr_stmt_2, nc, or(2), rule(expr_stmt_augassign), rule(expr_stmt_assign_list)) +DEF_RULE(expr_stmt_augassign, nc, and(2), rule(augassign), rule(expr_stmt_6)) +DEF_RULE(expr_stmt_assign_list, nc, one_or_more, rule(expr_stmt_assign)) +DEF_RULE(expr_stmt_assign, nc, and(2), tok(DEL_EQUAL), rule(expr_stmt_6)) +DEF_RULE(expr_stmt_6, nc, or(2), rule(yield_expr), rule(testlist_star_expr)) +DEF_RULE(testlist_star_expr, c(generic_tuple), list_with_end, rule(testlist_star_expr_2), tok(DEL_COMMA)) +DEF_RULE(testlist_star_expr_2, nc, or(2), rule(star_expr), rule(test)) +DEF_RULE(augassign, nc, or(12), tok(DEL_PLUS_EQUAL), tok(DEL_MINUS_EQUAL), tok(DEL_STAR_EQUAL), tok(DEL_SLASH_EQUAL), tok(DEL_PERCENT_EQUAL), tok(DEL_AMPERSAND_EQUAL), tok(DEL_PIPE_EQUAL), tok(DEL_CARET_EQUAL), tok(DEL_DBL_LESS_EQUAL), tok(DEL_DBL_MORE_EQUAL), tok(DEL_DBL_STAR_EQUAL), tok(DEL_DBL_SLASH_EQUAL)) + +// del_stmt: 'del' exprlist +// pass_stmt: 'pass' +// flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +// break_stmt: 'break' +// continue_stmt: 'continue' +// return_stmt: 'return' [testlist] +// yield_stmt: yield_expr +// raise_stmt: 'raise' [test ['from' test]] + +DEF_RULE(del_stmt, c(del_stmt), and(2), tok(KW_DEL), rule(exprlist)) +DEF_RULE(pass_stmt, c(generic_all_nodes), and(1), tok(KW_PASS)) +DEF_RULE(flow_stmt, nc, or(5), rule(break_stmt), rule(continue_stmt), rule(return_stmt), rule(raise_stmt), rule(yield_stmt)) +DEF_RULE(break_stmt, c(break_stmt), and(1), tok(KW_BREAK)) +DEF_RULE(continue_stmt, c(continue_stmt), and(1), tok(KW_CONTINUE)) +DEF_RULE(return_stmt, c(return_stmt), and(2), tok(KW_RETURN), opt_rule(testlist)) +DEF_RULE(yield_stmt, c(yield_stmt), and(1), rule(yield_expr)) +DEF_RULE(raise_stmt, c(raise_stmt), and(2), tok(KW_RAISE), opt_rule(raise_stmt_arg)) +DEF_RULE(raise_stmt_arg, nc, and(2), rule(test), opt_rule(raise_stmt_from)) +DEF_RULE(raise_stmt_from, nc, and(2), tok(KW_FROM), rule(test)) + +// import_stmt: import_name | import_from +// import_name: 'import' dotted_as_names +// import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+) 'import' ('*' | '(' import_as_names ')' | import_as_names) +// import_as_name: NAME ['as' NAME] +// dotted_as_name: dotted_name ['as' NAME] +// import_as_names: import_as_name (',' import_as_name)* [','] +// dotted_as_names: dotted_as_name (',' dotted_as_name)* +// dotted_name: NAME ('.' NAME)* +// global_stmt: 'global' NAME (',' NAME)* +// nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +// assert_stmt: 'assert' test [',' test] + +DEF_RULE(import_stmt, nc, or(2), rule(import_name), rule(import_from)) +DEF_RULE(import_name, c(import_name), and(2), tok(KW_IMPORT), rule(dotted_as_names)) +DEF_RULE(import_from, c(import_from), and(4), tok(KW_FROM), rule(import_from_2), tok(KW_IMPORT), rule(import_from_3)) +DEF_RULE(import_from_2, nc, or(2), rule(dotted_name), rule(import_from_2b)) +DEF_RULE(import_from_2b, nc, and(2), rule(one_or_more_period_or_ellipses), opt_rule(dotted_name)) +DEF_RULE(import_from_3, nc, or(3), tok(OP_STAR), rule(import_as_names_paren), rule(import_as_names)) +DEF_RULE(import_as_names_paren, nc, and(3), tok(DEL_PAREN_OPEN), rule(import_as_names), tok(DEL_PAREN_CLOSE)) +DEF_RULE(one_or_more_period_or_ellipses, nc, one_or_more, rule(period_or_ellipses)) +DEF_RULE(period_or_ellipses, nc, or(2), tok(DEL_PERIOD), tok(ELLIPSES)) +DEF_RULE(import_as_name, nc, and(2), tok(NAME), opt_rule(as_name)) +DEF_RULE(dotted_as_name, nc, and(2), rule(dotted_name), opt_rule(as_name)) +DEF_RULE(as_name, nc, and(2), tok(KW_AS), tok(NAME)) +DEF_RULE(import_as_names, nc, list_with_end, rule(import_as_name), tok(DEL_COMMA)) +DEF_RULE(dotted_as_names, nc, list, rule(dotted_as_name), tok(DEL_COMMA)) +DEF_RULE(dotted_name, nc, list, tok(NAME), tok(DEL_PERIOD)) +DEF_RULE(global_stmt, c(global_stmt), and(2), tok(KW_GLOBAL), rule(name_list)) +DEF_RULE(nonlocal_stmt, c(nonlocal_stmt), and(2), tok(KW_NONLOCAL), rule(name_list)) +DEF_RULE(name_list, nc, list, tok(NAME), tok(DEL_COMMA)) +DEF_RULE(assert_stmt, c(assert_stmt), and(3), tok(KW_ASSERT), rule(test), opt_rule(assert_stmt_extra)) +DEF_RULE(assert_stmt_extra, nc, and(2), tok(DEL_COMMA), rule(test)) + +// if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +// while_stmt: 'while' test ':' suite ['else' ':' suite] +// for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +// try_stmt: 'try' ':' suite ((except_clause ':' suite)+ ['else' ':' suite] ['finally' ':' suite] | 'finally' ':' suite) +// # NB compile.c makes sure that the default except clause is last +// except_clause: 'except' [test ['as' NAME]] +// with_stmt: 'with' with_item (',' with_item)* ':' suite +// with_item: test ['as' expr] +// suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +DEF_RULE(if_stmt, c(if_stmt), and(6), tok(KW_IF), rule(test), tok(DEL_COLON), rule(suite), opt_rule(if_stmt_elif_list), opt_rule(else_stmt)) +DEF_RULE(if_stmt_elif_list, nc, one_or_more, rule(if_stmt_elif)) +DEF_RULE(if_stmt_elif, nc, and(4), tok(KW_ELIF), rule(test), tok(DEL_COLON), rule(suite)) +DEF_RULE(while_stmt, c(while_stmt), and(5), tok(KW_WHILE), rule(test), tok(DEL_COLON), rule(suite), opt_rule(else_stmt)) +DEF_RULE(for_stmt, c(for_stmt), and(7), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(testlist), tok(DEL_COLON), rule(suite), opt_rule(else_stmt)) +DEF_RULE(try_stmt, c(try_stmt), and(4), tok(KW_TRY), tok(DEL_COLON), rule(suite), rule(try_stmt_2)) +DEF_RULE(try_stmt_2, nc, or(2), rule(try_stmt_except_and_more), rule(try_stmt_finally)) +DEF_RULE(try_stmt_except_and_more, nc, and(3), rule(try_stmt_except_list), opt_rule(else_stmt), opt_rule(try_stmt_finally)) +DEF_RULE(try_stmt_except, nc, and(4), tok(KW_EXCEPT), opt_rule(try_stmt_as_name), tok(DEL_COLON), rule(suite)) +DEF_RULE(try_stmt_as_name, nc, and(2), rule(test), opt_rule(as_name)) +DEF_RULE(try_stmt_except_list, nc, one_or_more, rule(try_stmt_except)) +DEF_RULE(try_stmt_finally, nc, and(3), tok(KW_FINALLY), tok(DEL_COLON), rule(suite)) +DEF_RULE(else_stmt, nc, and(3), tok(KW_ELSE), tok(DEL_COLON), rule(suite)) +DEF_RULE(with_stmt, c(with_stmt), and(4), tok(KW_WITH), rule(with_stmt_list), tok(DEL_COLON), rule(suite)) +DEF_RULE(with_stmt_list, nc, list, rule(with_item), tok(DEL_COMMA)) +DEF_RULE(with_item, nc, and(2), rule(test), opt_rule(with_item_as)) +DEF_RULE(with_item_as, nc, and(2), tok(KW_AS), rule(expr)) +DEF_RULE(suite, nc, or(2), rule(suite_block), rule(simple_stmt)) +DEF_RULE(suite_block, nc, and(4), tok(NEWLINE), tok(INDENT), rule(suite_block_stmts), tok(DEDENT)) +DEF_RULE(suite_block_stmts, c(generic_all_nodes), one_or_more, rule(stmt)) + +// test: or_test ['if' or_test 'else' test] | lambdef +// test_nocond: or_test | lambdef_nocond +// lambdef: 'lambda' [varargslist] ':' test +// lambdef_nocond: 'lambda' [varargslist] ':' test_nocond + +DEF_RULE(test, nc, or(2), rule(lambdef), rule(test_if_expr)) +DEF_RULE(test_if_expr, c(test_if_expr), and(2), rule(or_test), opt_rule(test_if_else)) +DEF_RULE(test_if_else, nc, and(4), tok(KW_IF), rule(or_test), tok(KW_ELSE), rule(test)) +DEF_RULE(test_nocond, nc, or(2), rule(lambdef_nocond), rule(or_test)) +DEF_RULE(lambdef, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test)) +DEF_RULE(lambdef_nocond, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test_nocond)) + +// or_test: and_test ('or' and_test)* +// and_test: not_test ('and' not_test)* +// not_test: 'not' not_test | comparison +// comparison: expr (comp_op expr)* +// comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not' +// star_expr: '*' expr +// expr: xor_expr ('|' xor_expr)* +// xor_expr: and_expr ('^' and_expr)* +// and_expr: shift_expr ('&' shift_expr)* +// shift_expr: arith_expr (('<<'|'>>') arith_expr)* +// arith_expr: term (('+'|'-') term)* +// term: factor (('*'|'/'|'%'|'//') factor)* +// factor: ('+'|'-'|'~') factor | power +// power: atom trailer* ['**' factor] + +DEF_RULE(or_test, c(or_test), list, rule(and_test), tok(KW_OR)) +DEF_RULE(and_test, c(and_test), list, rule(not_test), tok(KW_AND)) +DEF_RULE(not_test, nc, or(2), rule(not_test_2), rule(comparison)) +DEF_RULE(not_test_2, c(not_test_2), and(2), tok(KW_NOT), rule(not_test)) +DEF_RULE(comparison, c(comparison), list, rule(expr), rule(comp_op)) +DEF_RULE(comp_op, nc, or(9), tok(OP_LESS), tok(OP_MORE), tok(OP_DBL_EQUAL), tok(OP_LESS_EQUAL), tok(OP_MORE_EQUAL), tok(OP_NOT_EQUAL), tok(KW_IN), rule(comp_op_not_in), rule(comp_op_is)) +DEF_RULE(comp_op_not_in, nc, and(2), tok(KW_NOT), tok(KW_IN)) +DEF_RULE(comp_op_is, nc, and(2), tok(KW_IS), opt_rule(comp_op_is_not)) +DEF_RULE(comp_op_is_not, nc, and(1), tok(KW_NOT)) +DEF_RULE(star_expr, c(star_expr), and(2), tok(OP_STAR), rule(expr)) +DEF_RULE(expr, c(expr), list, rule(xor_expr), tok(OP_PIPE)) +DEF_RULE(xor_expr, c(xor_expr), list, rule(and_expr), tok(OP_CARET)) +DEF_RULE(and_expr, c(and_expr), list, rule(shift_expr), tok(OP_AMPERSAND)) +DEF_RULE(shift_expr, c(shift_expr), list, rule(arith_expr), rule(shift_op)) +DEF_RULE(shift_op, nc, or(2), tok(OP_DBL_LESS), tok(OP_DBL_MORE)) +DEF_RULE(arith_expr, c(arith_expr), list, rule(term), rule(arith_op)) +DEF_RULE(arith_op, nc, or(2), tok(OP_PLUS), tok(OP_MINUS)) +DEF_RULE(term, c(term), list, rule(factor), rule(term_op)) +DEF_RULE(term_op, nc, or(4), tok(OP_STAR), tok(OP_SLASH), tok(OP_PERCENT), tok(OP_DBL_SLASH)) +DEF_RULE(factor, nc, or(2), rule(factor_2), rule(power)) +DEF_RULE(factor_2, c(factor_2), and(2), rule(factor_op), rule(factor)) +DEF_RULE(factor_op, nc, or(3), tok(OP_PLUS), tok(OP_MINUS), tok(OP_TILDE)) +DEF_RULE(power, c(generic_all_nodes), and(3), rule(atom), opt_rule(power_trailers), opt_rule(power_dbl_star)) +DEF_RULE(power_trailers, c(power_trailers), one_or_more, rule(trailer)) +DEF_RULE(power_dbl_star, c(power_dbl_star), and(2), tok(OP_DBL_STAR), rule(factor)) + +// atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False' +// testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +// trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME + +DEF_RULE(atom, nc, or(10), tok(NAME), tok(NUMBER), rule(atom_string), tok(ELLIPSES), tok(KW_NONE), tok(KW_TRUE), tok(KW_FALSE), rule(atom_paren), rule(atom_bracket), rule(atom_brace)) +DEF_RULE(atom_string, c(atom_string), one_or_more, rule(string_or_bytes)) +DEF_RULE(string_or_bytes, nc, or(2), tok(STRING), tok(BYTES)) +DEF_RULE(atom_paren, c(atom_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(atom_2b), tok(DEL_PAREN_CLOSE)) +DEF_RULE(atom_2b, nc, or(2), rule(yield_expr), rule(testlist_comp)) +DEF_RULE(atom_bracket, c(atom_bracket), and(3), tok(DEL_BRACKET_OPEN), opt_rule(testlist_comp), tok(DEL_BRACKET_CLOSE)) +DEF_RULE(atom_brace, c(atom_brace), and(3), tok(DEL_BRACE_OPEN), opt_rule(dictorsetmaker), tok(DEL_BRACE_CLOSE)) +DEF_RULE(testlist_comp, nc, and(2), rule(testlist_comp_2), opt_rule(testlist_comp_3)) +DEF_RULE(testlist_comp_2, nc, or(2), rule(star_expr), rule(test)) +DEF_RULE(testlist_comp_3, nc, or(2), rule(comp_for), rule(testlist_comp_3b)) +DEF_RULE(testlist_comp_3b, nc, and(2), tok(DEL_COMMA), opt_rule(testlist_comp_3c)) +DEF_RULE(testlist_comp_3c, nc, list_with_end, rule(testlist_comp_2), tok(DEL_COMMA)) +DEF_RULE(trailer, nc, or(3), rule(trailer_paren), rule(trailer_bracket), rule(trailer_period)) +DEF_RULE(trailer_paren, c(trailer_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE)) +DEF_RULE(trailer_bracket, c(trailer_bracket), and(3), tok(DEL_BRACKET_OPEN), rule(subscriptlist), tok(DEL_BRACKET_CLOSE)) +DEF_RULE(trailer_period, c(trailer_period), and(2), tok(DEL_PERIOD), tok(NAME)) + +// subscriptlist: subscript (',' subscript)* [','] +// subscript: test | [test] ':' [test] [sliceop] +// sliceop: ':' [test] + +DEF_RULE(subscriptlist, c(generic_tuple), list_with_end, rule(subscript), tok(DEL_COMMA)) +DEF_RULE(subscript, nc, or(2), rule(subscript_3), rule(subscript_2)) +DEF_RULE(subscript_2, c(subscript_2), and(2), rule(test), opt_rule(subscript_3)) +DEF_RULE(subscript_3, c(subscript_3), and(2), tok(DEL_COLON), opt_rule(subscript_3b)) +DEF_RULE(subscript_3b, nc, or(2), rule(subscript_3c), rule(subscript_3d)) +DEF_RULE(subscript_3c, nc, and(2), tok(DEL_COLON), opt_rule(test)) +DEF_RULE(subscript_3d, nc, and(2), rule(test), opt_rule(sliceop)) +DEF_RULE(sliceop, nc, and(2), tok(DEL_COLON), opt_rule(test)) + +// exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +// testlist: test (',' test)* [','] +// dictorsetmaker: (test ':' test (comp_for | (',' test ':' test)* [','])) | (test (comp_for | (',' test)* [','])) + +DEF_RULE(exprlist, nc, list_with_end, rule(exprlist_2), tok(DEL_COMMA)) +DEF_RULE(exprlist_2, nc, or(2), rule(star_expr), rule(expr)) +DEF_RULE(testlist, c(generic_tuple), list_with_end, rule(test), tok(DEL_COMMA)) +// TODO dictorsetmaker lets through more than is allowed +DEF_RULE(dictorsetmaker, nc, and(2), rule(dictorsetmaker_item), opt_rule(dictorsetmaker_tail)) +DEF_RULE(dictorsetmaker_item, c(dictorsetmaker_item), and(2), rule(test), opt_rule(dictorsetmaker_colon)) +DEF_RULE(dictorsetmaker_colon, nc, and(2), tok(DEL_COLON), rule(test)) +DEF_RULE(dictorsetmaker_tail, nc, or(2), rule(comp_for), rule(dictorsetmaker_list)) +DEF_RULE(dictorsetmaker_list, nc, and(2), tok(DEL_COMMA), opt_rule(dictorsetmaker_list2)) +DEF_RULE(dictorsetmaker_list2, nc, list_with_end, rule(dictorsetmaker_item), tok(DEL_COMMA)) + +// classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +DEF_RULE(classdef, c(classdef), and(5), tok(KW_CLASS), tok(NAME), opt_rule(classdef_2), tok(DEL_COLON), rule(suite)) +DEF_RULE(classdef_2, nc, and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE)) + +// arglist: (argument ',')* (argument [','] | '*' test (',' argument)* [',' '**' test] | '**' test) + +// TODO arglist lets through more than is allowed, compiler needs to do further verification +DEF_RULE(arglist, c(generic_all_nodes), list_with_end, rule(arglist_2), tok(DEL_COMMA)) +DEF_RULE(arglist_2, nc, or(3), rule(arglist_star), rule(arglist_dbl_star), rule(argument)) +DEF_RULE(arglist_star, c(arglist_star), and(2), tok(OP_STAR), rule(test)) +DEF_RULE(arglist_dbl_star, c(arglist_dbl_star), and(2), tok(OP_DBL_STAR), rule(test)) + +// # The reason that keywords are test nodes instead of NAME is that using NAME +// # results in an ambiguity. ast.c makes sure it's a NAME. +// argument: test [comp_for] | test '=' test # Really [keyword '='] test +// comp_iter: comp_for | comp_if +// comp_for: 'for' exprlist 'in' or_test [comp_iter] +// comp_if: 'if' test_nocond [comp_iter] + +DEF_RULE(argument, c(argument), and(2), rule(test), opt_rule(argument_2)) +DEF_RULE(argument_2, nc, or(2), rule(comp_for), rule(argument_3)) +DEF_RULE(argument_3, nc, and(2), tok(DEL_EQUAL), rule(test)) +DEF_RULE(comp_iter, nc, or(2), rule(comp_for), rule(comp_if)) +DEF_RULE(comp_for, nc, and(5), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(or_test), opt_rule(comp_iter)) +DEF_RULE(comp_if, nc, and(3), tok(KW_IF), rule(test_nocond), opt_rule(comp_iter)) + +// # not used in grammar, but may appear in "node" passed from Parser to Compiler +// encoding_decl: NAME + +// yield_expr: 'yield' [yield_arg] +// yield_arg: 'from' test | testlist + +DEF_RULE(yield_expr, c(yield_expr), and(2), tok(KW_YIELD), opt_rule(yield_arg)) +DEF_RULE(yield_arg, nc, or(2), rule(yield_arg_from), rule(testlist)) +DEF_RULE(yield_arg_from, nc, and(2), tok(KW_FROM), rule(test)) diff --git a/py/lexer.c b/py/lexer.c new file mode 100644 index 0000000000..9c2195ef5b --- /dev/null +++ b/py/lexer.c @@ -0,0 +1,677 @@ +/* lexer.c -- simple tokeniser for Python implementation + */ + +#include +#include +#include + +#include "misc.h" +#include "lexer.h" + +#define TAB_SIZE (8) +#define CHR_EOF (-1) + +struct _py_lexer_t { + const char *name; // (file) name of source + bool free; // free source when done with it + + const char *src_beg; // beginning of source + const char *src_cur; // current location in source; points to chr0 + const char *src_end; // end (exclusive) of source + unichar chr0, chr1, chr2; // current characters from source + + uint line; // source line + uint column; // source column + + uint cont_line; // continued line + + int emit_dent; + int nested_bracket_level; + + uint alloc_indent_level; + uint num_indent_level; + uint16_t *indent_level; + + py_token_t tok_cur; + py_token_t tok_next; +}; + +static bool py_token_is_str(const py_token_t *tok, const char *str) { + uint i = 0; + const char *tstr = tok->str; + + while (i < tok->len && *tstr == *str) { + ++i; + ++tstr; + ++str; + } + + return i == tok->len && *str == 0; +} + +void py_token_show(const py_token_t *tok) { + printf("(%s:%d:%d) kind:%d cont_line:%d str:%p len:%d", tok->src_name, tok->src_line, tok->src_column, tok->kind, tok->cont_line, tok->str, tok->len); + if (tok->str != NULL && tok->len > 0) { + const char *i = tok->str; + const char *j = i + tok->len; + printf(" "); + while (i < j) { + unichar c = g_utf8_get_char(i); + i = g_utf8_next_char(i); + if (g_unichar_isprint(c)) { + printf("%c", c); + } else { + printf("?"); + } + } + } + printf("\n"); +} + +void py_token_show_error_prefix(const py_token_t *tok) { + printf("(%s:%d:%d) ", tok->src_name, tok->src_line, tok->src_column); +} + +bool py_token_show_error(const py_token_t *tok, const char *msg) { + printf("(%s:%d:%d) %s\n", tok->src_name, tok->src_line, tok->src_column, msg); + return false; +} + +static bool is_end(py_lexer_t *lex) { + return lex->chr0 == CHR_EOF; +} + +static bool is_physical_newline(py_lexer_t *lex) { + return lex->chr0 == '\n' || lex->chr0 == '\r'; +} + +static bool is_char(py_lexer_t *lex, char c) { + return lex->chr0 == c; +} + +static bool is_char_or(py_lexer_t *lex, char c1, char c2) { + return lex->chr0 == c1 || lex->chr0 == c2; +} + +static bool is_char_or3(py_lexer_t *lex, char c1, char c2, char c3) { + return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3; +} + +/* +static bool is_char_following(py_lexer_t *lex, char c) { + return lex->chr1 == c; +} +*/ + +static bool is_char_following_or(py_lexer_t *lex, char c1, char c2) { + return lex->chr1 == c1 || lex->chr1 == c2; +} + +static bool is_char_following_following_or(py_lexer_t *lex, char c1, char c2) { + return lex->chr2 == c1 || lex->chr2 == c2; +} + +static bool is_char_and(py_lexer_t *lex, char c1, char c2) { + return lex->chr0 == c1 && lex->chr1 == c2; +} + +static bool is_whitespace(py_lexer_t *lex) { + return g_unichar_isspace(lex->chr0); +} + +static bool is_letter(py_lexer_t *lex) { + return g_unichar_isalpha(lex->chr0); +} + +static bool is_digit(py_lexer_t *lex) { + return g_unichar_isdigit(lex->chr0); +} + +static bool is_following_digit(py_lexer_t *lex) { + return g_unichar_isdigit(lex->chr1); +} + +// TODO UNICODE include unicode characters in definition of identifiers +static bool is_head_of_identifier(py_lexer_t *lex) { + return is_letter(lex) || lex->chr0 == '_'; +} + +// TODO UNICODE include unicode characters in definition of identifiers +static bool is_tail_of_identifier(py_lexer_t *lex) { + return is_head_of_identifier(lex) || is_digit(lex); +} + +static void next_char(py_lexer_t *lex) { + if (lex->chr0 == CHR_EOF) { + return; + } + + int advance = 1; + + if (lex->chr0 == '\n') { + // LF is a new line + ++lex->line; + lex->column = 1; + lex->cont_line = lex->line; + } else if (lex->chr0 == '\r') { + // CR is a new line + ++lex->line; + lex->column = 1; + lex->cont_line = lex->line; + if (lex->chr1 == '\n') { + // CR LF is a single new line + advance = 2; + } + } else if (lex->chr0 == '\t') { + // a tab + lex->column = (((lex->column - 1 + TAB_SIZE) / TAB_SIZE) * TAB_SIZE) + 1; + } else { + // a character worth one column + ++lex->column; + } + + for (; advance > 0; advance--) { + lex->chr0 = lex->chr1; + lex->chr1 = lex->chr2; + lex->src_cur++; + if (lex->src_cur + 2 < lex->src_end) { + lex->chr2 = lex->src_cur[2]; + } else { + // EOF + if (lex->chr1 != '\n' && lex->chr1 != '\r') { + lex->chr2 = '\n'; // insert newline at end of file + } else { + lex->chr2 = CHR_EOF; + } + } + } +} + +void indent_push(py_lexer_t *lex, uint indent) { + if (lex->num_indent_level >= lex->alloc_indent_level) { + lex->alloc_indent_level *= 2; + lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level); + } + lex->indent_level[lex->num_indent_level++] = indent; +} + +uint indent_top(py_lexer_t *lex) { + return lex->indent_level[lex->num_indent_level - 1]; +} + +void indent_pop(py_lexer_t *lex) { + lex->num_indent_level -= 1; +} + +// some tricky operator encoding: +// = begin with , if this opchar matches then begin here +// e = end with , if this opchar matches then end +// E = mandatory end with , this opchar must match, then end +// c = continue with , if this opchar matches then continue matching +// this means if the start of two ops are the same then they are equal til the last char + +static const char *tok_enc = + "()[]{},:;@~" // singles + "e=c>e=" // > >= >> >>= + "*e=c*e=" // * *= ** **= + "+e=" // + += + "-e=e>" // - -= -> + "&e=" // & &= + "|e=" // | |= + "/e=c/e=" // / /= // //= + "%e=" // % %= + "^e=" // ^ ^= + "=e=" // = == + "!E=" // != + ".c.E."; // . ... + +// TODO static assert that number of tokens is less than 256 so we can safely make this table with byte sized entries +static const uint8_t tok_enc_kind[] = { + PY_TOKEN_DEL_PAREN_OPEN, PY_TOKEN_DEL_PAREN_CLOSE, + PY_TOKEN_DEL_BRACKET_OPEN, PY_TOKEN_DEL_BRACKET_CLOSE, + PY_TOKEN_DEL_BRACE_OPEN, PY_TOKEN_DEL_BRACE_CLOSE, + PY_TOKEN_DEL_COMMA, PY_TOKEN_DEL_COLON, PY_TOKEN_DEL_SEMICOLON, PY_TOKEN_DEL_AT, PY_TOKEN_OP_TILDE, + + PY_TOKEN_OP_LESS, PY_TOKEN_OP_LESS_EQUAL, PY_TOKEN_OP_DBL_LESS, PY_TOKEN_DEL_DBL_LESS_EQUAL, + PY_TOKEN_OP_MORE, PY_TOKEN_OP_MORE_EQUAL, PY_TOKEN_OP_DBL_MORE, PY_TOKEN_DEL_DBL_MORE_EQUAL, + PY_TOKEN_OP_STAR, PY_TOKEN_DEL_STAR_EQUAL, PY_TOKEN_OP_DBL_STAR, PY_TOKEN_DEL_DBL_STAR_EQUAL, + PY_TOKEN_OP_PLUS, PY_TOKEN_DEL_PLUS_EQUAL, + PY_TOKEN_OP_MINUS, PY_TOKEN_DEL_MINUS_EQUAL, PY_TOKEN_DEL_MINUS_MORE, + PY_TOKEN_OP_AMPERSAND, PY_TOKEN_DEL_AMPERSAND_EQUAL, + PY_TOKEN_OP_PIPE, PY_TOKEN_DEL_PIPE_EQUAL, + PY_TOKEN_OP_SLASH, PY_TOKEN_DEL_SLASH_EQUAL, PY_TOKEN_OP_DBL_SLASH, PY_TOKEN_DEL_DBL_SLASH_EQUAL, + PY_TOKEN_OP_PERCENT, PY_TOKEN_DEL_PERCENT_EQUAL, + PY_TOKEN_OP_CARET, PY_TOKEN_DEL_CARET_EQUAL, + PY_TOKEN_DEL_EQUAL, PY_TOKEN_OP_DBL_EQUAL, + PY_TOKEN_OP_NOT_EQUAL, + PY_TOKEN_DEL_PERIOD, PY_TOKEN_ELLIPSES, +}; + +// must have the same order as enum in lexer.h +static const char *tok_kw[] = { + "False", + "None", + "True", + "and", + "as", + "assert", + "break", + "class", + "continue", + "def", + "del", + "elif", + "else", + "except", + "finally", + "for", + "from", + "global", + "if", + "import", + "in", + "is", + "lambda", + "nonlocal", + "not", + "or", + "pass", + "raise", + "return", + "try", + "while", + "with", + "yield", + NULL, +}; + +static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) { + bool had_physical_newline = false; + + while (!is_end(lex)) { + if (is_physical_newline(lex)) { + had_physical_newline = true; + next_char(lex); + } else if (is_whitespace(lex)) { + next_char(lex); + } else if (is_char(lex, '#')) { + next_char(lex); + while (!is_end(lex) && !is_physical_newline(lex)) { + next_char(lex); + } + // had_physical_newline will be set on next loop + } else if (is_char(lex, '\\')) { + // backslash (outside string literals) must appear just before a physical newline + next_char(lex); + if (!is_physical_newline(lex)) { + // TODO SyntaxError + assert(0); + } else { + next_char(lex); + } + } else { + break; + } + } + + tok->src_name = lex->name; + tok->src_line = lex->line; + tok->src_column = lex->column; + tok->kind = PY_TOKEN_INVALID; + tok->cont_line = lex->cont_line; + tok->str = lex->src_cur; + tok->len = 0; + + if (lex->emit_dent < 0) { + tok->kind = PY_TOKEN_DEDENT; + lex->emit_dent += 1; + + } else if (lex->emit_dent > 0) { + tok->kind = PY_TOKEN_INDENT; + lex->emit_dent -= 1; + + } else if (had_physical_newline && lex->nested_bracket_level == 0 + && tok != &lex->tok_cur // so that we don't emit a newline if file starts with a comment + ) { + tok->kind = PY_TOKEN_NEWLINE; + + uint num_spaces = lex->column - 1; + lex->emit_dent = 0; + if (num_spaces == indent_top(lex)) { + } else if (num_spaces > indent_top(lex)) { + indent_push(lex, num_spaces); + lex->emit_dent += 1; + } else { + while (num_spaces < indent_top(lex)) { + indent_pop(lex); + lex->emit_dent -= 1; + } + if (num_spaces != indent_top(lex)) { + //SyntaxError + } + } + + } else if (is_end(lex)) { + // TODO emit a newline if file does not end in one + if (indent_top(lex) > 0) { + tok->kind = PY_TOKEN_NEWLINE; + lex->emit_dent = 0; + while (indent_top(lex) > 0) { + indent_pop(lex); + lex->emit_dent -= 1; + } + } else { + tok->kind = PY_TOKEN_END; + } + + } else if (is_char_or(lex, '\'', '\"') + || (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"')) + || ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r')) && is_char_following_following_or(lex, '\'', '\"'))) { + // a string or bytes literal + + // parse type codes + bool is_raw = false; + bool is_bytes = false; + if (is_char(lex, 'u')) { + next_char(lex); + } else if (is_char(lex, 'b')) { + is_bytes = true; + next_char(lex); + if (is_char(lex, 'r')) { + is_raw = true; + next_char(lex); + } + } else if (is_char(lex, 'r')) { + is_raw = true; + next_char(lex); + if (is_char(lex, 'b')) { + is_bytes = true; + next_char(lex); + } + } + + // set token kind + if (is_bytes) { + tok->kind = PY_TOKEN_BYTES; + } else { + tok->kind = PY_TOKEN_STRING; + } + + // get first quoting character + char quote_char = '\''; + if (is_char(lex, '\"')) { + quote_char = '\"'; + } + next_char(lex); + + // work out if it's a single or triple quoted literal + int num_quotes; + if (is_char_and(lex, quote_char, quote_char)) { + // triple quotes + next_char(lex); + next_char(lex); + num_quotes = 3; + } else { + // single quotes + num_quotes = 1; + } + + // set start of token + tok->str = lex->src_cur; + + // parse the literal + // TODO proper escaping + int n_closing = 0; + while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) { + if (is_char(lex, quote_char)) { + n_closing += 1; + } else { + n_closing = 0; + if (!is_raw && is_char(lex, '\\')) { + next_char(lex); + } + } + next_char(lex); + } + + // check we got the required end quotes + if (n_closing < num_quotes) { + tok->kind = PY_TOKEN_LONELY_STRING_OPEN; + } + + // set token string (byte) length + tok->len = lex->src_cur - tok->str - n_closing; + + // we set the length, return now so it's not set incorrectly below + return; + + } else if (is_head_of_identifier(lex)) { + tok->kind = PY_TOKEN_NAME; + + next_char(lex); + + while (!is_end(lex) && is_tail_of_identifier(lex)) { + next_char(lex); + } + + } else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) { + tok->kind = PY_TOKEN_NUMBER; + + next_char(lex); + + while (!is_end(lex)) { + if (is_char_or(lex, 'e', 'E')) { + next_char(lex); + if (is_char(lex, '+') || is_char(lex, '-')) { + next_char(lex); + } + } else if (is_letter(lex) || is_digit(lex) || is_char_or(lex, '_', '.')) { + next_char(lex); + } else { + break; + } + } + + } else { + // search for encoded delimiter or operator + + const char *t = tok_enc; + uint tok_enc_index = 0; + for (; *t != 0 && !is_char(lex, *t); t += 1) { + if (*t == 'e' || *t == 'c') { + t += 1; + } else if (*t == 'E') { + tok_enc_index -= 1; + t += 1; + } + tok_enc_index += 1; + } + + next_char(lex); + + if (*t == 0) { + // didn't match any delimiter or operator characters + tok->kind = PY_TOKEN_INVALID; + + } else { + // matched a delimiter or operator character + + // get the maximum characters for a valid token + t += 1; + uint t_index = tok_enc_index; + for (;;) { + for (; *t == 'e'; t += 1) { + t += 1; + t_index += 1; + if (is_char(lex, *t)) { + next_char(lex); + tok_enc_index = t_index; + break; + } + } + + if (*t == 'E') { + t += 1; + if (is_char(lex, *t)) { + next_char(lex); + tok_enc_index = t_index; + } else { + tok->kind = PY_TOKEN_INVALID; + } + break; + } + + if (*t == 'c') { + t += 1; + t_index += 1; + if (is_char(lex, *t)) { + next_char(lex); + tok_enc_index = t_index; + t += 1; + } else { + break; + } + } else { + break; + } + } + + // set token kind + tok->kind = tok_enc_kind[tok_enc_index]; + + // compute bracket level for implicit line joining + if (tok->kind == PY_TOKEN_DEL_PAREN_OPEN || tok->kind == PY_TOKEN_DEL_BRACKET_OPEN || tok->kind == PY_TOKEN_DEL_BRACE_OPEN) { + lex->nested_bracket_level += 1; + } else if (tok->kind == PY_TOKEN_DEL_PAREN_CLOSE || tok->kind == PY_TOKEN_DEL_BRACKET_CLOSE || tok->kind == PY_TOKEN_DEL_BRACE_CLOSE) { + lex->nested_bracket_level -= 1; + } + } + } + + // set token string (byte) length + tok->len = lex->src_cur - tok->str; + + // check for keywords (must be done after setting token string length) + if (tok->kind == PY_TOKEN_NAME) { + for (int i = 0; tok_kw[i] != NULL; i++) { + if (py_token_is_str(tok, tok_kw[i])) { + tok->kind = PY_TOKEN_KW_FALSE + i; + break; + } + } + } +} + +py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str) { + py_lexer_t *lex; + + lex = m_new(py_lexer_t, 1); + + //lex->name = g_strdup(src_name); // TODO + lex->name = src_name; + lex->free = free_str; + lex->src_beg = str; + lex->src_cur = str; + lex->src_end = str + len; + lex->line = 1; + lex->column = 1; + lex->cont_line = lex->line; + lex->emit_dent = 0; + lex->nested_bracket_level = 0; + lex->alloc_indent_level = 16; + lex->num_indent_level = 1; + lex->indent_level = m_new(uint16_t, lex->alloc_indent_level); + lex->indent_level[0] = 0; + + // preload characters + // TODO unicode + if (len == 0) { + lex->chr0 = '\n'; // insert newline at end of file + lex->chr1 = CHR_EOF; + lex->chr2 = CHR_EOF; + } else if (len == 1) { + lex->chr0 = str[0]; + if (lex->chr0 != '\n' && lex->chr0 != '\r') { + lex->chr1 = '\n'; // insert newline at end of file + } else { + lex->chr1 = CHR_EOF; + } + lex->chr2 = CHR_EOF; + } else if (len == 2) { + lex->chr0 = str[0]; + lex->chr1 = str[1]; + if (lex->chr1 != '\n' && lex->chr1 != '\r') { + lex->chr2 = '\n'; // insert newline at end of file + } else { + lex->chr2 = CHR_EOF; + } + } else { + lex->chr0 = str[0]; + lex->chr1 = str[1]; + lex->chr2 = str[2]; + } + + py_lexer_next_token_into(lex, &lex->tok_cur); + py_lexer_next_token_into(lex, &lex->tok_next); + + return lex; +} + +void py_lexer_free(py_lexer_t *lex) { + if (lex == NULL) { + return; + } + //m_free(lex->name); + if (lex->free) { + m_free((char*)lex->src_beg); + } + m_free(lex); +} + +void py_lexer_to_next(py_lexer_t *lex) { + lex->tok_cur = lex->tok_next; + py_lexer_next_token_into(lex, &lex->tok_next); +} + +const py_token_t *py_lexer_cur(const py_lexer_t *lex) { + return &lex->tok_cur; +} + +bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind) { + return lex->tok_cur.kind == kind; +} + +/* +bool py_lexer_is_str(py_lexer_t *lex, const char *str) { + return py_token_is_str(&lex->tok_cur, str); +} + +bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind) { + return lex->tok_next.kind == kind; +} + +bool py_lexer_is_next_str(py_lexer_t *lex, const char *str) { + return py_token_is_str(&lex->tok_next, str); +} + +bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind) { + if (py_lexer_is_kind(lex, kind)) { + py_lexer_to_next(lex); + return true; + } + return false; +} + +bool py_lexer_opt_str(py_lexer_t *lex, const char *str) { + if (py_lexer_is_str(lex, str)) { + py_lexer_to_next(lex); + return true; + } + return false; +} +*/ + +bool py_lexer_show_error(py_lexer_t *lex, const char *msg) { + return py_token_show_error(&lex->tok_cur, msg); +} diff --git a/py/lexer.h b/py/lexer.h new file mode 100644 index 0000000000..32ab48a084 --- /dev/null +++ b/py/lexer.h @@ -0,0 +1,141 @@ +/* lexer.h -- simple tokeniser for Python implementation + */ + +#ifndef INCLUDED_LEXER_H +#define INCLUDED_LEXER_H + +/* uses (byte) length instead of null termination + * tokens are the same - UTF-8 with (byte) length + */ + +typedef enum _py_token_kind_t { + PY_TOKEN_END, // 0 + + PY_TOKEN_INVALID, + PY_TOKEN_LONELY_STRING_OPEN, + + PY_TOKEN_NEWLINE, // 3 + PY_TOKEN_INDENT, // 4 + PY_TOKEN_DEDENT, // 5 + + PY_TOKEN_NAME, // 6 + PY_TOKEN_NUMBER, + PY_TOKEN_STRING, + PY_TOKEN_BYTES, + + PY_TOKEN_ELLIPSES, + + PY_TOKEN_KW_FALSE, // 11 + PY_TOKEN_KW_NONE, + PY_TOKEN_KW_TRUE, + PY_TOKEN_KW_AND, + PY_TOKEN_KW_AS, + PY_TOKEN_KW_ASSERT, + PY_TOKEN_KW_BREAK, + PY_TOKEN_KW_CLASS, + PY_TOKEN_KW_CONTINUE, + PY_TOKEN_KW_DEF, // 20 + PY_TOKEN_KW_DEL, + PY_TOKEN_KW_ELIF, + PY_TOKEN_KW_ELSE, + PY_TOKEN_KW_EXCEPT, + PY_TOKEN_KW_FINALLY, + PY_TOKEN_KW_FOR, + PY_TOKEN_KW_FROM, + PY_TOKEN_KW_GLOBAL, + PY_TOKEN_KW_IF, + PY_TOKEN_KW_IMPORT, // 30 + PY_TOKEN_KW_IN, + PY_TOKEN_KW_IS, + PY_TOKEN_KW_LAMBDA, + PY_TOKEN_KW_NONLOCAL, + PY_TOKEN_KW_NOT, + PY_TOKEN_KW_OR, + PY_TOKEN_KW_PASS, + PY_TOKEN_KW_RAISE, + PY_TOKEN_KW_RETURN, + PY_TOKEN_KW_TRY, // 40 + PY_TOKEN_KW_WHILE, + PY_TOKEN_KW_WITH, + PY_TOKEN_KW_YIELD, + + PY_TOKEN_OP_PLUS, // 44 + PY_TOKEN_OP_MINUS, + PY_TOKEN_OP_STAR, + PY_TOKEN_OP_DBL_STAR, + PY_TOKEN_OP_SLASH, + PY_TOKEN_OP_DBL_SLASH, + PY_TOKEN_OP_PERCENT, + PY_TOKEN_OP_LESS, + PY_TOKEN_OP_DBL_LESS, + PY_TOKEN_OP_MORE, + PY_TOKEN_OP_DBL_MORE, // 54 + PY_TOKEN_OP_AMPERSAND, + PY_TOKEN_OP_PIPE, + PY_TOKEN_OP_CARET, + PY_TOKEN_OP_TILDE, + PY_TOKEN_OP_LESS_EQUAL, + PY_TOKEN_OP_MORE_EQUAL, + PY_TOKEN_OP_DBL_EQUAL, + PY_TOKEN_OP_NOT_EQUAL, + + PY_TOKEN_DEL_PAREN_OPEN, // 63 + PY_TOKEN_DEL_PAREN_CLOSE, + PY_TOKEN_DEL_BRACKET_OPEN, + PY_TOKEN_DEL_BRACKET_CLOSE, + PY_TOKEN_DEL_BRACE_OPEN, + PY_TOKEN_DEL_BRACE_CLOSE, + PY_TOKEN_DEL_COMMA, + PY_TOKEN_DEL_COLON, + PY_TOKEN_DEL_PERIOD, + PY_TOKEN_DEL_SEMICOLON, + PY_TOKEN_DEL_AT, // 73 + PY_TOKEN_DEL_EQUAL, + PY_TOKEN_DEL_PLUS_EQUAL, + PY_TOKEN_DEL_MINUS_EQUAL, + PY_TOKEN_DEL_STAR_EQUAL, + PY_TOKEN_DEL_SLASH_EQUAL, + PY_TOKEN_DEL_DBL_SLASH_EQUAL, + PY_TOKEN_DEL_PERCENT_EQUAL, + PY_TOKEN_DEL_AMPERSAND_EQUAL, + PY_TOKEN_DEL_PIPE_EQUAL, + PY_TOKEN_DEL_CARET_EQUAL, // 83 + PY_TOKEN_DEL_DBL_MORE_EQUAL, + PY_TOKEN_DEL_DBL_LESS_EQUAL, + PY_TOKEN_DEL_DBL_STAR_EQUAL, + PY_TOKEN_DEL_MINUS_MORE, +} py_token_kind_t; + +typedef struct _py_token_t { + const char *src_name; // (file) name of source + uint src_line; // actual source line + uint src_column; // actual source column + + py_token_kind_t kind; // kind of token + uint cont_line; // token belongs to this line in a continued line + const char *str; // string of token + uint len; // (byte) length of string of token +} py_token_t; + +typedef struct _py_lexer_t py_lexer_t; + +void py_token_show(const py_token_t *tok); +void py_token_show_error_prefix(const py_token_t *tok); +bool py_token_show_error(const py_token_t *tok, const char *msg); + +py_lexer_t *py_lexer_from_file(const char *filename); +py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str); +void py_lexer_free(py_lexer_t *lex); +void py_lexer_to_next(py_lexer_t *lex); +const py_token_t *py_lexer_cur(const py_lexer_t *lex); +bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind); +/* unused +bool py_lexer_is_str(py_lexer_t *lex, const char *str); +bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind); +bool py_lexer_is_next_str(py_lexer_t *lex, const char *str); +bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind); +bool py_lexer_opt_str(py_lexer_t *lex, const char *str); +*/ +bool py_lexer_show_error(py_lexer_t *lex, const char *msg); + +#endif /* INCLUDED_LEXER_H */ diff --git a/py/lexerfile.c b/py/lexerfile.c new file mode 100644 index 0000000000..74bb5a061a --- /dev/null +++ b/py/lexerfile.c @@ -0,0 +1,23 @@ +#include +#include +#include +#include + +#include "misc.h" +#include "lexer.h" + +py_lexer_t *py_lexer_from_file(const char *filename) { + // TODO abstract away file functionality + int fd = open(filename, O_RDONLY); + if (fd < 0) { + printf("cannot open file %s\n", filename); + return NULL; + } + uint size = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + char *data = m_new(char, size); + read(fd, data, size); + close(fd); + + return py_lexer_from_str_len(filename, data, size, true); +} diff --git a/py/machine.h b/py/machine.h new file mode 100644 index 0000000000..fa39c8f2d0 --- /dev/null +++ b/py/machine.h @@ -0,0 +1,4 @@ +typedef int64_t machine_int_t; // must be pointer size +typedef uint64_t machine_uint_t; // must be pointer size +typedef void *machine_ptr_t; // must be of pointer size +typedef double machine_float_t; diff --git a/py/main.c b/py/main.c new file mode 100644 index 0000000000..7b17c38a85 --- /dev/null +++ b/py/main.c @@ -0,0 +1,58 @@ +#include +#include +#include + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" +#include "compile.h" +#include "runtime.h" + +int main(int argc, char **argv) { + qstr_init(); + rt_init(); + + if (argc != 2) { + printf("usage: py \n"); + return 1; + } + py_lexer_t *lex = py_lexer_from_file(argv[1]); + //const char *pysrc = "def f():\n x=x+1\n print(42)\n"; + //py_lexer_t *lex = py_lexer_from_str_len("<>", pysrc, strlen(pysrc), false); + if (lex == NULL) { + return 1; + } + + if (0) { + while (!py_lexer_is_kind(lex, PY_TOKEN_END)) { + py_token_show(py_lexer_cur(lex)); + py_lexer_to_next(lex); + } + } else { + py_parse_node_t pn = py_parse(lex, 0); + //printf("----------------\n"); + //parse_node_show(pn, 0); + //printf("----------------\n"); + py_compile(pn); + //printf("----------------\n"); + } + + py_lexer_free(lex); + + if (1) { + // execute it + py_obj_t module_fun = rt_make_function_from_id(1); + if (module_fun != py_const_none) { + py_obj_t ret = rt_call_function_0(module_fun); + printf("done! got: "); + py_obj_print(ret); + printf("\n"); + } + } + + rt_deinit(); + + //printf("total bytes = %d\n", m_get_total_bytes_allocated()); + return 0; +} diff --git a/py/malloc.c b/py/malloc.c new file mode 100644 index 0000000000..8775f68aa3 --- /dev/null +++ b/py/malloc.c @@ -0,0 +1,56 @@ +#include +#include + +#include "misc.h" + +static int total_bytes_allocated = 0; + +void m_free(void *ptr) { + if (ptr != NULL) { + free(ptr); + } +} + +void *m_malloc(int num_bytes) { + if (num_bytes == 0) { + return NULL; + } + void *ptr = malloc(num_bytes); + if (ptr == NULL) { + printf("could not allocate memory, allocating %d bytes\n", num_bytes); + return NULL; + } + total_bytes_allocated += num_bytes; + return ptr; +} + +void *m_malloc0(int num_bytes) { + if (num_bytes == 0) { + return NULL; + } + void *ptr = calloc(1, num_bytes); + if (ptr == NULL) { + printf("could not allocate memory, allocating %d bytes\n", num_bytes); + return NULL; + } + total_bytes_allocated += num_bytes; + return ptr; +} + +void *m_realloc(void *ptr, int num_bytes) { + if (num_bytes == 0) { + free(ptr); + return NULL; + } + ptr = realloc(ptr, num_bytes); + if (ptr == NULL) { + printf("could not allocate memory, reallocating %d bytes\n", num_bytes); + return NULL; + } + total_bytes_allocated += num_bytes; + return ptr; +} + +int m_get_total_bytes_allocated() { + return total_bytes_allocated; +} diff --git a/py/misc.c b/py/misc.c new file mode 100644 index 0000000000..a5bf8d5534 --- /dev/null +++ b/py/misc.c @@ -0,0 +1,84 @@ +#include +#include + +#include "misc.h" + +// attribute flags +#define FL_PRINT (0x01) +#define FL_SPACE (0x02) +#define FL_DIGIT (0x04) +#define FL_ALPHA (0x08) +#define FL_UPPER (0x10) +#define FL_LOWER (0x20) + +// shorthand character attributes +#define AT_PR (FL_PRINT) +#define AT_SP (FL_SPACE | FL_PRINT) +#define AT_DI (FL_DIGIT | FL_PRINT) +#define AT_AL (FL_ALPHA | FL_PRINT) +#define AT_UP (FL_UPPER | FL_ALPHA | FL_PRINT) +#define AT_LO (FL_LOWER | FL_ALPHA | FL_PRINT) + +// table of attributes for ascii characters +static const uint8_t attr[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, AT_SP, AT_SP, AT_SP, 0, AT_SP, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + AT_SP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, + AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, + AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, AT_DI, + AT_DI, AT_DI, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, + AT_PR, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, + AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, + AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, AT_UP, + AT_UP, AT_UP, AT_UP, AT_PR, AT_PR, AT_PR, AT_PR, AT_PR, + AT_PR, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, + AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, + AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, AT_LO, + AT_LO, AT_LO, AT_LO, AT_PR, AT_PR, AT_PR, AT_PR, 0 +}; + +unichar g_utf8_get_char(const char *s) { + return *s; +} + +char *g_utf8_next_char(const char *s) { + return (char*)(s + 1); +} + +bool g_unichar_isspace(unichar c) { + return c < 128 && (attr[c] & FL_SPACE) != 0; +} + +bool g_unichar_isalpha(unichar c) { + return c < 128 && (attr[c] & FL_ALPHA) != 0; +} + +bool g_unichar_isprint(unichar c) { + return c < 128 && (attr[c] & FL_PRINT) != 0; +} + +bool g_unichar_isdigit(unichar c) { + return c < 128 && (attr[c] & FL_DIGIT) != 0; +} + +/* +bool char_is_alpha_or_digit(unichar c) { + return c < 128 && (attr[c] & (FL_ALPHA | FL_DIGIT)) != 0; +} + +bool char_is_upper(unichar c) { + return c < 128 && (attr[c] & FL_UPPER) != 0; +} + +bool char_is_lower(unichar c) { + return c < 128 && (attr[c] & FL_LOWER) != 0; +} +*/ + +/* +char *g_strdup(const char *s) { + return strdup(s); +} +*/ diff --git a/py/misc.h b/py/misc.h new file mode 100644 index 0000000000..9ba80a5c37 --- /dev/null +++ b/py/misc.h @@ -0,0 +1,91 @@ +// a mini library of useful types and functions + +#ifndef _INCLUDED_MINILIB_H +#define _INCLUDED_MINILIB_H + +/** types *******************************************************/ + +typedef int bool; +enum { + false = 0, + true = 1 +}; + +typedef unsigned char byte; +typedef unsigned int uint; + +/** memomry allocation ******************************************/ + +#define m_new(type, num) ((type*)(m_malloc(sizeof(type) * (num)))) +#define m_new0(type, num) ((type*)(m_malloc0(sizeof(type) * (num)))) +#define m_renew(type, ptr, num) ((type*)(m_realloc((ptr), sizeof(type) * (num)))) + +void m_free(void *ptr); +void *m_malloc(int num_bytes); +void *m_malloc0(int num_bytes); +void *m_realloc(void *ptr, int num_bytes); + +int m_get_total_bytes_allocated(); + +/** unichar / UTF-8 *********************************************/ + +typedef int unichar; // TODO + +unichar g_utf8_get_char(const char *s); +char *g_utf8_next_char(const char *s); + +bool g_unichar_isspace(unichar c); +bool g_unichar_isalpha(unichar c); +bool g_unichar_isprint(unichar c); +bool g_unichar_isdigit(unichar c); + +//char *g_strdup(const char *s); + +/** blob ********************************************************/ + +/* +unsigned short decode_le16(byte *buf); +unsigned int decode_le32(byte *buf); +void encode_le16(byte *buf, unsigned short i); +void encode_le32(byte *buf, unsigned int i); +*/ + +/** string ******************************************************/ + +/* +#define streq(s1, s2) (strcmp((s1), (s2)) == 0) +*/ + +/** variable string *********************************************/ + +/* +typedef struct _vstr_t vstr_t; + +vstr_t *vstr_new(); +void vstr_free(vstr_t *vstr); +void vstr_reset(vstr_t *vstr); +bool vstr_had_error(vstr_t *vstr); +char *vstr_str(vstr_t *vstr); +int vstr_len(vstr_t *vstr); +void vstr_hint_size(vstr_t *vstr, int size); +char *vstr_add_len(vstr_t *vstr, int len); +void vstr_add_str(vstr_t *vstr, const char *str); +void vstr_add_strn(vstr_t *vstr, const char *str, int len); +void vstr_add_byte(vstr_t *vstr, byte v); +void vstr_add_le16(vstr_t *vstr, unsigned short v); +void vstr_add_le32(vstr_t *vstr, unsigned int v); +void vstr_cut_tail(vstr_t *vstr, int len); +void vstr_printf(vstr_t *vstr, const char *fmt, ...); +*/ + +/** unique string ***********************************************/ + +typedef unsigned int qstr; + +void qstr_init(); +qstr qstr_from_str_static(const char *str); +qstr qstr_from_str_take(char *str); +qstr qstr_from_strn_copy(const char *str, int len); +const char* qstr_str(qstr qstr); + +#endif // _INCLUDED_MINILIB_H diff --git a/py/parse.c b/py/parse.c new file mode 100644 index 0000000000..94a5a5d9ca --- /dev/null +++ b/py/parse.c @@ -0,0 +1,565 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "misc.h" +#include "lexer.h" +#include "machine.h" +#include "parse.h" + +#define RULE_ACT_KIND_MASK (0xf0) +#define RULE_ACT_ARG_MASK (0x0f) +#define RULE_ACT_OR (0x10) +#define RULE_ACT_AND (0x20) +#define RULE_ACT_LIST (0x30) + +#define RULE_ARG_BLANK (0x0000) +#define RULE_ARG_KIND_MASK (0xf000) +#define RULE_ARG_ARG_MASK (0x0fff) +#define RULE_ARG_TOK (0x1000) +#define RULE_ARG_RULE (0x2000) +#define RULE_ARG_OPT_TOK (0x3000) +#define RULE_ARG_OPT_RULE (0x4000) + +// (un)comment to use rule names; for debugging +//#define USE_RULE_NAME (1) + +typedef struct _rule_t { + byte rule_id; + byte act; +#ifdef USE_RULE_NAME + const char *rule_name; +#endif + uint16_t arg[]; +} rule_t; + +enum { + RULE_none = 0, +#define DEF_RULE(rule, comp, kind, arg...) RULE_##rule, +#include "grammar.h" +#undef DEF_RULE + RULE_maximum_number_of, +}; + +#define or(n) (RULE_ACT_OR | n) +#define and(n) (RULE_ACT_AND | n) +#define one_or_more (RULE_ACT_LIST | 2) +#define list (RULE_ACT_LIST | 1) +#define list_with_end (RULE_ACT_LIST | 3) +#define tok(t) (RULE_ARG_TOK | PY_TOKEN_##t) +#define rule(r) (RULE_ARG_RULE | RULE_##r) +#define opt_tok(t) (RULE_ARG_OPT_TOK | PY_TOKEN_##t) +#define opt_rule(r) (RULE_ARG_OPT_RULE | RULE_##r) +#ifdef USE_RULE_NAME +#define DEF_RULE(rule, comp, kind, arg...) static rule_t rule_##rule = { RULE_##rule, kind, #rule, { arg } }; +#else +#define DEF_RULE(rule, comp, kind, arg...) static rule_t rule_##rule = { RULE_##rule, kind, { arg } }; +#endif +#include "grammar.h" +#undef or +#undef and +#undef list +#undef list_with_end +#undef tok +#undef rule +#undef opt_tok +#undef opt_rule +#undef one_or_more +#undef DEF_RULE + +static rule_t *rules[] = { + NULL, +#define DEF_RULE(rule, comp, kind, arg...) &rule_##rule, +#include "grammar.h" +#undef DEF_RULE +}; + +typedef struct _rule_stack_t { + byte rule_id; + int32_t arg_i; // what should be the size and signedness? +} rule_stack_t; + +typedef struct _parser_t { + uint rule_stack_alloc; + uint rule_stack_top; + rule_stack_t *rule_stack; + + uint result_stack_top; + py_parse_node_t *result_stack; +} parser_t; + +static void push_rule(parser_t *parser, rule_t *rule, int arg_i) { + if (parser->rule_stack_top >= parser->rule_stack_alloc) { + parser->rule_stack_alloc *= 2; + parser->rule_stack = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc); + } + parser->rule_stack[parser->rule_stack_top].rule_id = rule->rule_id; + parser->rule_stack[parser->rule_stack_top].arg_i = arg_i; + parser->rule_stack_top += 1; +} + +static void push_rule_from_arg(parser_t *parser, uint arg) { + assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE); + uint rule_id = arg & RULE_ARG_ARG_MASK; + assert(rule_id < RULE_maximum_number_of); + push_rule(parser, rules[rule_id], 0); +} + +static void pop_rule(parser_t *parser, rule_t **rule, uint *arg_i) { + parser->rule_stack_top -= 1; + *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id]; + *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i; +} + +py_parse_node_t py_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) { + return (py_parse_node_t)(kind | (arg << 4)); +} + +int num_parse_nodes_allocated = 0; +py_parse_node_struct_t *parse_node_new_struct(int rule_id, int num_args) { + py_parse_node_struct_t *pn = m_malloc(sizeof(py_parse_node_struct_t) + num_args * sizeof(py_parse_node_t)); + pn->source = 0; // TODO + pn->kind_num_nodes = (rule_id & 0xff) | (num_args << 8); + num_parse_nodes_allocated += 1; + return pn; +} + +void parse_node_show(py_parse_node_t pn, int indent) { + for (int i = 0; i < indent; i++) { + printf(" "); + } + if (PY_PARSE_NODE_IS_NULL(pn)) { + printf("NULL\n"); + } else if (PY_PARSE_NODE_IS_LEAF(pn)) { + int arg = PY_PARSE_NODE_LEAF_ARG(pn); + switch (PY_PARSE_NODE_LEAF_KIND(pn)) { + case PY_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break; + case PY_PARSE_NODE_SMALL_INT: printf("int(%d)\n", arg); break; + case PY_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break; + case PY_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break; + case PY_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break; + case PY_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break; + case PY_PARSE_NODE_TOKEN: printf("tok(%d)\n", arg); break; + default: assert(0); + } + } else { + py_parse_node_struct_t *pns2 = (py_parse_node_struct_t*)pn; + int n = pns2->kind_num_nodes >> 8; +#ifdef USE_RULE_NAME + printf("%s(%d) (n=%d)\n", rules[PY_PARSE_NODE_STRUCT_KIND(pns2)]->rule_name, PY_PARSE_NODE_STRUCT_KIND(pns2), n); +#else + printf("rule(%u) (n=%d)\n", (uint)PY_PARSE_NODE_STRUCT_KIND(pns2), n); +#endif + for (int i = 0; i < n; i++) { + parse_node_show(pns2->nodes[i], indent + 2); + } + } +} + +/* +static void result_stack_show(parser_t *parser) { + printf("result stack, most recent first\n"); + for (int i = parser->result_stack_top - 1; i >= 0; i--) { + parse_node_show(parser->result_stack[i], 0); + } +} +*/ + +static py_parse_node_t pop_result(parser_t *parser) { + assert(parser->result_stack_top > 0); + return parser->result_stack[--parser->result_stack_top]; +} + +static py_parse_node_t peek_result(parser_t *parser, int pos) { + assert(parser->result_stack_top > pos); + return parser->result_stack[parser->result_stack_top - 1 - pos]; +} + +static void push_result_node(parser_t *parser, py_parse_node_t pn) { + parser->result_stack[parser->result_stack_top++] = pn; +} + +static void push_result_token(parser_t *parser, const py_lexer_t *lex) { + const py_token_t *tok = py_lexer_cur(lex); + py_parse_node_t pn; + if (tok->kind == PY_TOKEN_NAME) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_ID, qstr_from_strn_copy(tok->str, tok->len)); + } else if (tok->kind == PY_TOKEN_NUMBER) { + bool dec = false; + bool small_int = true; + int int_val = 0; + int len = tok->len; + const char *str = tok->str; + int base = 10; + int i = 0; + if (len >= 3 && str[0] == '0') { + if (str[1] == 'o' || str[1] == 'O') { + // octal + base = 8; + i = 2; + } else if (str[1] == 'x' || str[1] == 'X') { + // hexadecimal + base = 16; + i = 2; + } else if (str[1] == 'b' || str[1] == 'B') { + // binary + base = 2; + i = 2; + } + } + for (; i < len; i++) { + if (g_unichar_isdigit(str[i]) && str[i] - '0' < base) { + int_val = base * int_val + str[i] - '0'; + } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') { + int_val = base * int_val + str[i] - 'a' + 10; + } else if (base == 16 && 'F' <= str[i] && str[i] <= 'F') { + int_val = base * int_val + str[i] - 'A' + 10; + } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E') { + dec = true; + break; + } else { + small_int = false; + break; + } + } + if (dec) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_DECIMAL, qstr_from_strn_copy(str, len)); + } else if (small_int && -0x10000 <= int_val && int_val <= 0xffff) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_SMALL_INT, int_val); + } else { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_INTEGER, qstr_from_strn_copy(str, len)); + } + } else if (tok->kind == PY_TOKEN_STRING) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_STRING, qstr_from_strn_copy(tok->str, tok->len)); + } else if (tok->kind == PY_TOKEN_BYTES) { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_BYTES, qstr_from_strn_copy(tok->str, tok->len)); + } else { + pn = py_parse_node_new_leaf(PY_PARSE_NODE_TOKEN, tok->kind); + } + push_result_node(parser, pn); +} + +static void push_result_rule(parser_t *parser, rule_t *rule, int num_args) { + py_parse_node_struct_t *pn = parse_node_new_struct(rule->rule_id, num_args); + for (int i = num_args; i > 0; i--) { + pn->nodes[i - 1] = pop_result(parser); + } + push_result_node(parser, (py_parse_node_t)pn); +} + +py_parse_node_t py_parse(py_lexer_t *lex, int wanted_rule) { + wanted_rule = RULE_file_input; + parser_t *parser = m_new(parser_t, 1); + parser->rule_stack_alloc = 64; + parser->rule_stack_top = 0; + parser->rule_stack = m_new(rule_stack_t, parser->rule_stack_alloc); + + parser->result_stack = m_new(py_parse_node_t, 1000); + parser->result_stack_top = 0; + + push_rule(parser, rules[wanted_rule], 0); + + uint n, i; + bool backtrack = false; + rule_t *rule; + py_token_kind_t tok_kind; + bool emit_rule; + bool had_trailing_sep; + + for (;;) { + next_rule: + if (parser->rule_stack_top == 0) { + break; + } + + pop_rule(parser, &rule, &i); + n = rule->act & RULE_ACT_ARG_MASK; + + /* + // debugging + printf("depth=%d ", parser->rule_stack_top); + for (int j = 0; j < parser->rule_stack_top; ++j) { + printf(" "); + } + printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack); + */ + + switch (rule->act & RULE_ACT_KIND_MASK) { + case RULE_ACT_OR: + if (i > 0 && !backtrack) { + goto next_rule; + } else { + backtrack = false; + } + for (; i < n - 1; ++i) { + switch (rule->arg[i] & RULE_ARG_KIND_MASK) { + case RULE_ARG_TOK: + if (py_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) { + push_result_token(parser, lex); + py_lexer_to_next(lex); + goto next_rule; + } + break; + case RULE_ARG_RULE: + push_rule(parser, rule, i + 1); + push_rule_from_arg(parser, rule->arg[i]); + goto next_rule; + default: + assert(0); + } + } + if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { + if (py_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) { + push_result_token(parser, lex); + py_lexer_to_next(lex); + } else { + backtrack = true; + goto next_rule; + } + } else { + push_rule_from_arg(parser, rule->arg[i]); + } + break; + + case RULE_ACT_AND: + + // failed, backtrack if we can, else syntax error + if (backtrack) { + assert(i > 0); + if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) { + // an optional rule that failed, so continue with next arg + push_result_node(parser, PY_PARSE_NODE_NULL); + backtrack = false; + } else { + // a mandatory rule that failed, so propagate backtrack + if (i > 1) { + // already eaten tokens so can't backtrack + goto syntax_error; + } else { + goto next_rule; + } + } + } + + // progress through the rule + for (; i < n; ++i) { + switch (rule->arg[i] & RULE_ARG_KIND_MASK) { + case RULE_ARG_TOK: + // need to match a token + tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK; + if (py_lexer_is_kind(lex, tok_kind)) { + // matched token + if (tok_kind == PY_TOKEN_NAME) { + push_result_token(parser, lex); + } + py_lexer_to_next(lex); + } else { + // failed to match token + if (i > 0) { + // already eaten tokens so can't backtrack + goto syntax_error; + } else { + // this rule failed, so backtrack + backtrack = true; + goto next_rule; + } + } + break; + case RULE_ARG_RULE: + //if (i + 1 < n) { + push_rule(parser, rule, i + 1); + //} + push_rule_from_arg(parser, rule->arg[i]); + goto next_rule; + case RULE_ARG_OPT_RULE: + push_rule(parser, rule, i + 1); + push_rule_from_arg(parser, rule->arg[i]); + goto next_rule; + default: + assert(0); + } + } + + assert(i == n); + + // matched the rule, so now build the corresponding parse_node + + // count number of arguments for the parse_node + i = 0; + emit_rule = false; + for (int x = 0; x < n; ++x) { + if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { + tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK; + if (tok_kind >= PY_TOKEN_NAME) { + emit_rule = true; + } + if (tok_kind == PY_TOKEN_NAME) { + // only tokens which were names are pushed to stack + i += 1; + } + } else { + // rules are always pushed + i += 1; + } + } + + // always emit these rules, even if they have only 1 argument + if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) { + emit_rule = true; + } + + // never emit these rules if they have only 1 argument + // NOTE: can't put atom_paren here because we need it to distinguisg, for example, [a,b] from [(a,b)] + if (rule->rule_id == RULE_else_stmt || rule->rule_id == RULE_testlist_comp_3b || rule->rule_id == RULE_import_as_names_paren || rule->rule_id == RULE_typedargslist_colon || rule->rule_id == RULE_typedargslist_equal || rule->rule_id == RULE_dictorsetmaker_colon || rule->rule_id == RULE_classdef_2 || rule->rule_id == RULE_with_item_as || rule->rule_id == RULE_assert_stmt_extra || rule->rule_id == RULE_as_name || rule->rule_id == RULE_raise_stmt_from || rule->rule_id == RULE_vfpdef) { + emit_rule = false; + } + + // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data) + if (rule->rule_id == RULE_funcdef || rule->rule_id == RULE_classdef || rule->rule_id == RULE_comp_for || rule->rule_id == RULE_lambdef || rule->rule_id == RULE_lambdef_nocond) { + emit_rule = true; + push_result_node(parser, PY_PARSE_NODE_NULL); + i += 1; + } + + int num_not_nil = 0; + for (int x = 0; x < i; ++x) { + if (peek_result(parser, x) != PY_PARSE_NODE_NULL) { + num_not_nil += 1; + } + } + //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil); + if (emit_rule) { + push_result_rule(parser, rule, i); + } else if (num_not_nil == 0) { + push_result_rule(parser, rule, i); // needed for, eg, atom_paren, testlist_comp_3b + //result_stack_show(parser); + //assert(0); + } else if (num_not_nil == 1) { + // single result, leave it on stack + py_parse_node_t pn = PY_PARSE_NODE_NULL; + for (int x = 0; x < i; ++x) { + py_parse_node_t pn2 = pop_result(parser); + if (pn2 != PY_PARSE_NODE_NULL) { + pn = pn2; + } + } + push_result_node(parser, pn); + } else { + push_result_rule(parser, rule, i); + } + break; + + case RULE_ACT_LIST: + // n=2 is: item item* + // n=1 is: item (sep item)* + // n=3 is: item (sep item)* [sep] + if (backtrack) { + list_backtrack: + had_trailing_sep = false; + if (n == 2) { + if (i == 1) { + // fail on item, first time round; propagate backtrack + goto next_rule; + } else { + // fail on item, in later rounds; finish with this rule + backtrack = false; + } + } else { + if (i == 1) { + // fail on item, first time round; propagate backtrack + goto next_rule; + } else if ((i & 1) == 1) { + // fail on item, in later rounds; have eaten tokens so can't backtrack + if (n == 3) { + // list allows trailing separator; finish parsing list + had_trailing_sep = true; + backtrack = false; + } else { + // list doesn't allowing trailing separator; fail + goto syntax_error; + } + } else { + // fail on separator; finish parsing list + backtrack = false; + } + } + } else { + for (;;) { + uint arg = rule->arg[i & 1 & n]; + switch (arg & RULE_ARG_KIND_MASK) { + case RULE_ARG_TOK: + if (py_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) { + if (i & 1 & n) { + // separators which are tokens are not pushed to result stack + } else { + push_result_token(parser, lex); + } + py_lexer_to_next(lex); + // got element of list, so continue parsing list + i += 1; + } else { + // couldn't get element of list + i += 1; + backtrack = true; + goto list_backtrack; + } + break; + case RULE_ARG_RULE: + push_rule(parser, rule, i + 1); + push_rule_from_arg(parser, arg); + goto next_rule; + default: + assert(0); + } + } + } + assert(i >= 1); + + // compute number of elements in list, result in i + i -= 1; + if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { + // don't count separators when they are tokens + i = (i + 1) / 2; + } + + if (i == 1) { + // list matched single item + if (had_trailing_sep) { + // if there was a trailing separator, make a list of a single item + push_result_rule(parser, rule, i); + } else { + // just leave single item on stack (ie don't wrap in a list) + } + } else { + //printf("done list %s %d %d\n", rule->rule_name, n, i); + push_result_rule(parser, rule, i); + } + break; + + default: + assert(0); + } + } + if (!py_lexer_is_kind(lex, PY_TOKEN_END)) { + py_lexer_show_error(lex, "unexpected token at end:"); + py_token_show(py_lexer_cur(lex)); + } + //printf("--------------\n"); + //result_stack_show(parser); + assert(parser->result_stack_top == 1); + //printf("maximum depth: %d\n", parser->rule_stack_alloc); + //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated); + return parser->result_stack[0]; + +syntax_error: + py_lexer_show_error(lex, "syntax error:"); +#ifdef USE_RULE_NAME + py_lexer_show_error(lex, rule->rule_name); +#endif + py_token_show(py_lexer_cur(lex)); + return PY_PARSE_NODE_NULL; +} diff --git a/py/parse.h b/py/parse.h new file mode 100644 index 0000000000..07d553c141 --- /dev/null +++ b/py/parse.h @@ -0,0 +1,54 @@ +struct _py_lexer_t; + +// a py_parse_node_t is: +// - 0000...0000: no node +// - xxxx...0001: an identifier; bits 4 and above are the qstr +// - xxxx...0011: a small integer; bits 4 and above are the signed value, 2's complement +// - xxxx...0101: an integer; bits 4 and above are the qstr holding the value +// - xxxx...0111: a decimal; bits 4 and above are the qstr holding the value +// - xxxx...1001: a string; bits 4 and above are the qstr holding the value +// - xxxx...1011: a string with triple quotes; bits 4 and above are the qstr holding the value +// - xxxx...1101: a token; bits 4 and above are py_token_kind_t +// - xxxx...xxx0: pointer to py_parse_node_struct_t + +#define PY_PARSE_NODE_NULL (0) +#define PY_PARSE_NODE_ID (0x1) +#define PY_PARSE_NODE_SMALL_INT (0x3) +#define PY_PARSE_NODE_INTEGER (0x5) +#define PY_PARSE_NODE_DECIMAL (0x7) +#define PY_PARSE_NODE_STRING (0x9) +#define PY_PARSE_NODE_BYTES (0xb) +#define PY_PARSE_NODE_TOKEN (0xd) + +typedef machine_uint_t py_parse_node_t; // must be pointer size + +typedef struct _py_parse_node_struct_t { + uint32_t source; // file identifier, and line number + uint32_t kind_num_nodes; // parse node kind, and number of nodes + py_parse_node_t nodes[]; // nodes +} py_parse_node_struct_t; + +// macros for py_parse_node_t usage +// some of these evaluate their argument more than once + +#define PY_PARSE_NODE_IS_NULL(pn) ((pn) == PY_PARSE_NODE_NULL) +#define PY_PARSE_NODE_IS_LEAF(pn) ((pn) & 1) +#define PY_PARSE_NODE_IS_STRUCT(pn) ((pn) != PY_PARSE_NODE_NULL && ((pn) & 1) == 0) +#define PY_PARSE_NODE_IS_STRUCT_KIND(pn, k) ((pn) != PY_PARSE_NODE_NULL && ((pn) & 1) == 0 && PY_PARSE_NODE_STRUCT_KIND((py_parse_node_struct_t*)(pn)) == (k)) + +#define PY_PARSE_NODE_IS_ID(pn) (((pn) & 0xf) == PY_PARSE_NODE_ID) +#define PY_PARSE_NODE_IS_SMALL_INT(pn) (((pn) & 0xf) == PY_PARSE_NODE_SMALL_INT) +#define PY_PARSE_NODE_IS_TOKEN(pn) (((pn) & 0xf) == PY_PARSE_NODE_TOKEN) +#define PY_PARSE_NODE_IS_TOKEN_KIND(pn, k) ((pn) == (PY_PARSE_NODE_TOKEN | (k << 4))) + +#define PY_PARSE_NODE_LEAF_KIND(pn) ((pn) & 0xf) +// TODO should probably have int and uint versions of this macro +#define PY_PARSE_NODE_LEAF_ARG(pn) (((machine_int_t)(pn)) >> 4) +#define PY_PARSE_NODE_STRUCT_KIND(pns) ((pns)->kind_num_nodes & 0xff) +#define PY_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8) + +py_parse_node_t py_parse_node_new_leaf(machine_int_t kind, machine_int_t arg); + +void parse_node_show(py_parse_node_t pn, int indent); + +py_parse_node_t py_parse(struct _py_lexer_t *lex, int wanted_rule); diff --git a/py/qstr.c b/py/qstr.c new file mode 100644 index 0000000000..33d15c7e73 --- /dev/null +++ b/py/qstr.c @@ -0,0 +1,56 @@ +#include +#include + +#include "misc.h" + +static int qstrs_alloc; +static int qstrs_len; +static const char **qstrs; + +void qstr_init() { + qstrs_alloc = 400; + qstrs_len = 1; + qstrs = m_new(const char*, qstrs_alloc); + qstrs[0] = "nil"; +} + +static qstr qstr_add(const char *str) { + if (qstrs_len >= qstrs_alloc) { + qstrs_alloc *= 2; + qstrs = m_renew(const char*, qstrs, qstrs_alloc); + } + qstrs[qstrs_len++] = str; + return qstrs_len - 1; +} + +qstr qstr_from_str_static(const char *str) { + for (int i = 0; i < qstrs_len; i++) { + if (strcmp(qstrs[i], str) == 0) { + return i; + } + } + return qstr_add(str); +} + +qstr qstr_from_str_take(char *str) { + for (int i = 0; i < qstrs_len; i++) { + if (strcmp(qstrs[i], str) == 0) { + m_free(str); + return i; + } + } + return qstr_add(str); +} + +qstr qstr_from_strn_copy(const char *str, int len) { + for (int i = 0; i < qstrs_len; i++) { + if (strncmp(qstrs[i], str, len) == 0 && qstrs[i][len] == '\0') { + return i; + } + } + return qstr_add(strndup(str, len)); +} + +const char *qstr_str(qstr qstr) { + return qstrs[qstr]; +} diff --git a/py/runtime.c b/py/runtime.c new file mode 100644 index 0000000000..bf2e2ee065 --- /dev/null +++ b/py/runtime.c @@ -0,0 +1,944 @@ +#include +#include +#include +#include +#include + +#include "misc.h" +#include "machine.h" +#include "runtime.h" +#include "bc.h" + +#define DEBUG_printf(args...) (void)0 +//#define DEBUG_printf(args...) printf(args) + +#define DEBUG_OP_printf(args...) (void)0 +//#define DEBUG_OP_printf(args...) printf(args) + +// enable/disable float support with this definition +#define PY_FLOAT (1) + +typedef machine_int_t py_small_int_t; + +#define IS_O(o, k) (((((py_small_int_t)(o)) & 1) == 0) && (((py_obj_base_t*)(o))->kind == (k))) +#define IS_SMALL_INT(o) (((py_small_int_t)(o)) & 1) +#define FROM_SMALL_INT(o) (((py_small_int_t)(o)) >> 1) +#define TO_SMALL_INT(o) ((py_obj_t)(((o) << 1) | 1)) + +#ifdef PY_FLOAT +typedef machine_float_t float_t; +#endif + +typedef enum { + O_CONST, + O_STR, +#ifdef PY_FLOAT + O_FLOAT, +#endif + O_FUN_0, + O_FUN_1, + O_FUN_2, + O_FUN_N, + O_FUN_BC, + O_BOUND_METH, + O_LIST, + O_SET, + O_MAP, + O_CLASS, +} py_obj_kind_t; + +typedef enum { + MAP_QSTR, + MAP_PY_OBJ, +} py_map_kind_t; + +typedef struct _py_map_elem_t { + py_obj_t key; + py_obj_t value; +} py_map_elem_t; + +typedef struct _py_map_t { + py_map_kind_t kind; + machine_uint_t alloc; + machine_uint_t used; + py_map_elem_t *table; +} py_map_t; + +typedef struct _py_obj_base_t { + py_obj_kind_t kind; + union { + const char *id; + qstr u_str; +#ifdef PY_FLOAT + float_t flt; +#endif + struct { // for O_FUN_[012N] + void *fun; + int n_args; + } u_fun; + struct { // for O_FUN_BC + byte *code; + uint len; + int n_args; + } u_fun_bc; + struct { // for O_BOUND_METH + py_obj_t meth; + py_obj_t self; + } u_bound_meth; + struct { // for O_LIST + int alloc; + int len; + py_obj_t *items; + } u_list; + struct { // for O_SET + int alloc; + int used; + py_obj_t *table; + } u_set; + py_map_t u_map; // for O_MAP + /* + struct { // for O_MAP + int alloc; + int used; + py_map_elem_t *table; + } u_map; + */ + struct { // for O_CLASS + py_map_t *map; + } u_class; + }; +} py_obj_base_t; + +py_obj_t py_const_none; +py_obj_t py_const_false; +py_obj_t py_const_true; + +py_map_t map_name; +py_map_t map_builtins; + +// approximatelly doubling primes; made with Mathematica command: Table[Prime[Floor[(1.7)^n]], {n, 3, 24}] +static int doubling_primes[] = {7, 19, 43, 89, 179, 347, 647, 1229, 2297, 4243, 7829, 14347, 26017, 47149, 84947, 152443, 273253, 488399, 869927, 1547173, 2745121, 4861607}; + +int get_doubling_prime_greater_or_equal_to(int x) { + for (int i = 0; i < sizeof(doubling_primes) / sizeof(int); i++) { + if (doubling_primes[i] >= x) { + return doubling_primes[i]; + } + } + // ran out of primes in the table! + // return something sensible, at least make it odd + return x | 1; +} + +void py_map_init(py_map_t *map, py_map_kind_t kind, int n) { + map->kind = kind; + map->alloc = get_doubling_prime_greater_or_equal_to(n + 1); + map->used = 0; + map->table = m_new(py_map_elem_t, map->alloc); + for (int i = 0; i < map->alloc; i++) { + map->table[i].key = NULL; + map->table[i].value = NULL; + } +} + +py_map_t *py_map_new(py_map_kind_t kind, int n) { + py_map_t *map = m_new(py_map_t, 1); + py_map_init(map, kind, n); + return map; +} + +int py_obj_hash(py_obj_t o_in) { + if (IS_SMALL_INT(o_in)) { + return FROM_SMALL_INT(o_in); + } else if (IS_O(o_in, O_STR)) { + return ((py_obj_base_t*)o_in)->u_str; + } else { + assert(0); + return 0; + } +} + +bool py_obj_equal(py_obj_t o1, py_obj_t o2) { + if (o1 == o2) { + return true; + } else if (IS_SMALL_INT(o1) && IS_SMALL_INT(o2)) { + return false; + } else if (IS_O(o1, O_STR) && IS_O(o2, O_STR)) { + return ((py_obj_base_t*)o1)->u_str == ((py_obj_base_t*)o2)->u_str; + } else { + assert(0); + return false; + } +} + +py_map_elem_t* py_map_lookup_helper(py_map_t *map, py_obj_t index, bool add_if_not_found) { + bool is_map_py_obj = (map->kind == MAP_PY_OBJ); + machine_uint_t hash; + if (is_map_py_obj) { + hash = py_obj_hash(index); + } else { + hash = (machine_uint_t)index; + } + uint pos = hash % map->alloc; + for (;;) { + py_map_elem_t *elem = &map->table[pos]; + if (elem->key == NULL) { + // not in table + if (add_if_not_found) { + if (map->used + 1 >= map->alloc) { + // not enough room in table, rehash it + int old_alloc = map->alloc; + py_map_elem_t *old_table = map->table; + map->alloc = get_doubling_prime_greater_or_equal_to(map->alloc + 1); + map->used = 0; + map->table = m_new(py_map_elem_t, map->alloc); + for (int i = 0; i < old_alloc; i++) { + if (old_table[i].key != NULL) { + py_map_lookup_helper(map, old_table[i].key, true)->value = old_table[i].value; + } + } + m_free(old_table); + // restart the search for the new element + pos = hash % map->alloc; + } else { + map->used += 1; + elem->key = index; + return elem; + } + } else { + return NULL; + } + } else if (elem->key == index || (is_map_py_obj && py_obj_equal(elem->key, index))) { + // found it + if (add_if_not_found) { + elem->key = index; + } + return elem; + } else { + // not yet found, keep searching in this table + pos = (pos + 1) % map->alloc; + } + } +} + +py_map_elem_t* py_qstr_map_lookup(py_map_t *map, qstr index, bool add_if_not_found) { + py_obj_t o = (py_obj_t)(machine_uint_t)index; + return py_map_lookup_helper(map, o, add_if_not_found); +} + +py_map_elem_t* py_map_lookup(py_obj_t o, py_obj_t index, bool add_if_not_found) { + assert(IS_O(o, O_MAP)); + return py_map_lookup_helper(&((py_obj_base_t *)o)->u_map, index, add_if_not_found); +} + +static bool fit_small_int(py_small_int_t o) { + return true; +} + +py_obj_t py_obj_new_const(const char *id) { + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_CONST; + o->id = id; + return (py_obj_t)o; +} + +py_obj_t py_obj_new_str(qstr qstr) { + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_STR; + o->u_str = qstr; + return (py_obj_t)o; +} + +#ifdef PY_FLOAT +py_obj_t py_obj_new_float(float_t val) { + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_FLOAT; + o->flt = val; + return (py_obj_t)o; +} +#endif + +py_obj_t list_append(py_obj_t self_in, py_obj_t arg) { + assert(IS_O(self_in, O_LIST)); + py_obj_base_t *self = self_in; + if (self->u_list.len >= self->u_list.alloc) { + self->u_list.alloc *= 2; + self->u_list.items = m_renew(py_obj_t, self->u_list.items, self->u_list.alloc); + } + self->u_list.items[self->u_list.len++] = arg; + return arg; +} + +static qstr q_append; +static qstr q_print; +static qstr q_len; +static qstr q___build_class__; + +typedef enum { + PY_CODE_NATIVE, + PY_CODE_BYTE, +} py_code_kind_t; + +typedef struct _py_code_t { + py_code_kind_t kind; + int n_args; + union { + struct { + py_fun_t fun; + } u_native; + struct { + byte *code; + uint len; + } u_byte; + }; +} py_code_t; + +static int next_unique_code_id; +static py_code_t *unique_codes; + +py_obj_t fun_list_append; + +py_obj_t py_builtin_print(py_obj_t o) { + if (IS_O(o, O_STR)) { + // special case, print string raw + printf("%s\n", qstr_str(((py_obj_base_t*)o)->u_str)); + } else { + // print the object Python style + py_obj_print(o); + printf("\n"); + } + return py_const_none; +} + +py_obj_t py_builtin_len(py_obj_t o_in) { + py_small_int_t len = 0; + if (IS_O(o_in, O_LIST)) { + py_obj_base_t *o = o_in; + len = o->u_list.len; + } else if (IS_O(o_in, O_MAP)) { + py_obj_base_t *o = o_in; + len = o->u_map.used; + } else { + assert(0); + } + return TO_SMALL_INT(len); +} + +py_obj_t py_builtin___build_class__(py_obj_t o1, py_obj_t o2) { + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_CLASS; + o->u_class.map = py_map_new(MAP_QSTR, 0); + return o; +} + +FILE *fp_native = NULL; + +void rt_init() { + q_append = qstr_from_str_static("append"); + q_print = qstr_from_str_static("print"); + q_len = qstr_from_str_static("len"); + q___build_class__ = qstr_from_str_static("__build_class__"); + + py_const_none = py_obj_new_const("None"); + py_const_false = py_obj_new_const("False"); + py_const_true = py_obj_new_const("True"); + + py_map_init(&map_name, MAP_QSTR, 0); + + py_map_init(&map_builtins, MAP_QSTR, 3); + py_qstr_map_lookup(&map_builtins, q_print, true)->value = rt_make_function_1(py_builtin_print); + py_qstr_map_lookup(&map_builtins, q_len, true)->value = rt_make_function_1(py_builtin_len); + py_qstr_map_lookup(&map_builtins, q___build_class__, true)->value = rt_make_function_2(py_builtin___build_class__); + + next_unique_code_id = 1; + unique_codes = NULL; + + fun_list_append = rt_make_function_2(list_append); + + fp_native = fopen("out-native", "wb"); +} + +void rt_deinit() { + if (fp_native != NULL) { + fclose(fp_native); + } +} + +int rt_get_new_unique_code_id() { + return next_unique_code_id++; +} + +void rt_assign_native_code(int unique_code_id, py_fun_t fun, uint len, int n_args) { + if (unique_codes == NULL) { + unique_codes = m_new(py_code_t, next_unique_code_id); + } + assert(unique_code_id < next_unique_code_id); + unique_codes[unique_code_id].kind = PY_CODE_NATIVE; + unique_codes[unique_code_id].n_args = n_args; + unique_codes[unique_code_id].u_native.fun = fun; + + DEBUG_printf("assign native code: id=%d fun=%p len=%u n_args=%d\n", unique_code_id, fun, len, n_args); + byte *fun_data = (byte*)(((machine_uint_t)fun) & (~1)); // need to clear lower bit in case it's thumb code + for (int i = 0; i < 128 && i < len; i++) { + if (i > 0 && i % 16 == 0) { + DEBUG_printf("\n"); + } + DEBUG_printf(" %02x", fun_data[i]); + } + DEBUG_printf("\n"); + + if (fp_native != NULL) { + fwrite(fun_data, len, 1, fp_native); + } +} + +void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args) { + if (unique_codes == NULL) { + unique_codes = m_new(py_code_t, next_unique_code_id); + } + assert(unique_code_id < next_unique_code_id); + unique_codes[unique_code_id].kind = PY_CODE_BYTE; + unique_codes[unique_code_id].n_args = n_args; + unique_codes[unique_code_id].u_byte.code = code; + unique_codes[unique_code_id].u_byte.len = len; + + DEBUG_printf("assign byte code: id=%d code=%p len=%u n_args=%d\n", unique_code_id, code, len, n_args); +} + +const char *py_obj_get_type_str(py_obj_t o_in) { + if (IS_SMALL_INT(o_in)) { + return "int"; + } else { + py_obj_base_t *o = o_in; + switch (o->kind) { + case O_CONST: + if (o == py_const_none) { + return "NoneType"; + } else { + return "bool"; + } + case O_STR: + return "str"; +#ifdef PY_FLOAT + case O_FLOAT: + return "float"; +#endif + case O_LIST: + return "list"; + case O_SET: + return "set"; + case O_MAP: + return "dict"; + default: + assert(0); + return "UnknownType"; + } + } +} + +void py_obj_print(py_obj_t o_in) { + if (IS_SMALL_INT(o_in)) { + printf("%d", (int)FROM_SMALL_INT(o_in)); + } else { + py_obj_base_t *o = o_in; + switch (o->kind) { + case O_CONST: + printf("%s", o->id); + break; + case O_STR: + // TODO need to escape chars etc + printf("'%s'", qstr_str(o->u_str)); + break; +#ifdef PY_FLOAT + case O_FLOAT: + printf("%f", o->flt); + break; +#endif + case O_LIST: + printf("["); + for (int i = 0; i < o->u_list.len; i++) { + if (i > 0) { + printf(", "); + } + py_obj_print(o->u_list.items[i]); + } + printf("]"); + break; + case O_SET: + { + bool first = true; + printf("{"); + for (int i = 0; i < o->u_set.alloc; i++) { + if (o->u_set.table[i] != NULL) { + if (!first) { + printf(", "); + } + first = false; + py_obj_print(o->u_set.table[i]); + } + } + printf("}"); + break; + } + case O_MAP: + { + bool first = true; + printf("{"); + for (int i = 0; i < o->u_map.alloc; i++) { + if (o->u_map.table[i].key != NULL) { + if (!first) { + printf(", "); + } + first = false; + py_obj_print(o->u_map.table[i].key); + printf(": "); + py_obj_print(o->u_map.table[i].value); + } + } + printf("}"); + break; + } + default: + assert(0); + } + } +} + +int rt_is_true(py_obj_t arg) { + DEBUG_OP_printf("is true %p\n", arg); + if (IS_SMALL_INT(arg)) { + if (FROM_SMALL_INT(arg) == 0) { + return 0; + } else { + return 1; + } + } else if (arg == py_const_none) { + return 0; + } else if (arg == py_const_false) { + return 0; + } else if (arg == py_const_true) { + return 1; + } else { + assert(0); + return 0; + } +} + +int rt_get_int(py_obj_t arg) { + if (IS_SMALL_INT(arg)) { + return FROM_SMALL_INT(arg); + } else { + assert(0); + return 0; + } +} + +py_obj_t rt_load_const_str(qstr qstr) { + DEBUG_OP_printf("load '%s'\n", qstr_str(qstr)); + return py_obj_new_str(qstr); +} + +py_obj_t rt_load_name(qstr qstr) { + // logic: search locals, globals, builtins + DEBUG_OP_printf("load %s\n", qstr_str(qstr)); + py_map_elem_t *elem = py_qstr_map_lookup(&map_name, qstr, false); + if (elem == NULL) { + elem = py_qstr_map_lookup(&map_builtins, qstr, false); + if (elem == NULL) { + printf("name doesn't exist: %s\n", qstr_str(qstr)); + assert(0); + } + } + return elem->value; +} + +py_obj_t rt_load_global(qstr qstr) { + return rt_load_name(qstr); // TODO +} + +py_obj_t rt_load_build_class() { + DEBUG_OP_printf("load_build_class\n"); + py_map_elem_t *elem = py_qstr_map_lookup(&map_builtins, q___build_class__, false); + if (elem == NULL) { + printf("name doesn't exist: __build_class__\n"); + assert(0); + } + return elem->value; +} + +void rt_store_name(qstr qstr, py_obj_t obj) { + DEBUG_OP_printf("store %s <- %p\n", qstr_str(qstr), obj); + py_qstr_map_lookup(&map_name, qstr, true)->value = obj; +} + +py_obj_t rt_unary_op(int op, py_obj_t arg) { + assert(0); + return py_const_none; +} + +py_obj_t rt_binary_op(int op, py_obj_t lhs, py_obj_t rhs) { + DEBUG_OP_printf("binary %d %p %p\n", op, lhs, rhs); + if (op == RT_BINARY_OP_SUBSCR) { + if (IS_O(lhs, O_LIST) && IS_SMALL_INT(rhs)) { + return ((py_obj_base_t*)lhs)->u_list.items[FROM_SMALL_INT(rhs)]; + } else { + assert(0); + } + } else if (IS_SMALL_INT(lhs) && IS_SMALL_INT(rhs)) { + py_small_int_t val; + switch (op) { + case RT_BINARY_OP_ADD: + case RT_BINARY_OP_INPLACE_ADD: val = FROM_SMALL_INT(lhs) + FROM_SMALL_INT(rhs); break; + case RT_BINARY_OP_SUBTRACT: val = FROM_SMALL_INT(lhs) - FROM_SMALL_INT(rhs); break; + case RT_BINARY_OP_MULTIPLY: val = FROM_SMALL_INT(lhs) * FROM_SMALL_INT(rhs); break; + case RT_BINARY_OP_FLOOR_DIVIDE: val = FROM_SMALL_INT(lhs) / FROM_SMALL_INT(rhs); break; +#ifdef PY_FLOAT + case RT_BINARY_OP_TRUE_DIVIDE: return py_obj_new_float((float_t)FROM_SMALL_INT(lhs) / (float_t)FROM_SMALL_INT(rhs)); +#endif + default: printf("%d\n", op); assert(0); val = 0; + } + if (fit_small_int(val)) { + return TO_SMALL_INT(val); + } + } else if (IS_O(lhs, O_STR) && IS_O(rhs, O_STR)) { + const char *lhs_str = qstr_str(((py_obj_base_t*)lhs)->u_str); + const char *rhs_str = qstr_str(((py_obj_base_t*)rhs)->u_str); + char *val; + switch (op) { + case RT_BINARY_OP_ADD: + case RT_BINARY_OP_INPLACE_ADD: val = m_new(char, strlen(lhs_str) + strlen(rhs_str) + 1); strcpy(val, lhs_str); strcat(val, rhs_str); break; + default: printf("%d\n", op); assert(0); val = NULL; + } + return py_obj_new_str(qstr_from_str_take(val)); + } + assert(0); + return py_const_none; +} + +py_obj_t rt_compare_op(int op, py_obj_t lhs, py_obj_t rhs) { + DEBUG_OP_printf("compare %d %p %p\n", op, lhs, rhs); + if (IS_SMALL_INT(lhs) && IS_SMALL_INT(rhs)) { + int cmp; + switch (op) { + case RT_COMPARE_OP_LESS: cmp = FROM_SMALL_INT(lhs) < FROM_SMALL_INT(rhs); break; + case RT_COMPARE_OP_MORE: cmp = FROM_SMALL_INT(lhs) > FROM_SMALL_INT(rhs); break; + default: assert(0); cmp = 0; + } + if (cmp) { + return py_const_true; + } else { + return py_const_false; + } + } + assert(0); + return py_const_none; +} + +py_obj_t rt_make_function_from_id(int unique_code_id) { + if (unique_code_id >= next_unique_code_id) { + // illegal code id + return py_const_none; + } + py_code_t *c = &unique_codes[unique_code_id]; + py_obj_base_t *o = m_new(py_obj_base_t, 1); + switch (c->kind) { + case PY_CODE_NATIVE: + switch (c->n_args) { + case 0: o->kind = O_FUN_0; break; + case 1: o->kind = O_FUN_1; break; + case 2: o->kind = O_FUN_2; break; + default: assert(0); + } + o->u_fun.fun = c->u_native.fun; + break; + case PY_CODE_BYTE: + o->kind = O_FUN_BC; + o->u_fun_bc.code = c->u_byte.code; + o->u_fun_bc.len = c->u_byte.len; + o->u_fun_bc.n_args = c->n_args; + break; + default: + assert(0); + } + return o; +} + +py_obj_t rt_make_function_0(py_fun_0_t fun) { + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_FUN_0; + o->u_fun.fun = fun; + return o; +} + +py_obj_t rt_make_function_1(py_fun_1_t fun) { + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_FUN_1; + o->u_fun.fun = fun; + return o; +} + +py_obj_t rt_make_function_2(py_fun_2_t fun) { + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_FUN_2; + o->u_fun.fun = fun; + return o; +} + +py_obj_t rt_make_function(int n_args, py_fun_t code) { + // assumes code is a pointer to a py_fun_t (i think this is safe...) + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_FUN_N; + o->u_fun.fun = code; + o->u_fun.n_args = n_args; + return o; +} + +py_obj_t rt_call_function_0(py_obj_t fun) { + if (IS_O(fun, O_FUN_0)) { + py_obj_base_t *o = fun; + DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun); + return ((py_fun_0_t)o->u_fun.fun)(); + } else if (IS_O(fun, O_FUN_BC)) { + py_obj_base_t *o = fun; + assert(o->u_fun_bc.n_args == 0); + DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code); + return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, NULL, 0); + } else { + printf("fun0:%p\n", fun); + assert(0); + return py_const_none; + } +} + +py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg) { + if (IS_O(fun, O_FUN_1)) { + py_obj_base_t *o = fun; + DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun); + return ((py_fun_1_t)o->u_fun.fun)(arg); + } else if (IS_O(fun, O_FUN_BC)) { + py_obj_base_t *o = fun; + assert(o->u_fun_bc.n_args == 1); + DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code); + return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &arg, 1); + } else if (IS_O(fun, O_BOUND_METH)) { + py_obj_base_t *o = fun; + return rt_call_function_2(o->u_bound_meth.meth, o->u_bound_meth.self, arg); + } else { + printf("fun1:%p\n", fun); + assert(0); + return py_const_none; + } +} + +py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2) { + if (IS_O(fun, O_FUN_2)) { + py_obj_base_t *o = fun; + DEBUG_OP_printf("calling native %p...\n", o->u_fun.fun); + return ((py_fun_2_t)o->u_fun.fun)(arg1, arg2); + } else if (IS_O(fun, O_FUN_BC)) { + py_obj_base_t *o = fun; + assert(o->u_fun_bc.n_args == 2); + DEBUG_OP_printf("calling byte code %p...\n", o->u_fun_bc.code); + py_obj_t args[2]; + args[0] = arg1; + args[1] = arg2; + return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &args[0], 2); + } else { + assert(0); + return py_const_none; + } +} + +py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self) { + DEBUG_OP_printf("call method %p %p\n", fun, self); + if (self == NULL) { + return rt_call_function_0(fun); + } else { + return rt_call_function_1(fun, self); + } +} + +py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg) { + DEBUG_OP_printf("call method %p %p %p\n", fun, self, arg); + if (self == NULL) { + return rt_call_function_1(fun, arg); + } else { + return rt_call_function_2(fun, self, arg); + } +} + +// items are in reverse order +py_obj_t rt_build_list(int n_args, py_obj_t *items) { + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_LIST; + o->u_list.alloc = n_args; + if (o->u_list.alloc < 4) { + o->u_list.alloc = 4; + } + o->u_list.len = n_args; + o->u_list.items = m_new(py_obj_t, o->u_list.alloc); + for (int i = 0; i < n_args; i++) { + o->u_list.items[i] = items[n_args - i - 1]; + } + return o; +} + +py_obj_t py_set_lookup(py_obj_t o_in, py_obj_t index, bool add_if_not_found) { + assert(IS_O(o_in, O_SET)); + py_obj_base_t *o = o_in; + int hash = py_obj_hash(index); + int pos = hash % o->u_set.alloc; + for (;;) { + py_obj_t elem = o->u_set.table[pos]; + if (elem == NULL) { + // not in table + if (add_if_not_found) { + if (o->u_set.used + 1 >= o->u_set.alloc) { + // not enough room in table, rehash it + int old_alloc = o->u_set.alloc; + py_obj_t *old_table = o->u_set.table; + o->u_set.alloc = get_doubling_prime_greater_or_equal_to(o->u_set.alloc + 1); + o->u_set.used = 0; + o->u_set.table = m_new(py_obj_t, o->u_set.alloc); + for (int i = 0; i < old_alloc; i++) { + if (old_table[i] != NULL) { + py_set_lookup(o, old_table[i], true); + } + } + m_free(old_table); + // restart the search for the new element + pos = hash % o->u_set.alloc; + } else { + o->u_set.used += 1; + o->u_set.table[pos] = index; + return index; + } + } else { + return NULL; + } + } else if (py_obj_equal(elem, index)) { + // found it + return elem; + } else { + // not yet found, keep searching in this table + pos = (pos + 1) % o->u_set.alloc; + } + } +} + +py_obj_t rt_build_set(int n_args, py_obj_t *items) { + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_SET; + o->u_set.alloc = get_doubling_prime_greater_or_equal_to(n_args + 1); + o->u_set.used = 0; + o->u_set.table = m_new(py_obj_t, o->u_set.alloc); + for (int i = 0; i < o->u_set.alloc; i++) { + o->u_set.table[i] = NULL; + } + for (int i = 0; i < n_args; i++) { + py_set_lookup(o, items[i], true); + } + return o; +} + +py_obj_t rt_build_map(int n_args) { + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_MAP; + py_map_init(&o->u_map, MAP_PY_OBJ, n_args); + return o; +} + +py_obj_t rt_store_map(py_obj_t map, py_obj_t key, py_obj_t value) { + assert(IS_O(map, O_MAP)); // should always be + py_map_lookup(map, key, true)->value = value; + return map; +} + +void rt_store_subscr(py_obj_t base, py_obj_t index, py_obj_t value) { + if (IS_O(base, O_LIST) && IS_SMALL_INT(index)) { + // list store + py_obj_base_t *o = base; + int idx = FROM_SMALL_INT(index); + if (idx < 0) { + idx += o->u_list.len; + } + if (0 <= idx && idx < o->u_list.len) { + o->u_list.items[idx] = value; + } else { + assert(0); + } + } else if (IS_O(base, O_MAP)) { + // map store + py_map_lookup(base, index, true)->value = value; + } else { + assert(0); + } +} + +py_obj_t build_bound_method(py_obj_t self, py_obj_t meth) { + py_obj_base_t *o = m_new(py_obj_base_t, 1); + o->kind = O_BOUND_METH; + o->u_bound_meth.meth = meth; + o->u_bound_meth.self = self; + return o; +} + +py_obj_t rt_load_attr(py_obj_t base, qstr attr) { + DEBUG_OP_printf("load %s\n", qstr_str(attr)); + if (IS_O(base, O_LIST) && attr == q_append) { + return build_bound_method(base, fun_list_append); + } else if (IS_O(base, O_CLASS)) { + py_obj_base_t *o = base; + py_map_elem_t *elem = py_qstr_map_lookup(o->u_class.map, attr, false); + if (elem == NULL) { + printf("Nope! %s\n", qstr_str(attr)); + assert(0); + } + return elem->value; + } else { + printf("AttributeError: '%s' object has no attribute '%s'\n", py_obj_get_type_str(base), qstr_str(attr)); + assert(0); + return py_const_none; + } +} + +void rt_load_method(py_obj_t base, qstr attr, py_obj_t *dest) { + DEBUG_OP_printf("load method %s\n", qstr_str(attr)); + if (IS_O(base, O_LIST) && attr == q_append) { + dest[1] = fun_list_append; + dest[0] = base; + } else { + dest[1] = rt_load_attr(base, attr); + dest[0] = NULL; + } +} + +void *rt_fun_table[RT_F_NUMBER_OF] = { + rt_load_const_str, + rt_load_name, + rt_load_global, + rt_load_attr, + rt_load_method, + rt_store_name, + rt_store_subscr, + rt_is_true, + rt_unary_op, + rt_build_list, + rt_build_map, + rt_store_map, + rt_build_set, + rt_make_function_from_id, + rt_call_function_0, + rt_call_function_1, + rt_call_function_2, + rt_call_method_1, + rt_call_method_2, + rt_binary_op, + rt_compare_op, +}; + +/* +void rt_f_vector(rt_fun_kind_t fun_kind) { + (rt_f_table[fun_kind])(); +} +*/ diff --git a/py/runtime.h b/py/runtime.h new file mode 100644 index 0000000000..4c842b235e --- /dev/null +++ b/py/runtime.h @@ -0,0 +1,121 @@ +typedef enum { + RT_UNARY_OP_NOT, + RT_UNARY_OP_POSITIVE, + RT_UNARY_OP_NEGATIVE, + RT_UNARY_OP_INVERT, +} rt_unary_op_t; + +typedef enum { + RT_BINARY_OP_SUBSCR, + RT_BINARY_OP_OR, + RT_BINARY_OP_XOR, + RT_BINARY_OP_AND, + RT_BINARY_OP_LSHIFT, + RT_BINARY_OP_RSHIFT, + RT_BINARY_OP_ADD, + RT_BINARY_OP_SUBTRACT, + RT_BINARY_OP_MULTIPLY, + RT_BINARY_OP_FLOOR_DIVIDE, + RT_BINARY_OP_TRUE_DIVIDE, + RT_BINARY_OP_MODULO, + RT_BINARY_OP_POWER, + RT_BINARY_OP_INPLACE_OR, + RT_BINARY_OP_INPLACE_XOR, + RT_BINARY_OP_INPLACE_AND, + RT_BINARY_OP_INPLACE_LSHIFT, + RT_BINARY_OP_INPLACE_RSHIFT, + RT_BINARY_OP_INPLACE_ADD, + RT_BINARY_OP_INPLACE_SUBTRACT, + RT_BINARY_OP_INPLACE_MULTIPLY, + RT_BINARY_OP_INPLACE_FLOOR_DIVIDE, + RT_BINARY_OP_INPLACE_TRUE_DIVIDE, + RT_BINARY_OP_INPLACE_MODULO, + RT_BINARY_OP_INPLACE_POWER, +} rt_binary_op_t; + +typedef enum { + RT_COMPARE_OP_LESS, + RT_COMPARE_OP_MORE, + RT_COMPARE_OP_EQUAL, + RT_COMPARE_OP_LESS_EQUAL, + RT_COMPARE_OP_MORE_EQUAL, + RT_COMPARE_OP_NOT_EQUAL, + RT_COMPARE_OP_IN, + RT_COMPARE_OP_NOT_IN, + RT_COMPARE_OP_IS, + RT_COMPARE_OP_IS_NOT, + RT_COMPARE_OP_EXCEPTION_MATCH, +} rt_compare_op_t; + +typedef enum { + RT_F_LOAD_CONST_STR = 0, + RT_F_LOAD_NAME, + RT_F_LOAD_GLOBAL, + RT_F_LOAD_ATTR, + RT_F_LOAD_METHOD, + RT_F_STORE_NAME, + RT_F_STORE_SUBSCR, + RT_F_IS_TRUE, + RT_F_UNARY_OP, + RT_F_BUILD_LIST, + RT_F_BUILD_MAP, + RT_F_STORE_MAP, + RT_F_BUILD_SET, + RT_F_MAKE_FUNCTION_FROM_ID, + RT_F_CALL_FUNCTION_0, + RT_F_CALL_FUNCTION_1, + RT_F_CALL_FUNCTION_2, + RT_F_CALL_METHOD_1, + RT_F_CALL_METHOD_2, + RT_F_BINARY_OP, + RT_F_COMPARE_OP, + RT_F_NUMBER_OF, +} rt_fun_kind_t; + +extern void *rt_fun_table[RT_F_NUMBER_OF]; + +typedef machine_ptr_t py_obj_t; // must be of pointer size +typedef py_obj_t (*py_fun_0_t)(); +typedef py_obj_t (*py_fun_1_t)(py_obj_t); +typedef py_obj_t (*py_fun_2_t)(py_obj_t, py_obj_t); +typedef py_obj_t (*py_fun_t)(); + +extern py_obj_t py_const_none; +extern py_obj_t py_const_false; +extern py_obj_t py_const_true; + +void rt_init(); +void rt_deinit(); +int rt_get_new_unique_code_id(); +void rt_assign_native_code(int unique_code_id, py_fun_t f, uint len, int n_args); +void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args); +py_fun_t rt_get_code(qstr id); +void py_obj_print(py_obj_t o); +int rt_is_true(py_obj_t arg); +int rt_get_int(py_obj_t arg); +py_obj_t rt_load_const_str(qstr qstr); +//py_obj_t rt_load_const_code(qstr qstr); +py_obj_t rt_load_name(qstr qstr); +py_obj_t rt_load_global(qstr qstr); +py_obj_t rt_load_build_class(); +void rt_store_name(qstr qstr, py_obj_t obj); +py_obj_t rt_unary_op(int op, py_obj_t arg); +py_obj_t rt_binary_op(int op, py_obj_t lhs, py_obj_t rhs); +py_obj_t rt_compare_op(int op, py_obj_t lhs, py_obj_t rhs); +py_obj_t rt_make_function_from_id(int unique_code_id); +py_obj_t rt_make_function_0(py_fun_0_t f); +py_obj_t rt_make_function_1(py_fun_1_t f); +py_obj_t rt_make_function_2(py_fun_2_t f); +py_obj_t rt_make_function(int n_args, py_fun_t code); +py_obj_t rt_call_function_0(py_obj_t fun); +py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg); +py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2); +py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self); +py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg); +py_obj_t rt_build_list(int n_args, py_obj_t *items); +py_obj_t rt_build_map(int n_args); +py_obj_t rt_store_map(py_obj_t map, py_obj_t key, py_obj_t value); +py_obj_t rt_build_set(int n_args, py_obj_t *items); +void rt_store_subscr(py_obj_t base, py_obj_t index, py_obj_t val); +py_obj_t rt_load_attr(py_obj_t base, qstr attr); +void rt_load_method(py_obj_t base, qstr attr, py_obj_t *dest); diff --git a/py/scope.c b/py/scope.c new file mode 100644 index 0000000000..a715b2b506 --- /dev/null +++ b/py/scope.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include + +#include "misc.h" +#include "machine.h" +#include "parse.h" +#include "scope.h" + +scope_t *scope_new(scope_kind_t kind, py_parse_node_t pn) { + scope_t *scope = m_new(scope_t, 1); + scope->kind = kind; + scope->parent = NULL; + scope->next = NULL; + scope->pn = pn; + switch (kind) { + case SCOPE_MODULE: + scope->simple_name = 0; + break; + case SCOPE_FUNCTION: + case SCOPE_CLASS: + assert(PY_PARSE_NODE_IS_STRUCT(pn)); + scope->simple_name = PY_PARSE_NODE_LEAF_ARG(((py_parse_node_struct_t*)pn)->nodes[0]); + break; + case SCOPE_LAMBDA: + scope->simple_name = qstr_from_str_static(""); + break; + case SCOPE_LIST_COMP: + scope->simple_name = qstr_from_str_static(""); + break; + case SCOPE_DICT_COMP: + scope->simple_name = qstr_from_str_static(""); + break; + case SCOPE_SET_COMP: + scope->simple_name = qstr_from_str_static(""); + break; + case SCOPE_GEN_EXPR: + scope->simple_name = qstr_from_str_static(""); + break; + default: + assert(0); + } + scope->id_info_alloc = 8; + scope->id_info_len = 0; + scope->id_info = m_new(id_info_t, scope->id_info_alloc); + + scope->flags = 0; + scope->num_params = 0; + /* not needed + scope->num_default_params = 0; + scope->num_dict_params = 0; + */ + scope->num_locals = 0; + scope->unique_code_id = 0; + + return scope; +} + +id_info_t *scope_find_or_add_id(scope_t *scope, qstr qstr, bool *added) { + for (int i = 0; i < scope->id_info_len; i++) { + if (scope->id_info[i].qstr == qstr) { + *added = false; + return &scope->id_info[i]; + } + } + + // make sure we have enough memory + if (scope->id_info_len >= scope->id_info_alloc) { + scope->id_info_alloc *= 2; + scope->id_info = m_renew(id_info_t, scope->id_info, scope->id_info_alloc); + } + + id_info_t *id_info; + + { + /* + // just pick next slot in array + id_info = &scope->id_info[scope->id_info_len++]; + */ + } + + { + // sort insert into id_info array, so we are equivalent to CPython (no other reason to do it) + scope->id_info_len += 1; + for (int i = scope->id_info_len - 1;; i--) { + if (i == 0 || strcmp(qstr_str(scope->id_info[i - 1].qstr), qstr_str(qstr)) < 0) { + id_info = &scope->id_info[i]; + break; + } else { + scope->id_info[i] = scope->id_info[i - 1]; + } + } + } + + id_info->param = false; + id_info->kind = 0; + id_info->qstr = qstr; + *added = true; + return id_info; +} + +id_info_t *scope_find(scope_t *scope, qstr qstr) { + for (int i = 0; i < scope->id_info_len; i++) { + if (scope->id_info[i].qstr == qstr) { + return &scope->id_info[i]; + } + } + return NULL; +} + +id_info_t *scope_find_global(scope_t *scope, qstr qstr) { + while (scope->parent != NULL) { + scope = scope->parent; + } + for (int i = 0; i < scope->id_info_len; i++) { + if (scope->id_info[i].qstr == qstr) { + return &scope->id_info[i]; + } + } + return NULL; +} + +id_info_t *scope_find_local_in_parent(scope_t *scope, qstr qstr) { + if (scope->parent == NULL) { + return NULL; + } + for (scope_t *s = scope->parent; s->parent != NULL; s = s->parent) { + for (int i = 0; i < s->id_info_len; i++) { + if (s->id_info[i].qstr == qstr) { + return &s->id_info[i]; + } + } + } + return NULL; +} + +void scope_close_over_in_parents(scope_t *scope, qstr qstr) { + assert(scope->parent != NULL); // we should have at least 1 parent + for (scope_t *s = scope->parent; s->parent != NULL; s = s->parent) { + id_info_t *id = NULL; + for (int i = 0; i < s->id_info_len; i++) { + if (s->id_info[i].qstr == qstr) { + id = &s->id_info[i]; + break; + } + } + if (id == NULL) { + // variable not declared in this scope, so declare it as free and keep searching parents + bool added; + id = scope_find_or_add_id(s, qstr, &added); + assert(added); + id->kind = ID_INFO_KIND_FREE; + } else { + // variable is declared in this scope, so finish + switch (id->kind) { + case ID_INFO_KIND_LOCAL: id->kind = ID_INFO_KIND_CELL; break; // variable local to this scope, close it over + case ID_INFO_KIND_FREE: break; // variable already closed over in a parent scope + case ID_INFO_KIND_CELL: break; // variable already closed over in this scope + default: assert(0); // TODO + } + return; + } + } + assert(0); // we should have found the variable in one of the parents +} + +void scope_print_info(scope_t *s) { + if (s->kind == SCOPE_MODULE) { + printf("code \n"); + } else if (s->kind == SCOPE_LAMBDA) { + printf("code \n"); + } else if (s->kind == SCOPE_LIST_COMP) { + printf("code \n"); + } else if (s->kind == SCOPE_DICT_COMP) { + printf("code \n"); + } else if (s->kind == SCOPE_SET_COMP) { + printf("code \n"); + } else if (s->kind == SCOPE_GEN_EXPR) { + printf("code \n"); + } else { + printf("code %s\n", qstr_str(s->simple_name)); + } + /* + printf("var global:"); + for (int i = 0; i < s->id_info_len; i++) { + if (s->id_info[i].kind == ID_INFO_KIND_GLOBAL_EXPLICIT) { + printf(" %s", qstr_str(s->id_info[i].qstr)); + } + } + printf("\n"); + printf("var name:"); + for (int i = 0; i < s->id_info_len; i++) { + if (s->id_info[i].kind == ID_INFO_KIND_GLOBAL_IMPLICIT) { + printf(" %s", qstr_str(s->id_info[i].qstr)); + } + } + printf("\n"); + printf("var local:"); + for (int i = 0; i < s->id_info_len; i++) { + if (s->id_info[i].kind == ID_INFO_KIND_LOCAL) { + printf(" %s", qstr_str(s->id_info[i].qstr)); + } + } + printf("\n"); + printf("var free:"); + for (int i = 0; i < s->id_info_len; i++) { + if (s->id_info[i].kind == ID_INFO_KIND_FREE) { + printf(" %s", qstr_str(s->id_info[i].qstr)); + } + } + printf("\n"); + */ + printf(" flags %04x\n", s->flags); + printf(" argcount %d\n", s->num_params); + printf(" nlocals %d\n", s->num_locals); + printf(" stacksize %d\n", s->stack_size); +} diff --git a/py/scope.h b/py/scope.h new file mode 100644 index 0000000000..2d620fb4fb --- /dev/null +++ b/py/scope.h @@ -0,0 +1,58 @@ +enum { + ID_INFO_KIND_GLOBAL_IMPLICIT, + ID_INFO_KIND_GLOBAL_EXPLICIT, + ID_INFO_KIND_LOCAL, // in a function f, written and only referenced by f + ID_INFO_KIND_CELL, // in a function f, read/written by children of f + ID_INFO_KIND_FREE, // in a function f, belongs to the parent of f +}; + +typedef struct _id_info_t { + bool param; + int kind; + qstr qstr; + int local_num; // when it's an ID_INFO_KIND_LOCAL this is the unique number of the local +} id_info_t; + +// taken from python source, Include/code.h +#define SCOPE_FLAG_OPTIMISED 0x0001 +#define SCOPE_FLAG_NEWLOCALS 0x0002 +#define SCOPE_FLAG_VARARGS 0x0004 +#define SCOPE_FLAG_VARKEYWORDS 0x0008 +#define SCOPE_FLAG_NESTED 0x0010 +#define SCOPE_FLAG_GENERATOR 0x0020 +/* The SCOPE_FLAG_NOFREE flag is set if there are no free or cell variables. + This information is redundant, but it allows a single flag test + to determine whether there is any extra work to be done when the + call frame is setup. +*/ +#define SCOPE_FLAG_NOFREE 0x0040 + +// scope is a "block" in Python parlance +typedef enum { SCOPE_MODULE, SCOPE_FUNCTION, SCOPE_LAMBDA, SCOPE_LIST_COMP, SCOPE_DICT_COMP, SCOPE_SET_COMP, SCOPE_GEN_EXPR, SCOPE_CLASS } scope_kind_t; +typedef struct _scope_t { + scope_kind_t kind; + struct _scope_t *parent; + struct _scope_t *next; + py_parse_node_t pn; + qstr simple_name; + int id_info_alloc; + int id_info_len; + id_info_t *id_info; + int flags; + int num_params; + /* not needed + int num_default_params; + int num_dict_params; + */ + int num_locals; + int stack_size; + int unique_code_id; +} scope_t; + +scope_t *scope_new(scope_kind_t kind, py_parse_node_t pn); +id_info_t *scope_find_or_add_id(scope_t *scope, qstr qstr, bool *added); +id_info_t *scope_find(scope_t *scope, qstr qstr); +id_info_t *scope_find_global(scope_t *scope, qstr qstr); +id_info_t *scope_find_local_in_parent(scope_t *scope, qstr qstr); +void scope_close_over_in_parents(scope_t *scope, qstr qstr); +void scope_print_info(scope_t *s);