From c49d5207e9437755be364639632be31c001955a8 Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 16 May 2022 19:20:52 +1000 Subject: [PATCH] py/persistentcode: Remove unicode feature flag from .mpy file. Prior to this commit, even with unicode disabled .py and .mpy files could contain unicode characters, eg by entering them directly in a string as utf-8 encoded. The only thing the compiler disallowed (with unicode disabled) was using \uxxxx and \Uxxxxxxxx notation to specify a character within a string with value >= 0x100; that would give a SyntaxError. With this change mpy-cross will now accept \u and \U notation to insert a character with value >= 0x100 into a string (because the -mno-unicode option is now gone, there's no way to forbid this). The runtime will happily work with strings with such characters, just like it already works with strings with characters that were utf-8 encoded directly. This change simplifies things because there are no longer any feature flags in .mpy files, and any bytecode .mpy will now run on any target. Signed-off-by: Damien George --- mpy-cross/main.c | 6 ---- py/lexer.c | 36 ++++++++++------------ py/mpconfig.h | 7 ----- py/mpstate.h | 1 - py/persistentcode.h | 12 +++----- tests/micropython/import_mpy_native_gc.py | 8 ++--- tests/micropython/import_mpy_native_x64.py | 6 ++-- tools/mpy_ld.py | 15 +++++---- 8 files changed, 35 insertions(+), 56 deletions(-) diff --git a/mpy-cross/main.c b/mpy-cross/main.c index a2daf7b22f..6199abd365 100644 --- a/mpy-cross/main.c +++ b/mpy-cross/main.c @@ -108,7 +108,6 @@ STATIC int usage(char **argv) { "\n" "Target specific options:\n" "-msmall-int-bits=number : set the maximum bits used to encode a small-int\n" - "-mno-unicode : don't support unicode in compiled strings\n" "-march= : set architecture for native emitter; x86, x64, armv6, armv7m, armv7em, armv7emsp, armv7emdp, xtensa, xtensawin\n" "\n" "Implementation specific options:\n", argv[0] @@ -203,7 +202,6 @@ MP_NOINLINE int main_(int argc, char **argv) { // set default compiler configuration mp_dynamic_compiler.small_int_bits = 31; - mp_dynamic_compiler.py_builtins_str_unicode = 1; #if defined(__i386__) mp_dynamic_compiler.native_arch = MP_NATIVE_ARCH_X86; mp_dynamic_compiler.nlr_buf_num_regs = MICROPY_NLR_NUM_REGS_X86; @@ -261,10 +259,6 @@ MP_NOINLINE int main_(int argc, char **argv) { return usage(argv); } // TODO check that small_int_bits is within range of host's capabilities - } else if (strcmp(argv[a], "-mno-unicode") == 0) { - mp_dynamic_compiler.py_builtins_str_unicode = 0; - } else if (strcmp(argv[a], "-municode") == 0) { - mp_dynamic_compiler.py_builtins_str_unicode = 1; } else if (strncmp(argv[a], "-march=", sizeof("-march=") - 1) == 0) { const char *arch = argv[a] + sizeof("-march=") - 1; if (strcmp(arch, "x86") == 0) { diff --git a/py/lexer.c b/py/lexer.c index ac406bd469..39e9662f63 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -473,25 +473,23 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring) } } if (c != MP_LEXER_EOF) { - if (MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) { - if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) { - vstr_add_char(&lex->vstr, c); - } else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) { - vstr_add_byte(&lex->vstr, c); - } else { - // unicode character out of range - // this raises a generic SyntaxError; could provide more info - lex->tok_kind = MP_TOKEN_INVALID; - } - } else { - // without unicode everything is just added as an 8-bit byte - if (c < 0x100) { - vstr_add_byte(&lex->vstr, c); - } else { - // 8-bit character out of range - // this raises a generic SyntaxError; could provide more info - lex->tok_kind = MP_TOKEN_INVALID; - } + #if MICROPY_PY_BUILTINS_STR_UNICODE + if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) { + // Valid unicode character in a str object. + vstr_add_char(&lex->vstr, c); + } else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) { + // Valid byte in a bytes object. + vstr_add_byte(&lex->vstr, c); + } + #else + if (c < 0x100) { + // Without unicode everything is just added as an 8-bit byte. + vstr_add_byte(&lex->vstr, c); + } + #endif + else { + // Character out of range; this raises a generic SyntaxError. + lex->tok_kind = MP_TOKEN_INVALID; } } } else { diff --git a/py/mpconfig.h b/py/mpconfig.h index 8814d1f09c..f9894b497c 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -429,13 +429,6 @@ #define MICROPY_DYNAMIC_COMPILER (0) #endif -// Configure dynamic compiler macros -#if MICROPY_DYNAMIC_COMPILER -#define MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC (mp_dynamic_compiler.py_builtins_str_unicode) -#else -#define MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC MICROPY_PY_BUILTINS_STR_UNICODE -#endif - // Whether to enable constant folding; eg 1+2 rewritten as 3 #ifndef MICROPY_COMP_CONST_FOLDING #define MICROPY_COMP_CONST_FOLDING (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_CORE_FEATURES) diff --git a/py/mpstate.h b/py/mpstate.h index bc1aaf1e09..98aa9a8498 100644 --- a/py/mpstate.h +++ b/py/mpstate.h @@ -55,7 +55,6 @@ enum { #if MICROPY_DYNAMIC_COMPILER typedef struct mp_dynamic_compiler_t { uint8_t small_int_bits; // must be <= host small_int_bits - bool py_builtins_str_unicode; uint8_t native_arch; uint8_t nlr_buf_num_regs; } mp_dynamic_compiler_t; diff --git a/py/persistentcode.h b/py/persistentcode.h index 37263f66b3..55428e73a6 100644 --- a/py/persistentcode.h +++ b/py/persistentcode.h @@ -42,15 +42,11 @@ #define MPY_FEATURE_DECODE_ARCH(feat) ((feat) >> 2) // The feature flag bits encode the compile-time config options that affect -// the generate bytecode. Note: position 0 is now unused -// (formerly MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE). -#define MPY_FEATURE_FLAGS ( \ - ((MICROPY_PY_BUILTINS_STR_UNICODE) << 1) \ - ) +// the generate bytecode. Note: no longer used. +// (formerly MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE and MICROPY_PY_BUILTINS_STR_UNICODE). +#define MPY_FEATURE_FLAGS (0) // This is a version of the flags that can be configured at runtime. -#define MPY_FEATURE_FLAGS_DYNAMIC ( \ - ((MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) << 1) \ - ) +#define MPY_FEATURE_FLAGS_DYNAMIC (0) // Define the host architecture #if MICROPY_EMIT_X86 diff --git a/tests/micropython/import_mpy_native_gc.py b/tests/micropython/import_mpy_native_gc.py index 44f5942697..a2012fc1cf 100644 --- a/tests/micropython/import_mpy_native_gc.py +++ b/tests/micropython/import_mpy_native_gc.py @@ -49,14 +49,14 @@ class UserFS: # by the required value of sys.implementation._mpy. features0_file_contents = { # -march=x64 - 0xA06: b'M\x06\n\x1f\x01\x004build/features0.native.mpy\x00\x8aB\xe9/\x00\x00\x00SH\x8b\x1d\x83\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dQ\x00\x00\x00H\x8bG\x08L\x8bc(H\x8bx\x08A\xff\xd4H\x8d5+\x00\x00\x00H\x89\xc5H\x8b\x059\x00\x00\x00\x0f\xb78\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x85\x00\x12factorial\x00\x10\r$\x01&\x9f \x01"\xff', + 0x806: b'M\x06\b\x1f\x01\x004build/features0.native.mpy\x00\x8aB\xe9/\x00\x00\x00SH\x8b\x1d\x83\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dQ\x00\x00\x00H\x8bG\x08L\x8bc(H\x8bx\x08A\xff\xd4H\x8d5+\x00\x00\x00H\x89\xc5H\x8b\x059\x00\x00\x00\x0f\xb78\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x85\x00\x12factorial\x00\x10\r$\x01&\x9f \x01"\xff', # -march=armv7m - 0x1606: b"M\x06\x16\x1f\x01\x004build/features0.native.mpy\x00\x88B\x1a\xe0\x00\x00\x13\xb5\nK\nJ{D\x9cX\x02!\xe3h\x98G\x03F\x01 3\xb9\x02!#i\x01\x93\x02\xb0\xbd\xe8\x10@\x18GXC\x01;\xf4\xe7\x00\xbfn\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\nN\nK~D\xf4XChgiXh\xb8G\x05F\x07K\x08I\xf2XyD\x10\x88ck\x98G(F\xb8G h\xf8\xbd\x00\xbf:\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x84\x00\x00\x00\x00\x00\x00\x00\x01\x84\x10\x12factorial\x00\x10\r>\x01@\x9f:\x01<\xff", + 0x1406: b"M\x06\x14\x1f\x01\x004build/features0.native.mpy\x00\x88B\x1a\xe0\x00\x00\x13\xb5\nK\nJ{D\x9cX\x02!\xe3h\x98G\x03F\x01 3\xb9\x02!#i\x01\x93\x02\xb0\xbd\xe8\x10@\x18GXC\x01;\xf4\xe7\x00\xbfn\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\nN\nK~D\xf4XChgiXh\xb8G\x05F\x07K\x08I\xf2XyD\x10\x88ck\x98G(F\xb8G h\xf8\xbd\x00\xbf:\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x84\x00\x00\x00\x00\x00\x00\x00\x01\x84\x10\x12factorial\x00\x10\r>\x01@\x9f:\x01<\xff", } # Populate other armv7m-derived archs based on armv7m. -for arch in (0x1A06, 0x1E06, 0x2206): - features0_file_contents[arch] = features0_file_contents[0x1606] +for arch in (0x1806, 0x1C06, 0x2006): + features0_file_contents[arch] = features0_file_contents[0x1406] if sys.implementation._mpy not in features0_file_contents: print("SKIP") diff --git a/tests/micropython/import_mpy_native_x64.py b/tests/micropython/import_mpy_native_x64.py index 3a25df1866..453d81f256 100644 --- a/tests/micropython/import_mpy_native_x64.py +++ b/tests/micropython/import_mpy_native_x64.py @@ -52,11 +52,11 @@ class UserFS: # fmt: off user_files = { # bad architecture - '/mod0.mpy': b'M\x06\xfe\x00\x10', + '/mod0.mpy': b'M\x06\xfc\x00\x10', # test loading of viper and asm '/mod1.mpy': ( - b'M\x06\x0a\x1f' # header + b'M\x06\x08\x1f' # header b'\x02' # n_qstr b'\x00' # n_obj @@ -85,7 +85,7 @@ user_files = { # test loading viper with additional scope flags and relocation '/mod2.mpy': ( - b'M\x06\x0a\x1f' # header + b'M\x06\x08\x1f' # header b'\x02' # n_qstr b'\x00' # n_obj diff --git a/tools/mpy_ld.py b/tools/mpy_ld.py index a0817500e6..6189491518 100755 --- a/tools/mpy_ld.py +++ b/tools/mpy_ld.py @@ -48,7 +48,6 @@ MP_CODE_NATIVE_VIPER = 4 MP_SCOPE_FLAG_VIPERRELOC = 0x10 MP_SCOPE_FLAG_VIPERRODATA = 0x20 MP_SCOPE_FLAG_VIPERBSS = 0x40 -MICROPY_PY_BUILTINS_STR_UNICODE = 2 MP_SMALL_INT_BITS = 31 # ELF constants @@ -116,7 +115,7 @@ class ArchData: ARCH_DATA = { "x86": ArchData( "EM_386", - MP_NATIVE_ARCH_X86 << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, + MP_NATIVE_ARCH_X86 << 2, 2, 4, (R_386_PC32, R_386_GOT32, R_386_GOT32X), @@ -124,7 +123,7 @@ ARCH_DATA = { ), "x64": ArchData( "EM_X86_64", - MP_NATIVE_ARCH_X64 << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, + MP_NATIVE_ARCH_X64 << 2, 2, 8, (R_X86_64_GOTPCREL, R_X86_64_REX_GOTPCRELX), @@ -132,7 +131,7 @@ ARCH_DATA = { ), "armv7m": ArchData( "EM_ARM", - MP_NATIVE_ARCH_ARMV7M << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, + MP_NATIVE_ARCH_ARMV7M << 2, 2, 4, (R_ARM_GOT_BREL,), @@ -140,7 +139,7 @@ ARCH_DATA = { ), "armv7emsp": ArchData( "EM_ARM", - MP_NATIVE_ARCH_ARMV7EMSP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, + MP_NATIVE_ARCH_ARMV7EMSP << 2, 2, 4, (R_ARM_GOT_BREL,), @@ -148,7 +147,7 @@ ARCH_DATA = { ), "armv7emdp": ArchData( "EM_ARM", - MP_NATIVE_ARCH_ARMV7EMDP << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, + MP_NATIVE_ARCH_ARMV7EMDP << 2, 2, 4, (R_ARM_GOT_BREL,), @@ -156,7 +155,7 @@ ARCH_DATA = { ), "xtensa": ArchData( "EM_XTENSA", - MP_NATIVE_ARCH_XTENSA << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, + MP_NATIVE_ARCH_XTENSA << 2, 2, 4, (R_XTENSA_32, R_XTENSA_PLT), @@ -164,7 +163,7 @@ ARCH_DATA = { ), "xtensawin": ArchData( "EM_XTENSA", - MP_NATIVE_ARCH_XTENSAWIN << 2 | MICROPY_PY_BUILTINS_STR_UNICODE, + MP_NATIVE_ARCH_XTENSAWIN << 2, 4, 4, (R_XTENSA_32, R_XTENSA_PLT),