From 2a6ba47110be88ff1e1f5abd1bd76c353447884c Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Wed, 22 Jan 2020 13:34:19 +0100 Subject: [PATCH] py/obj: Add static safety checks to mp_obj_is_type(). Commit d96cfd13e3a464862c introduced a regression by breaking existing users of mp_obj_is_type(.., &mp_obj_bool). This function (and associated helpers like mp_obj_is_int()) have some specific nuances, and mistakes like this one can happen again. This commit adds mp_obj_is_exact_type() which behaves like the the old mp_obj_is_type(). The new mp_obj_is_type() has the same prototype but it attempts to statically assert that it's not called with types which should be checked using mp_obj_is_type(). If called with any of these types: int, str, bool, NoneType - it will cause a compilation error. Additional checked types (e.g function types) can be added in the future. Existing users of mp_obj_is_type() with the now "invalid" types, were translated to use mp_obj_is_exact_type(). The use of MP_STATIC_ASSERT() is not bulletproof - usually GCC (and other compilers) can't statically check conditions that are only known during link-time (like variables' addresses comparison). However, in this case, GCC is able to statically detect these conditions, probably because it's the exact same object - `&mp_type_int == &mp_type_int` is detected. Misuses of this function with runtime-chosen types (e.g: `mp_obj_type_t *x = ...; mp_obj_is_type(..., x);` won't be detected. MSC is unable to detect this, so we use MP_STATIC_ASSERT_NOT_MSC(). Compiling with this commit and without the fix for d96cfd13e3a464862c shows that it detects the problem. Signed-off-by: Yonatan Goldschmidt --- py/binary.c | 4 ++-- py/map.c | 2 +- py/obj.c | 8 ++++---- py/obj.h | 22 ++++++++++++++++++---- py/objexcept.c | 2 +- py/objfun.c | 2 +- py/objint.c | 2 +- py/objint_longlong.c | 8 ++++---- py/objint_mpz.c | 10 +++++----- py/objstr.c | 2 +- 10 files changed, 38 insertions(+), 24 deletions(-) diff --git a/py/binary.c b/py/binary.c index 05e658c952..f59e89ca48 100644 --- a/py/binary.c +++ b/py/binary.c @@ -334,7 +334,7 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p #endif default: #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - if (mp_obj_is_type(val_in, &mp_type_int)) { + if (mp_obj_is_exact_type(val_in, &mp_type_int)) { mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p); return; } @@ -371,7 +371,7 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_ break; default: #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - if (mp_obj_is_type(val_in, &mp_type_int)) { + if (mp_obj_is_exact_type(val_in, &mp_type_int)) { size_t size = mp_binary_get_size('@', typecode, NULL); mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG, size, (uint8_t *)p + index * size); diff --git a/py/map.c b/py/map.c index b194250cb4..7a6f2233e6 100644 --- a/py/map.c +++ b/py/map.c @@ -174,7 +174,7 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_ if (compare_only_ptrs) { if (mp_obj_is_qstr(index)) { // Index is a qstr, so can just do ptr comparison. - } else if (mp_obj_is_type(index, &mp_type_str)) { + } else if (mp_obj_is_exact_type(index, &mp_type_str)) { // Index is a non-interned string. // We can either intern the string, or force a full equality comparison. // We chose the latter, since interning costs time and potentially RAM, diff --git a/py/obj.c b/py/obj.c index 024de3c96c..51f6d85def 100644 --- a/py/obj.c +++ b/py/obj.c @@ -304,7 +304,7 @@ mp_int_t mp_obj_get_int(mp_const_obj_t arg) { return 1; } else if (mp_obj_is_small_int(arg)) { return MP_OBJ_SMALL_INT_VALUE(arg); - } else if (mp_obj_is_type(arg, &mp_type_int)) { + } else if (mp_obj_is_exact_type(arg, &mp_type_int)) { return mp_obj_int_get_checked(arg); } else { mp_obj_t res = mp_unary_op(MP_UNARY_OP_INT, (mp_obj_t)arg); @@ -330,7 +330,7 @@ bool mp_obj_get_int_maybe(mp_const_obj_t arg, mp_int_t *value) { *value = 1; } else if (mp_obj_is_small_int(arg)) { *value = MP_OBJ_SMALL_INT_VALUE(arg); - } else if (mp_obj_is_type(arg, &mp_type_int)) { + } else if (mp_obj_is_exact_type(arg, &mp_type_int)) { *value = mp_obj_int_get_checked(arg); } else { return false; @@ -349,7 +349,7 @@ bool mp_obj_get_float_maybe(mp_obj_t arg, mp_float_t *value) { } else if (mp_obj_is_small_int(arg)) { val = (mp_float_t)MP_OBJ_SMALL_INT_VALUE(arg); #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - } else if (mp_obj_is_type(arg, &mp_type_int)) { + } else if (mp_obj_is_exact_type(arg, &mp_type_int)) { val = mp_obj_int_as_float_impl(arg); #endif } else if (mp_obj_is_float(arg)) { @@ -389,7 +389,7 @@ bool mp_obj_get_complex_maybe(mp_obj_t arg, mp_float_t *real, mp_float_t *imag) *real = (mp_float_t)MP_OBJ_SMALL_INT_VALUE(arg); *imag = 0; #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - } else if (mp_obj_is_type(arg, &mp_type_int)) { + } else if (mp_obj_is_exact_type(arg, &mp_type_int)) { *real = mp_obj_int_as_float_impl(arg); *imag = 0; #endif diff --git a/py/obj.h b/py/obj.h index 84ca94213c..ea3d2db706 100644 --- a/py/obj.h +++ b/py/obj.h @@ -743,15 +743,29 @@ void *mp_obj_malloc_helper(size_t num_bytes, const mp_obj_type_t *type); // check for more specific object types. // Note: these are kept as macros because inline functions sometimes use much // more code space than the equivalent macros, depending on the compiler. -#define mp_obj_is_type(o, t) (mp_obj_is_obj(o) && (((mp_obj_base_t *)MP_OBJ_TO_PTR(o))->type == (t))) // this does not work for checking int, str or fun; use below macros for that +// don't use mp_obj_is_exact_type directly; use mp_obj_is_type which provides additional safety checks. +// use the former only if you need to bypass these checks (because you've already checked everything else) +#define mp_obj_is_exact_type(o, t) (mp_obj_is_obj(o) && (((mp_obj_base_t *)MP_OBJ_TO_PTR(o))->type == (t))) + +// Type checks are split to a separate, constant result macro. This is so it doesn't hinder the compilers's +// optimizations (other tricks like using ({ expr; exper; }) or (exp, expr, expr) in mp_obj_is_type() result +// in missed optimizations) +#define mp_type_assert_not_bool_int_str_nonetype(t) ( \ + MP_STATIC_ASSERT_NOT_MSC((t) != &mp_type_bool), \ + MP_STATIC_ASSERT_NOT_MSC((t) != &mp_type_int), \ + MP_STATIC_ASSERT_NOT_MSC((t) != &mp_type_str), \ + MP_STATIC_ASSERT_NOT_MSC((t) != &mp_type_NoneType), \ + 1) + +#define mp_obj_is_type(o, t) (mp_type_assert_not_bool_int_str_nonetype(t) && mp_obj_is_exact_type(o, t)) #if MICROPY_OBJ_IMMEDIATE_OBJS // bool's are immediates, not real objects, so test for the 2 possible values. #define mp_obj_is_bool(o) ((o) == mp_const_false || (o) == mp_const_true) #else -#define mp_obj_is_bool(o) mp_obj_is_type(o, &mp_type_bool) +#define mp_obj_is_bool(o) mp_obj_is_exact_type(o, &mp_type_bool) #endif -#define mp_obj_is_int(o) (mp_obj_is_small_int(o) || mp_obj_is_type(o, &mp_type_int)) -#define mp_obj_is_str(o) (mp_obj_is_qstr(o) || mp_obj_is_type(o, &mp_type_str)) +#define mp_obj_is_int(o) (mp_obj_is_small_int(o) || mp_obj_is_exact_type(o, &mp_type_int)) +#define mp_obj_is_str(o) (mp_obj_is_qstr(o) || mp_obj_is_exact_type(o, &mp_type_str)) #define mp_obj_is_str_or_bytes(o) (mp_obj_is_qstr(o) || (mp_obj_is_obj(o) && ((mp_obj_base_t *)MP_OBJ_TO_PTR(o))->type->binary_op == mp_obj_str_binary_op)) #define mp_obj_is_dict_or_ordereddict(o) (mp_obj_is_obj(o) && ((mp_obj_base_t *)MP_OBJ_TO_PTR(o))->type->make_new == mp_obj_dict_make_new) #define mp_obj_is_fun(o) (mp_obj_is_obj(o) && (((mp_obj_base_t *)MP_OBJ_TO_PTR(o))->type->name == MP_QSTR_function)) diff --git a/py/objexcept.c b/py/objexcept.c index dca287bb6e..028b73fd8b 100644 --- a/py/objexcept.c +++ b/py/objexcept.c @@ -122,7 +122,7 @@ STATIC mp_obj_exception_t *get_native_exception(mp_obj_t self_in) { STATIC void decompress_error_text_maybe(mp_obj_exception_t *o) { #if MICROPY_ROM_TEXT_COMPRESSION - if (o->args->len == 1 && mp_obj_is_type(o->args->items[0], &mp_type_str)) { + if (o->args->len == 1 && mp_obj_is_exact_type(o->args->items[0], &mp_type_str)) { mp_obj_str_t *o_str = MP_OBJ_TO_PTR(o->args->items[0]); if (MP_IS_COMPRESSED_ROM_STRING(o_str->data)) { byte *buf = m_new_maybe(byte, MP_MAX_UNCOMPRESSED_TEXT_LEN + 1); diff --git a/py/objfun.c b/py/objfun.c index 37c1eaa549..5fa9d71dda 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -468,7 +468,7 @@ STATIC mp_uint_t convert_obj_for_inline_asm(mp_obj_t obj) { return 0; } else if (obj == mp_const_true) { return 1; - } else if (mp_obj_is_type(obj, &mp_type_int)) { + } else if (mp_obj_is_exact_type(obj, &mp_type_int)) { return mp_obj_int_get_truncated(obj); } else if (mp_obj_is_str(obj)) { // pointer to the string (it's probably constant though!) diff --git a/py/objint.c b/py/objint.c index 5ff5e7de4b..740e7ea948 100644 --- a/py/objint.c +++ b/py/objint.c @@ -232,7 +232,7 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co // A small int; get the integer value to format. num = MP_OBJ_SMALL_INT_VALUE(self_in); } else { - assert(mp_obj_is_type(self_in, &mp_type_int)); + assert(mp_obj_is_exact_type(self_in, &mp_type_int)); // Not a small int. #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG const mp_obj_int_t *self = self_in; diff --git a/py/objint_longlong.c b/py/objint_longlong.c index 7fcb5462f8..1c07588367 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -58,7 +58,7 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf } void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { - assert(mp_obj_is_type(self_in, &mp_type_int)); + assert(mp_obj_is_exact_type(self_in, &mp_type_int)); mp_obj_int_t *self = self_in; long long val = self->val; if (big_endian) { @@ -131,13 +131,13 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i if (mp_obj_is_small_int(lhs_in)) { lhs_val = MP_OBJ_SMALL_INT_VALUE(lhs_in); } else { - assert(mp_obj_is_type(lhs_in, &mp_type_int)); + assert(mp_obj_is_exact_type(lhs_in, &mp_type_int)); lhs_val = ((mp_obj_int_t *)lhs_in)->val; } if (mp_obj_is_small_int(rhs_in)) { rhs_val = MP_OBJ_SMALL_INT_VALUE(rhs_in); - } else if (mp_obj_is_type(rhs_in, &mp_type_int)) { + } else if (mp_obj_is_exact_type(rhs_in, &mp_type_int)) { rhs_val = ((mp_obj_int_t *)rhs_in)->val; } else { // delegate to generic function to check for extra cases @@ -284,7 +284,7 @@ mp_int_t mp_obj_int_get_checked(mp_const_obj_t self_in) { #if MICROPY_PY_BUILTINS_FLOAT mp_float_t mp_obj_int_as_float_impl(mp_obj_t self_in) { - assert(mp_obj_is_type(self_in, &mp_type_int)); + assert(mp_obj_is_exact_type(self_in, &mp_type_int)); mp_obj_int_t *self = self_in; return self->val; } diff --git a/py/objint_mpz.c b/py/objint_mpz.c index cbc4cb75a7..2811bcf2aa 100644 --- a/py/objint_mpz.c +++ b/py/objint_mpz.c @@ -91,7 +91,7 @@ mp_obj_int_t *mp_obj_int_new_mpz(void) { // This particular routine should only be called for the mpz representation of the int. char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in, int base, const char *prefix, char base_char, char comma) { - assert(mp_obj_is_type(self_in, &mp_type_int)); + assert(mp_obj_is_exact_type(self_in, &mp_type_int)); const mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in); size_t needed_size = mp_int_format_size(mpz_max_num_bits(&self->mpz), base, prefix, comma); @@ -113,7 +113,7 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf } void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { - assert(mp_obj_is_type(self_in, &mp_type_int)); + assert(mp_obj_is_exact_type(self_in, &mp_type_int)); mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in); mpz_as_bytes(&self->mpz, big_endian, len, buf); } @@ -181,7 +181,7 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i mpz_init_fixed_from_int(&z_int, z_int_dig, MPZ_NUM_DIG_FOR_INT, MP_OBJ_SMALL_INT_VALUE(lhs_in)); zlhs = &z_int; } else { - assert(mp_obj_is_type(lhs_in, &mp_type_int)); + assert(mp_obj_is_exact_type(lhs_in, &mp_type_int)); zlhs = &((mp_obj_int_t *)MP_OBJ_TO_PTR(lhs_in))->mpz; } @@ -189,7 +189,7 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i if (mp_obj_is_small_int(rhs_in)) { mpz_init_fixed_from_int(&z_int, z_int_dig, MPZ_NUM_DIG_FOR_INT, MP_OBJ_SMALL_INT_VALUE(rhs_in)); zrhs = &z_int; - } else if (mp_obj_is_type(rhs_in, &mp_type_int)) { + } else if (mp_obj_is_exact_type(rhs_in, &mp_type_int)) { zrhs = &((mp_obj_int_t *)MP_OBJ_TO_PTR(rhs_in))->mpz; #if MICROPY_PY_BUILTINS_FLOAT } else if (mp_obj_is_float(rhs_in)) { @@ -447,7 +447,7 @@ mp_uint_t mp_obj_int_get_uint_checked(mp_const_obj_t self_in) { #if MICROPY_PY_BUILTINS_FLOAT mp_float_t mp_obj_int_as_float_impl(mp_obj_t self_in) { - assert(mp_obj_is_type(self_in, &mp_type_int)); + assert(mp_obj_is_exact_type(self_in, &mp_type_int)); mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in); return mpz_as_float(&self->mpz); } diff --git a/py/objstr.c b/py/objstr.c index 6e5a316d78..f15a2d9744 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -2144,7 +2144,7 @@ STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in) { qstr mp_obj_str_get_qstr(mp_obj_t self_in) { if (mp_obj_is_qstr(self_in)) { return MP_OBJ_QSTR_VALUE(self_in); - } else if (mp_obj_is_type(self_in, &mp_type_str)) { + } else if (mp_obj_is_exact_type(self_in, &mp_type_str)) { mp_obj_str_t *self = MP_OBJ_TO_PTR(self_in); return qstr_from_strn((char *)self->data, self->len); } else {