py/obj: Add static safety checks to mp_obj_is_type().

Commit d96cfd13e3 introduced a regression by breaking existing users of mp_obj_is_type(.., &mp_obj_bool). This function (and associated helpers like mp_obj_is_int()) have some specific nuances, and mistakes like this one can happen again. This commit adds mp_obj_is_exact_type() which behaves like the the old mp_obj_is_type(). The new mp_obj_is_type() has the same prototype but it attempts to statically assert that it's not called with types which should be checked using mp_obj_is_type(). If called with any of these types: int, str, bool, NoneType - it will cause a compilation error. Additional checked types (e.g function types) can be added in the future. Existing users of mp_obj_is_type() with the now "invalid" types, were translated to use mp_obj_is_exact_type(). The use of MP_STATIC_ASSERT() is not bulletproof - usually GCC (and other compilers) can't statically check conditions that are only known during link-time (like variables' addresses comparison). However, in this case, GCC is able to statically detect these conditions, probably because it's the exact same object - `&mp_type_int == &mp_type_int` is detected. Misuses of this function with runtime-chosen types (e.g: `mp_obj_type_t *x = ...; mp_obj_is_type(..., x);` won't be detected. MSC is unable to detect this, so we use MP_STATIC_ASSERT_NOT_MSC(). Compiling with this commit and without the fix for d96cfd13e3 shows that it detects the problem. Signed-off-by: Yonatan Goldschmidt <yon.goldschmidt@gmail.com>
2020-01-22 13:34:19 +01:00 · 2020-01-22 13:34:19 +01:00 · 2a6ba47110
commit 2a6ba47110
--- a/py/binary.c
+++ b/py/binary.c
@ -334,7 +334,7 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p
        #endif
        default:
            #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-            if (mp_obj_is_type(val_in, &mp_type_int)) {
+            if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
                mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p);
                return;
            }
@ -371,7 +371,7 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_
            break;
        default:
            #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-            if (mp_obj_is_type(val_in, &mp_type_int)) {
+            if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
                size_t size = mp_binary_get_size('@', typecode, NULL);
                mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG,
                    size, (uint8_t *)p + index * size);
--- a/py/map.c
+++ b/py/map.c
@ -174,7 +174,7 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_
    if (compare_only_ptrs) {
        if (mp_obj_is_qstr(index)) {
            // Index is a qstr, so can just do ptr comparison.
-        } else if (mp_obj_is_type(index, &mp_type_str)) {
+        } else if (mp_obj_is_exact_type(index, &mp_type_str)) {
            // Index is a non-interned string.
            // We can either intern the string, or force a full equality comparison.
            // We chose the latter, since interning costs time and potentially RAM,
--- a/py/obj.c
+++ b/py/obj.c
@ -304,7 +304,7 @@ mp_int_t mp_obj_get_int(mp_const_obj_t arg) {
        return 1;
    } else if (mp_obj_is_small_int(arg)) {
        return MP_OBJ_SMALL_INT_VALUE(arg);
-    } else if (mp_obj_is_type(arg, &mp_type_int)) {
+    } else if (mp_obj_is_exact_type(arg, &mp_type_int)) {
        return mp_obj_int_get_checked(arg);
    } else {
        mp_obj_t res = mp_unary_op(MP_UNARY_OP_INT, (mp_obj_t)arg);
@ -330,7 +330,7 @@ bool mp_obj_get_int_maybe(mp_const_obj_t arg, mp_int_t *value) {
        *value = 1;
    } else if (mp_obj_is_small_int(arg)) {
        *value = MP_OBJ_SMALL_INT_VALUE(arg);
-    } else if (mp_obj_is_type(arg, &mp_type_int)) {
+    } else if (mp_obj_is_exact_type(arg, &mp_type_int)) {
        *value = mp_obj_int_get_checked(arg);
    } else {
        return false;
@ -349,7 +349,7 @@ bool mp_obj_get_float_maybe(mp_obj_t arg, mp_float_t *value) {
    } else if (mp_obj_is_small_int(arg)) {
        val = (mp_float_t)MP_OBJ_SMALL_INT_VALUE(arg);
    #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-    } else if (mp_obj_is_type(arg, &mp_type_int)) {
+    } else if (mp_obj_is_exact_type(arg, &mp_type_int)) {
        val = mp_obj_int_as_float_impl(arg);
    #endif
    } else if (mp_obj_is_float(arg)) {
@ -389,7 +389,7 @@ bool mp_obj_get_complex_maybe(mp_obj_t arg, mp_float_t *real, mp_float_t *imag)
        *real = (mp_float_t)MP_OBJ_SMALL_INT_VALUE(arg);
        *imag = 0;
    #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-    } else if (mp_obj_is_type(arg, &mp_type_int)) {
+    } else if (mp_obj_is_exact_type(arg, &mp_type_int)) {
        *real = mp_obj_int_as_float_impl(arg);
        *imag = 0;
    #endif
--- a/py/obj.h
+++ b/py/obj.h
@ -743,15 +743,29 @@ void *mp_obj_malloc_helper(size_t num_bytes, const mp_obj_type_t *type);
 // check for more specific object types.
 // Note: these are kept as macros because inline functions sometimes use much
 // more code space than the equivalent macros, depending on the compiler.
-#define mp_obj_is_type(o, t) (mp_obj_is_obj(o) && (((mp_obj_base_t *)MP_OBJ_TO_PTR(o))->type == (t))) // this does not work for checking int, str or fun; use below macros for that
+// don't use mp_obj_is_exact_type directly; use mp_obj_is_type which provides additional safety checks.
+// use the former only if you need to bypass these checks (because you've already checked everything else)
+#define mp_obj_is_exact_type(o, t) (mp_obj_is_obj(o) && (((mp_obj_base_t *)MP_OBJ_TO_PTR(o))->type == (t)))
+
+// Type checks are split to a separate, constant result macro. This is so it doesn't hinder the compilers's
+// optimizations (other tricks like using ({ expr; exper; }) or (exp, expr, expr) in mp_obj_is_type() result
+// in missed optimizations)
+#define mp_type_assert_not_bool_int_str_nonetype(t) (         \
+     MP_STATIC_ASSERT_NOT_MSC((t) != &mp_type_bool),          \
+     MP_STATIC_ASSERT_NOT_MSC((t) != &mp_type_int),           \
+     MP_STATIC_ASSERT_NOT_MSC((t) != &mp_type_str),           \
+     MP_STATIC_ASSERT_NOT_MSC((t) != &mp_type_NoneType),      \
+     1)
+
+#define mp_obj_is_type(o, t) (mp_type_assert_not_bool_int_str_nonetype(t) && mp_obj_is_exact_type(o, t))
 #if MICROPY_OBJ_IMMEDIATE_OBJS
 // bool's are immediates, not real objects, so test for the 2 possible values.
 #define mp_obj_is_bool(o) ((o) == mp_const_false || (o) == mp_const_true)
 #else
-#define mp_obj_is_bool(o) mp_obj_is_type(o, &mp_type_bool)
+#define mp_obj_is_bool(o) mp_obj_is_exact_type(o, &mp_type_bool)
 #endif
-#define mp_obj_is_int(o) (mp_obj_is_small_int(o) || mp_obj_is_type(o, &mp_type_int))
-#define mp_obj_is_str(o) (mp_obj_is_qstr(o) || mp_obj_is_type(o, &mp_type_str))
+#define mp_obj_is_int(o) (mp_obj_is_small_int(o) || mp_obj_is_exact_type(o, &mp_type_int))
+#define mp_obj_is_str(o) (mp_obj_is_qstr(o) || mp_obj_is_exact_type(o, &mp_type_str))
 #define mp_obj_is_str_or_bytes(o) (mp_obj_is_qstr(o) || (mp_obj_is_obj(o) && ((mp_obj_base_t *)MP_OBJ_TO_PTR(o))->type->binary_op == mp_obj_str_binary_op))
 #define mp_obj_is_dict_or_ordereddict(o) (mp_obj_is_obj(o) && ((mp_obj_base_t *)MP_OBJ_TO_PTR(o))->type->make_new == mp_obj_dict_make_new)
 #define mp_obj_is_fun(o) (mp_obj_is_obj(o) && (((mp_obj_base_t *)MP_OBJ_TO_PTR(o))->type->name == MP_QSTR_function))
--- a/py/objexcept.c
+++ b/py/objexcept.c
@ -122,7 +122,7 @@ STATIC mp_obj_exception_t *get_native_exception(mp_obj_t self_in) {

 STATIC void decompress_error_text_maybe(mp_obj_exception_t *o) {
    #if MICROPY_ROM_TEXT_COMPRESSION
-    if (o->args->len == 1 && mp_obj_is_type(o->args->items[0], &mp_type_str)) {
+    if (o->args->len == 1 && mp_obj_is_exact_type(o->args->items[0], &mp_type_str)) {
        mp_obj_str_t *o_str = MP_OBJ_TO_PTR(o->args->items[0]);
        if (MP_IS_COMPRESSED_ROM_STRING(o_str->data)) {
            byte *buf = m_new_maybe(byte, MP_MAX_UNCOMPRESSED_TEXT_LEN + 1);
--- a/py/objfun.c
+++ b/py/objfun.c
@ -468,7 +468,7 @@ STATIC mp_uint_t convert_obj_for_inline_asm(mp_obj_t obj) {
        return 0;
    } else if (obj == mp_const_true) {
        return 1;
-    } else if (mp_obj_is_type(obj, &mp_type_int)) {
+    } else if (mp_obj_is_exact_type(obj, &mp_type_int)) {
        return mp_obj_int_get_truncated(obj);
    } else if (mp_obj_is_str(obj)) {
        // pointer to the string (it's probably constant though!)
--- a/py/objint.c
+++ b/py/objint.c
@ -232,7 +232,7 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co
        // A small int; get the integer value to format.
        num = MP_OBJ_SMALL_INT_VALUE(self_in);
    } else {
-        assert(mp_obj_is_type(self_in, &mp_type_int));
+        assert(mp_obj_is_exact_type(self_in, &mp_type_int));
        // Not a small int.
        #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
        const mp_obj_int_t *self = self_in;
--- a/py/objint_longlong.c
+++ b/py/objint_longlong.c
@ -58,7 +58,7 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
 }

 void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
-    assert(mp_obj_is_type(self_in, &mp_type_int));
+    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
    mp_obj_int_t *self = self_in;
    long long val = self->val;
    if (big_endian) {
@ -131,13 +131,13 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i
    if (mp_obj_is_small_int(lhs_in)) {
        lhs_val = MP_OBJ_SMALL_INT_VALUE(lhs_in);
    } else {
-        assert(mp_obj_is_type(lhs_in, &mp_type_int));
+        assert(mp_obj_is_exact_type(lhs_in, &mp_type_int));
        lhs_val = ((mp_obj_int_t *)lhs_in)->val;
    }

    if (mp_obj_is_small_int(rhs_in)) {
        rhs_val = MP_OBJ_SMALL_INT_VALUE(rhs_in);
-    } else if (mp_obj_is_type(rhs_in, &mp_type_int)) {
+    } else if (mp_obj_is_exact_type(rhs_in, &mp_type_int)) {
        rhs_val = ((mp_obj_int_t *)rhs_in)->val;
    } else {
        // delegate to generic function to check for extra cases
@ -284,7 +284,7 @@ mp_int_t mp_obj_int_get_checked(mp_const_obj_t self_in) {

 #if MICROPY_PY_BUILTINS_FLOAT
 mp_float_t mp_obj_int_as_float_impl(mp_obj_t self_in) {
-    assert(mp_obj_is_type(self_in, &mp_type_int));
+    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
    mp_obj_int_t *self = self_in;
    return self->val;
 }
--- a/py/objint_mpz.c
+++ b/py/objint_mpz.c
@ -91,7 +91,7 @@ mp_obj_int_t *mp_obj_int_new_mpz(void) {
 // This particular routine should only be called for the mpz representation of the int.
 char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in,
    int base, const char *prefix, char base_char, char comma) {
-    assert(mp_obj_is_type(self_in, &mp_type_int));
+    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
    const mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);

    size_t needed_size = mp_int_format_size(mpz_max_num_bits(&self->mpz), base, prefix, comma);
@ -113,7 +113,7 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
 }

 void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
-    assert(mp_obj_is_type(self_in, &mp_type_int));
+    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
    mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
    mpz_as_bytes(&self->mpz, big_endian, len, buf);
 }
@ -181,7 +181,7 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i
        mpz_init_fixed_from_int(&z_int, z_int_dig, MPZ_NUM_DIG_FOR_INT, MP_OBJ_SMALL_INT_VALUE(lhs_in));
        zlhs = &z_int;
    } else {
-        assert(mp_obj_is_type(lhs_in, &mp_type_int));
+        assert(mp_obj_is_exact_type(lhs_in, &mp_type_int));
        zlhs = &((mp_obj_int_t *)MP_OBJ_TO_PTR(lhs_in))->mpz;
    }

@ -189,7 +189,7 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i
    if (mp_obj_is_small_int(rhs_in)) {
        mpz_init_fixed_from_int(&z_int, z_int_dig, MPZ_NUM_DIG_FOR_INT, MP_OBJ_SMALL_INT_VALUE(rhs_in));
        zrhs = &z_int;
-    } else if (mp_obj_is_type(rhs_in, &mp_type_int)) {
+    } else if (mp_obj_is_exact_type(rhs_in, &mp_type_int)) {
        zrhs = &((mp_obj_int_t *)MP_OBJ_TO_PTR(rhs_in))->mpz;
    #if MICROPY_PY_BUILTINS_FLOAT
    } else if (mp_obj_is_float(rhs_in)) {
@ -447,7 +447,7 @@ mp_uint_t mp_obj_int_get_uint_checked(mp_const_obj_t self_in) {

 #if MICROPY_PY_BUILTINS_FLOAT
 mp_float_t mp_obj_int_as_float_impl(mp_obj_t self_in) {
-    assert(mp_obj_is_type(self_in, &mp_type_int));
+    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
    mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
    return mpz_as_float(&self->mpz);
 }
--- a/py/objstr.c
+++ b/py/objstr.c
@ -2144,7 +2144,7 @@ STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in) {
 qstr mp_obj_str_get_qstr(mp_obj_t self_in) {
    if (mp_obj_is_qstr(self_in)) {
        return MP_OBJ_QSTR_VALUE(self_in);
-    } else if (mp_obj_is_type(self_in, &mp_type_str)) {
+    } else if (mp_obj_is_exact_type(self_in, &mp_type_str)) {
        mp_obj_str_t *self = MP_OBJ_TO_PTR(self_in);
        return qstr_from_strn((char *)self->data, self->len);
    } else {