diff --git a/docs/library/builtins.rst b/docs/library/builtins.rst index 7a0229c2aa..342832b819 100644 --- a/docs/library/builtins.rst +++ b/docs/library/builtins.rst @@ -77,7 +77,7 @@ Functions and types In MicroPython, `byteorder` parameter must be positional (this is compatible with CPython). - .. method:: to_bytes(size, byteorder) + .. method:: to_bytes(size, byteorder, / signed=False) In MicroPython, `byteorder` parameter must be positional (this is compatible with CPython). diff --git a/ports/unix/modffi.c b/ports/unix/modffi.c index b7d03e84dd..f343a32767 100644 --- a/ports/unix/modffi.c +++ b/ports/unix/modffi.c @@ -442,7 +442,7 @@ static unsigned long long ffi_get_int_value(mp_obj_t o) { return MP_OBJ_SMALL_INT_VALUE(o); } else { unsigned long long res; - mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, sizeof(res), (byte *)&res); + mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, true, sizeof(res), (byte *)&res); return res; } } diff --git a/py/binary.c b/py/binary.c index 7c01cfa1c8..cb5305d6d6 100644 --- a/py/binary.c +++ b/py/binary.c @@ -444,7 +444,7 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p default: #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (mp_obj_is_exact_type(val_in, &mp_type_int)) { - mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p); + mp_obj_int_to_bytes_impl(val_in, struct_type == '>', true, size, p); return; } #endif @@ -482,7 +482,7 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_ #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (mp_obj_is_exact_type(val_in, &mp_type_int)) { size_t size = mp_binary_get_size('@', typecode, NULL); - mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG, + mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG, true, size, (uint8_t *)p + index * size); return; } diff --git a/py/misc.h b/py/misc.h index eea3e8b0fe..84e7cb770a 100644 --- a/py/misc.h +++ b/py/misc.h @@ -334,4 +334,45 @@ typedef const char *mp_rom_error_text_t; // For now, forward directly to MP_COMPRESSED_ROM_TEXT. #define MP_ERROR_TEXT(x) (mp_rom_error_text_t)MP_COMPRESSED_ROM_TEXT(x) +// Macro and inline function to measure the length (in bytes) needed to hold an +// unambiguous representation of a small (C representable) integer. +// +// Implemented inline as there's a lot the compiler can optimise based on the size of INT. +#define MP_INT_REPR_LEN(INT, IS_SIGNED) mp_int_repr_len_helper(&(INT), sizeof(INT), IS_SIGNED) + +static inline int mp_int_repr_len_helper(void *pint, int size, bool is_signed) { + int i; + byte *b = (byte *)pint; + byte ext_byte = 0x00; + int result = size; + int msb_idx, step; + + #if MP_ENDIANNESS_LITTLE + msb_idx = size - 1; + step = -1; + #else + msb_idx = 0; + step = 1; + #endif + + if (is_signed && (b[msb_idx] & 0x80)) { + ext_byte = 0xFF; // Negative number + } + + // Count down the number of most significant bytes that don't contain + // any significant values (i.e. equal to the extension byte). + for (i = msb_idx; i >= 0 && i <= size - 1; i += step) { + if (b[i] != ext_byte) { + if (is_signed && (b[i] & 0x80) != (ext_byte & 0x80)) { + // Add one additional byte to hold the sign bit + result++; + } + break; + } + result--; + } + + return result; +} + #endif // MICROPY_INCLUDED_PY_MISC_H diff --git a/py/mpz.c b/py/mpz.c index 502d4e1c13..750664ad9a 100644 --- a/py/mpz.c +++ b/py/mpz.c @@ -1589,7 +1589,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) { return true; } -void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) { +bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) { byte *b = buf; if (big_endian) { b += len; @@ -1598,6 +1598,8 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) { int bits = 0; mpz_dbl_dig_t d = 0; mpz_dbl_dig_t carry = 1; + size_t olen = len; // bytes in output buffer + bool ok = true; for (size_t zlen = z->len; zlen > 0; --zlen) { bits += DIG_SIZE; d = (d << DIG_SIZE) | *zdig++; @@ -1607,28 +1609,32 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) { val = (~val & 0xff) + carry; carry = val >> 8; } + + if (!olen) { + // Buffer is full, only OK if all remaining bytes are zeroes + ok = ok && ((byte)val == 0); + continue; + } + if (big_endian) { *--b = val; - if (b == buf) { - return; - } } else { *b++ = val; - if (b == buf + len) { - return; - } } + olen--; } } - // fill remainder of buf with zero/sign extension of the integer - if (big_endian) { - len = b - buf; + if (as_signed && olen == 0 && len > 0) { + // If output exhausted then ensure there was enough space for the sign bit + byte most_sig = big_endian ? buf[0] : buf[len - 1]; + ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg; } else { - len = buf + len - b; - buf = b; + // fill remainder of buf with zero/sign extension of the integer + memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen); } - memset(buf, z->neg ? 0xff : 0x00, len); + + return ok; } #if MICROPY_PY_BUILTINS_FLOAT diff --git a/py/mpz.h b/py/mpz.h index d27f572404..6f1ac930b0 100644 --- a/py/mpz.h +++ b/py/mpz.h @@ -93,9 +93,9 @@ typedef int8_t mpz_dbl_dig_signed_t; typedef struct _mpz_t { // Zero has neg=0, len=0. Negative zero is not allowed. size_t neg : 1; - size_t fixed_dig : 1; - size_t alloc : (8 * sizeof(size_t) - 2); - size_t len; + size_t fixed_dig : 1; // flag, 'dig' buffer cannot be reallocated + size_t alloc : (8 * sizeof(size_t) - 2); // number of entries allocated in 'dig' + size_t len; // number of entries used in 'dig' mpz_dig_t *dig; } mpz_t; @@ -145,7 +145,8 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) { mp_int_t mpz_hash(const mpz_t *z); bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value); bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value); -void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf); +// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. +bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf); #if MICROPY_PY_BUILTINS_FLOAT mp_float_t mpz_as_float(const mpz_t *z); #endif diff --git a/py/objint.c b/py/objint.c index 6caa608f33..8849c4672c 100644 --- a/py/objint.c +++ b/py/objint.c @@ -420,35 +420,59 @@ static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) { static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 3, 4, int_from_bytes); static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj)); -static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) { - // TODO: Support signed param (assumes signed=False) - (void)n_args; +static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { + // Only supported kwarg is 'signed' + enum { ARG_signed }; + static const mp_arg_t allowed_args[] = { + { MP_QSTR_signed, MP_ARG_BOOL, {.u_bool = false} }, + }; - mp_int_t len = mp_obj_get_int(args[1]); - if (len < 0) { + // Parse positional args + mp_obj_t self = pos_args[0]; + mp_int_t dlen = mp_obj_get_int(pos_args[1]); + if (dlen < 0) { mp_raise_ValueError(NULL); } - bool big_endian = args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little); + bool big_endian = pos_args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little); + + // parse kwargs + mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)]; + mp_arg_parse_all(n_args - 3, pos_args + 3, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args); + bool as_signed = args[ARG_signed].u_bool; + + bool overflow; vstr_t vstr; - vstr_init_len(&vstr, len); + vstr_init_len(&vstr, dlen); byte *data = (byte *)vstr.buf; - memset(data, 0, len); #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - if (!mp_obj_is_small_int(args[0])) { - mp_obj_int_to_bytes_impl(args[0], big_endian, len, data); + if (!mp_obj_is_small_int(self)) { + overflow = !mp_obj_int_to_bytes_impl(self, big_endian, as_signed, dlen, data); } else #endif { - mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]); - size_t l = MIN((size_t)len, sizeof(val)); - mp_binary_set_int(l, big_endian, data + (big_endian ? (len - l) : 0), val); + mp_int_t val = MP_OBJ_SMALL_INT_VALUE(self); + if (val < 0 && !as_signed) { + mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("can't convert negative int to unsigned")); + } + int slen = MP_INT_REPR_LEN(val, as_signed); + memset(data, val < 0 ? 0xFF : 0x00, dlen); + if (slen <= dlen) { + mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val); + overflow = false; + } else { + overflow = true; + } + } + + if (overflow) { + mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("int too big to convert")); } return mp_obj_new_bytes_from_vstr(&vstr); } -static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_to_bytes_obj, 3, 4, int_to_bytes); +static MP_DEFINE_CONST_FUN_OBJ_KW(int_to_bytes_obj, 3, int_to_bytes); static const mp_rom_map_elem_t int_locals_dict_table[] = { { MP_ROM_QSTR(MP_QSTR_from_bytes), MP_ROM_PTR(&int_from_bytes_obj) }, diff --git a/py/objint.h b/py/objint.h index 5eed87705d..b85b9d13a4 100644 --- a/py/objint.h +++ b/py/objint.h @@ -55,7 +55,8 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, int base, const char *prefix, char base_char, char comma); mp_int_t mp_obj_int_hash(mp_obj_t self_in); mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf); -void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf); +// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. +bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf); int mp_obj_int_sign(mp_obj_t self_in); mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in); mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in); diff --git a/py/objint_longlong.c b/py/objint_longlong.c index ee499e0265..ce7673bfad 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -57,10 +57,12 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf return mp_obj_new_int_from_ll(value); } -void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { +bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf) { assert(mp_obj_is_exact_type(self_in, &mp_type_int)); mp_obj_int_t *self = self_in; long long val = self->val; + size_t slen = MP_INT_REPR_LEN(val, as_signed); + bool ok = slen <= len; if (big_endian) { byte *b = buf + len; while (b > buf) { @@ -73,6 +75,7 @@ void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byt val >>= 8; } } + return ok; } int mp_obj_int_sign(mp_obj_t self_in) { diff --git a/py/objint_mpz.c b/py/objint_mpz.c index 600316a42a..93eeaee90a 100644 --- a/py/objint_mpz.c +++ b/py/objint_mpz.c @@ -112,10 +112,13 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf return MP_OBJ_FROM_PTR(o); } -void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { +bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf) { assert(mp_obj_is_exact_type(self_in, &mp_type_int)); mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in); - mpz_as_bytes(&self->mpz, big_endian, len, buf); + if (self->mpz.neg && !as_signed) { + mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("can't convert negative int to unsigned")); + } + return mpz_as_bytes(&self->mpz, big_endian, as_signed, len, buf); } int mp_obj_int_sign(mp_obj_t self_in) { diff --git a/tests/basics/int_bytes.py b/tests/basics/int_bytes.py index d1837ea75c..de44f5afab 100644 --- a/tests/basics/int_bytes.py +++ b/tests/basics/int_bytes.py @@ -1,3 +1,5 @@ +import sys + print((10).to_bytes(1, "little")) print((111111).to_bytes(4, "little")) print((100).to_bytes(10, "little")) @@ -20,3 +22,63 @@ try: (1).to_bytes(-1, "little") except ValueError: print("ValueError") + +# zero byte destination should also raise an error +try: + (1).to_bytes(0, "little") +except OverflowError: + print("OverflowError") + +# except for converting 0 to a zero-length byte array +print((0).to_bytes(0, "big")) + +# byte length can fit the integer directly +print((0xFF).to_bytes(1, "little")) +print((0xFF).to_bytes(1, "big")) +print((0xEFF).to_bytes(2, "little")) +print((0xEFF).to_bytes(2, "big")) +print((0xCDEFF).to_bytes(3, "little")) +print((0xCDEFF).to_bytes(3, "big")) + +# OverFlowError if not big enough + +try: + (0x123).to_bytes(1, "big") +except OverflowError: + print("OverflowError") + +try: + (0x12345).to_bytes(2, "big") +except OverflowError: + print("OverflowError") + +try: + (0x1234567).to_bytes(3, "big") +except OverflowError: + print("OverflowError") + + +# negative representations + +print((-1).to_bytes(1, "little", signed=True)) +print((-1).to_bytes(3, "little", signed=True)) +print((-1).to_bytes(1, "big", signed=True)) +print((-1).to_bytes(3, "big", signed=True)) +print((-128).to_bytes(1, "big", signed=True)) +print((-32768).to_bytes(2, "big", signed=True)) +print((-(1 << 23)).to_bytes(3, "big", signed=True)) + +try: + print((-129).to_bytes(1, "big", signed=True)) +except OverflowError: + print("OverflowError") + +try: + print((-32769).to_bytes(2, "big", signed=True)) +except OverflowError: + print("OverflowError") + +try: + print(((-1 << 23) - 1).to_bytes(2, "big", signed=True)) +except OverflowError: + print("OverflowError") diff --git a/tests/basics/int_bytes_int64.py b/tests/basics/int_bytes_int64.py new file mode 100644 index 0000000000..257558eb7c --- /dev/null +++ b/tests/basics/int_bytes_int64.py @@ -0,0 +1,41 @@ +import sys + +# Depending on the port, the numbers in this test may be implemented as "small" +# native 64 bit ints, arbitrary precision large ints, or large integers using 64-bit +# long longs. + +try: + x = int.from_bytes(b"\x6F\xAB\xCD\x12\x34\x56\x78\xFB", "big") +except OverflowError: + print("SKIP") # Port can't represent this size of integer at all + raise SystemExit + +print(hex(x)) +b = x.to_bytes(8, "little") +print(b) +print(x.to_bytes(8, "big")) + +# padding in output +print(x.to_bytes(20, "little")) +print(x.to_bytes(20, "big")) + +# check that extra zero bytes don't change the internal int value +print(int.from_bytes(b + bytes(10), "little") == x) + +# can't write to a zero-length bytes object +try: + x.to_bytes(0, "little") +except OverflowError: + print("OverflowError") + +# or one that it too short +try: + x.to_bytes(7, "big") +except OverflowError: + print("OverflowError") + +# negative representations + +print((-x).to_bytes(8, "little", signed=True)) +print((-x).to_bytes(20, "big", signed=True)) +print((-x).to_bytes(20, "little", signed=True)) diff --git a/tests/basics/int_bytes_intbig.py b/tests/basics/int_bytes_intbig.py index 147362bef1..449245ce90 100644 --- a/tests/basics/int_bytes_intbig.py +++ b/tests/basics/int_bytes_intbig.py @@ -1,3 +1,5 @@ +import sys + print((2**64).to_bytes(9, "little")) print((2**64).to_bytes(9, "big")) @@ -10,5 +12,40 @@ print(ib) print(il.to_bytes(20, "little")) print(ib.to_bytes(20, "big")) +# check padding comes out correctly +print(il.to_bytes(40, "little")) +print(ib.to_bytes(40, "big")) + # check that extra zero bytes don't change the internal int value print(int.from_bytes(b + bytes(10), "little") == int.from_bytes(b, "little")) + +# can't write to a zero-length bytes object +try: + ib.to_bytes(0, "little") +except OverflowError: + print("OverflowError") + +# or one that it too short +try: + ib.to_bytes(18, "big") +except OverflowError: + print("OverflowError") + +# negative representations + +print((-ib).to_bytes(20, "big", signed=True)) +print((ib * -ib).to_bytes(40, "big", signed=True)) + +# case where an additional byte is needed for sign bit +ib = (2**64) - 1 +print(ib.to_bytes(8, "little")) + +ib *= -1 + +try: + print((ib).to_bytes(8, "little", signed=True)) +except OverflowError: + print("OverflowError") + +print((ib).to_bytes(9, "little", signed=True)) +print((ib).to_bytes(9, "big", signed=True))