Porównaj commity

...

3 Commity

Autor SHA1 Wiadomość Data
Angus Gratton dc2a7ee4b2
Merge fd7bc4f12b into 49ce7a6075 2024-04-22 21:13:04 +08:00
Angus Gratton fd7bc4f12b WIP: objint: Support signed parameter for int.to_bytes().
Signed-off-by: Angus Gratton <angus@redyak.com.au>
2024-04-10 14:20:23 +10:00
Angus Gratton c97d3534dc py/objint: Fix int.to_bytes() buffer size checks.
* No longer overflows if byte size is 0 (closes #13041)
* Raises OverflowError in any case where number won't fit into byte length
  (Now matches CPython, previously MicroPython would return a truncated
  bytes object.)
* Document that micropython int.to_bytes() doesn't implement the optional
  signed kwarg, but will behave as if signed=True when the integer is
  negative (this is the current behaviour). Add tests for this also.

Requires changes for small ints, MPZ large ints, and "long long" large
ints.

Adds a new set of unit tests for ints between 32 and 64 bits to increase
coverage of "long long" large ints, which are otherwise untested.

Tested on unix port (64 bit small ints, MPZ long ints) and Zephyr STM32WB
board (32 bit small ints, long long large ints).

Untested on a port whose native format is big endian (don't have one at
hand).

This work was funded through GitHub Sponsors.

Signed-off-by: Angus Gratton <angus@redyak.com.au>
2024-04-10 13:52:29 +10:00
13 zmienionych plików z 258 dodań i 39 usunięć

Wyświetl plik

@ -77,7 +77,7 @@ Functions and types
In MicroPython, `byteorder` parameter must be positional (this is
compatible with CPython).
.. method:: to_bytes(size, byteorder)
.. method:: to_bytes(size, byteorder, / signed=False)
In MicroPython, `byteorder` parameter must be positional (this is
compatible with CPython).

Wyświetl plik

@ -442,7 +442,7 @@ static unsigned long long ffi_get_int_value(mp_obj_t o) {
return MP_OBJ_SMALL_INT_VALUE(o);
} else {
unsigned long long res;
mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, sizeof(res), (byte *)&res);
mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, true, sizeof(res), (byte *)&res);
return res;
}
}

Wyświetl plik

@ -444,7 +444,7 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p
default:
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p);
mp_obj_int_to_bytes_impl(val_in, struct_type == '>', true, size, p);
return;
}
#endif
@ -482,7 +482,7 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
size_t size = mp_binary_get_size('@', typecode, NULL);
mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG,
mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG, true,
size, (uint8_t *)p + index * size);
return;
}

Wyświetl plik

@ -334,4 +334,45 @@ typedef const char *mp_rom_error_text_t;
// For now, forward directly to MP_COMPRESSED_ROM_TEXT.
#define MP_ERROR_TEXT(x) (mp_rom_error_text_t)MP_COMPRESSED_ROM_TEXT(x)
// Macro and inline function to measure the length (in bytes) needed to hold an
// unambiguous representation of a small (C representable) integer.
//
// Implemented inline as there's a lot the compiler can optimise based on the size of INT.
#define MP_INT_REPR_LEN(INT, IS_SIGNED) mp_int_repr_len_helper(&(INT), sizeof(INT), IS_SIGNED)
static inline int mp_int_repr_len_helper(void *pint, int size, bool is_signed) {
int i;
byte *b = (byte *)pint;
byte ext_byte = 0x00;
int result = size;
int msb_idx, step;
#if MP_ENDIANNESS_LITTLE
msb_idx = size - 1;
step = -1;
#else
msb_idx = 0;
step = 1;
#endif
if (is_signed && (b[msb_idx] & 0x80)) {
ext_byte = 0xFF; // Negative number
}
// Count down the number of most significant bytes that don't contain
// any significant values (i.e. equal to the extension byte).
for (i = msb_idx; i >= 0 && i <= size - 1; i += step) {
if (b[i] != ext_byte) {
if (is_signed && (b[i] & 0x80) != (ext_byte & 0x80)) {
// Add one additional byte to hold the sign bit
result++;
}
break;
}
result--;
}
return result;
}
#endif // MICROPY_INCLUDED_PY_MISC_H

Wyświetl plik

@ -1589,7 +1589,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) {
return true;
}
void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) {
byte *b = buf;
if (big_endian) {
b += len;
@ -1598,6 +1598,8 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
int bits = 0;
mpz_dbl_dig_t d = 0;
mpz_dbl_dig_t carry = 1;
size_t olen = len; // bytes in output buffer
bool ok = true;
for (size_t zlen = z->len; zlen > 0; --zlen) {
bits += DIG_SIZE;
d = (d << DIG_SIZE) | *zdig++;
@ -1607,28 +1609,32 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
val = (~val & 0xff) + carry;
carry = val >> 8;
}
if (!olen) {
// Buffer is full, only OK if all remaining bytes are zeroes
ok = ok && ((byte)val == 0);
continue;
}
if (big_endian) {
*--b = val;
if (b == buf) {
return;
}
} else {
*b++ = val;
if (b == buf + len) {
return;
}
}
olen--;
}
}
// fill remainder of buf with zero/sign extension of the integer
if (big_endian) {
len = b - buf;
if (as_signed && olen == 0 && len > 0) {
// If output exhausted then ensure there was enough space for the sign bit
byte most_sig = big_endian ? buf[0] : buf[len - 1];
ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg;
} else {
len = buf + len - b;
buf = b;
// fill remainder of buf with zero/sign extension of the integer
memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen);
}
memset(buf, z->neg ? 0xff : 0x00, len);
return ok;
}
#if MICROPY_PY_BUILTINS_FLOAT

Wyświetl plik

@ -93,9 +93,9 @@ typedef int8_t mpz_dbl_dig_signed_t;
typedef struct _mpz_t {
// Zero has neg=0, len=0. Negative zero is not allowed.
size_t neg : 1;
size_t fixed_dig : 1;
size_t alloc : (8 * sizeof(size_t) - 2);
size_t len;
size_t fixed_dig : 1; // flag, 'dig' buffer cannot be reallocated
size_t alloc : (8 * sizeof(size_t) - 2); // number of entries allocated in 'dig'
size_t len; // number of entries used in 'dig'
mpz_dig_t *dig;
} mpz_t;
@ -145,7 +145,8 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) {
mp_int_t mpz_hash(const mpz_t *z);
bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value);
bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value);
void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf);
// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf);
#if MICROPY_PY_BUILTINS_FLOAT
mp_float_t mpz_as_float(const mpz_t *z);
#endif

Wyświetl plik

@ -420,35 +420,59 @@ static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 3, 4, int_from_bytes);
static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));
static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) {
// TODO: Support signed param (assumes signed=False)
(void)n_args;
static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
// Only supported kwarg is 'signed'
enum { ARG_signed };
static const mp_arg_t allowed_args[] = {
{ MP_QSTR_signed, MP_ARG_BOOL, {.u_bool = false} },
};
mp_int_t len = mp_obj_get_int(args[1]);
if (len < 0) {
// Parse positional args
mp_obj_t self = pos_args[0];
mp_int_t dlen = mp_obj_get_int(pos_args[1]);
if (dlen < 0) {
mp_raise_ValueError(NULL);
}
bool big_endian = args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
bool big_endian = pos_args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
// parse kwargs
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
mp_arg_parse_all(n_args - 3, pos_args + 3, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
bool as_signed = args[ARG_signed].u_bool;
bool overflow;
vstr_t vstr;
vstr_init_len(&vstr, len);
vstr_init_len(&vstr, dlen);
byte *data = (byte *)vstr.buf;
memset(data, 0, len);
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
if (!mp_obj_is_small_int(args[0])) {
mp_obj_int_to_bytes_impl(args[0], big_endian, len, data);
if (!mp_obj_is_small_int(self)) {
overflow = !mp_obj_int_to_bytes_impl(self, big_endian, as_signed, dlen, data);
} else
#endif
{
mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]);
size_t l = MIN((size_t)len, sizeof(val));
mp_binary_set_int(l, big_endian, data + (big_endian ? (len - l) : 0), val);
mp_int_t val = MP_OBJ_SMALL_INT_VALUE(self);
if (val < 0 && !as_signed) {
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("can't convert negative int to unsigned"));
}
int slen = MP_INT_REPR_LEN(val, as_signed);
memset(data, val < 0 ? 0xFF : 0x00, dlen);
if (slen <= dlen) {
mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val);
overflow = false;
} else {
overflow = true;
}
}
if (overflow) {
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("int too big to convert"));
}
return mp_obj_new_bytes_from_vstr(&vstr);
}
static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_to_bytes_obj, 3, 4, int_to_bytes);
static MP_DEFINE_CONST_FUN_OBJ_KW(int_to_bytes_obj, 3, int_to_bytes);
static const mp_rom_map_elem_t int_locals_dict_table[] = {
{ MP_ROM_QSTR(MP_QSTR_from_bytes), MP_ROM_PTR(&int_from_bytes_obj) },

Wyświetl plik

@ -55,7 +55,8 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
int base, const char *prefix, char base_char, char comma);
mp_int_t mp_obj_int_hash(mp_obj_t self_in);
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf);
int mp_obj_int_sign(mp_obj_t self_in);
mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);

Wyświetl plik

@ -57,10 +57,12 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
return mp_obj_new_int_from_ll(value);
}
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf) {
assert(mp_obj_is_exact_type(self_in, &mp_type_int));
mp_obj_int_t *self = self_in;
long long val = self->val;
size_t slen = MP_INT_REPR_LEN(val, as_signed);
bool ok = slen <= len;
if (big_endian) {
byte *b = buf + len;
while (b > buf) {
@ -73,6 +75,7 @@ void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byt
val >>= 8;
}
}
return ok;
}
int mp_obj_int_sign(mp_obj_t self_in) {

Wyświetl plik

@ -112,10 +112,13 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
return MP_OBJ_FROM_PTR(o);
}
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf) {
assert(mp_obj_is_exact_type(self_in, &mp_type_int));
mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
mpz_as_bytes(&self->mpz, big_endian, len, buf);
if (self->mpz.neg && !as_signed) {
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("can't convert negative int to unsigned"));
}
return mpz_as_bytes(&self->mpz, big_endian, as_signed, len, buf);
}
int mp_obj_int_sign(mp_obj_t self_in) {

Wyświetl plik

@ -1,3 +1,5 @@
import sys
print((10).to_bytes(1, "little"))
print((111111).to_bytes(4, "little"))
print((100).to_bytes(10, "little"))
@ -20,3 +22,63 @@ try:
(1).to_bytes(-1, "little")
except ValueError:
print("ValueError")
# zero byte destination should also raise an error
try:
(1).to_bytes(0, "little")
except OverflowError:
print("OverflowError")
# except for converting 0 to a zero-length byte array
print((0).to_bytes(0, "big"))
# byte length can fit the integer directly
print((0xFF).to_bytes(1, "little"))
print((0xFF).to_bytes(1, "big"))
print((0xEFF).to_bytes(2, "little"))
print((0xEFF).to_bytes(2, "big"))
print((0xCDEFF).to_bytes(3, "little"))
print((0xCDEFF).to_bytes(3, "big"))
# OverFlowError if not big enough
try:
(0x123).to_bytes(1, "big")
except OverflowError:
print("OverflowError")
try:
(0x12345).to_bytes(2, "big")
except OverflowError:
print("OverflowError")
try:
(0x1234567).to_bytes(3, "big")
except OverflowError:
print("OverflowError")
# negative representations
print((-1).to_bytes(1, "little", signed=True))
print((-1).to_bytes(3, "little", signed=True))
print((-1).to_bytes(1, "big", signed=True))
print((-1).to_bytes(3, "big", signed=True))
print((-128).to_bytes(1, "big", signed=True))
print((-32768).to_bytes(2, "big", signed=True))
print((-(1 << 23)).to_bytes(3, "big", signed=True))
try:
print((-129).to_bytes(1, "big", signed=True))
except OverflowError:
print("OverflowError")
try:
print((-32769).to_bytes(2, "big", signed=True))
except OverflowError:
print("OverflowError")
try:
print(((-1 << 23) - 1).to_bytes(2, "big", signed=True))
except OverflowError:
print("OverflowError")

Wyświetl plik

@ -0,0 +1,41 @@
import sys
# Depending on the port, the numbers in this test may be implemented as "small"
# native 64 bit ints, arbitrary precision large ints, or large integers using 64-bit
# long longs.
try:
x = int.from_bytes(b"\x6F\xAB\xCD\x12\x34\x56\x78\xFB", "big")
except OverflowError:
print("SKIP") # Port can't represent this size of integer at all
raise SystemExit
print(hex(x))
b = x.to_bytes(8, "little")
print(b)
print(x.to_bytes(8, "big"))
# padding in output
print(x.to_bytes(20, "little"))
print(x.to_bytes(20, "big"))
# check that extra zero bytes don't change the internal int value
print(int.from_bytes(b + bytes(10), "little") == x)
# can't write to a zero-length bytes object
try:
x.to_bytes(0, "little")
except OverflowError:
print("OverflowError")
# or one that it too short
try:
x.to_bytes(7, "big")
except OverflowError:
print("OverflowError")
# negative representations
print((-x).to_bytes(8, "little", signed=True))
print((-x).to_bytes(20, "big", signed=True))
print((-x).to_bytes(20, "little", signed=True))

Wyświetl plik

@ -1,3 +1,5 @@
import sys
print((2**64).to_bytes(9, "little"))
print((2**64).to_bytes(9, "big"))
@ -10,5 +12,40 @@ print(ib)
print(il.to_bytes(20, "little"))
print(ib.to_bytes(20, "big"))
# check padding comes out correctly
print(il.to_bytes(40, "little"))
print(ib.to_bytes(40, "big"))
# check that extra zero bytes don't change the internal int value
print(int.from_bytes(b + bytes(10), "little") == int.from_bytes(b, "little"))
# can't write to a zero-length bytes object
try:
ib.to_bytes(0, "little")
except OverflowError:
print("OverflowError")
# or one that it too short
try:
ib.to_bytes(18, "big")
except OverflowError:
print("OverflowError")
# negative representations
print((-ib).to_bytes(20, "big", signed=True))
print((ib * -ib).to_bytes(40, "big", signed=True))
# case where an additional byte is needed for sign bit
ib = (2**64) - 1
print(ib.to_bytes(8, "little"))
ib *= -1
try:
print((ib).to_bytes(8, "little", signed=True))
except OverflowError:
print("OverflowError")
print((ib).to_bytes(9, "little", signed=True))
print((ib).to_bytes(9, "big", signed=True))