Merge fd7bc4f12b into 49ce7a6075

WIP: objint: Support signed parameter for int.to_bytes().
Signed-off-by: Angus Gratton <angus@redyak.com.au>
2024-04-22 21:13:04 +08:00 · 2024-04-10 14:20:23 +10:00 · 2024-04-10 13:52:29 +10:00
--- a/docs/library/builtins.rst
+++ b/docs/library/builtins.rst
@ -77,7 +77,7 @@ Functions and types
      In MicroPython, `byteorder` parameter must be positional (this is
      compatible with CPython).

-   .. method:: to_bytes(size, byteorder)
+   .. method:: to_bytes(size, byteorder, / signed=False)

      In MicroPython, `byteorder` parameter must be positional (this is
      compatible with CPython).
--- a/ports/unix/modffi.c
+++ b/ports/unix/modffi.c
@ -442,7 +442,7 @@ static unsigned long long ffi_get_int_value(mp_obj_t o) {
        return MP_OBJ_SMALL_INT_VALUE(o);
    } else {
        unsigned long long res;
-        mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, sizeof(res), (byte *)&res);
+        mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, true, sizeof(res), (byte *)&res);
        return res;
    }
 }
--- a/py/binary.c
+++ b/py/binary.c
@ -444,7 +444,7 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p
        default:
            #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
            if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
-                mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p);
+                mp_obj_int_to_bytes_impl(val_in, struct_type == '>', true, size, p);
                return;
            }
            #endif
@ -482,7 +482,7 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_
            #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
            if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
                size_t size = mp_binary_get_size('@', typecode, NULL);
-                mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG,
+                mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG, true,
                    size, (uint8_t *)p + index * size);
                return;
            }
--- a/py/misc.h
+++ b/py/misc.h
@ -334,4 +334,45 @@ typedef const char *mp_rom_error_text_t;
 // For now, forward directly to MP_COMPRESSED_ROM_TEXT.
 #define MP_ERROR_TEXT(x) (mp_rom_error_text_t)MP_COMPRESSED_ROM_TEXT(x)

+// Macro and inline function to measure the length (in bytes) needed to hold an
+// unambiguous representation of a small (C representable) integer.
+//
+// Implemented inline as there's a lot the compiler can optimise based on the size of INT.
+#define MP_INT_REPR_LEN(INT, IS_SIGNED) mp_int_repr_len_helper(&(INT), sizeof(INT), IS_SIGNED)
+
+static inline int mp_int_repr_len_helper(void *pint, int size, bool is_signed) {
+    int i;
+    byte *b = (byte *)pint;
+    byte ext_byte = 0x00;
+    int result = size;
+    int msb_idx, step;
+
+    #if MP_ENDIANNESS_LITTLE
+    msb_idx = size - 1;
+    step = -1;
+    #else
+    msb_idx = 0;
+    step = 1;
+    #endif
+
+    if (is_signed && (b[msb_idx] & 0x80)) {
+        ext_byte = 0xFF; // Negative number
+    }
+
+    // Count down the number of most significant bytes that don't contain
+    // any significant values (i.e. equal to the extension byte).
+    for (i = msb_idx; i >= 0 && i <= size - 1; i += step) {
+        if (b[i] != ext_byte) {
+            if (is_signed && (b[i] & 0x80) != (ext_byte & 0x80)) {
+                // Add one additional byte to hold the sign bit
+                result++;
+            }
+            break;
+        }
+        result--;
+    }
+
+    return result;
+}
+
 #endif // MICROPY_INCLUDED_PY_MISC_H
--- a/py/mpz.c
+++ b/py/mpz.c
@ -1589,7 +1589,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) {
    return true;
 }

-void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
+bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) {
    byte *b = buf;
    if (big_endian) {
        b += len;
@ -1598,6 +1598,8 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
    int bits = 0;
    mpz_dbl_dig_t d = 0;
    mpz_dbl_dig_t carry = 1;
+    size_t olen = len; // bytes in output buffer
+    bool ok = true;
    for (size_t zlen = z->len; zlen > 0; --zlen) {
        bits += DIG_SIZE;
        d = (d << DIG_SIZE) | *zdig++;
@ -1607,28 +1609,32 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
                val = (~val & 0xff) + carry;
                carry = val >> 8;
            }
+
+            if (!olen) {
+                // Buffer is full, only OK if all remaining bytes are zeroes
+                ok = ok && ((byte)val == 0);
+                continue;
+            }
+
            if (big_endian) {
                *--b = val;
-                if (b == buf) {
-                    return;
-                }
            } else {
                *b++ = val;
-                if (b == buf + len) {
-                    return;
-                }
            }
+            olen--;
        }
    }

-    // fill remainder of buf with zero/sign extension of the integer
-    if (big_endian) {
-        len = b - buf;
+    if (as_signed && olen == 0 && len > 0) {
+        // If output exhausted then ensure there was enough space for the sign bit
+        byte most_sig = big_endian ? buf[0] : buf[len - 1];
+        ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg;
    } else {
-        len = buf + len - b;
-        buf = b;
+        // fill remainder of buf with zero/sign extension of the integer
+        memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen);
    }
-    memset(buf, z->neg ? 0xff : 0x00, len);
+
+    return ok;
 }

 #if MICROPY_PY_BUILTINS_FLOAT
--- a/py/mpz.h
+++ b/py/mpz.h
@ -93,9 +93,9 @@ typedef int8_t mpz_dbl_dig_signed_t;
 typedef struct _mpz_t {
    // Zero has neg=0, len=0.  Negative zero is not allowed.
    size_t neg : 1;
-    size_t fixed_dig : 1;
-    size_t alloc : (8 * sizeof(size_t) - 2);
-    size_t len;
+    size_t fixed_dig : 1; // flag, 'dig' buffer cannot be reallocated
+    size_t alloc : (8 * sizeof(size_t) - 2); // number of entries allocated in 'dig'
+    size_t len; // number of entries used in 'dig'
    mpz_dig_t *dig;
 } mpz_t;

@ -145,7 +145,8 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) {
 mp_int_t mpz_hash(const mpz_t *z);
 bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value);
 bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value);
-void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf);
+// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
+bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf);
 #if MICROPY_PY_BUILTINS_FLOAT
 mp_float_t mpz_as_float(const mpz_t *z);
 #endif
--- a/py/objint.c
+++ b/py/objint.c
@ -420,35 +420,59 @@ static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
 static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 3, 4, int_from_bytes);
 static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));

-static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) {
-    // TODO: Support signed param (assumes signed=False)
-    (void)n_args;
+static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    // Only supported kwarg is 'signed'
+    enum { ARG_signed };
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_signed, MP_ARG_BOOL, {.u_bool = false} },
+    };

-    mp_int_t len = mp_obj_get_int(args[1]);
-    if (len < 0) {
+    // Parse positional args
+    mp_obj_t self = pos_args[0];
+    mp_int_t dlen = mp_obj_get_int(pos_args[1]);
+    if (dlen < 0) {
        mp_raise_ValueError(NULL);
    }
-    bool big_endian = args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
+    bool big_endian = pos_args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
+
+    // parse kwargs
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args - 3, pos_args + 3, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+    bool as_signed = args[ARG_signed].u_bool;
+
+    bool overflow;

    vstr_t vstr;
-    vstr_init_len(&vstr, len);
+    vstr_init_len(&vstr, dlen);
    byte *data = (byte *)vstr.buf;
-    memset(data, 0, len);

    #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-    if (!mp_obj_is_small_int(args[0])) {
-        mp_obj_int_to_bytes_impl(args[0], big_endian, len, data);
+    if (!mp_obj_is_small_int(self)) {
+        overflow = !mp_obj_int_to_bytes_impl(self, big_endian, as_signed, dlen, data);
    } else
    #endif
    {
-        mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]);
-        size_t l = MIN((size_t)len, sizeof(val));
-        mp_binary_set_int(l, big_endian, data + (big_endian ? (len - l) : 0), val);
+        mp_int_t val = MP_OBJ_SMALL_INT_VALUE(self);
+        if (val < 0 && !as_signed) {
+            mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("can't convert negative int to unsigned"));
+        }
+        int slen = MP_INT_REPR_LEN(val, as_signed);
+        memset(data, val < 0 ? 0xFF : 0x00, dlen);
+        if (slen <= dlen) {
+            mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val);
+            overflow = false;
+        } else {
+            overflow = true;
+        }
+    }
+
+    if (overflow) {
+        mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("int too big to convert"));
    }

    return mp_obj_new_bytes_from_vstr(&vstr);
 }
-static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_to_bytes_obj, 3, 4, int_to_bytes);
+static MP_DEFINE_CONST_FUN_OBJ_KW(int_to_bytes_obj, 3, int_to_bytes);

 static const mp_rom_map_elem_t int_locals_dict_table[] = {
    { MP_ROM_QSTR(MP_QSTR_from_bytes), MP_ROM_PTR(&int_from_bytes_obj) },
--- a/py/objint.h
+++ b/py/objint.h
@ -55,7 +55,8 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
    int base, const char *prefix, char base_char, char comma);
 mp_int_t mp_obj_int_hash(mp_obj_t self_in);
 mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
-void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
+// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
+bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf);
 int mp_obj_int_sign(mp_obj_t self_in);
 mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
 mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
--- a/py/objint_longlong.c
+++ b/py/objint_longlong.c
@ -57,10 +57,12 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
    return mp_obj_new_int_from_ll(value);
 }

-void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
+bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf) {
    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
    mp_obj_int_t *self = self_in;
    long long val = self->val;
+    size_t slen = MP_INT_REPR_LEN(val, as_signed);
+    bool ok = slen <= len;
    if (big_endian) {
        byte *b = buf + len;
        while (b > buf) {
@ -73,6 +75,7 @@ void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byt
            val >>= 8;
        }
    }
+    return ok;
 }

 int mp_obj_int_sign(mp_obj_t self_in) {
--- a/py/objint_mpz.c
+++ b/py/objint_mpz.c
@ -112,10 +112,13 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
    return MP_OBJ_FROM_PTR(o);
 }

-void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
+bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf) {
    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
    mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
-    mpz_as_bytes(&self->mpz, big_endian, len, buf);
+    if (self->mpz.neg && !as_signed) {
+        mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("can't convert negative int to unsigned"));
+    }
+    return mpz_as_bytes(&self->mpz, big_endian, as_signed, len, buf);
 }

 int mp_obj_int_sign(mp_obj_t self_in) {
--- a/tests/basics/int_bytes.py
+++ b/tests/basics/int_bytes.py
@ -1,3 +1,5 @@
+import sys
+
 print((10).to_bytes(1, "little"))
 print((111111).to_bytes(4, "little"))
 print((100).to_bytes(10, "little"))
@ -20,3 +22,63 @@ try:
    (1).to_bytes(-1, "little")
 except ValueError:
    print("ValueError")
+
+# zero byte destination should also raise an error
+try:
+    (1).to_bytes(0, "little")
+except OverflowError:
+    print("OverflowError")
+
+# except for converting 0 to a zero-length byte array
+print((0).to_bytes(0, "big"))
+
+# byte length can fit the integer directly
+print((0xFF).to_bytes(1, "little"))
+print((0xFF).to_bytes(1, "big"))
+print((0xEFF).to_bytes(2, "little"))
+print((0xEFF).to_bytes(2, "big"))
+print((0xCDEFF).to_bytes(3, "little"))
+print((0xCDEFF).to_bytes(3, "big"))
+
+# OverFlowError if not big enough
+
+try:
+    (0x123).to_bytes(1, "big")
+except OverflowError:
+    print("OverflowError")
+
+try:
+    (0x12345).to_bytes(2, "big")
+except OverflowError:
+    print("OverflowError")
+
+try:
+    (0x1234567).to_bytes(3, "big")
+except OverflowError:
+    print("OverflowError")
+
+
+# negative representations
+
+print((-1).to_bytes(1, "little", signed=True))
+print((-1).to_bytes(3, "little", signed=True))
+print((-1).to_bytes(1, "big", signed=True))
+print((-1).to_bytes(3, "big", signed=True))
+print((-128).to_bytes(1, "big", signed=True))
+print((-32768).to_bytes(2, "big", signed=True))
+print((-(1 << 23)).to_bytes(3, "big", signed=True))
+
+try:
+    print((-129).to_bytes(1, "big", signed=True))
+except OverflowError:
+    print("OverflowError")
+
+try:
+    print((-32769).to_bytes(2, "big", signed=True))
+except OverflowError:
+    print("OverflowError")
+
+try:
+    print(((-1 << 23) - 1).to_bytes(2, "big", signed=True))
+except OverflowError:
+    print("OverflowError")
--- a/tests/basics/int_bytes_int64.py
+++ b/tests/basics/int_bytes_int64.py
@ -0,0 +1,41 @@
+import sys
+
+# Depending on the port, the numbers in this test may be implemented as "small"
+# native 64 bit ints, arbitrary precision large ints, or large integers using 64-bit
+# long longs.
+
+try:
+    x = int.from_bytes(b"\x6F\xAB\xCD\x12\x34\x56\x78\xFB", "big")
+except OverflowError:
+    print("SKIP")  # Port can't represent this size of integer at all
+    raise SystemExit
+
+print(hex(x))
+b = x.to_bytes(8, "little")
+print(b)
+print(x.to_bytes(8, "big"))
+
+# padding in output
+print(x.to_bytes(20, "little"))
+print(x.to_bytes(20, "big"))
+
+# check that extra zero bytes don't change the internal int value
+print(int.from_bytes(b + bytes(10), "little") == x)
+
+# can't write to a zero-length bytes object
+try:
+    x.to_bytes(0, "little")
+except OverflowError:
+    print("OverflowError")
+
+# or one that it too short
+try:
+    x.to_bytes(7, "big")
+except OverflowError:
+    print("OverflowError")
+
+# negative representations
+
+print((-x).to_bytes(8, "little", signed=True))
+print((-x).to_bytes(20, "big", signed=True))
+print((-x).to_bytes(20, "little", signed=True))
--- a/tests/basics/int_bytes_intbig.py
+++ b/tests/basics/int_bytes_intbig.py
@ -1,3 +1,5 @@
+import sys
+
 print((2**64).to_bytes(9, "little"))
 print((2**64).to_bytes(9, "big"))

@ -10,5 +12,40 @@ print(ib)
 print(il.to_bytes(20, "little"))
 print(ib.to_bytes(20, "big"))

+# check padding comes out correctly
+print(il.to_bytes(40, "little"))
+print(ib.to_bytes(40, "big"))
+
 # check that extra zero bytes don't change the internal int value
 print(int.from_bytes(b + bytes(10), "little") == int.from_bytes(b, "little"))
+
+# can't write to a zero-length bytes object
+try:
+    ib.to_bytes(0, "little")
+except OverflowError:
+    print("OverflowError")
+
+# or one that it too short
+try:
+    ib.to_bytes(18, "big")
+except OverflowError:
+    print("OverflowError")
+
+# negative representations
+
+print((-ib).to_bytes(20, "big", signed=True))
+print((ib * -ib).to_bytes(40, "big", signed=True))
+
+# case where an additional byte is needed for sign bit
+ib = (2**64) - 1
+print(ib.to_bytes(8, "little"))
+
+ib *= -1
+
+try:
+    print((ib).to_bytes(8, "little", signed=True))
+except OverflowError:
+    print("OverflowError")
+
+print((ib).to_bytes(9, "little", signed=True))
+print((ib).to_bytes(9, "big", signed=True))