Merge c97d3534dc into 5114f2c1ea

2024-04-10 03:52:59 +00:00 · 2024-04-10 03:52:59 +00:00 · 2b01907c35
commit 2b01907c35
--- a/docs/library/builtins.rst
+++ b/docs/library/builtins.rst
@ -82,6 +82,10 @@ Functions and types
      In MicroPython, `byteorder` parameter must be positional (this is
      compatible with CPython).

+      .. note:: The optional ``signed`` kwarg from CPython is not supported.
+                MicroPython currently converts negative integers as signed,
+                and positive as unsigned. (:ref:`Details <cpydiff_types_int_to_bytes>`.)
+
 .. function:: isinstance()

 .. function:: issubclass()
--- a/py/misc.h
+++ b/py/misc.h
@ -334,4 +334,45 @@ typedef const char *mp_rom_error_text_t;
 // For now, forward directly to MP_COMPRESSED_ROM_TEXT.
 #define MP_ERROR_TEXT(x) (mp_rom_error_text_t)MP_COMPRESSED_ROM_TEXT(x)

+// Macro and inline function to measure the length (in bytes) needed to hold an
+// unambiguous representation of a small (C representable) integer.
+//
+// Implemented inline as there's a lot the compiler can optimise based on the size of INT.
+#define MP_INT_REPR_LEN(INT, IS_SIGNED) mp_int_repr_len_helper(&(INT), sizeof(INT), IS_SIGNED)
+
+static inline int mp_int_repr_len_helper(void *pint, int size, bool is_signed) {
+    int i;
+    byte *b = (byte *)pint;
+    byte ext_byte = 0x00;
+    int result = size;
+    int msb_idx, step;
+
+    #if MP_ENDIANNESS_LITTLE
+    msb_idx = size - 1;
+    step = -1;
+    #else
+    msb_idx = 0;
+    step = 1;
+    #endif
+
+    if (is_signed && (b[msb_idx] & 0x80)) {
+        ext_byte = 0xFF; // Negative number
+    }
+
+    // Count down the number of most significant bytes that don't contain
+    // any significant values (i.e. equal to the extension byte).
+    for (i = msb_idx; i >= 0 && i <= size - 1; i += step) {
+        if (b[i] != ext_byte) {
+            if (is_signed && (b[i] & 0x80) != (ext_byte & 0x80)) {
+                // Add one additional byte to hold the sign bit
+                result++;
+            }
+            break;
+        }
+        result--;
+    }
+
+    return result;
+}
+
 #endif // MICROPY_INCLUDED_PY_MISC_H
--- a/py/mpz.c
+++ b/py/mpz.c
@ -1589,7 +1589,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) {
    return true;
 }

-void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
+bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) {
    byte *b = buf;
    if (big_endian) {
        b += len;
@ -1598,6 +1598,8 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
    int bits = 0;
    mpz_dbl_dig_t d = 0;
    mpz_dbl_dig_t carry = 1;
+    size_t olen = len; // bytes in output buffer
+    bool ok = true;
    for (size_t zlen = z->len; zlen > 0; --zlen) {
        bits += DIG_SIZE;
        d = (d << DIG_SIZE) | *zdig++;
@ -1607,28 +1609,32 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
                val = (~val & 0xff) + carry;
                carry = val >> 8;
            }
+
+            if (!olen) {
+                // Buffer is full, only OK if all remaining bytes are zeroes
+                ok = ok && ((byte)val == 0);
+                continue;
+            }
+
            if (big_endian) {
                *--b = val;
-                if (b == buf) {
-                    return;
-                }
            } else {
                *b++ = val;
-                if (b == buf + len) {
-                    return;
-                }
            }
+            olen--;
        }
    }

-    // fill remainder of buf with zero/sign extension of the integer
-    if (big_endian) {
-        len = b - buf;
+    if (as_signed && olen == 0 && len > 0) {
+        // If output exhausted then ensure there was enough space for the sign bit
+        byte most_sig = big_endian ? buf[0] : buf[len - 1];
+        ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg;
    } else {
-        len = buf + len - b;
-        buf = b;
+        // fill remainder of buf with zero/sign extension of the integer
+        memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen);
    }
-    memset(buf, z->neg ? 0xff : 0x00, len);
+
+    return ok;
 }

 #if MICROPY_PY_BUILTINS_FLOAT
--- a/py/mpz.h
+++ b/py/mpz.h
@ -93,9 +93,9 @@ typedef int8_t mpz_dbl_dig_signed_t;
 typedef struct _mpz_t {
    // Zero has neg=0, len=0.  Negative zero is not allowed.
    size_t neg : 1;
-    size_t fixed_dig : 1;
-    size_t alloc : (8 * sizeof(size_t) - 2);
-    size_t len;
+    size_t fixed_dig : 1; // flag, 'dig' buffer cannot be reallocated
+    size_t alloc : (8 * sizeof(size_t) - 2); // number of entries allocated in 'dig'
+    size_t len; // number of entries used in 'dig'
    mpz_dig_t *dig;
 } mpz_t;

@ -145,7 +145,8 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) {
 mp_int_t mpz_hash(const mpz_t *z);
 bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value);
 bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value);
-void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf);
+// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
+bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf);
 #if MICROPY_PY_BUILTINS_FLOAT
 mp_float_t mpz_as_float(const mpz_t *z);
 #endif
--- a/py/objint.c
+++ b/py/objint.c
@ -421,29 +421,39 @@ static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 3, 4, int_fro
 static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));

 static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) {
-    // TODO: Support signed param (assumes signed=False)
+    // TODO: Support signed (currently behaves as if signed=(val < 0))
    (void)n_args;
+    bool overflow;

-    mp_int_t len = mp_obj_get_int(args[1]);
-    if (len < 0) {
+    mp_int_t dlen = mp_obj_get_int(args[1]);
+    if (dlen < 0) {
        mp_raise_ValueError(NULL);
    }
    bool big_endian = args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);

    vstr_t vstr;
-    vstr_init_len(&vstr, len);
+    vstr_init_len(&vstr, dlen);
    byte *data = (byte *)vstr.buf;
-    memset(data, 0, len);

    #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
    if (!mp_obj_is_small_int(args[0])) {
-        mp_obj_int_to_bytes_impl(args[0], big_endian, len, data);
+        overflow = !mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data);
    } else
    #endif
    {
        mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]);
-        size_t l = MIN((size_t)len, sizeof(val));
-        mp_binary_set_int(l, big_endian, data + (big_endian ? (len - l) : 0), val);
+        int slen = MP_INT_REPR_LEN(val, val < 0);
+        memset(data, val < 0 ? 0xFF : 0x00, dlen);
+        if (slen <= dlen) {
+            mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val);
+            overflow = false;
+        } else {
+            overflow = true;
+        }
+    }
+
+    if (overflow) {
+        mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("int too big to convert"));
    }

    return mp_obj_new_bytes_from_vstr(&vstr);
--- a/py/objint.h
+++ b/py/objint.h
@ -55,7 +55,8 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
    int base, const char *prefix, char base_char, char comma);
 mp_int_t mp_obj_int_hash(mp_obj_t self_in);
 mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
-void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
+// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
+bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
 int mp_obj_int_sign(mp_obj_t self_in);
 mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
 mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
--- a/py/objint_longlong.c
+++ b/py/objint_longlong.c
@ -57,10 +57,12 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
    return mp_obj_new_int_from_ll(value);
 }

-void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
+bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
    mp_obj_int_t *self = self_in;
    long long val = self->val;
+    size_t slen = MP_INT_REPR_LEN(val, val < 0);
+    bool ok = slen <= len;
    if (big_endian) {
        byte *b = buf + len;
        while (b > buf) {
@ -73,6 +75,7 @@ void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byt
            val >>= 8;
        }
    }
+    return ok;
 }

 int mp_obj_int_sign(mp_obj_t self_in) {
--- a/py/objint_mpz.c
+++ b/py/objint_mpz.c
@ -112,10 +112,10 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
    return MP_OBJ_FROM_PTR(o);
 }

-void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
+bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
    mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
-    mpz_as_bytes(&self->mpz, big_endian, len, buf);
+    return mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, len, buf);
 }

 int mp_obj_int_sign(mp_obj_t self_in) {
--- a/tests/basics/int_bytes.py
+++ b/tests/basics/int_bytes.py
@ -1,3 +1,5 @@
+import sys
+
 print((10).to_bytes(1, "little"))
 print((111111).to_bytes(4, "little"))
 print((100).to_bytes(10, "little"))
@ -20,3 +22,74 @@ try:
    (1).to_bytes(-1, "little")
 except ValueError:
    print("ValueError")
+
+# zero byte destination should also raise an error
+try:
+    (1).to_bytes(0, "little")
+except OverflowError:
+    print("OverflowError")
+
+# except for converting 0 to a zero-length byte array
+print((0).to_bytes(0, "big"))
+
+# byte length can fit the integer directly
+print((0xFF).to_bytes(1, "little"))
+print((0xFF).to_bytes(1, "big"))
+print((0xEFF).to_bytes(2, "little"))
+print((0xEFF).to_bytes(2, "big"))
+print((0xCDEFF).to_bytes(3, "little"))
+print((0xCDEFF).to_bytes(3, "big"))
+
+# OverFlowError if not big enough
+
+try:
+    (0x123).to_bytes(1, "big")
+except OverflowError:
+    print("OverflowError")
+
+try:
+    (0x12345).to_bytes(2, "big")
+except OverflowError:
+    print("OverflowError")
+
+try:
+    (0x1234567).to_bytes(3, "big")
+except OverflowError:
+    print("OverflowError")
+
+
+# negative representations
+
+# MicroPython int.to_bytes() behaves as if signed=True for negative numbers
+if "micropython" in repr(sys.implementation):
+
+    def to_bytes_compat(i, l, e):
+        return i.to_bytes(l, e)
+else:
+    # Implement MicroPython compatible behaviour for CPython
+    def to_bytes_compat(i, l, e):
+        return i.to_bytes(l, e, signed=i < 0)
+
+
+print(to_bytes_compat(-1, 1, "little"))
+print(to_bytes_compat(-1, 3, "little"))
+print(to_bytes_compat(-1, 1, "big"))
+print(to_bytes_compat(-1, 3, "big"))
+print(to_bytes_compat(-128, 1, "big"))
+print(to_bytes_compat(-32768, 2, "big"))
+print(to_bytes_compat(-(1 << 23), 3, "big"))
+
+try:
+    print(to_bytes_compat(-129, 1, "big"))
+except OverflowError:
+    print("OverflowError")
+
+try:
+    print(to_bytes_compat(-32769, 2, "big"))
+except OverflowError:
+    print("OverflowError")
+
+try:
+    print(to_bytes_compat(-(1 << 23) - 1, 2, "big"))
+except OverflowError:
+    print("OverflowError")
--- a/tests/basics/int_bytes_int64.py
+++ b/tests/basics/int_bytes_int64.py
@ -0,0 +1,52 @@
+import sys
+
+# Depending on the port, the numbers in this test may be implemented as "small"
+# native 64 bit ints, arbitrary precision large ints, or large integers using 64-bit
+# long longs.
+
+try:
+    x = int.from_bytes(b"\x6F\xAB\xCD\x12\x34\x56\x78\xFB", "big")
+except OverflowError:
+    print("SKIP")  # Port can't represent this size of integer at all
+    raise SystemExit
+
+print(hex(x))
+b = x.to_bytes(8, "little")
+print(b)
+print(x.to_bytes(8, "big"))
+
+# padding in output
+print(x.to_bytes(20, "little"))
+print(x.to_bytes(20, "big"))
+
+# check that extra zero bytes don't change the internal int value
+print(int.from_bytes(b + bytes(10), "little") == x)
+
+# can't write to a zero-length bytes object
+try:
+    x.to_bytes(0, "little")
+except OverflowError:
+    print("OverflowError")
+
+# or one that it too short
+try:
+    x.to_bytes(7, "big")
+except OverflowError:
+    print("OverflowError")
+
+# negative representations
+
+# MicroPython int.to_bytes() behaves as if signed=True for negative numbers
+if "micropython" in repr(sys.implementation):
+
+    def to_bytes_compat(i, l, e):
+        return i.to_bytes(l, e)
+else:
+    # Implement MicroPython compatible behaviour for CPython
+    def to_bytes_compat(i, l, e):
+        return i.to_bytes(l, e, signed=i < 0)
+
+
+print(to_bytes_compat(-x, 8, "little"))
+print(to_bytes_compat(-x, 20, "big"))
+print(to_bytes_compat(-x, 20, "little"))
--- a/tests/basics/int_bytes_intbig.py
+++ b/tests/basics/int_bytes_intbig.py
@ -1,3 +1,5 @@
+import sys
+
 print((2**64).to_bytes(9, "little"))
 print((2**64).to_bytes(9, "big"))

@ -10,5 +12,51 @@ print(ib)
 print(il.to_bytes(20, "little"))
 print(ib.to_bytes(20, "big"))

+# check padding comes out correctly
+print(il.to_bytes(40, "little"))
+print(ib.to_bytes(40, "big"))
+
 # check that extra zero bytes don't change the internal int value
 print(int.from_bytes(b + bytes(10), "little") == int.from_bytes(b, "little"))
+
+# can't write to a zero-length bytes object
+try:
+    ib.to_bytes(0, "little")
+except OverflowError:
+    print("OverflowError")
+
+# or one that it too short
+try:
+    ib.to_bytes(18, "big")
+except OverflowError:
+    print("OverflowError")
+
+# negative representations
+
+# MicroPython int.to_bytes() behaves as if signed=True for negative numbers
+if "micropython" in repr(sys.implementation):
+
+    def to_bytes_compat(i, l, e):
+        return i.to_bytes(l, e)
+else:
+    # Implement MicroPython compatible behaviour for CPython
+    def to_bytes_compat(i, l, e):
+        return i.to_bytes(l, e, signed=i < 0)
+
+
+print(to_bytes_compat(-ib, 20, "big"))
+print(to_bytes_compat(ib * -ib, 40, "big"))
+
+# case where an additional byte is needed for sign bit
+ib = (2**64) - 1
+print(ib.to_bytes(8, "little"))
+
+ib *= -1
+
+try:
+    print(to_bytes_compat(ib, 8, "little"))
+except OverflowError:
+    print("OverflowError")
+
+print(to_bytes_compat(ib, 9, "little"))
+print(to_bytes_compat(ib, 9, "big"))
--- a/tests/cpydiff/types_int_to_bytes.py
+++ b/tests/cpydiff/types_int_to_bytes.py
@ -0,0 +1,16 @@
+"""
+categories: Types,int
+description: ``to_bytes`` method doesn't implement signed parameter.
+cause: The ``signed`` keyword-only parameter is not implemented for ``int.to_bytes()``.
+
+When the integer is negative, MicroPython behaves the same as CPython ``int.to_bytes(..., signed=True)``
+
+When the integer is non-negative, MicroPython behaves the same as CPython ``int.to_bytes(..., signed=False)``.
+
+(The difference is subtle, but in CPython a positive integer converted with ``signed=True`` may require one byte more in the output length, in order to fit the 0 sign bit.)
+
+workaround: Take care when calling ``to_bytes()`` on an integer value which may be negative.
+"""
+
+x = -1
+print(x.to_bytes(1, "big"))