WIP: objint: Support signed parameter for int.to_bytes().

Signed-off-by: Angus Gratton <angus@redyak.com.au>
2024-04-10 13:41:58 +10:00 · 2024-04-10 13:41:58 +10:00 · fd7bc4f12b
commit fd7bc4f12b
--- a/docs/library/builtins.rst
+++ b/docs/library/builtins.rst
@ -77,15 +77,11 @@ Functions and types
      In MicroPython, `byteorder` parameter must be positional (this is
      compatible with CPython).

-   .. method:: to_bytes(size, byteorder)
+   .. method:: to_bytes(size, byteorder, / signed=False)

      In MicroPython, `byteorder` parameter must be positional (this is
      compatible with CPython).

-      .. note:: The optional ``signed`` kwarg from CPython is not supported.
-                MicroPython currently converts negative integers as signed,
-                and positive as unsigned. (:ref:`Details <cpydiff_types_int_to_bytes>`.)
-
 .. function:: isinstance()

 .. function:: issubclass()
--- a/ports/unix/modffi.c
+++ b/ports/unix/modffi.c
@ -442,7 +442,7 @@ static unsigned long long ffi_get_int_value(mp_obj_t o) {
        return MP_OBJ_SMALL_INT_VALUE(o);
    } else {
        unsigned long long res;
-        mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, sizeof(res), (byte *)&res);
+        mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, true, sizeof(res), (byte *)&res);
        return res;
    }
 }
--- a/py/binary.c
+++ b/py/binary.c
@ -444,7 +444,7 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p
        default:
            #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
            if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
-                mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p);
+                mp_obj_int_to_bytes_impl(val_in, struct_type == '>', true, size, p);
                return;
            }
            #endif
@ -482,7 +482,7 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_
            #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
            if (mp_obj_is_exact_type(val_in, &mp_type_int)) {
                size_t size = mp_binary_get_size('@', typecode, NULL);
-                mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG,
+                mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG, true,
                    size, (uint8_t *)p + index * size);
                return;
            }
--- a/py/objint.c
+++ b/py/objint.c
@ -420,29 +420,43 @@ static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
 static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 3, 4, int_from_bytes);
 static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));

-static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) {
-    // TODO: Support signed (currently behaves as if signed=(val < 0))
-    (void)n_args;
-    bool overflow;
+static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    // Only supported kwarg is 'signed'
+    enum { ARG_signed };
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_signed, MP_ARG_BOOL, {.u_bool = false} },
+    };

-    mp_int_t dlen = mp_obj_get_int(args[1]);
+    // Parse positional args
+    mp_obj_t self = pos_args[0];
+    mp_int_t dlen = mp_obj_get_int(pos_args[1]);
    if (dlen < 0) {
        mp_raise_ValueError(NULL);
    }
-    bool big_endian = args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
+    bool big_endian = pos_args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
+
+    // parse kwargs
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args - 3, pos_args + 3, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+    bool as_signed = args[ARG_signed].u_bool;
+
+    bool overflow;

    vstr_t vstr;
    vstr_init_len(&vstr, dlen);
    byte *data = (byte *)vstr.buf;

    #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
-    if (!mp_obj_is_small_int(args[0])) {
-        overflow = !mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data);
+    if (!mp_obj_is_small_int(self)) {
+        overflow = !mp_obj_int_to_bytes_impl(self, big_endian, as_signed, dlen, data);
    } else
    #endif
    {
-        mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]);
-        int slen = MP_INT_REPR_LEN(val, val < 0);
+        mp_int_t val = MP_OBJ_SMALL_INT_VALUE(self);
+        if (val < 0 && !as_signed) {
+            mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("can't convert negative int to unsigned"));
+        }
+        int slen = MP_INT_REPR_LEN(val, as_signed);
        memset(data, val < 0 ? 0xFF : 0x00, dlen);
        if (slen <= dlen) {
            mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val);
@ -458,7 +472,7 @@ static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) {

    return mp_obj_new_bytes_from_vstr(&vstr);
 }
-static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_to_bytes_obj, 3, 4, int_to_bytes);
+static MP_DEFINE_CONST_FUN_OBJ_KW(int_to_bytes_obj, 3, int_to_bytes);

 static const mp_rom_map_elem_t int_locals_dict_table[] = {
    { MP_ROM_QSTR(MP_QSTR_from_bytes), MP_ROM_PTR(&int_from_bytes_obj) },
--- a/py/objint.h
+++ b/py/objint.h
@ -56,7 +56,7 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
 mp_int_t mp_obj_int_hash(mp_obj_t self_in);
 mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
 // Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
-bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
+bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf);
 int mp_obj_int_sign(mp_obj_t self_in);
 mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
 mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
--- a/py/objint_longlong.c
+++ b/py/objint_longlong.c
@ -57,11 +57,11 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
    return mp_obj_new_int_from_ll(value);
 }

-bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
+bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf) {
    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
    mp_obj_int_t *self = self_in;
    long long val = self->val;
-    size_t slen = MP_INT_REPR_LEN(val, val < 0);
+    size_t slen = MP_INT_REPR_LEN(val, as_signed);
    bool ok = slen <= len;
    if (big_endian) {
        byte *b = buf + len;
--- a/py/objint_mpz.c
+++ b/py/objint_mpz.c
@ -112,10 +112,13 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
    return MP_OBJ_FROM_PTR(o);
 }

-bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
+bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, bool as_signed, size_t len, byte *buf) {
    assert(mp_obj_is_exact_type(self_in, &mp_type_int));
    mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
-    return mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, len, buf);
+    if (self->mpz.neg && !as_signed) {
+        mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("can't convert negative int to unsigned"));
+    }
+    return mpz_as_bytes(&self->mpz, big_endian, as_signed, len, buf);
 }

 int mp_obj_int_sign(mp_obj_t self_in) {
--- a/tests/basics/int_bytes.py
+++ b/tests/basics/int_bytes.py
@ -60,36 +60,25 @@ except OverflowError:

 # negative representations

-# MicroPython int.to_bytes() behaves as if signed=True for negative numbers
-if "micropython" in repr(sys.implementation):
-
-    def to_bytes_compat(i, l, e):
-        return i.to_bytes(l, e)
-else:
-    # Implement MicroPython compatible behaviour for CPython
-    def to_bytes_compat(i, l, e):
-        return i.to_bytes(l, e, signed=i < 0)
-
-
-print(to_bytes_compat(-1, 1, "little"))
-print(to_bytes_compat(-1, 3, "little"))
-print(to_bytes_compat(-1, 1, "big"))
-print(to_bytes_compat(-1, 3, "big"))
-print(to_bytes_compat(-128, 1, "big"))
-print(to_bytes_compat(-32768, 2, "big"))
-print(to_bytes_compat(-(1 << 23), 3, "big"))
+print((-1).to_bytes(1, "little", signed=True))
+print((-1).to_bytes(3, "little", signed=True))
+print((-1).to_bytes(1, "big", signed=True))
+print((-1).to_bytes(3, "big", signed=True))
+print((-128).to_bytes(1, "big", signed=True))
+print((-32768).to_bytes(2, "big", signed=True))
+print((-(1 << 23)).to_bytes(3, "big", signed=True))

 try:
-    print(to_bytes_compat(-129, 1, "big"))
+    print((-129).to_bytes(1, "big", signed=True))
 except OverflowError:
    print("OverflowError")

 try:
-    print(to_bytes_compat(-32769, 2, "big"))
+    print((-32769).to_bytes(2, "big", signed=True))
 except OverflowError:
    print("OverflowError")

 try:
-    print(to_bytes_compat(-(1 << 23) - 1, 2, "big"))
+    print(((-1 << 23) - 1).to_bytes(2, "big", signed=True))
 except OverflowError:
    print("OverflowError")
--- a/tests/basics/int_bytes_int64.py
+++ b/tests/basics/int_bytes_int64.py
@ -36,17 +36,6 @@ except OverflowError:

 # negative representations

-# MicroPython int.to_bytes() behaves as if signed=True for negative numbers
-if "micropython" in repr(sys.implementation):
-
-    def to_bytes_compat(i, l, e):
-        return i.to_bytes(l, e)
-else:
-    # Implement MicroPython compatible behaviour for CPython
-    def to_bytes_compat(i, l, e):
-        return i.to_bytes(l, e, signed=i < 0)
-
-
-print(to_bytes_compat(-x, 8, "little"))
-print(to_bytes_compat(-x, 20, "big"))
-print(to_bytes_compat(-x, 20, "little"))
+print((-x).to_bytes(8, "little", signed=True))
+print((-x).to_bytes(20, "big", signed=True))
+print((-x).to_bytes(20, "little", signed=True))
--- a/tests/basics/int_bytes_intbig.py
+++ b/tests/basics/int_bytes_intbig.py
@ -33,19 +33,8 @@ except OverflowError:

 # negative representations

-# MicroPython int.to_bytes() behaves as if signed=True for negative numbers
-if "micropython" in repr(sys.implementation):
-
-    def to_bytes_compat(i, l, e):
-        return i.to_bytes(l, e)
-else:
-    # Implement MicroPython compatible behaviour for CPython
-    def to_bytes_compat(i, l, e):
-        return i.to_bytes(l, e, signed=i < 0)
-
-
-print(to_bytes_compat(-ib, 20, "big"))
-print(to_bytes_compat(ib * -ib, 40, "big"))
+print((-ib).to_bytes(20, "big", signed=True))
+print((ib * -ib).to_bytes(40, "big", signed=True))

 # case where an additional byte is needed for sign bit
 ib = (2**64) - 1
@ -54,9 +43,9 @@ print(ib.to_bytes(8, "little"))
 ib *= -1

 try:
-    print(to_bytes_compat(ib, 8, "little"))
+    print((ib).to_bytes(8, "little", signed=True))
 except OverflowError:
    print("OverflowError")

-print(to_bytes_compat(ib, 9, "little"))
-print(to_bytes_compat(ib, 9, "big"))
+print((ib).to_bytes(9, "little", signed=True))
+print((ib).to_bytes(9, "big", signed=True))
--- a/tests/cpydiff/types_int_to_bytes.py
+++ b/tests/cpydiff/types_int_to_bytes.py
@ -1,16 +0,0 @@
-"""
-categories: Types,int
-description: ``to_bytes`` method doesn't implement signed parameter.
-cause: The ``signed`` keyword-only parameter is not implemented for ``int.to_bytes()``.
-
-When the integer is negative, MicroPython behaves the same as CPython ``int.to_bytes(..., signed=True)``
-
-When the integer is non-negative, MicroPython behaves the same as CPython ``int.to_bytes(..., signed=False)``.
-
-(The difference is subtle, but in CPython a positive integer converted with ``signed=True`` may require one byte more in the output length, in order to fit the 0 sign bit.)
-
-workaround: Take care when calling ``to_bytes()`` on an integer value which may be negative.
-"""
-
-x = -1
-print(x.to_bytes(1, "big"))