From 18f813785a2aadbcad9cea3f128c81111703c91f Mon Sep 17 00:00:00 2001 From: Glenn Moloney Date: Fri, 21 Jul 2023 14:49:07 +1000 Subject: [PATCH] py/objstr: Skip whitespace in bytes.fromhex(). Skip whitespace characters between pairs of hex numbers. This makes `bytes.fromhex()` compatible with cpython. Includes simple test in `tests/basic/builtin_str_hex.py`. Signed-off-by: Glenn Moloney --- py/objstr.c | 28 ++++++++++++---------------- tests/basics/builtin_str_hex.py | 2 ++ tests/basics/builtin_str_hex.py.exp | 2 ++ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/py/objstr.c b/py/objstr.c index b966a70169..150ff33dd3 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -2012,27 +2012,23 @@ mp_obj_t mp_obj_bytes_fromhex(mp_obj_t type_in, mp_obj_t data) { mp_buffer_info_t bufinfo; mp_get_buffer_raise(data, &bufinfo, MP_BUFFER_READ); - if ((bufinfo.len & 1) != 0) { - mp_raise_ValueError(MP_ERROR_TEXT("odd-length string")); - } vstr_t vstr; vstr_init_len(&vstr, bufinfo.len / 2); byte *in = bufinfo.buf, *out = (byte *)vstr.buf; - byte hex_byte = 0; - for (mp_uint_t i = bufinfo.len; i--;) { - byte hex_ch = *in++; - if (unichar_isxdigit(hex_ch)) { - hex_byte += unichar_xdigit_value(hex_ch); - } else { - mp_raise_ValueError(MP_ERROR_TEXT("non-hex digit found")); - } - if (i & 1) { - hex_byte <<= 4; - } else { - *out++ = hex_byte; - hex_byte = 0; + byte *in_end = in + bufinfo.len; + mp_uint_t hex_ch, x1, x2; + while (in < in_end) { + hex_ch = *in++; + if (!unichar_isspace(hex_ch)) { + x1 = unichar_xdigit_value(hex_ch); + x2 = (in < in_end) ? unichar_xdigit_value(*in++) : 0xff; + if ((x1 | x2) & ~0xf) { // if (x1 > 0xf || x2 > 0xf) {} + mp_raise_ValueError(MP_ERROR_TEXT("non-hex or odd number of digits")); + } + *out++ = (byte)((x1 << 4) | x2); } } + vstr.len = out - (byte *)vstr.buf; // Length may be shorter due to whitespace chars in input return mp_obj_new_str_type_from_vstr(MP_OBJ_TO_PTR(type_in), &vstr); } diff --git a/tests/basics/builtin_str_hex.py b/tests/basics/builtin_str_hex.py index 7390c8eaee..5134f9af2c 100644 --- a/tests/basics/builtin_str_hex.py +++ b/tests/basics/builtin_str_hex.py @@ -20,5 +20,7 @@ for x in ( "08090a0b0c0d0e0f", "7f80ff", "313233344142434461626364", + "ab cd ef", + "ab\tcd\n ef ", ): print(bytes.fromhex(x)) diff --git a/tests/basics/builtin_str_hex.py.exp b/tests/basics/builtin_str_hex.py.exp index 990dd85707..7a43ce60fe 100644 --- a/tests/basics/builtin_str_hex.py.exp +++ b/tests/basics/builtin_str_hex.py.exp @@ -26,3 +26,5 @@ b'\x00\x01\x02\x03\x04\x05\x06\x07' b'\x08\t\n\x0b\x0c\r\x0e\x0f' b'\x7f\x80\xff' b'1234ABCDabcd' +b'\xab\xcd\xef' +b'\xab\xcd\xef'