From 9d6f474ea49fd89b7a1a90b830e6014ef70a89b7 Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Sun, 25 Sep 2022 22:15:45 +1000 Subject: [PATCH] py/objstr: Don't treat bytes as unicode in str.count. `b'\xaa \xaa'.count(b'\xaa')` now (correctly) returns 2 instead of 1. Fixes issue #9404. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared --- py/objstr.c | 4 +++- tests/basics/bytes_count.py | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/py/objstr.c b/py/objstr.c index 62d7bfb4cc..55e737fffc 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -1768,6 +1768,8 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) { return MP_OBJ_NEW_SMALL_INT(utf8_charlen(start, end - start) + 1); } + bool is_str = self_type == &mp_type_str; + // count the occurrences mp_int_t num_occurrences = 0; for (const byte *haystack_ptr = start; haystack_ptr + needle_len <= end;) { @@ -1775,7 +1777,7 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) { num_occurrences++; haystack_ptr += needle_len; } else { - haystack_ptr = utf8_next_char(haystack_ptr); + haystack_ptr = is_str ? utf8_next_char(haystack_ptr) : haystack_ptr + 1; } } diff --git a/tests/basics/bytes_count.py b/tests/basics/bytes_count.py index 5fa0730f5c..e71f09db00 100644 --- a/tests/basics/bytes_count.py +++ b/tests/basics/bytes_count.py @@ -48,6 +48,13 @@ print(b"aaaa".count(b'a', 1, 5)) print(b"aaaa".count(b'a', -1, 5)) print(b"abbabba".count(b"abba")) +print(b'\xaa \xaa'.count(b'\xaa')) +print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa')) +print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 1) +print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 2) +print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 1, 3) +print(b'\xaa \xaa \xaa \xaa'.count(b'\xaa'), 2, 3) + def t(): return True