From 3e5a718f8457f2a328ab775413b5a8a133b114e4 Mon Sep 17 00:00:00 2001 From: Jared Hancock Date: Tue, 26 Mar 2024 21:41:28 -0500 Subject: [PATCH] re: Add support for `finditer` method. --- docs/library/re.rst | 11 ++++-- extmod/modre.c | 67 +++++++++++++++++++++++++++++++++++++ py/mpconfig.h | 4 +++ tests/extmod/re_finditer.py | 25 ++++++++++++++ 4 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 tests/extmod/re_finditer.py diff --git a/docs/library/re.rst b/docs/library/re.rst index b8aeefd90c..2d639c2e64 100644 --- a/docs/library/re.rst +++ b/docs/library/re.rst @@ -140,6 +140,12 @@ Functions Note: availability of this function depends on :term:`MicroPython port`. +.. function:: finditer(regex_str, string) + + Return an iterator yielding ``Match`` objects over all non-overlapping + matches for the RE *regex_str* in *string*. The string is scanned + left-to-right, and matches are returned in the order found. + .. data:: DEBUG Flag value, display debug information about compiled expression. @@ -156,10 +162,11 @@ Compiled regular expression. Instances of this class are created using .. method:: regex.match(string, [pos, [endpos]]) regex.search(string, [pos, [endpos]]) + regex.finditer(string, [pos, [endpos]]) regex.sub(replace, string, count=0, flags=0, /) - Similar to the module-level functions :meth:`match`, :meth:`search` - and :meth:`sub`. + Similar to the module-level functions :meth:`match`, :meth:`search`, + :meth:`finditer`, and :meth:`sub`. Using methods is (much) more efficient if the same regex is applied to multiple strings. diff --git a/extmod/modre.c b/extmod/modre.c index 3a203644a8..d2304a534b 100644 --- a/extmod/modre.c +++ b/extmod/modre.c @@ -422,11 +422,75 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_sub_obj, 3, 5, re_sub_helper); #endif +#if MICROPY_PY_RE_FINDITER + +typedef struct _mp_re_finditer_it_t { + mp_obj_base_t base; + mp_fun_1_t iternext; + mp_obj_t pattern; + mp_obj_t str; + mp_obj_t start; + mp_obj_t end; +} mp_re_finditer_it_t; + + +static mp_obj_t mp_re_finditer_it_iternext(mp_obj_t self_in) { + mp_re_finditer_it_t *self = MP_OBJ_TO_PTR(self_in); + + mp_obj_t args[4] = { + self->pattern, + self->str, + self->start, + self->end + }; + int n_args = (self->end == mp_const_none) ? 3 : 4; + + mp_obj_t obj_match = re_exec(false, n_args, args); + if (obj_match == mp_const_none) { + return MP_OBJ_STOP_ITERATION; + } + + mp_obj_match_t *match = MP_OBJ_TO_PTR(obj_match); + const char *begin = mp_obj_str_get_str(self->str); + self->start = MP_OBJ_NEW_SMALL_INT(match->caps[1] - begin); + return obj_match; +} + +static mp_obj_t re_finditer(size_t n_args, const mp_obj_t *args) { + mp_re_finditer_it_t *iter = mp_obj_malloc(mp_re_finditer_it_t, &mp_type_polymorph_iter); + iter->iternext = mp_re_finditer_it_iternext; + iter->str = args[1]; + iter->start = MP_OBJ_NEW_SMALL_INT(0); + iter->end = mp_const_none; + + if (mp_obj_is_type(args[0], (mp_obj_type_t *)&re_type)) { + iter->pattern = args[0]; + if (n_args > 2) { + iter->start = args[2]; + if (n_args > 3) { + iter->end = args[3]; + } + } + } + else { + iter->pattern = mod_re_compile(1, args); + } + + return MP_OBJ_FROM_PTR(iter); +} + +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_finditer_obj, 2, 4, re_finditer); + +#endif // MICROPY_PY_RE_FINDITER + #if !MICROPY_ENABLE_DYNRUNTIME static const mp_rom_map_elem_t re_locals_dict_table[] = { { MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) }, { MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) }, { MP_ROM_QSTR(MP_QSTR_split), MP_ROM_PTR(&re_split_obj) }, + #if MICROPY_PY_RE_FINDITER + { MP_ROM_QSTR(MP_QSTR_finditer), MP_ROM_PTR(&re_finditer_obj) }, + #endif #if MICROPY_PY_RE_SUB { MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) }, #endif @@ -477,6 +541,9 @@ static const mp_rom_map_elem_t mp_module_re_globals_table[] = { { MP_ROM_QSTR(MP_QSTR_compile), MP_ROM_PTR(&mod_re_compile_obj) }, { MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) }, { MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) }, + #if MICROPY_PY_RE_FINDITER + { MP_ROM_QSTR(MP_QSTR_finditer), MP_ROM_PTR(&re_finditer_obj) }, + #endif #if MICROPY_PY_RE_SUB { MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) }, #endif diff --git a/py/mpconfig.h b/py/mpconfig.h index d9cff930d1..4cd7d2f0ab 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -1652,6 +1652,10 @@ typedef double mp_float_t; #define MICROPY_PY_RE_MATCH_SPAN_START_END (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EVERYTHING) #endif +#ifndef MICROPY_PY_RE_FINDITER +#define MICROPY_PY_RE_FINDITER (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES) +#endif + #ifndef MICROPY_PY_RE_SUB #define MICROPY_PY_RE_SUB (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES) #endif diff --git a/tests/extmod/re_finditer.py b/tests/extmod/re_finditer.py new file mode 100644 index 0000000000..6043a37e3f --- /dev/null +++ b/tests/extmod/re_finditer.py @@ -0,0 +1,25 @@ +try: + import re + from re import finditer +except ImportError: + print("SKIP") + raise SystemExit + +ms = re.finditer(r'f[a-z]*', 'which foot or hand fell fastest') +print(list(x.group(0) for x in ms)) + +p = re.compile(r'f[a-z]*') +ms = p.finditer('which foot or hand fell fastest') +print(list(x.group(0) for x in ms)) + +ms = p.finditer('which foot or hand fell fastest', 10) +print(list(x.group(0) for x in ms)) + +ms = p.finditer('which foot or hand fell fastest', 10, 21) +print(list(x.group(0) for x in ms)) + +ms = re.finditer(r'\s+', 'which foot or hand fell fastest') +print(list(x.group(0) for x in ms)) + +ms = re.finditer(r'zz', 'which foot or hand fell fastest') +print(list(x.group(0) for x in ms))