re: Add support for `finditer` method.

pull/14179/head
Jared Hancock 2024-03-26 21:41:28 -05:00
rodzic b190872d02
commit 3e5a718f84
4 zmienionych plików z 105 dodań i 2 usunięć

Wyświetl plik

@ -140,6 +140,12 @@ Functions
Note: availability of this function depends on :term:`MicroPython port`. Note: availability of this function depends on :term:`MicroPython port`.
.. function:: finditer(regex_str, string)
Return an iterator yielding ``Match`` objects over all non-overlapping
matches for the RE *regex_str* in *string*. The string is scanned
left-to-right, and matches are returned in the order found.
.. data:: DEBUG .. data:: DEBUG
Flag value, display debug information about compiled expression. Flag value, display debug information about compiled expression.
@ -156,10 +162,11 @@ Compiled regular expression. Instances of this class are created using
.. method:: regex.match(string, [pos, [endpos]]) .. method:: regex.match(string, [pos, [endpos]])
regex.search(string, [pos, [endpos]]) regex.search(string, [pos, [endpos]])
regex.finditer(string, [pos, [endpos]])
regex.sub(replace, string, count=0, flags=0, /) regex.sub(replace, string, count=0, flags=0, /)
Similar to the module-level functions :meth:`match`, :meth:`search` Similar to the module-level functions :meth:`match`, :meth:`search`,
and :meth:`sub`. :meth:`finditer`, and :meth:`sub`.
Using methods is (much) more efficient if the same regex is applied to Using methods is (much) more efficient if the same regex is applied to
multiple strings. multiple strings.

Wyświetl plik

@ -422,11 +422,75 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_sub_obj, 3, 5, re_sub_helper);
#endif #endif
#if MICROPY_PY_RE_FINDITER
typedef struct _mp_re_finditer_it_t {
mp_obj_base_t base;
mp_fun_1_t iternext;
mp_obj_t pattern;
mp_obj_t str;
mp_obj_t start;
mp_obj_t end;
} mp_re_finditer_it_t;
static mp_obj_t mp_re_finditer_it_iternext(mp_obj_t self_in) {
mp_re_finditer_it_t *self = MP_OBJ_TO_PTR(self_in);
mp_obj_t args[4] = {
self->pattern,
self->str,
self->start,
self->end
};
int n_args = (self->end == mp_const_none) ? 3 : 4;
mp_obj_t obj_match = re_exec(false, n_args, args);
if (obj_match == mp_const_none) {
return MP_OBJ_STOP_ITERATION;
}
mp_obj_match_t *match = MP_OBJ_TO_PTR(obj_match);
const char *begin = mp_obj_str_get_str(self->str);
self->start = MP_OBJ_NEW_SMALL_INT(match->caps[1] - begin);
return obj_match;
}
static mp_obj_t re_finditer(size_t n_args, const mp_obj_t *args) {
mp_re_finditer_it_t *iter = mp_obj_malloc(mp_re_finditer_it_t, &mp_type_polymorph_iter);
iter->iternext = mp_re_finditer_it_iternext;
iter->str = args[1];
iter->start = MP_OBJ_NEW_SMALL_INT(0);
iter->end = mp_const_none;
if (mp_obj_is_type(args[0], (mp_obj_type_t *)&re_type)) {
iter->pattern = args[0];
if (n_args > 2) {
iter->start = args[2];
if (n_args > 3) {
iter->end = args[3];
}
}
}
else {
iter->pattern = mod_re_compile(1, args);
}
return MP_OBJ_FROM_PTR(iter);
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_finditer_obj, 2, 4, re_finditer);
#endif // MICROPY_PY_RE_FINDITER
#if !MICROPY_ENABLE_DYNRUNTIME #if !MICROPY_ENABLE_DYNRUNTIME
static const mp_rom_map_elem_t re_locals_dict_table[] = { static const mp_rom_map_elem_t re_locals_dict_table[] = {
{ MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) }, { MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) },
{ MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) }, { MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) },
{ MP_ROM_QSTR(MP_QSTR_split), MP_ROM_PTR(&re_split_obj) }, { MP_ROM_QSTR(MP_QSTR_split), MP_ROM_PTR(&re_split_obj) },
#if MICROPY_PY_RE_FINDITER
{ MP_ROM_QSTR(MP_QSTR_finditer), MP_ROM_PTR(&re_finditer_obj) },
#endif
#if MICROPY_PY_RE_SUB #if MICROPY_PY_RE_SUB
{ MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) }, { MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) },
#endif #endif
@ -477,6 +541,9 @@ static const mp_rom_map_elem_t mp_module_re_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR_compile), MP_ROM_PTR(&mod_re_compile_obj) }, { MP_ROM_QSTR(MP_QSTR_compile), MP_ROM_PTR(&mod_re_compile_obj) },
{ MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) }, { MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) },
{ MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) }, { MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) },
#if MICROPY_PY_RE_FINDITER
{ MP_ROM_QSTR(MP_QSTR_finditer), MP_ROM_PTR(&re_finditer_obj) },
#endif
#if MICROPY_PY_RE_SUB #if MICROPY_PY_RE_SUB
{ MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) }, { MP_ROM_QSTR(MP_QSTR_sub), MP_ROM_PTR(&re_sub_obj) },
#endif #endif

Wyświetl plik

@ -1652,6 +1652,10 @@ typedef double mp_float_t;
#define MICROPY_PY_RE_MATCH_SPAN_START_END (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EVERYTHING) #define MICROPY_PY_RE_MATCH_SPAN_START_END (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EVERYTHING)
#endif #endif
#ifndef MICROPY_PY_RE_FINDITER
#define MICROPY_PY_RE_FINDITER (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES)
#endif
#ifndef MICROPY_PY_RE_SUB #ifndef MICROPY_PY_RE_SUB
#define MICROPY_PY_RE_SUB (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES) #define MICROPY_PY_RE_SUB (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES)
#endif #endif

Wyświetl plik

@ -0,0 +1,25 @@
try:
import re
from re import finditer
except ImportError:
print("SKIP")
raise SystemExit
ms = re.finditer(r'f[a-z]*', 'which foot or hand fell fastest')
print(list(x.group(0) for x in ms))
p = re.compile(r'f[a-z]*')
ms = p.finditer('which foot or hand fell fastest')
print(list(x.group(0) for x in ms))
ms = p.finditer('which foot or hand fell fastest', 10)
print(list(x.group(0) for x in ms))
ms = p.finditer('which foot or hand fell fastest', 10, 21)
print(list(x.group(0) for x in ms))
ms = re.finditer(r'\s+', 'which foot or hand fell fastest')
print(list(x.group(0) for x in ms))
ms = re.finditer(r'zz', 'which foot or hand fell fastest')
print(list(x.group(0) for x in ms))