From 1c6de11f772afae9b4155f8a654cadd05125a2de Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 3 Jan 2014 02:41:17 +0200 Subject: [PATCH 1/9] Add basic implementation of slice object. So far, only start and stop integer indexes are supported. Step is not supported, as well as objects of arbitrary types. --- py/obj.h | 5 ++++ py/objslice.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++ stm/Makefile | 1 + unix-cpy/Makefile | 1 + unix/Makefile | 1 + 5 files changed, 67 insertions(+) create mode 100644 py/objslice.c diff --git a/py/obj.h b/py/obj.h index 7b4b0656f2..16c7c36dd1 100644 --- a/py/obj.h +++ b/py/obj.h @@ -144,6 +144,7 @@ mp_obj_t mp_obj_new_list(uint n, mp_obj_t *items); mp_obj_t mp_obj_new_list_reverse(uint n, mp_obj_t *items); mp_obj_t mp_obj_new_dict(int n_args); mp_obj_t mp_obj_new_set(int n_args, mp_obj_t *items); +mp_obj_t mp_obj_new_slice(mp_obj_t start, mp_obj_t stop, mp_obj_t step); mp_obj_t mp_obj_new_bound_meth(mp_obj_t self, mp_obj_t meth); mp_obj_t mp_obj_new_class(struct _mp_map_t *class_locals); mp_obj_t mp_obj_new_instance(mp_obj_t clas); @@ -214,6 +215,10 @@ mp_obj_t mp_obj_dict_store(mp_obj_t self_in, mp_obj_t key, mp_obj_t value); // set void mp_obj_set_store(mp_obj_t self_in, mp_obj_t item); +// slice +extern const mp_obj_type_t slice_type; +void mp_obj_slice_get(mp_obj_t self_in, machine_int_t *start, machine_int_t *stop, machine_int_t *step); + // functions typedef struct _mp_obj_fun_native_t { // need this so we can define const objects (to go in ROM) mp_obj_base_t base; diff --git a/py/objslice.c b/py/objslice.c new file mode 100644 index 0000000000..619899b232 --- /dev/null +++ b/py/objslice.c @@ -0,0 +1,59 @@ +#include +#include +#include +#include + +#include "nlr.h" +#include "misc.h" +#include "mpconfig.h" +#include "obj.h" +#include "runtime0.h" + +#if MICROPY_ENABLE_SLICE + +// TODO: This implements only variant of slice with 2 integer args only. +// CPython supports 3rd arg (step), plus args can be arbitrary Python objects. +typedef struct _mp_obj_slice_t { + mp_obj_base_t base; + machine_int_t start; + machine_int_t stop; +} mp_obj_slice_t; + +void slice_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t o_in) { + mp_obj_slice_t *o = o_in; + print(env, "slice(" INT_FMT ", " INT_FMT ")", o->start, o->stop); +} + +const mp_obj_type_t slice_type = { + { &mp_const_type }, + "slice", + slice_print, + NULL, // call_n + NULL, // unary_op + NULL, // binary_op + NULL, // getiter + NULL, // iternext + { { NULL, NULL }, }, // method list +}; + +// TODO: Make sure to handle "empty" values, which are signified by None in CPython +mp_obj_t mp_obj_new_slice(mp_obj_t ostart, mp_obj_t ostop, mp_obj_t ostep) { + assert(ostep == NULL); + machine_int_t start = mp_obj_get_int(ostart); + machine_int_t stop = mp_obj_get_int(ostop); + mp_obj_slice_t *o = m_new(mp_obj_slice_t, 1); + o->base.type = &slice_type; + o->start = start; + o->stop = stop; + return (mp_obj_t)o; +} + +void mp_obj_slice_get(mp_obj_t self_in, machine_int_t *start, machine_int_t *stop, machine_int_t *step) { + assert(MP_OBJ_IS_TYPE(self_in, &slice_type)); + mp_obj_slice_t *self = self_in; + *start = self->start; + *stop = self->stop; + *step = 1; +} + +#endif diff --git a/stm/Makefile b/stm/Makefile index d6c77e2bd7..e84e21eae2 100644 --- a/stm/Makefile +++ b/stm/Makefile @@ -78,6 +78,7 @@ PY_O = \ objnone.o \ objrange.o \ objset.o \ + objslice.o \ objstr.o \ objtuple.o \ objtype.o \ diff --git a/unix-cpy/Makefile b/unix-cpy/Makefile index 48c3179460..7fee3438fb 100644 --- a/unix-cpy/Makefile +++ b/unix-cpy/Makefile @@ -43,6 +43,7 @@ PY_O = \ objnone.o \ objrange.o \ objset.o \ + objslice.o \ objstr.o \ objtuple.o \ objtype.o \ diff --git a/unix/Makefile b/unix/Makefile index fd5b6b43e0..38d6ba8e15 100644 --- a/unix/Makefile +++ b/unix/Makefile @@ -50,6 +50,7 @@ PY_O = \ objnone.o \ objrange.o \ objset.o \ + objslice.o \ objstr.o \ objtuple.o \ objtype.o \ From ded0a1efa5744235fbb4cd07c96598018a3661eb Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 3 Jan 2014 02:48:56 +0200 Subject: [PATCH 2/9] Implement BUILD_SLICE opcode (2-arg version). --- py/vm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/py/vm.c b/py/vm.c index c549e2b490..35dcbea523 100644 --- a/py/vm.c +++ b/py/vm.c @@ -410,6 +410,18 @@ bool mp_execute_byte_code_2(const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t ** sp++; break; + case MP_BC_BUILD_SLICE: + DECODE_UINT; + if (unum == 2) { + obj2 = POP(); + obj1 = TOP(); + SET_TOP(mp_obj_new_slice(obj1, obj2, NULL)); + } else { + printf("3-argument slice is not supported\n"); + assert(0); + } + break; + case MP_BC_UNPACK_SEQUENCE: DECODE_UINT; rt_unpack_sequence(sp[0], unum, sp - unum + 1); From 31ba60f8364a4009ddc3d45fee90c84b43d88d2c Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 3 Jan 2014 02:51:16 +0200 Subject: [PATCH 3/9] str: Initial implementation of string slicing. Only step=1 and non-negative indexes are supported so far. --- py/objstr.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/py/objstr.c b/py/objstr.c index 48abf4951d..46adabcec9 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -29,7 +29,21 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { case RT_BINARY_OP_SUBSCR: // string access // XXX a massive hack! - return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); + + // TODO: need predicate to check for int-like type (bools are such for example) + // ["no", "yes"][1 == 2] is common idiom + if (MP_OBJ_IS_SMALL_INT(rhs_in)) { + // TODO: This implements byte string access for single index so far + return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); + } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { + int start, stop, step; + mp_obj_slice_get(rhs_in, &start, &stop, &step); + assert(step == 1); + return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start)); + } else { + // Throw TypeError here + assert(0); + } case RT_BINARY_OP_ADD: case RT_BINARY_OP_INPLACE_ADD: From cd22627f781080fb245dd6999f2158c8099379b0 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 3 Jan 2014 03:01:12 +0200 Subject: [PATCH 4/9] Enable slice support in config. --- py/mpconfig.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/py/mpconfig.h b/py/mpconfig.h index 44095bd10b..56495d9156 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -25,3 +25,9 @@ #ifndef MICROPY_MEM_STATS #define MICROPY_MEM_STATS (1) #endif + +// Whether to support slice object and correspondingly +// slice subscript operators +#ifndef MICROPY_ENABLE_SLICE +#define MICROPY_ENABLE_SLICE (1) +#endif From 59800afae9f74571eea5bb463a28fd9bd8251150 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 3 Jan 2014 23:35:32 +0200 Subject: [PATCH 5/9] slice: Implement special handling of omitted start/stop indexes. --- py/objslice.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/py/objslice.c b/py/objslice.c index 619899b232..03607e4c3e 100644 --- a/py/objslice.c +++ b/py/objslice.c @@ -39,8 +39,23 @@ const mp_obj_type_t slice_type = { // TODO: Make sure to handle "empty" values, which are signified by None in CPython mp_obj_t mp_obj_new_slice(mp_obj_t ostart, mp_obj_t ostop, mp_obj_t ostep) { assert(ostep == NULL); - machine_int_t start = mp_obj_get_int(ostart); - machine_int_t stop = mp_obj_get_int(ostop); + machine_int_t start = 0, stop = 0; + if (ostart != mp_const_none) { + start = mp_obj_get_int(ostart); + } + if (ostop != mp_const_none) { + stop = mp_obj_get_int(ostop); + if (stop == 0) { + // [x:0] is a special case - in our slice object, stop = 0 means + // "end of sequence". Fortunately, [x:0] is an empty seqence for + // any x (including negative). [x:x] is also always empty sequence. + // but x also can be 0. But note that b""[x:x] is b"" for any x (i.e. + // no IndexError, at least in Python 3.3.3). So, we just use -1's to + // signify that. -1 is catchy "special" number in case someone will + // try to print [x:0] slice ever. + start = stop = -1; + } + } mp_obj_slice_t *o = m_new(mp_obj_slice_t, 1); o->base.type = &slice_type; o->start = start; From decad08ef57aa3cf3960ce65e29b194cb97c6d22 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 3 Jan 2014 23:36:56 +0200 Subject: [PATCH 6/9] str: Handle non-positive slice indexes. --- py/objstr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/py/objstr.c b/py/objstr.c index 46adabcec9..54dd087a45 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -39,6 +39,13 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { int start, stop, step; mp_obj_slice_get(rhs_in, &start, &stop, &step); assert(step == 1); + int len = strlen(lhs_str); + if (start < 0) { + start = len + start; + } + if (stop <= 0) { + stop = len + stop; + } return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start)); } else { // Throw TypeError here From 26534cec8510323e8d1a313991b76320821c666b Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 3 Jan 2014 23:38:16 +0200 Subject: [PATCH 7/9] Add test for byte string slicing. --- tests/basics/tests/slice-bstr1.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/basics/tests/slice-bstr1.py diff --git a/tests/basics/tests/slice-bstr1.py b/tests/basics/tests/slice-bstr1.py new file mode 100644 index 0000000000..8b492b35cc --- /dev/null +++ b/tests/basics/tests/slice-bstr1.py @@ -0,0 +1,27 @@ +b"123"[0:1] + +b"123"[0:2] + +b"123"[:1] + +b"123"[1:] + +# Idiom for copying sequence +b"123"[:] + +b"123"[:-1] + +# Weird cases +b"123"[0:0] +b"123"[1:0] +b"123"[1:1] +b"123"[-1:-1] +b"123"[-3:] +b"123"[-3:3] +b"123"[0:] +b"123"[:0] +b"123"[:-3] +b"123"[:-4] +# No IndexError! +b""[1:1] +b""[-1:-1] From e606cb656165aff2424fb6ca45f09d606246d073 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 4 Jan 2014 01:34:23 +0200 Subject: [PATCH 8/9] slice: Allow building with MICROPY_ENABLE_SLICE=0. --- py/objstr.c | 2 ++ py/vm.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/py/objstr.c b/py/objstr.c index 54dd087a45..8e3e9d9025 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -35,6 +35,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { if (MP_OBJ_IS_SMALL_INT(rhs_in)) { // TODO: This implements byte string access for single index so far return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); +#if MICROPY_ENABLE_SLICE } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { int start, stop, step; mp_obj_slice_get(rhs_in, &start, &stop, &step); @@ -47,6 +48,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { stop = len + stop; } return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start)); +#endif } else { // Throw TypeError here assert(0); diff --git a/py/vm.c b/py/vm.c index 35dcbea523..382780640b 100644 --- a/py/vm.c +++ b/py/vm.c @@ -410,6 +410,7 @@ bool mp_execute_byte_code_2(const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t ** sp++; break; +#if MICROPY_ENABLE_SLICE case MP_BC_BUILD_SLICE: DECODE_UINT; if (unum == 2) { @@ -421,6 +422,7 @@ bool mp_execute_byte_code_2(const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t ** assert(0); } break; +#endif case MP_BC_UNPACK_SEQUENCE: DECODE_UINT; From f8b9d3c41addea79851c355f014db9f0f256cdaf Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 4 Jan 2014 01:38:26 +0200 Subject: [PATCH 9/9] str: Throw TypeError for invalid index type and clean up comments. --- py/objstr.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/py/objstr.c b/py/objstr.c index 8e3e9d9025..6a0721d45f 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -27,13 +27,11 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { const char *lhs_str = qstr_str(lhs->qstr); switch (op) { case RT_BINARY_OP_SUBSCR: - // string access - // XXX a massive hack! - // TODO: need predicate to check for int-like type (bools are such for example) // ["no", "yes"][1 == 2] is common idiom if (MP_OBJ_IS_SMALL_INT(rhs_in)) { // TODO: This implements byte string access for single index so far + // TODO: Handle negative indexes. return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); #if MICROPY_ENABLE_SLICE } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { @@ -50,8 +48,9 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start)); #endif } else { - // Throw TypeError here - assert(0); + // Message doesn't match CPython, but we don't have so much bytes as they + // to spend them on verbose wording + nlr_jump(mp_obj_new_exception_msg(rt_q_TypeError, "index must be int")); } case RT_BINARY_OP_ADD: