From 5008972fefdd0d8cad214d4c0a2fb3daea0ac3c8 Mon Sep 17 00:00:00 2001
From: = <peter@hinch.me.uk>
Date: Tue, 14 Apr 2015 13:14:57 +0100
Subject: [PATCH] py/inlinethumb: Support for core floating point instructions.

Adds support for the following Thumb2 VFP instructions, via the option
MICROPY_EMIT_INLINE_THUMB_FLOAT:

vcmp
vsqrt
vneg
vcvt_f32_to_s32
vcvt_s32_to_f32
vmrs
vmov
vldr
vstr
vadd
vsub
vmul
vdiv
---
 py/emitinlinethumb.c               | 136 +++++++++++++++++++++++++++++
 py/mpconfig.h                      |   5 ++
 tests/inlineasm/asmfpaddsub.py     |  15 ++++
 tests/inlineasm/asmfpaddsub.py.exp |   1 +
 tests/inlineasm/asmfpcmp.py        |  14 +++
 tests/inlineasm/asmfpcmp.py.exp    |   3 +
 tests/inlineasm/asmfpldrstr.py     |  12 +++
 tests/inlineasm/asmfpldrstr.py.exp |   1 +
 tests/inlineasm/asmfpmuldiv.py     |  15 ++++
 tests/inlineasm/asmfpmuldiv.py.exp |   1 +
 tests/inlineasm/asmfpsqrt.py       |  15 ++++
 tests/inlineasm/asmfpsqrt.py.exp   |   1 +
 12 files changed, 219 insertions(+)
 create mode 100644 tests/inlineasm/asmfpaddsub.py
 create mode 100644 tests/inlineasm/asmfpaddsub.py.exp
 create mode 100644 tests/inlineasm/asmfpcmp.py
 create mode 100644 tests/inlineasm/asmfpcmp.py.exp
 create mode 100644 tests/inlineasm/asmfpldrstr.py
 create mode 100644 tests/inlineasm/asmfpldrstr.py.exp
 create mode 100644 tests/inlineasm/asmfpmuldiv.py
 create mode 100644 tests/inlineasm/asmfpmuldiv.py.exp
 create mode 100644 tests/inlineasm/asmfpsqrt.py
 create mode 100644 tests/inlineasm/asmfpsqrt.py.exp

diff --git a/py/emitinlinethumb.c b/py/emitinlinethumb.c
index 17db2d8414..391d057891 100644
--- a/py/emitinlinethumb.c
+++ b/py/emitinlinethumb.c
@@ -196,6 +196,35 @@ STATIC mp_uint_t get_arg_reg(emit_inline_asm_t *emit, const char *op, mp_parse_n
     return 0;
 }
 
+#if MICROPY_EMIT_INLINE_THUMB_FLOAT
+STATIC mp_uint_t get_arg_vfpreg(emit_inline_asm_t *emit, const char *op, mp_parse_node_t pn) {
+    const char *reg_str = get_arg_str(pn);
+    if (reg_str[0] == 's' && reg_str[1] != '\0') {
+        mp_uint_t regno = 0;
+        for (++reg_str; *reg_str; ++reg_str) {
+            mp_uint_t v = *reg_str;
+            if (!('0' <= v && v <= '9')) {
+                goto malformed;
+            }
+            regno = 10 * regno + v - '0';
+        }
+        if (regno > 31) {
+            emit_inline_thumb_error_exc(emit,
+                 mp_obj_new_exception_msg_varg(&mp_type_SyntaxError,
+                       "'%s' expects at most r%d", op, 31));
+            return 0;
+        } else {
+            return regno;
+        }
+    }
+malformed:
+    emit_inline_thumb_error_exc(emit,
+         mp_obj_new_exception_msg_varg(&mp_type_SyntaxError,
+            "'%s' expects an FPU register", op));
+    return 0;
+}
+#endif
+
 STATIC mp_uint_t get_arg_reglist(emit_inline_asm_t *emit, const char *op, mp_parse_node_t pn) {
     // a register list looks like {r0, r1, r2} and is parsed as a Python set
 
@@ -352,6 +381,17 @@ STATIC const format_9_10_op_t format_9_10_op_table[] = {
 };
 #undef X
 
+#if MICROPY_EMIT_INLINE_THUMB_FLOAT
+// actual opcodes are: 0xee00 | op.hi_nibble, 0x0a00 | op.lo_nibble
+typedef struct _format_vfp_op_t { byte op; char name[3]; } format_vfp_op_t;
+STATIC const format_vfp_op_t format_vfp_op_table[] = {
+    { 0x30, "add" },
+    { 0x34, "sub" },
+    { 0x20, "mul" },
+    { 0x80, "div" },
+};
+#endif
+
 STATIC void emit_inline_thumb_op(emit_inline_asm_t *emit, qstr op, mp_uint_t n_args, mp_parse_node_t *pn_args) {
     // TODO perhaps make two tables:
     // one_args =
@@ -366,6 +406,102 @@ STATIC void emit_inline_thumb_op(emit_inline_asm_t *emit, qstr op, mp_uint_t n_a
     mp_uint_t op_len;
     const char *op_str = (const char*)qstr_data(op, &op_len);
 
+    #if MICROPY_EMIT_INLINE_THUMB_FLOAT
+    if (op_str[0] == 'v') {
+        // floating point operations
+        if (n_args == 2) {
+            mp_uint_t op_code = 0x0ac0, op_code_hi;
+            if (strcmp(op_str, "vcmp") == 0) {
+                op_code_hi = 0xeeb4;
+                op_vfp_twoargs:;
+                mp_uint_t vd = get_arg_vfpreg(emit, op_str, pn_args[0]);
+                mp_uint_t vm = get_arg_vfpreg(emit, op_str, pn_args[1]);
+                asm_thumb_op32(emit->as,
+                    op_code_hi | ((vd & 1) << 6),
+                    op_code | ((vd & 0x1e) << 11) | ((vm & 1) << 5) | (vm & 0x1e) >> 1);
+            } else if (strcmp(op_str, "vsqrt") == 0) {
+                op_code_hi = 0xeeb1;
+                goto op_vfp_twoargs;
+            } else if (strcmp(op_str, "vneg") == 0) {
+                op_code_hi = 0xeeb1;
+                op_code = 0x0a40;
+                goto op_vfp_twoargs;
+            } else if (strcmp(op_str, "vcvt_f32_s32") == 0) {
+                op_code_hi = 0xeeb8; // int to float
+                goto op_vfp_twoargs;
+            } else if (strcmp(op_str, "vcvt_s32_f32") == 0) {
+                op_code_hi = 0xeebd; // float to int
+                goto op_vfp_twoargs;
+            } else if (strcmp(op_str, "vmrs") == 0) {
+                mp_uint_t reg_dest;
+                const char *reg_str0 = get_arg_str(pn_args[0]);
+                if (strcmp(reg_str0, "APSR_nzcv") == 0) {
+                    reg_dest = 15;
+                } else {
+                    reg_dest = get_arg_reg(emit, op_str, pn_args[0], 15);
+                }
+                const char *reg_str1 = get_arg_str(pn_args[1]);
+                if (strcmp(reg_str1, "FPSCR") == 0) {
+                    // FP status to ARM reg
+                    asm_thumb_op32(emit->as, 0xeef1, 0x0a10 | (reg_dest << 12));
+                } else {
+                    goto unknown_op;
+                }
+            } else if (strcmp(op_str, "vmov") == 0) {
+                op_code_hi = 0xee00;
+                mp_uint_t r_arm, vm;
+                const char *reg_str = get_arg_str(pn_args[0]);
+                if (reg_str[0] == 'r') {
+                    r_arm = get_arg_reg(emit, op_str, pn_args[0], 15);
+                    vm = get_arg_vfpreg(emit, op_str, pn_args[1]);
+                    op_code_hi |= 0x10;
+                } else {
+                    vm = get_arg_vfpreg(emit, op_str, pn_args[0]);
+                    r_arm = get_arg_reg(emit, op_str, pn_args[1], 15);
+                }
+                asm_thumb_op32(emit->as,
+                    op_code_hi | ((vm & 0x1e) >> 1),
+                    0x0a10 | (r_arm << 12) | ((vm & 1) << 7));
+            } else if (strcmp(op_str, "vldr") == 0) {
+                op_code_hi = 0xed90;
+                op_vldr_vstr:;
+                mp_uint_t vd = get_arg_vfpreg(emit, op_str, pn_args[0]);
+                mp_parse_node_t pn_base, pn_offset;
+                if (get_arg_addr(emit, op_str, pn_args[1], &pn_base, &pn_offset)) {
+                    mp_uint_t rlo_base = get_arg_reg(emit, op_str, pn_base, 7);
+                    mp_uint_t i8;
+                    i8 = get_arg_i(emit, op_str, pn_offset, 0xff);
+                    asm_thumb_op32(emit->as,
+                        op_code_hi | rlo_base | ((vd & 1) << 6),
+                        0x0a00 | ((vd & 0x1e) << 11) | i8);
+                }
+            } else if (strcmp(op_str, "vstr") == 0) {
+                op_code_hi = 0xed80;
+                goto op_vldr_vstr;
+            } else {
+                goto unknown_op;
+            }
+        } else if (n_args == 3) {
+            // search table for arith ops
+            for (mp_uint_t i = 0; i < MP_ARRAY_SIZE(format_vfp_op_table); i++) {
+                if (strncmp(op_str + 1, format_vfp_op_table[i].name, 3) == 0 && op_str[4] == '\0') {
+                    mp_uint_t op_code_hi = 0xee00 | (format_vfp_op_table[i].op & 0xf0);
+                    mp_uint_t op_code = 0x0a00 | ((format_vfp_op_table[i].op & 0x0f) << 4);
+                    mp_uint_t vd = get_arg_vfpreg(emit, op_str, pn_args[0]);
+                    mp_uint_t vn = get_arg_vfpreg(emit, op_str, pn_args[1]);
+                    mp_uint_t vm = get_arg_vfpreg(emit, op_str, pn_args[2]);
+                    asm_thumb_op32(emit->as,
+                        op_code_hi | ((vd & 1) << 6) | (vn >> 1),
+                        op_code | (vm >> 1) | ((vm & 1) << 5) | ((vd & 0x1e) << 11) | ((vn & 1) << 7));
+                    return;
+                }
+            }
+            goto unknown_op;
+        } else {
+            goto unknown_op;
+        }
+    } else
+    #endif
     if (n_args == 0) {
         if (strcmp(op_str, "nop") == 0) {
             asm_thumb_op16(emit->as, ASM_THUMB_OP_NOP);
diff --git a/py/mpconfig.h b/py/mpconfig.h
index c78221f3a5..a403a66633 100644
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@@ -186,6 +186,11 @@
 #define MICROPY_EMIT_INLINE_THUMB (0)
 #endif
 
+// Whether to enable float support in the Thumb2 inline assembler
+#ifndef MICROPY_EMIT_INLINE_THUMB_FLOAT
+#define MICROPY_EMIT_INLINE_THUMB_FLOAT (1)
+#endif
+
 // Whether to emit ARM native code
 #ifndef MICROPY_EMIT_ARM
 #define MICROPY_EMIT_ARM (0)
diff --git a/tests/inlineasm/asmfpaddsub.py b/tests/inlineasm/asmfpaddsub.py
new file mode 100644
index 0000000000..b5fcecb6ce
--- /dev/null
+++ b/tests/inlineasm/asmfpaddsub.py
@@ -0,0 +1,15 @@
+@micropython.asm_thumb      # r0 = r0+r1-r2
+def add_sub(r0, r1, r2):
+    vmov(s0, r0)
+    vcvt_f32_s32(s0, s0)
+    vmov(s1, r1)
+    vcvt_f32_s32(s1, s1)
+    vmov(s2, r2)
+    vcvt_f32_s32(s2, s2)
+    vadd(s0, s0, s1)
+    vsub(s0, s0, s2)
+    vcvt_s32_f32(s31, s0)
+    vmov(r0, s31)
+
+print(add_sub(100, 20, 30))
+
diff --git a/tests/inlineasm/asmfpaddsub.py.exp b/tests/inlineasm/asmfpaddsub.py.exp
new file mode 100644
index 0000000000..d61f00d8ca
--- /dev/null
+++ b/tests/inlineasm/asmfpaddsub.py.exp
@@ -0,0 +1 @@
+90
diff --git a/tests/inlineasm/asmfpcmp.py b/tests/inlineasm/asmfpcmp.py
new file mode 100644
index 0000000000..d4fa1f2410
--- /dev/null
+++ b/tests/inlineasm/asmfpcmp.py
@@ -0,0 +1,14 @@
+@micropython.asm_thumb      # test vcmp, vmrs
+def f(r0, r1):
+    vmov(s0, r0)
+    vcvt_f32_s32(s0, s0)
+    vmov(s1, r1)
+    vcvt_f32_s32(s1, s1)
+    vcmp(s1, s0)
+    vmrs(r0, FPSCR)
+    mov(r1, 28)
+    lsr(r0, r1)
+
+print(f(0,1))
+print(f(1,1))
+print(f(1,0))
diff --git a/tests/inlineasm/asmfpcmp.py.exp b/tests/inlineasm/asmfpcmp.py.exp
new file mode 100644
index 0000000000..104b3580f7
--- /dev/null
+++ b/tests/inlineasm/asmfpcmp.py.exp
@@ -0,0 +1,3 @@
+2
+6
+8
diff --git a/tests/inlineasm/asmfpldrstr.py b/tests/inlineasm/asmfpldrstr.py
new file mode 100644
index 0000000000..75054a6796
--- /dev/null
+++ b/tests/inlineasm/asmfpldrstr.py
@@ -0,0 +1,12 @@
+import array
+@micropython.asm_thumb      # test vldr, vstr
+def arrayadd(r0):
+    vldr(s0, [r0, 0])
+    vldr(s1, [r0, 1])
+    vadd(s2, s0, s1)
+    vstr(s2, [r0, 2])
+
+z = array.array("f", [2, 4, 10])
+arrayadd(z)
+print(z[2])
+
diff --git a/tests/inlineasm/asmfpldrstr.py.exp b/tests/inlineasm/asmfpldrstr.py.exp
new file mode 100644
index 0000000000..e0ea36feef
--- /dev/null
+++ b/tests/inlineasm/asmfpldrstr.py.exp
@@ -0,0 +1 @@
+6.0
diff --git a/tests/inlineasm/asmfpmuldiv.py b/tests/inlineasm/asmfpmuldiv.py
new file mode 100644
index 0000000000..edf9511bcd
--- /dev/null
+++ b/tests/inlineasm/asmfpmuldiv.py
@@ -0,0 +1,15 @@
+@micropython.asm_thumb      # r0 = (int)(r0*r1/r2)
+def muldiv(r0, r1, r2):
+    vmov(s0, r0)
+    vcvt_f32_s32(s0, s0)
+    vmov(s1, r1)
+    vcvt_f32_s32(s1, s1)
+    vmov(s2, r2)
+    vcvt_f32_s32(s2, s2)
+    vmul(s7, s0, s1)
+    vdiv(s8, s7, s2)
+    vcvt_s32_f32(s31, s8)
+    vmov(r0, s31)
+
+print(muldiv(100, 10, 50))
+
diff --git a/tests/inlineasm/asmfpmuldiv.py.exp b/tests/inlineasm/asmfpmuldiv.py.exp
new file mode 100644
index 0000000000..209e3ef4b6
--- /dev/null
+++ b/tests/inlineasm/asmfpmuldiv.py.exp
@@ -0,0 +1 @@
+20
diff --git a/tests/inlineasm/asmfpsqrt.py b/tests/inlineasm/asmfpsqrt.py
new file mode 100644
index 0000000000..f2c2d3a954
--- /dev/null
+++ b/tests/inlineasm/asmfpsqrt.py
@@ -0,0 +1,15 @@
+# test vsqrt, vneg
+@micropython.asm_thumb      # r0 = -(int)(sqrt(r0)*r1)
+def sqrt_test(r0, r1):
+    vmov(s1, r0)
+    vcvt_f32_s32(s1, s1)
+    vsqrt(s1, s1)
+    vmov(s2, r1)
+    vcvt_f32_s32(s2, s2)
+    vmul(s0, s1, s2)
+    vneg(s7, s0)
+    vcvt_s32_f32(s31, s7)
+    vmov(r0, s31)
+
+print(sqrt_test(256, 10))
+
diff --git a/tests/inlineasm/asmfpsqrt.py.exp b/tests/inlineasm/asmfpsqrt.py.exp
new file mode 100644
index 0000000000..88a1e93bab
--- /dev/null
+++ b/tests/inlineasm/asmfpsqrt.py.exp
@@ -0,0 +1 @@
+-160