micropython/tests/micropython/import_mpy_native_gc.py

# Test that native code loaded from a .mpy file is retained after a GC.

try:
    import gc, sys, uio, uos

    sys.implementation.mpy
    uio.IOBase
    uos.mount
except (ImportError, AttributeError):
    print("SKIP")
    raise SystemExit


class UserFile(uio.IOBase):
    def __init__(self, data):
        self.data = memoryview(data)
        self.pos = 0

    def readinto(self, buf):
        n = min(len(buf), len(self.data) - self.pos)
        buf[:n] = self.data[self.pos : self.pos + n]
        self.pos += n
        return n

    def ioctl(self, req, arg):
        return 0


class UserFS:
    def __init__(self, files):
        self.files = files

    def mount(self, readonly, mksfs):
        pass

    def umount(self):
        pass

    def stat(self, path):
        if path in self.files:
            return (32768, 0, 0, 0, 0, 0, 0, 0, 0, 0)
        raise OSError

    def open(self, path, mode):
        return UserFile(self.files[path])


# Pre-compiled examples/natmod/features0 example for various architectures, keyed
# by the required value of sys.implementation.mpy.
features0_file_contents = {
    # -march=x64
    0xA05: b'M\x05\x0a\x1f \x84b\xe9/\x00\x00\x00SH\x8b\x1ds\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dA\x00\x00\x00H\x8b\x7f\x08L\x8bc(A\xff\xd4H\x8d5\x1f\x00\x00\x00H\x89\xc5H\x8b\x05-\x00\x00\x00\x0f\xb78\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x84@\x12factorial\x10\x00\x00\r \x01"\x9f\x1c\x01\x1e\xff',
    # -march=armv7m
    0x1605: b"M\x05\x16\x1f \x84\x12\x1a\xe0\x00\x00\x13\xb5\nK\nJ{D\x9cX\x02!\xe3h\x98G\x03F\x01 3\xb9\x02!#i\x01\x93\x02\xb0\xbd\xe8\x10@\x18GXC\x01;\xf4\xe7\x00\xbfj\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\tN\tK~D\xf4X@hgi\xb8G\x05F\x07K\x07I\xf2XyD\x10\x88ck\x98G(F\xb8G h\xf8\xbd6\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1c\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x01\x84\x00\x12factorial\x10\x00\x00\r<\x01>\x9f8\x01:\xff",
}

# Populate other armv7m-derived archs based on armv7m.
for arch in (0x1A05, 0x1E05, 0x2205):
    features0_file_contents[arch] = features0_file_contents[0x1605]

if sys.implementation.mpy not in features0_file_contents:
    print("SKIP")
    raise SystemExit

# These are the test .mpy files.
user_files = {"/features0.mpy": features0_file_contents[sys.implementation.mpy]}

# Create and mount a user filesystem.
uos.mount(UserFS(user_files), "/userfs")
sys.path.append("/userfs")

# Import the native function.
gc.collect()
from features0 import factorial

# Free the module that contained the function.
del sys.modules["features0"]

# Run a GC cycle which should reclaim the module but not the function.
gc.collect()

# Allocate lots of fragmented memory to overwrite anything that was just freed by the GC.
for i in range(1000):
    []

# Run the native function, it should not have been freed or overwritten.
print(factorial(10))

# Unmount and undo path addition.
uos.umount("/userfs")
sys.path.pop()
py/persistentcode: Maintain root ptr list of imported native .mpy code. On ports where normal heap memory can contain executable code (eg ARM-based ports such as stm32), native code loaded from an .mpy file may be reclaimed by the GC because there's no reference to the very start of the native machine code block that is reachable from root pointers (only pointers to internal parts of the machine code block are reachable, but that doesn't help the GC find the memory). This commit fixes this issue by maintaining an explicit list of root pointers pointing to native code that is loaded from an .mpy file. This is not needed for all ports so is selectable by the new configuration option MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE. It's enabled by default if a port does not specify any special functions to allocate or commit executable memory. A test is included to test that native code loaded from an .mpy file does not get reclaimed by the GC. Fixes #6045. Signed-off-by: Damien George <damien@micropython.org> 2020-07-27 13:52:38 +00:00			`# Test that native code loaded from a .mpy file is retained after a GC.`

			`try:`
			`import gc, sys, uio, uos`

			`sys.implementation.mpy`
			`uio.IOBase`
			`uos.mount`
			`except (ImportError, AttributeError):`
			`print("SKIP")`
			`raise SystemExit`


			`class UserFile(uio.IOBase):`
			`def __init__(self, data):`
			`self.data = memoryview(data)`
			`self.pos = 0`

			`def readinto(self, buf):`
			`n = min(len(buf), len(self.data) - self.pos)`
			`buf[:n] = self.data[self.pos : self.pos + n]`
			`self.pos += n`
			`return n`

			`def ioctl(self, req, arg):`
			`return 0`


			`class UserFS:`
			`def __init__(self, files):`
			`self.files = files`

			`def mount(self, readonly, mksfs):`
			`pass`

			`def umount(self):`
			`pass`

			`def stat(self, path):`
			`if path in self.files:`
			`return (32768, 0, 0, 0, 0, 0, 0, 0, 0, 0)`
			`raise OSError`

			`def open(self, path, mode):`
			`return UserFile(self.files[path])`


			`# Pre-compiled examples/natmod/features0 example for various architectures, keyed`
			`# by the required value of sys.implementation.mpy.`
			`features0_file_contents = {`
all: Remove MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE. This commit removes all parts of code associated with the existing MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE optimisation option, including the -mcache-lookup-bc option to mpy-cross. This feature originally provided a significant performance boost for Unix, but wasn't able to be enabled for MCU targets (due to frozen bytecode), and added significant extra complexity to generating and distributing .mpy files. The equivalent performance gain is now provided by the combination of MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE (which has been enabled on the unix port in the previous commit). It's hard to provide precise performance numbers, but tests have been run on a wide variety of architectures (x86-64, ARM Cortex, Aarch64, RISC-V, xtensa) and they all generally agree on the qualitative improvements seen by the combination of MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE. For example, on a "quiet" Linux x64 environment (i3-5010U @ 2.10GHz) the change from CACHE_MAP_LOOKUP_IN_BYTECODE, to LOAD_ATTR_FAST_PATH combined with MAP_LOOKUP_CACHE is: diff of scores (higher is better) N=2000 M=2000 bccache -> attrmapcache diff diff% (error%) bm_chaos.py 13742.56 -> 13905.67 : +163.11 = +1.187% (+/-3.75%) bm_fannkuch.py 60.13 -> 61.34 : +1.21 = +2.012% (+/-2.11%) bm_fft.py 113083.20 -> 114793.68 : +1710.48 = +1.513% (+/-1.57%) bm_float.py 256552.80 -> 243908.29 : -12644.51 = -4.929% (+/-1.90%) bm_hexiom.py 521.93 -> 625.41 : +103.48 = +19.826% (+/-0.40%) bm_nqueens.py 197544.25 -> 217713.12 : +20168.87 = +10.210% (+/-3.01%) bm_pidigits.py 8072.98 -> 8198.75 : +125.77 = +1.558% (+/-3.22%) misc_aes.py 17283.45 -> 16480.52 : -802.93 = -4.646% (+/-0.82%) misc_mandel.py 99083.99 -> 128939.84 : +29855.85 = +30.132% (+/-5.88%) misc_pystone.py 83860.10 -> 82592.56 : -1267.54 = -1.511% (+/-2.27%) misc_raytrace.py 21490.40 -> 22227.23 : +736.83 = +3.429% (+/-1.88%) This shows that the new optimisations are at least as good as the existing inline-bytecode-caching, and are sometimes much better (because the new ones apply caching to a wider variety of map lookups). The new optimisations can also benefit code generated by the native emitter, because they apply to the runtime rather than the generated code. The improvement for the native emitter when LOAD_ATTR_FAST_PATH and MAP_LOOKUP_CACHE are enabled is (same Linux environment as above): diff of scores (higher is better) N=2000 M=2000 native -> nat-attrmapcache diff diff% (error%) bm_chaos.py 14130.62 -> 15464.68 : +1334.06 = +9.441% (+/-7.11%) bm_fannkuch.py 74.96 -> 76.16 : +1.20 = +1.601% (+/-1.80%) bm_fft.py 166682.99 -> 168221.86 : +1538.87 = +0.923% (+/-4.20%) bm_float.py 233415.23 -> 265524.90 : +32109.67 = +13.756% (+/-2.57%) bm_hexiom.py 628.59 -> 734.17 : +105.58 = +16.796% (+/-1.39%) bm_nqueens.py 225418.44 -> 232926.45 : +7508.01 = +3.331% (+/-3.10%) bm_pidigits.py 6322.00 -> 6379.52 : +57.52 = +0.910% (+/-5.62%) misc_aes.py 20670.10 -> 27223.18 : +6553.08 = +31.703% (+/-1.56%) misc_mandel.py 138221.11 -> 152014.01 : +13792.90 = +9.979% (+/-2.46%) misc_pystone.py 85032.14 -> 105681.44 : +20649.30 = +24.284% (+/-2.25%) misc_raytrace.py 19800.01 -> 23350.73 : +3550.72 = +17.933% (+/-2.79%) In summary, compared to MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE, the new MICROPY_OPT_LOAD_ATTR_FAST_PATH and MICROPY_OPT_MAP_LOOKUP_CACHE options: - are simpler; - take less code size; - are faster (generally); - work with code generated by the native emitter; - can be used on embedded targets with a small and constant RAM overhead; - allow the same .mpy bytecode to run on all targets. See #7680 for further discussion. And see also #7653 for a discussion about simplifying mpy-cross options. Signed-off-by: Jim Mussared <jim.mussared@gmail.com> 2021-09-06 02:28:06 +00:00			`# -march=x64`
			0xA05: b'M\x05\x0a\x1f \x84b\xe9/\x00\x00\x00SH\x8b\x1ds\x00\x00\x00\xbe\x02\x00\x00\x00\xffS\x18\xbf\x01\x00\x00\x00H\x85\xc0u\x0cH\x8bC \xbe\x02\x00\x00\x00[\xff\xe0H\x0f\xaf\xf8H\xff\xc8\xeb\xe6ATUSH\x8b\x1dA\x00\x00\x00H\x8b\x7f\x08L\x8bc(A\xff\xd4H\x8d5\x1f\x00\x00\x00H\x89\xc5H\x8b\x05-\x00\x00\x00\x0f\xb78\xffShH\x89\xefA\xff\xd4H\x8b\x03[]A\\\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x84@\x12factorial\x10\x00\x00\r \x01"\x9f\x1c\x01\x1e\xff',
py/persistentcode: Maintain root ptr list of imported native .mpy code. On ports where normal heap memory can contain executable code (eg ARM-based ports such as stm32), native code loaded from an .mpy file may be reclaimed by the GC because there's no reference to the very start of the native machine code block that is reachable from root pointers (only pointers to internal parts of the machine code block are reachable, but that doesn't help the GC find the memory). This commit fixes this issue by maintaining an explicit list of root pointers pointing to native code that is loaded from an .mpy file. This is not needed for all ports so is selectable by the new configuration option MICROPY_PERSISTENT_CODE_TRACK_RELOC_CODE. It's enabled by default if a port does not specify any special functions to allocate or commit executable memory. A test is included to test that native code loaded from an .mpy file does not get reclaimed by the GC. Fixes #6045. Signed-off-by: Damien George <damien@micropython.org> 2020-07-27 13:52:38 +00:00			`# -march=armv7m`
			`0x1605: b"M\x05\x16\x1f \x84\x12\x1a\xe0\x00\x00\x13\xb5\nK\nJ{D\x9cX\x02!\xe3h\x98G\x03F\x01 3\xb9\x02!#i\x01\x93\x02\xb0\xbd\xe8\x10@\x18GXC\x01;\xf4\xe7\x00\xbfj\x00\x00\x00\x00\x00\x00\x00\xf8\xb5\tN\tK~D\xf4X@hgi\xb8G\x05F\x07K\x07I\xf2XyD\x10\x88ck\x98G(F\xb8G h\xf8\xbd6\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x1c\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x01\x84\x00\x12factorial\x10\x00\x00\r<\x01>\x9f8\x01:\xff",`
			`}`

			`# Populate other armv7m-derived archs based on armv7m.`
			`for arch in (0x1A05, 0x1E05, 0x2205):`
			`features0_file_contents[arch] = features0_file_contents[0x1605]`

			`if sys.implementation.mpy not in features0_file_contents:`
			`print("SKIP")`
			`raise SystemExit`

			`# These are the test .mpy files.`
			`user_files = {"/features0.mpy": features0_file_contents[sys.implementation.mpy]}`

			`# Create and mount a user filesystem.`
			`uos.mount(UserFS(user_files), "/userfs")`
			`sys.path.append("/userfs")`

			`# Import the native function.`
			`gc.collect()`
			`from features0 import factorial`

			`# Free the module that contained the function.`
			`del sys.modules["features0"]`

			`# Run a GC cycle which should reclaim the module but not the function.`
			`gc.collect()`

			`# Allocate lots of fragmented memory to overwrite anything that was just freed by the GC.`
			`for i in range(1000):`
			`[]`

			`# Run the native function, it should not have been freed or overwritten.`
			`print(factorial(10))`

			`# Unmount and undo path addition.`
			`uos.umount("/userfs")`
			`sys.path.pop()`