Add faster non-alpha version of tile loop

pull/41/head
Luke Wren 2021-11-20 23:05:38 +00:00
rodzic 82cc7ced44
commit df00a0f3aa
3 zmienionych plików z 36 dodań i 3 usunięć

Wyświetl plik

@ -73,7 +73,7 @@
#define MAP_WIDTH 512
#define MAP_HEIGHT 256
#define N_CHARACTERS 70
#define N_CHARACTERS 75
typedef struct {
int16_t pos_x;
@ -170,7 +170,7 @@ void render(const game_state_t *gstate) {
.log_size_x = 9,
.log_size_y = 8,
.tilesize = TILESIZE_16,
.fill_loop = (tile_loop_t)tile16_16px_alpha_loop
.fill_loop = (tile_loop_t)tile16_16px_loop
};
sprite_t sp = {

Wyświetl plik

@ -44,6 +44,12 @@
1:
.endm
.macro do_2px_16bpp rd rs dstoffs
strh \rs, [\rd, #\dstoffs]
lsrs \rs, #16
strh \rs, [\rd, #\dstoffs + 2]
.endm
// interp0 has been set up to give the next x-ward pointer into the tilemap
// with each pop. This saves us having to remember the tilemap pointer and
// tilemap x size mask in core registers.
@ -53,7 +59,9 @@
// r2: x0 (start pos in tile space)
// r3: x1 (end pos in tile space, exclusive)
decl_func tile16_16px_alpha_loop
// Instantiated with alpha=1 and alpha=0 to get both variants of the loop.
// Linker garbage collection ensures we only keep the versions we use.
.macro tile16_16px_loop_alpha_or_nonalpha alpha
push {r4-r7, lr}
mov r4, r8
mov r5, r9
@ -78,8 +86,10 @@ decl_func tile16_16px_alpha_loop
// Fall through into copy loop
1:
ldrh r5, [r4]
.if \alpha
lsrs r6, r5, #ALPHA_SHIFT_16BPP
bcc 2f
.endif
strh r5, [r0]
2:
adds r4, #2
@ -113,6 +123,7 @@ decl_func tile16_16px_alpha_loop
lsls r1, #9
add r1, r8
.if \alpha
ldmia r1!, {r3-r6}
do_2px_16bpp_alpha r0 r3 r2 0
do_2px_16bpp_alpha r0 r4 r2 4
@ -123,6 +134,18 @@ decl_func tile16_16px_alpha_loop
do_2px_16bpp_alpha r0 r4 r2 20
do_2px_16bpp_alpha r0 r5 r2 24
do_2px_16bpp_alpha r0 r6 r2 28
.else
ldmia r1!, {r3-r6}
do_2px_16bpp r0 r3 0
do_2px_16bpp r0 r4 4
do_2px_16bpp r0 r5 8
do_2px_16bpp r0 r6 12
ldmia r1!, {r3-r6}
do_2px_16bpp r0 r3 16
do_2px_16bpp r0 r4 20
do_2px_16bpp r0 r5 24
do_2px_16bpp r0 r6 28
.endif
adds r0, 32
3:
cmp r0, ip
@ -136,8 +159,10 @@ decl_func tile16_16px_alpha_loop
b 3f
1:
ldrh r5, [r4]
.if \alpha
lsrs r6, r5, #ALPHA_SHIFT_16BPP
bcc 2f
.endif
strh r5, [r0]
2:
adds r4, #2
@ -150,3 +175,10 @@ decl_func tile16_16px_alpha_loop
mov r8, r4
mov r9, r5
pop {r4-r7, pc}
.endm
decl_func tile16_16px_alpha_loop
tile16_16px_loop_alpha_or_nonalpha 1
decl_func tile16_16px_loop
tile16_16px_loop_alpha_or_nonalpha 0

Wyświetl plik

@ -40,6 +40,7 @@ typedef void (*tile16_loop_t)(uint16_t *dst, const uint16_t *tileset, uint x0, u
typedef void (*tile8_loop_t)(uint8_t *dst, const uint8_t *tileset, uint x0, uint x1);
void tile16_16px_alpha_loop(uint16_t *dst, const uint16_t *tileset, uint x0, uint x1);
void tile16_16px_loop(uint16_t *dst, const uint16_t *tileset, uint x0, uint x1);
// ----------------------------------------------------------------------------
// Functions from tile.c