Change assembly code to use variable peripheral base

pull/256/head
IanSB 2021-11-21 06:03:41 +00:00
rodzic 8d65fb0443
commit 05b649fe91
6 zmienionych plików z 111 dodań i 104 usunięć

Wyświetl plik

@ -75,6 +75,9 @@
.global _get_hardware_id
.global _get_peripheral_base
.global _get_GPLEV0_r4
.global _get_gpu_data_base_r4
.global _get_gpu_command_base_r10
#ifdef HAS_MULTICORE
.global _get_core
@ -293,11 +296,27 @@ _read_hardware_id:
_get_hardware_id:
ldr r0, hardware_id
mov pc, r14
mov pc, lr
_get_peripheral_base:
ldr r0, peripheral_base
mov pc, r14
mov pc, lr
_get_GPLEV0_r4:
ldr r4, peripheral_base
add r4, r4, #GPIO_BASE_OFFSET
add r4, r4, #GPLEV0_OFFSET
mov pc, lr
_get_gpu_data_base_r4:
ldr r4, peripheral_base
add r4, r4, #(GPU_COMMAND_BASE_OFFSET + GPU_DATA_0_offset)
mov pc, lr
_get_gpu_command_base_r10:
ldr r10, peripheral_base
add r10, r10, #GPU_COMMAND_BASE_OFFSET
mov pc, lr
hardware_id:
.word 0

Wyświetl plik

@ -28,7 +28,6 @@ skip_psync_loop_no_oldL\@:
.macro SKIP_PSYNC_NO_OLD_CPLD_NTSC
SKIP_PSYNC_COMMON_NO_OLD_CPLD
ldr r4, =GPU_DATA_0
add r8, r7, r1
add r8, r8, #1 + 4
mov r8, r8, lsr #1
@ -51,7 +50,6 @@ skip_psync_loop_no_oldL6\@:
.macro SKIP_PSYNC_NO_OLD_CPLD_NTSC_3BPP
SKIP_PSYNC_COMMON_NO_OLD_CPLD
ldr r4, =GPU_DATA_0
add r8, r7, r1
add r8, r8, #1 + 2
mov r8, r8, lsr #1

Wyświetl plik

@ -8,9 +8,32 @@
#define _RPI3 3
#define _RPI4 4
#define _PERIPHERAL_BASE_RPI 0x20000000
#define _PERIPHERAL_BASE_RPI3 0x3F000000 //also RPI2
#define _PERIPHERAL_BASE_RPI4 0xFE000000
//do not leave USE_ARM_CAPTURE uncommented during a release build as all versions will be ARM
//#define USE_ARM_CAPTURE //uncomment to select ARM capture build
#define USE_CACHED_SCREEN // caches the upper half of the screen area and uses it for mode7 deinterlace
#define CACHED_SCREEN_OFFSET 0x00B00000 // offset to cached screen area
#define CACHED_SCREEN_SIZE 0x00100000 // size of cached screen area
#define USE_ALT_M7DEINTERLACE_CODE // uses re-ordered code for mode7 deinterlace
#if defined(RPI2) || defined(RPI3)
#define HAS_MULTICORE // indicates multiple cores are available
#if defined(USE_ARM_CAPTURE)
#define WARN_12BIT // warn that 9bpp & 12bpp won't work
#define HIDE_12BIT_PROFILES // 12 bit profile won't work on Pi zero2 etc
#define INHIBIT_DOUBLE_HEIGHT // inhibit line doubling as it causes memory stalls
#endif
#endif
#if defined(RPI4)
#define HAS_MULTICORE // indicates multiple cores are available
#define USE_CACHED_SCREEN // caches the upper half of the screen area and uses it for mode7 deinterlace
#define USE_ALT_M7DEINTERLACE_CODE // uses re-ordered code for mode7 deinterlace
#define MODE7_ALWAYS_ARM // always runs mode7 capture code on ARM
#endif
//#define USE_MULTICORE //can be used to add code in an extra core
// Define how the Pi Framebuffer is initialized
// - if defined, use the property interface (Channel 8)
@ -119,41 +142,16 @@
#define BIT_BOTH_BUFFERS (BIT_DRAW_BUFFER | BIT_DISP_BUFFER)
//do not leave USE_ARM_CAPTURE uncommented during a release build as all versions will be ARM
//#define USE_ARM_CAPTURE //uncomment to select ARM capture build
#define CACHED_SCREEN_OFFSET 0x00B00000 // offset to cached screen area
#define CACHED_SCREEN_SIZE 0x00100000 // size of cached screen area
#if defined(RPI2) || defined(RPI3)
#define HAS_MULTICORE // indicates multiple cores are available
#define USE_CACHED_SCREEN // caches the upper half of the screen area and uses it for mode7 deinterlace
#define USE_ALT_M7DEINTERLACE_CODE // uses re-ordered code for mode7 deinterlace
#if defined(USE_ARM_CAPTURE)
#define WARN_12BIT // warn that 9bpp & 12bpp won't work
#define HIDE_12BIT_PROFILES // 12 bit profile won't work on Pi zero2 etc
#define INHIBIT_DOUBLE_HEIGHT // inhibit line doubling as it causes memory stalls
#endif
#endif
#if defined(RPI4)
#define HAS_MULTICORE // indicates multiple cores are available
#define USE_CACHED_SCREEN // caches the upper half of the screen area and uses it for mode7 deinterlace
#define USE_ALT_M7DEINTERLACE_CODE // uses re-ordered code for mode7 deinterlace
#define MODE7_ALWAYS_ARM // always runs mode7 capture code on ARM
#endif
//#define USE_MULTICORE //can be used to add code in an extra core
#ifdef __ASSEMBLER__
#define GPU_COMMAND (PERIPHERAL_BASE + 0x000000a0)
#define GPU_DATA_0 (PERIPHERAL_BASE + 0x000000a4)
#define GPU_DATA_1 (PERIPHERAL_BASE + 0x000000a8)
#define GPU_DATA_2 (PERIPHERAL_BASE + 0x000000ac)
#define GPU_SYNC (PERIPHERAL_BASE + 0x000000b0) //gap in data block to allow fast 3 register read on ARM side
#define GPU_DATA_3 (PERIPHERAL_BASE + 0x000000b4) //using a single ldr and a two register ldmia
#define GPU_DATA_4 (PERIPHERAL_BASE + 0x000000b8) //can't use more than a single unaligned two register ldmia on the peripherals
#define GPU_DATA_5 (PERIPHERAL_BASE + 0x000000bc)
#define GPU_COMMAND_BASE_OFFSET 0x000000a0
//#define GPU_DATA_0 (PERIPHERAL_BASE + 0x000000a4)
//#define GPU_DATA_1 (PERIPHERAL_BASE + 0x000000a8)
//#define GPU_DATA_2 (PERIPHERAL_BASE + 0x000000ac)
//#define GPU_SYNC (PERIPHERAL_BASE + 0x000000b0) //gap in data block to allow fast 3 register read on ARM side
//#define GPU_DATA_3 (PERIPHERAL_BASE + 0x000000b4) //using a single ldr and a two register ldmia
//#define GPU_DATA_4 (PERIPHERAL_BASE + 0x000000b8) //can't use more than a single unaligned two register ldmia on the peripherals
//#define GPU_DATA_5 (PERIPHERAL_BASE + 0x000000bc)
#define GPU_COMMAND_offset 0x00
#define GPU_DATA_0_offset 0x04
@ -164,22 +162,24 @@
#define GPU_DATA_4_offset 0x18
#define GPU_DATA_5_offset 0x1c
#define GPFSEL0 (PERIPHERAL_BASE + 0x200000) // controls GPIOs 0..9
#define GPFSEL1 (PERIPHERAL_BASE + 0x200004) // controls GPIOs 10..19
#define GPFSEL2 (PERIPHERAL_BASE + 0x200008) // controls GPIOs 20..29
#define GPSET0 (PERIPHERAL_BASE + 0x20001C)
#define GPCLR0 (PERIPHERAL_BASE + 0x200028)
#define GPLEV0 (PERIPHERAL_BASE + 0x200034)
#define GPEDS0 (PERIPHERAL_BASE + 0x200040)
#define GPREN0 (PERIPHERAL_BASE + 0x20004C)
#define GPFEN0 (PERIPHERAL_BASE + 0x200058)
#define GPAREN0 (PERIPHERAL_BASE + 0x20007C)
#define GPAFEN0 (PERIPHERAL_BASE + 0x200088)
#define GPIO_BASE_OFFSET 0x200000
#define GPSET0_OFFSET 0x00001C
#define GPCLR0_OFFSET 0x000028
#define GPLEV0_OFFSET 0x000034
#define FIQCTRL (PERIPHERAL_BASE + 0x00B20C)
#define INTPEND2_OFFSET 0x00B208
#define SMICTRL_OFFSET 0x600000
//#define GPFSEL0 (PERIPHERAL_BASE + 0x200000) // controls GPIOs 0..9
//#define GPFSEL1 (PERIPHERAL_BASE + 0x200004) // controls GPIOs 10..19
//#define GPFSEL2 (PERIPHERAL_BASE + 0x200008) // controls GPIOs 20..29
//#define GPEDS0 (PERIPHERAL_BASE + 0x200040)
//#define GPREN0 (PERIPHERAL_BASE + 0x20004C)
//#define GPFEN0 (PERIPHERAL_BASE + 0x200058)
//#define GPAREN0 (PERIPHERAL_BASE + 0x20007C)
//#define GPAFEN0 (PERIPHERAL_BASE + 0x200088)
//#define FIQCTRL (PERIPHERAL_BASE + 0x00B20C)
#define INTPEND2 (PERIPHERAL_BASE + 0x00B208)
#define SMICTRL (PERIPHERAL_BASE + 0x600000)
// Offsets into capture_info_t structure below
#define O_FB_BASE 0

Wyświetl plik

@ -400,7 +400,7 @@ got_sample\@:
.endm
.macro SETUP_GPU_CAPTURE
ldr r10, =GPU_COMMAND
bl _get_gpu_command_base_r10
capturebusy\@:
ldr r8, [r10]
cmp r8, #0
@ -453,12 +453,14 @@ clear_regs\@:
moveq r7, r8 // only allow fine sideways scrolling in bbc / electron mode (causes timing issues in ega mode)
// Skip the configured number of psync edges (modes 0..6: edges every 250ns, mode 7: edges ever 333ns)
do_skip_psync_no_old\@:
mov r8, r14
bl _get_gpu_data_base_r4
mov r14, r8
//exit with timestamp in r14
.endm
.macro SKIP_PSYNC_NO_OLD_CPLD
SKIP_PSYNC_COMMON_NO_OLD_CPLD
ldr r4, =GPU_DATA_0
add r8, r7, r1
add r8, r8, #1
mov r8, r8, lsr #1
@ -479,7 +481,7 @@ skip_psync_no_old_loop\@:
ldr r8, =param_delay
ldr r8, [r8]
add r7, r7, r8
ldr r4, =GPU_DATA_0
bl _get_gpu_data_base_r4
add r8, r7, r1
add r8, r8, #1
mov r8, r8, lsr #1
@ -1366,7 +1368,8 @@ skip_psync_loop_simple_fast_loop\@:
.macro CLEAR_VSYNC
// Clear the VSYNC interrupt
ldr r0, =SMICTRL
bl _get_peripheral_base
add r0, r0, #SMICTRL_OFFSET
bic r3, r3, #BIT_VSYNC_MARKER
mov r10, #0
str r10, [r0]
@ -1379,7 +1382,9 @@ skip_psync_loop_simple_fast_loop\@:
// tst r3, #(BIT_PROBE)
// bne novsync\@
// Poll for the VSYNC interrupt
ldr r0, =INTPEND2
bl _get_peripheral_base
ldr r14, =INTPEND2_OFFSET
add r0, r0, r14
ldr r0, [r0]
tst r0, #(1<<VSYNCINT)
beq novsync\@

Wyświetl plik

@ -206,7 +206,7 @@ rgb_to_fb:
skip_swap:
#endif
// Setup r4 as a constant
ldr r4, =GPLEV0
bl _get_GPLEV0_r4
// Setup r3 with the flags/options parameter (as per before)
mov r3, r1
@ -463,13 +463,13 @@ no_mode7_test:
bne no_vsync_test
glitch_detected:
mov r8, #VERSION_MASK
str r8, [r4, #-(GPLEV0 - GPCLR0)] //briefly switch to vsync on psync by clearing version bit
str r8, [r4, #-(GPLEV0_OFFSET - GPCLR0_OFFSET)] //briefly switch to vsync on psync by clearing version bit
ldr r9, [r4] // dummy read for delay
ldr r9, [r4] // dummy read for delay
ldr r6, [r4]
ldr r9, [r4] // dummy read for delay
ldr r7, [r4]
str r8, [r4, #-(GPLEV0 - GPSET0)] //restore version bit
str r8, [r4, #-(GPLEV0_OFFSET - GPSET0_OFFSET)] //restore version bit
eor r6, r6, r7 // eor together in case of glitches
tst r6, #PSYNC_MASK
bne glitch_detected
@ -981,11 +981,11 @@ process_line_loop:
mov r14, #0
tst r3, #BITDUP_IIGS_DETECT
movne r10, #VERSION_MASK
strne r10, [r4, #-(GPLEV0 - GPCLR0)] //briefly switch to vsync on psync by clearing version bit
strne r10, [r4, #-(GPLEV0_OFFSET - GPCLR0_OFFSET)] //briefly switch to vsync on psync by clearing version bit
ldrne r14, [r4] // dummy read for delay
ldrne r14, [r4] // dummy read for delay
ldrne r14, [r4]
strne r10, [r4, #-(GPLEV0 - GPSET0)] //restore version bit
strne r10, [r4, #-(GPLEV0_OFFSET - GPSET0_OFFSET)] //restore version bit
ldrne r10, vsync_detected
tst r14, #CSYNC_MASK
addne r10, r10, #1
@ -1148,13 +1148,13 @@ done_ntsc_auto:
orr r0, #RET_VSYNC_POLARITY_CHANGED
mov r8, #VERSION_MASK
str r8, [r4, #-(GPLEV0 - GPCLR0)] //briefly switch to vsync on psync by clearing version bit
str r8, [r4, #-(GPLEV0_OFFSET - GPCLR0_OFFSET)] //briefly switch to vsync on psync by clearing version bit
ldr r9, [r4] // dummy read for delay
ldr r9, [r4] // dummy read for delay
ldr r6, [r4]
ldr r9, [r4] // dummy read for delay
ldr r7, [r4]
str r8, [r4, #-(GPLEV0 - GPSET0)] //restore version bit
str r8, [r4, #-(GPLEV0_OFFSET - GPSET0_OFFSET)] //restore version bit
ldr r8, param_sync_type
tst r8, #SYNC_BIT_VSYNC_INVERTED
@ -1256,10 +1256,10 @@ skip_osd_update:
FLIP_BUFFER
#endif
push {r1-r5, r11}
push {r4}
mov r0, #0 //do not force genlock
bl recalculate_hdmi_clock_line_locked_update
pop {r4}
// Returns:
// r0=0 genlock disabled - LED off
// r0=1 genlock enabled (unlocked) - LED flash
@ -1272,16 +1272,16 @@ skip_osd_update:
orr r2, r2, r3 //sync gone for 3 frames?
cmp r2, #0
moveq r0, #0 //if no sync switch off genlock led
moveq r4, #MODE7_MASK
ldreq r3, =GPSET0
streq r4, [r3] //switch on mode7 led if no sync so at least one led is always lit (will be set correctly when sync reacquired)
moveq r5, #MODE7_MASK
subeq r3, r4, #(GPLEV0_OFFSET - GPSET0_OFFSET)
streq r5, [r3] //switch on mode7 led if no sync so at least one led is always lit (will be set correctly when sync reacquired)
READ_CYCLE_COUNTER r1
mov r2, #LED1_MASK
tst r0, #1 // should LED flash?
tstne r1, #(1 << 26) // flash rate ~ 8Hz
tsteq r0, #2 // should LED be on?
ldrne r1, =GPSET0 // LED on
ldreq r1, =GPCLR0 // LED off
subne r1, r4, #(GPLEV0_OFFSET - GPSET0_OFFSET)
subeq r1, r4, #(GPLEV0_OFFSET - GPCLR0_OFFSET)
str r2, [r1]
pop {r1-r5, r11}
@ -1459,7 +1459,7 @@ vsync_retry_count:
key_press_reset:
push {r4-r12, lr}
ldr r4, =GPLEV0
bl _get_GPLEV0_r4
ldr r8, [r4]
mov r0, #0
tst r8, #SW1_MASK
@ -1691,7 +1691,7 @@ abortvs:
READ_CYCLE_COUNTER r6
mov r8, #VERSION_MASK
str r8, [r4, #-(GPLEV0 - GPSET0)] //restore version bit
str r8, [r4, #-(GPLEV0_OFFSET - GPSET0_OFFSET)] //restore version bit
ldr r7, [r4] // dummy read for delay
ldr r7, [r4] // dummy read for delay
@ -1739,7 +1739,7 @@ measure_n_lines:
push {r4-r12, lr}
mov r3, #0
// Setup R4 as a constant
ldr r4, =GPLEV0
bl _get_GPLEV0_r4
// wait for vsync
bl wait_for_vsync
@ -1806,7 +1806,7 @@ measure_vsync:
mov r3, #0
// Setup R4 as a constant
ldr r4, =GPLEV0
bl _get_GPLEV0_r4
mov r12, #VSYNC_RETRY_MAX //retry count
ldr r9, sync_detected
cmp r9, #0
@ -1875,7 +1875,7 @@ test_again:
analyse_sync:
push {r4-r12, lr}
ldr r4, =GPLEV0
bl _get_GPLEV0_r4
mov r6, #0 //csync low
mov r7, #0 //csync high
READ_CYCLE_COUNTER r10
@ -2168,7 +2168,7 @@ benchloop2:
pop {r1-r12, pc}
gpio_bench:
ldr r4, =GPLEV0
bl _get_GPLEV0_r4
ldr r1, =100000
READ_CYCLE_COUNTER r6
gpio_bench_loop:
@ -2181,7 +2181,7 @@ gpio_bench_loop:
pop {r1-r12, pc}
mbox_bench_1:
ldr r4, =GPU_DATA_0
bl _get_gpu_data_base_r4
ldr r1, =100000
READ_CYCLE_COUNTER r6
mbox_bench_1_loop:
@ -2194,7 +2194,7 @@ mbox_bench_1_loop:
pop {r1-r12, pc}
mbox_bench_3:
ldr r4, =GPU_DATA_0
bl _get_gpu_data_base_r4
ldr r1, =100000
READ_CYCLE_COUNTER r6
mbox_bench_3_loop:
@ -3121,7 +3121,9 @@ wait_for_pi_fieldsync:
push {r4-r12, lr}
CLEAR_VSYNC
// Poll for the VSYNC interrupt
ldr r0, =INTPEND2
bl _get_peripheral_base
ldr r14, =INTPEND2_OFFSET
add r0, r0, r14
wait_for_pi_loop:
ldr r1, [r0]
tst r1, #(1<<VSYNCINT)
@ -3140,7 +3142,7 @@ poll_keys_loop:
// Wait ~20ms (for debouncing)
ldr r0, =20*1024*1024
bl delay_in_arm_cycles
ldr r4, =GPLEV0
bl _get_GPLEV0_r4
ldr r8, [r4]
mov r0, #0
ldr r10, =sw1_power_up

Wyświetl plik

@ -33,29 +33,12 @@
#ifdef __ASSEMBLER__
#if defined(RPI2) || defined(RPI3)
#define PERIPHERAL_BASE 0x3F000000
#else
#if defined(RPI4)
#define PERIPHERAL_BASE 0xFE000000
#else
#define PERIPHERAL_BASE 0x20000000
#endif
#endif
#define _PERIPHERAL_BASE_RPI 0x20000000
#define _PERIPHERAL_BASE_RPI3 0x3F000000 //also RPI2
#define _PERIPHERAL_BASE_RPI4 0xFE000000
#else
#if defined(RPI2) || defined(RPI3)
#define PERIPHERAL_BASE 0x3F000000UL
#else
#if defined(RPI4)
#define PERIPHERAL_BASE 0xFE000000UL
#else
#define PERIPHERAL_BASE 0x20000000UL
#endif
#endif
#include <stdint.h>
typedef volatile uint32_t rpi_reg_rw_t;