Add palette encoding to libdvi, plus example (#8)

* Add palette encoder

(cherry picked from commit c30692ee75b8a2e3aaf4d7d5b809c267f9066aed)

* Add full resolution Mandelbrot as example for palette encoding

* Switch to 2 symbols per word, this is performance neutral on the encode, but saves memory bandwidth

(cherry picked from commit 70834bfa1953a29d95e0d0f5ae16c86d2feb7242)

* Marginally faster palette encode

* Up to 256 colour palettes

(cherry picked from commit 86e0e5d7dd11020d01f167b8cad571391de56aee)

* Apply parity alternation to palette and other full res case

(cherry picked from commit e9971155ff08977275612e4d22d37f0f416ef13e)

* Use 256 colour palette, generate on both cores

* Clear up magic numbers

(cherry picked from commit 6180d210e59f25c7c4b4855920acdaa973447228)

* Use PICO_DEFAULT_LED_PIN

(cherry picked from commit 383c6eb4b6ea79b617d785e3736ea744746f57af)
pull/11/head
Mike Bell 2021-03-08 00:23:26 +00:00 zatwierdzone przez GitHub
rodzic 0d9ac42ba4
commit a607ff5afa
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
9 zmienionych plików z 713 dodań i 0 usunięć

1
software/.gitignore vendored
Wyświetl plik

@ -1 +1,2 @@
build
*.swp

Wyświetl plik

@ -7,3 +7,4 @@ add_subdirectory(moon)
add_subdirectory(sprite_bounce)
add_subdirectory(terminal)
add_subdirectory(vista)
add_subdirectory(mandel-full)

Wyświetl plik

@ -0,0 +1,26 @@
# Replace TMDS with 10 bit UART (same baud rate):
# add_definitions(-DDVI_SERIAL_DEBUG=1)
# add_definitions(-DRUN_FROM_CRYSTAL)
add_executable(mandel-full
main.c
mandelbrot.c
)
target_compile_definitions(mandel-full PRIVATE
DVI_VERTICAL_REPEAT=1
DVI_N_TMDS_BUFFERS=8
DVI_SYMBOLS_PER_WORD=2
)
target_compile_definitions(mandel-full PRIVATE PICO_STACK_SIZE=0x400)
target_link_libraries(mandel-full
pico_stdlib
pico_multicore
libdvi
)
# create map/bin/hex file etc.
pico_add_extra_outputs(mandel-full)

Wyświetl plik

@ -0,0 +1,207 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "hardware/clocks.h"
#include "hardware/dma.h"
#include "hardware/gpio.h"
#include "hardware/irq.h"
#include "hardware/pll.h"
#include "hardware/sync.h"
#include "hardware/structs/bus_ctrl.h"
#include "hardware/structs/ssi.h"
#include "hardware/vreg.h"
#include "pico/multicore.h"
#include "pico/sem.h"
#include "pico/stdlib.h"
#include "tmds_encode.h"
#include "dvi.h"
#include "dvi_serialiser.h"
#include "common_dvi_pin_configs.h"
#include "mandelbrot.h"
// TMDS bit clock 252 MHz
// DVDD 1.2V (1.1V seems ok too)
#define FRAME_WIDTH 640
#define FRAME_HEIGHT 480
#define VREG_VSEL VREG_VOLTAGE_1_10
#define DVI_TIMING dvi_timing_640x480p_60hz
#define N_IMAGES 3
#define FRAMES_PER_IMAGE 300
uint8_t mandel[FRAME_WIDTH * (FRAME_HEIGHT / 2)];
#define PALETTE_BITS 8
#define PALETTE_SIZE (1 << PALETTE_BITS)
uint16_t palette[PALETTE_SIZE];
uint32_t tmds_palette[PALETTE_SIZE * 6];
struct dvi_inst dvi0;
struct semaphore dvi_start_sem;
FractalBuffer fractal;
static uint8_t palette_offset = 0;
void init_palette() {
palette[0] = 0;
for (int i = 1; i < PALETTE_SIZE; ++i) {
uint8_t c = i + palette_offset;
if (c < 0x20) palette[i] = c;
else if (c < 0x40) palette[i] = (c - 0x20) << 6;
else if (c < 0x60) palette[i] = (c - 0x40) << 11;
else if (c < 0x80) palette[i] = ((c - 0x60) & 0x1f) * 0x0840;
else if (c < 0xa0) palette[i] = ((c - 0x80) & 0x1f) * 0x0041;
else if (c < 0xc0) palette[i] = ((c - 0xa0) & 0x1f) * 0x0801;
else if (c < 0xe0) palette[i] = ((c - 0xc0) & 0x1f) * 0x0841;
else palette[i] = 0;
}
++palette_offset;
tmds_setup_palette_symbols(palette, tmds_palette, PALETTE_SIZE);
}
void init_mandel() {
for (int y = 0; y < (FRAME_HEIGHT / 2); ++y) {
uint8_t* buf = &mandel[y * FRAME_WIDTH];
for (int i = 0; i < FRAME_WIDTH; ++i) {
buf[i] = ((i + y) & 0x3f);
}
}
fractal.buff = mandel;
fractal.rows = FRAME_HEIGHT / 2;
fractal.cols = FRAME_WIDTH;
fractal.max_iter = PALETTE_SIZE;
fractal.iter_offset = 0;
fractal.minx = -2.25f;
fractal.maxx = 0.75f;
fractal.miny = -1.6f;
fractal.maxy = 0.f - (1.6f / FRAME_HEIGHT); // Half a row
fractal.use_cycle_check = true;
init_fractal(&fractal);
}
#define NUM_ZOOMS 64
static uint32_t zoom_count = 0;
void zoom_mandel() {
if (++zoom_count == NUM_ZOOMS)
{
init_mandel();
zoom_count = 0;
return;
}
printf("Zoom: %ld\n", zoom_count);
float zoomx = -.75f - .7f * ((float)zoom_count / (float)NUM_ZOOMS);
float sizex = fractal.maxx - fractal.minx;
float sizey = fractal.miny * -2.f;
float zoomr = 0.96f * 0.5f;
fractal.minx = zoomx - zoomr * sizex;
fractal.maxx = zoomx + zoomr * sizex;
fractal.miny = -zoomr * sizey;
fractal.maxy = 0.f + fractal.miny / FRAME_HEIGHT;
init_fractal(&fractal);
}
// Core 1 handles DMA IRQs and runs TMDS encode on scanline buffers it
// receives through the mailbox FIFO
void __not_in_flash("core1_main") core1_main() {
dvi_register_irqs_this_core(&dvi0, DMA_IRQ_0);
sem_acquire_blocking(&dvi_start_sem);
dvi_start(&dvi0);
while (1) {
const uint32_t *colourbuf = (const uint32_t*)multicore_fifo_pop_blocking();
uint32_t *tmdsbuf = (uint32_t*)multicore_fifo_pop_blocking();
tmds_encode_palette_data((const uint32_t*)colourbuf, tmds_palette, tmdsbuf, FRAME_WIDTH, PALETTE_BITS);
multicore_fifo_push_blocking(0);
while (!fractal.done && queue_get_level(&dvi0.q_tmds_valid) >= 5) generate_steal_one(&fractal);
}
__builtin_unreachable();
}
int __not_in_flash("main") main() {
vreg_set_voltage(VREG_VSEL);
sleep_ms(10);
set_sys_clock_khz(DVI_TIMING.bit_clk_khz, true);
setup_default_uart();
gpio_init(PICO_DEFAULT_LED_PIN);
gpio_set_dir(PICO_DEFAULT_LED_PIN, GPIO_OUT);
init_palette();
init_mandel();
printf("Configuring DVI\n");
dvi0.timing = &DVI_TIMING;
dvi0.ser_cfg = DEFAULT_DVI_SERIAL_CONFIG;
dvi_init(&dvi0, next_striped_spin_lock_num(), next_striped_spin_lock_num());
printf("Core 1 start\n");
sem_init(&dvi_start_sem, 0, 1);
hw_set_bits(&bus_ctrl_hw->priority, BUSCTRL_BUS_PRIORITY_PROC1_BITS);
multicore_launch_core1(core1_main);
uint heartbeat = 0;
uint32_t encode_time = 0;
sem_release(&dvi_start_sem);
while (1) {
if (++heartbeat >= 30) {
heartbeat = 0;
gpio_xor_mask(1u << PICO_DEFAULT_LED_PIN);
printf("Encode total time: %ldus\n", encode_time);
encode_time = 0;
}
if (fractal.done) zoom_mandel();
//if (heartbeat & 1) init_palette();
for (int y = 0; y < FRAME_HEIGHT / 2; y += 2) {
uint32_t *our_tmds_buf, *their_tmds_buf;
queue_remove_blocking_u32(&dvi0.q_tmds_free, &their_tmds_buf);
multicore_fifo_push_blocking((uint32_t)(&mandel[y*FRAME_WIDTH]));
multicore_fifo_push_blocking((uint32_t)their_tmds_buf);
queue_remove_blocking_u32(&dvi0.q_tmds_free, &our_tmds_buf);
absolute_time_t start_time = get_absolute_time();
tmds_encode_palette_data((const uint32_t*)(&mandel[(y+1)*FRAME_WIDTH]), tmds_palette, our_tmds_buf, FRAME_WIDTH, PALETTE_BITS);
encode_time += absolute_time_diff_us(start_time, get_absolute_time());
multicore_fifo_pop_blocking();
while (!fractal.done && queue_get_level(&dvi0.q_tmds_valid) >= 5) generate_one_forward(&fractal);
queue_add_blocking_u32(&dvi0.q_tmds_valid, &their_tmds_buf);
queue_add_blocking_u32(&dvi0.q_tmds_valid, &our_tmds_buf);
}
for (int y = FRAME_HEIGHT / 2 - 2; y >= 0; y -= 2) {
uint32_t *our_tmds_buf, *their_tmds_buf;
queue_remove_blocking_u32(&dvi0.q_tmds_free, &their_tmds_buf);
multicore_fifo_push_blocking((uint32_t)(&mandel[(y+1)*FRAME_WIDTH]));
multicore_fifo_push_blocking((uint32_t)their_tmds_buf);
queue_remove_blocking_u32(&dvi0.q_tmds_free, &our_tmds_buf);
absolute_time_t start_time = get_absolute_time();
tmds_encode_palette_data((const uint32_t*)(&mandel[y*FRAME_WIDTH]), tmds_palette, our_tmds_buf, FRAME_WIDTH, PALETTE_BITS);
encode_time += absolute_time_diff_us(start_time, get_absolute_time());
multicore_fifo_pop_blocking();
while (!fractal.done && queue_get_level(&dvi0.q_tmds_valid) >= 5) generate_one_forward(&fractal);
queue_add_blocking_u32(&dvi0.q_tmds_valid, &their_tmds_buf);
queue_add_blocking_u32(&dvi0.q_tmds_valid, &our_tmds_buf);
}
}
__builtin_unreachable();
}

Wyświetl plik

@ -0,0 +1,232 @@
// Copyright (C) Michael Bell 2021
#include <stdio.h>
#include <stdlib.h>
#include "pico/stdlib.h"
#include "hardware/interp.h"
#include "hardware/dma.h"
#include "mandelbrot.h"
// Cycle checking parameters
#define MAX_CYCLE_LEN 8 // Must be power of 2
#define MIN_CYCLE_CHECK_ITER 32 // Must be multiple of max cycle len
#define CYCLE_TOLERANCE (1<<18)
// Fixed point with 6 bits to the left of the point.
// Range [-32,32) with precision 2^-26
typedef int32_t fixed_pt_t;
#define ESCAPE_SQUARE (4<<26)
static inline fixed_pt_t mul(fixed_pt_t a, fixed_pt_t b)
{
int32_t ah = a >> 13;
int32_t al = a & 0x1fff;
int32_t bh = b >> 13;
int32_t bl = b & 0x1fff;
// Ignore al * bl as contribution to final result is only the carry.
fixed_pt_t r = ((ah * bl) + (al * bh)) >> 13;
r += ah * bh;
return r;
}
// a * b * 2
static inline fixed_pt_t mul2(fixed_pt_t a, fixed_pt_t b)
{
#if 0
int32_t ah = a >> 12;
int32_t al = a & 0xfff;
int32_t bh = b >> 13;
int32_t bl = b & 0x1fff;
interp0->accum[0] = ah * bl;
interp0->accum[1] = al * bh;
interp0->base[2] = ah * bh;
return interp0->peek[2];
#else
int32_t ah = a >> 12;
int32_t al = (a & 0xfff) << 1;
int32_t bh = b >> 13;
int32_t bl = b & 0x1fff;
fixed_pt_t r = ((ah * bl) + (al * bh)) >> 13;
r += ah * bh;
return r;
#endif
}
static inline fixed_pt_t square(fixed_pt_t a) {
int32_t ah = a >> 13;
int32_t al = a & 0x1fff;
return ((ah * al) >> 12) + (ah * ah);
}
fixed_pt_t make_fixed(int32_t x) {
return x << 26;
}
fixed_pt_t make_fixedf(float x) {
return (int32_t)(x * (67108864.f));
}
void mandel_init()
{
// Not curently used
interp_config cfg = interp_default_config();
interp_config_set_add_raw(&cfg, false);
interp_config_set_shift(&cfg, 13);
interp_config_set_mask(&cfg, 0, 31 - 13);
interp_config_set_signed(&cfg, true);
interp_set_config(interp0, 0, &cfg);
interp_config_set_shift(&cfg, 12);
interp_config_set_mask(&cfg, 0, 31 - 12);
interp_set_config(interp0, 1, &cfg);
}
void init_fractal(FractalBuffer* f)
{
f->done = false;
f->min_iter = f->max_iter - 1;
f->iminx = make_fixedf(f->minx);
f->imaxx = make_fixedf(f->maxx);
f->iminy = make_fixedf(f->miny);
f->imaxy = make_fixedf(f->maxy);
f->incx = (f->imaxx - f->iminx) / (f->cols - 1);
f->incy = (f->imaxy - f->iminy) / (f->rows - 1);
f->count_inside = 0;
f->ipos = 0;
f->jpos = 0;
f->iend = f->rows - 1;
f->jend = f->cols - 1;
}
static inline void generate_one(FractalBuffer* f, fixed_pt_t x0, fixed_pt_t y0, uint8_t* buffptr)
{
fixed_pt_t x = x0;
fixed_pt_t y = y0;
uint16_t k = 1;
for (; k < f->max_iter; ++k) {
fixed_pt_t x_square = square(x);
fixed_pt_t y_square = square(y);
if (x_square + y_square > ESCAPE_SQUARE) break;
fixed_pt_t nextx = x_square - y_square + x0;
y = mul2(x,y) + y0;
x = nextx;
}
if (k == f->max_iter) {
*buffptr = 0;
f->count_inside++;
} else {
if (k > f->iter_offset) k -= f->iter_offset;
else k = 1;
*buffptr = k;
if (f->min_iter > k) f->min_iter = k;
}
}
static inline void generate_one_cycle_check(FractalBuffer* f, fixed_pt_t x0, fixed_pt_t y0, uint8_t* buffptr)
{
fixed_pt_t x = x0;
fixed_pt_t y = y0;
fixed_pt_t oldx = 0, oldy = 0;
uint16_t k = 1;
for (; k < f->max_iter; ++k) {
fixed_pt_t x_square = square(x);
fixed_pt_t y_square = square(y);
if (x_square + y_square > ESCAPE_SQUARE) break;
if (k >= MIN_CYCLE_CHECK_ITER) {
if ((k & (MAX_CYCLE_LEN - 1)) == 0) {
oldx = x - CYCLE_TOLERANCE;
oldy = y - CYCLE_TOLERANCE;
}
else
{
if ((uint32_t)(x - oldx) < (2*CYCLE_TOLERANCE) && (uint32_t)(y - oldy) < (2*CYCLE_TOLERANCE)) {
// Found a cycle
k = f->max_iter;
break;
}
}
}
fixed_pt_t nextx = x_square - y_square + x0;
y = mul2(x,y) + y0;
x = nextx;
}
if (k == f->max_iter) {
*buffptr = 0;
f->count_inside++;
} else {
if (k > f->iter_offset) k -= f->iter_offset;
else k = 1;
*buffptr = k;
if (f->min_iter > k) f->min_iter = k;
}
}
void generate_fractal(FractalBuffer* f)
{
uint8_t* buffptr = f->buff;
fixed_pt_t y0 = f->iminy;
int16_t i = 0;
for (; i < f->iend; ++i, y0 += f->incy) {
fixed_pt_t x0 = f->iminx;
for (int16_t j = 0; j < f->cols; ++j, x0 += f->incx) {
if (f->use_cycle_check) generate_one_cycle_check(f, x0, y0, buffptr++);
else generate_one(f, x0, y0, buffptr++);
}
}
fixed_pt_t x0 = f->iminx;
for (int16_t j = 0; j < f->jend && i == f->iend; ++j, x0 += f->incx) {
if (f->use_cycle_check) generate_one_cycle_check(f, x0, y0, buffptr++);
else generate_one(f, x0, y0, buffptr++);
}
f->done = true;
}
void generate_one_forward(FractalBuffer* f)
{
if (f->done) return;
uint8_t* buffptr = f->buff + f->ipos * f->cols + f->jpos;
fixed_pt_t y0 = f->iminy + f->ipos * f->incy;
fixed_pt_t x0 = f->iminx + f->jpos * f->incx;
if (f->use_cycle_check) generate_one_cycle_check(f, x0, y0, buffptr);
else generate_one(f, x0, y0, buffptr);
if (++f->jpos == f->cols)
{
f->jpos = 0;
if (++f->ipos > f->iend) f->done = true;
}
}
void generate_steal_one(FractalBuffer* f)
{
if (f->done) {
return;
}
uint8_t* buffptr = f->buff + f->iend * f->cols + f->jend;
fixed_pt_t y0 = f->iminy + f->iend * f->incy;
fixed_pt_t x0 = f->iminx + f->jend * f->incx;
if (f->use_cycle_check) generate_one_cycle_check(f, x0, y0, buffptr);
else generate_one(f, x0, y0, buffptr);
if (f->jend-- == 0) {
f->jend = f->cols - 1;
if (--f->iend < f->ipos) f->done = true;
}
}

Wyświetl plik

@ -0,0 +1,41 @@
// Init pico resources used for generation
void mandel_init();
// Fixed point with 6 bits to the left of the point.
// Range [-32,32) with precision 2^-26
typedef int32_t fixed_pt_t;
typedef struct {
// Configuration
uint8_t* buff;
int16_t rows;
int16_t cols;
uint16_t max_iter;
uint16_t iter_offset;
float minx, miny, maxx, maxy;
bool use_cycle_check;
// State
volatile bool done;
volatile uint16_t min_iter;
fixed_pt_t iminx, iminy, imaxx, imaxy;
fixed_pt_t incx, incy;
volatile uint32_t count_inside;
int16_t ipos, jpos;
// Tracks work stealing on core 0
volatile int16_t iend, jend;
} FractalBuffer;
// Make a fixed_pt_t from an int or float.
fixed_pt_t make_fixed(int32_t x);
fixed_pt_t make_fixedf(float x);
// Generate a section of the fractal into buff
// Result written to buff is 0 for inside Mandelbrot set
// Otherwise iteration of escape minus min_iter (clamped to 1)
void init_fractal(FractalBuffer* fractal);
void generate_fractal(FractalBuffer* fractal);
void generate_one_forward(FractalBuffer* f);
void generate_steal_one(FractalBuffer* f);

Wyświetl plik

@ -395,6 +395,10 @@ tmds_2bpp_table:
// DC balance defined to be 0 at start of scanline:
movs r4, #0
str r4, [r2, #ACCUM1_OFFS]
#if TMDS_FULLRES_NO_DC_BALANCE
// Alternate parity between odd/even symbols if there's no balance feedback
mvns r4, r4
#endif
str r4, [r2, #ACCUM1_OFFS + INTERP1]
// Keep loop start pointer in r8 so we can get a longer backward branch
@ -529,3 +533,118 @@ decl_func_x tmds_fullres_encode_loop_16bpp_leftshift_x
tmds_fullres_encode_loop_16bpp_leftshift
decl_func_y tmds_fullres_encode_loop_16bpp_leftshift_y
tmds_fullres_encode_loop_16bpp_leftshift
// Variant of tmds_fullres_encode_loop_16bpp that reads
// 8-bit wide pixels packed 4 per word. The interpolator
// base is set to a reordered list of TMDS symbols based
// on a user colour palette.
.macro tmds_palette_encode_loop
push {r4-r7, lr}
mov r4, r8
push {r4}
lsls r2, #1
add r2, r1
mov ip, r2
ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET)
// DC balance defined to be 0 at start of scanline:
movs r4, #0
str r4, [r2, #ACCUM1_OFFS]
#if TMDS_FULLRES_NO_DC_BALANCE
// Alternate parity between odd/even symbols if there's no balance feedback
mvns r4, r4
#endif
str r4, [r2, #ACCUM1_OFFS + INTERP1]
// Keep loop start pointer in r8 so we can get a longer backward branch
adr r4, 1f
adds r4, #1 // god damn thumb bit why is this a thing
mov r8, r4
b 2f
.align 2
1:
.rept 8
ldmia r0!, {r3, r5}
lsrs r4, r3, #14
lsls r3, #2
lsrs r6, r5, #14
lsls r5, #2
str r3, [r2, #ACCUM0_OFFS + INTERP1]
str r3, [r2, #ACCUM0_OFFS]
ldr r3, [r2, #PEEK2_OFFS]
ldr r3, [r3]
#if !TMDS_FULLRES_NO_DC_BALANCE
str r3, [r2, #ACCUM1_ADD_OFFS]
#endif
ldr r7, [r2, #PEEK2_OFFS + INTERP1]
ldr r7, [r7]
#if !TMDS_FULLRES_NO_DC_BALANCE
str r7, [r2, #ACCUM1_ADD_OFFS + INTERP1]
#endif
lsls r7, #10
orrs r3, r7
str r4, [r2, #ACCUM0_OFFS + INTERP1]
str r4, [r2, #ACCUM0_OFFS]
ldr r4, [r2, #PEEK2_OFFS]
ldr r4, [r4]
#if !TMDS_FULLRES_NO_DC_BALANCE
str r4, [r2, #ACCUM1_ADD_OFFS]
#endif
ldr r7, [r2, #PEEK2_OFFS + INTERP1]
ldr r7, [r7]
#if !TMDS_FULLRES_NO_DC_BALANCE
str r7, [r2, #ACCUM1_ADD_OFFS + INTERP1]
#endif
lsls r7, #10
orrs r4, r7
str r5, [r2, #ACCUM0_OFFS + INTERP1]
str r5, [r2, #ACCUM0_OFFS]
ldr r5, [r2, #PEEK2_OFFS]
ldr r5, [r5]
#if !TMDS_FULLRES_NO_DC_BALANCE
str r5, [r2, #ACCUM1_ADD_OFFS]
#endif
ldr r7, [r2, #PEEK2_OFFS + INTERP1]
ldr r7, [r7]
#if !TMDS_FULLRES_NO_DC_BALANCE
str r7, [r2, #ACCUM1_ADD_OFFS + INTERP1]
#endif
lsls r7, #10
orrs r5, r7
str r6, [r2, #ACCUM0_OFFS + INTERP1]
str r6, [r2, #ACCUM0_OFFS]
ldr r6, [r2, #PEEK2_OFFS]
ldr r6, [r6]
#if !TMDS_FULLRES_NO_DC_BALANCE
str r6, [r2, #ACCUM1_ADD_OFFS]
#endif
ldr r7, [r2, #PEEK2_OFFS + INTERP1]
ldr r7, [r7]
#if !TMDS_FULLRES_NO_DC_BALANCE
str r7, [r2, #ACCUM1_ADD_OFFS + INTERP1]
#endif
lsls r7, #10
orrs r6, r7
stmia r1!, {r3, r4, r5, r6}
.endr
2:
cmp r1, ip
beq 1f
bx r8
1:
pop {r4}
mov r8, r4
pop {r4-r7, pc}
.endm
decl_func_x tmds_palette_encode_loop_x
tmds_palette_encode_loop
decl_func_y tmds_palette_encode_loop_y
tmds_palette_encode_loop

Wyświetl plik

@ -162,3 +162,85 @@ void __not_in_flash_func(tmds_encode_data_channel_fullres_16bpp)(const uint32_t
interp_restore(interp1_hw, &interp1_save);
#endif
}
// This takes a 16-bit (RGB 565) colour palette and makes palettes of TMDS symbols suitable
// for performing fullres encode.
// The TMDS palette buffer should be 6 * n_palette words long.
// n_palette must be a power of 2 <= 256.
void tmds_setup_palette_symbols(const uint16_t *palette, uint32_t *tmds_palette, size_t n_palette) {
uint32_t* tmds_palette_blue = tmds_palette;
uint32_t* tmds_palette_green = tmds_palette + 2 * n_palette;
uint32_t* tmds_palette_red = tmds_palette + 4 * n_palette;
for (int i = 0; i < n_palette; ++i) {
uint16_t blue = (palette[i] << 1) & 0x3e;
uint16_t green = (palette[i] >> 5) & 0x3f;
uint16_t red = (palette[i] >> 10) & 0x3e;
tmds_palette_blue[i] = tmds_table_fullres_x[blue];
tmds_palette_blue[i + n_palette] = tmds_table_fullres_x[64 + blue];
tmds_palette_green[i] = tmds_table_fullres_x[green];
tmds_palette_green[i + n_palette] = tmds_table_fullres_x[64 + green];
tmds_palette_red[i] = tmds_table_fullres_x[red];
tmds_palette_red[i + n_palette] = tmds_table_fullres_x[64 + red];
}
}
// Encode palette data for all 3 channels.
// pixbuf is an array of n_pix 8-bit wide pixels containing palette values (32-bit word aligned)
// tmds_palette is a palette of TMDS symbols produced by tmds_setup_palette_symbols
// symbuf is 3*n_pix 32-bit words, this function writes the symbol values for each of the channels to it.
void __not_in_flash_func(tmds_encode_palette_data)(const uint32_t *pixbuf, const uint32_t *tmds_palette, uint32_t *symbuf, size_t n_pix, uint32_t palette_bits) {
uint core = get_core_num();
#if !TMDS_FULLRES_NO_INTERP_SAVE
interp_hw_save_t interp0_save, interp1_save;
interp_save(interp0_hw, &interp0_save);
interp_save(interp1_hw, &interp1_save);
#endif
interp0_hw->base[2] = (uint32_t)tmds_palette;
interp1_hw->base[2] = (uint32_t)tmds_palette;
// Lane 0 on both interpolators masks the palette bits, starting at bit 2,
// The second interpolator also shifts to read the 2nd or 4th byte of the word.
interp0_hw->ctrl[0] =
(2 << SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB) |
((palette_bits + 1) << SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB);
interp1_hw->ctrl[0] =
(8 << SIO_INTERP0_CTRL_LANE0_SHIFT_LSB) |
(2 << SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB) |
((palette_bits + 1) << SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB);
// Lane 1 shifts and masks the sign bit into the right position to add to the symbol
// table index to choose the negative disparity symbols if the sign is negative.
const uint32_t ctrl_lane_1 =
((31 - (palette_bits + 2)) << SIO_INTERP0_CTRL_LANE0_SHIFT_LSB) |
(palette_bits + 2) * ((1 << SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB) | (1 << SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB));
interp0_hw->ctrl[1] = ctrl_lane_1;
interp1_hw->ctrl[1] = ctrl_lane_1;
if (core) {
tmds_palette_encode_loop_x(pixbuf, symbuf, n_pix);
interp0_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits));
interp1_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits));
tmds_palette_encode_loop_x(pixbuf, symbuf + (n_pix >> 1), n_pix);
interp0_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits));
interp1_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits));
tmds_palette_encode_loop_x(pixbuf, symbuf + n_pix, n_pix);
} else {
tmds_palette_encode_loop_y(pixbuf, symbuf, n_pix);
interp0_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits));
interp1_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits));
tmds_palette_encode_loop_y(pixbuf, symbuf + (n_pix >> 1), n_pix);
interp0_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits));
interp1_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits));
tmds_palette_encode_loop_y(pixbuf, symbuf + n_pix, n_pix);
}
#if !TMDS_FULLRES_NO_INTERP_SAVE
interp_restore(interp0_hw, &interp0_save);
interp_restore(interp1_hw, &interp1_save);
#endif
}

Wyświetl plik

@ -8,6 +8,8 @@
void tmds_encode_data_channel_16bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb);
void tmds_encode_data_channel_8bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb);
void tmds_encode_data_channel_fullres_16bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb);
void tmds_setup_palette_symbols(const uint16_t *palette, uint32_t *symbuf, size_t n_palette);
void tmds_encode_palette_data(const uint32_t *pixbuf, const uint32_t *tmds_palette, uint32_t *symbuf, size_t n_pix, uint32_t palette_bits);
// Functions from tmds_encode.S
@ -28,5 +30,7 @@ void tmds_fullres_encode_loop_16bpp_x(const uint32_t *pixbuf, uint32_t *symbuf,
void tmds_fullres_encode_loop_16bpp_y(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix);
void tmds_fullres_encode_loop_16bpp_leftshift_x(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint leftshift);
void tmds_fullres_encode_loop_16bpp_leftshift_y(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint leftshift);
void tmds_palette_encode_loop_x(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix);
void tmds_palette_encode_loop_y(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix);
#endif