SDRPlusPlus/core/libcorrect/tools/find_conv_optim_poly_anneal...

351 wiersze
11 KiB
C

#include <stdbool.h>
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stddef.h>
#include <limits.h>
#include <pthread.h>
#include <signal.h>
#if HAVE_SSE
#include "correct/util/error-sim-sse.h"
typedef correct_convolutional_sse conv_t;
static conv_t*(*conv_create)(size_t, size_t, const uint16_t *) = correct_convolutional_sse_create;
static void(*conv_destroy)(conv_t *) = correct_convolutional_sse_destroy;
static size_t(*conv_enclen)(void *, size_t) = conv_correct_sse_enclen;
static void(*conv_encode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_sse_encode;
static void(*conv_decode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_sse_decode;
#else
#include "correct/util/error-sim.h"
typedef correct_convolutional conv_t;
static conv_t*(*conv_create)(size_t, size_t, const uint16_t *) = correct_convolutional_create;
static void(*conv_destroy)(conv_t *) = correct_convolutional_destroy;
static size_t(*conv_enclen)(void *, size_t) = conv_correct_enclen;
static void(*conv_encode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_encode;
static void(*conv_decode)(void *, uint8_t *, size_t, uint8_t *) = conv_correct_decode;
#endif
typedef struct {
conv_t *conv;
correct_convolutional_polynomial_t *poly;
} conv_tester_t;
void shuffle(int *a, size_t len) {
for (size_t i = 0; i < len - 2; i++) {
size_t j = rand() % (len - i) + i;
int temp = a[i];
a[i] = a[j];
a[j] = temp;
}
}
int rand_geo(float p, int max) {
int geo = 1;
while (geo < max) {
if (rand() / (float)RAND_MAX > p) {
geo++;
} else {
break;
}
}
return geo;
}
void next_neighbor(correct_convolutional_polynomial_t *start,
correct_convolutional_polynomial_t *neighbor, size_t rate, size_t order) {
int coeffs[rate * (order - 2)];
for (int i = 0; i < rate * (order - 2); i++) {
coeffs[i] = i;
}
shuffle(coeffs, rate * (order - 2));
memcpy(neighbor, start, rate * sizeof(correct_convolutional_polynomial_t));
size_t nflips = rand_geo(0.4, rate * (order - 2));
for (int i = 0; i < nflips; i++) {
ptrdiff_t index = coeffs[i] / (order - 2);
// decide which bit to flip
// we avoid the edge bits to prevent creating a degenerate poly
neighbor[index] ^= 1 << (coeffs[i] % (order - 2) + 1);
}
}
bool accept(float cost_a, float cost_b, double temperature) {
if (cost_b < cost_a) {
return true;
}
float p = (float)(rand()) / (float)(RAND_MAX);
return exp((cost_a - cost_b) / (cost_a * temperature)) > p;
}
typedef struct {
size_t rate;
size_t order;
correct_convolutional_polynomial_t *poly;
unsigned int distance;
conv_testbench *scratch;
size_t msg_len;
double eb_n0;
double bpsk_voltage;
double bpsk_bit_energy;
} thread_args;
const size_t max_block_len = 16384;
void *find_cost_thread(void *vargs) {
thread_args *args = (thread_args *)vargs;
conv_t *conv;
uint8_t *msg = malloc(max_block_len);
conv = conv_create(args->rate, args->order, args->poly);
args->distance = 0;
conv_testbench *scratch = args->scratch;
size_t bytes_remaining = args->msg_len;
while (bytes_remaining) {
// in order to keep memory usage constant, we separate the msg into
// blocks and send each one through
// each time we do this, we have to calculate a new noise for each
// testbench
size_t block_len = (max_block_len < bytes_remaining) ? max_block_len : bytes_remaining;
bytes_remaining -= block_len;
for (unsigned int j = 0; j < block_len; j++) {
msg[j] = rand() % 256;
}
scratch = resize_conv_testbench(scratch, conv_enclen, conv, block_len);
scratch->encode = conv_encode;
scratch->encoder = conv;
scratch->decode = conv_decode;
scratch->decoder = conv;
build_white_noise(scratch->noise, scratch->enclen, args->eb_n0, args->bpsk_bit_energy);
args->distance += test_conv_noise(scratch, msg, block_len, args->bpsk_voltage);
}
conv_destroy(conv);
free(msg);
pthread_exit(NULL);
}
float find_cost(size_t rate, size_t order, correct_convolutional_polynomial_t *poly, size_t msg_len,
conv_testbench **scratches, size_t num_scratches, float *weights, double *eb_n0,
double bpsk_voltage, double bpsk_bit_energy) {
thread_args *args = malloc(num_scratches * sizeof(thread_args));
pthread_t *threads = malloc(num_scratches * sizeof(pthread_t));
for (size_t i = 0; i < num_scratches; i++) {
args[i].rate = rate;
args[i].order = order;
args[i].poly = poly;
args[i].scratch = scratches[i];
args[i].msg_len = msg_len;
args[i].eb_n0 = eb_n0[i];
args[i].bpsk_voltage = bpsk_voltage;
args[i].bpsk_bit_energy = bpsk_bit_energy;
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
pthread_create(&threads[i], &attr, find_cost_thread, &args[i]);
}
for (size_t i = 0; i < num_scratches; i++) {
pthread_join(threads[i], NULL);
}
float cost = 0;
printf("poly:");
for (size_t i = 0; i < rate; i++) {
printf(" %06o", poly[i]);
}
printf(" error:");
for (size_t i = 0; i < num_scratches; i++) {
cost += weights[i] * args[i].distance;
printf(" %.2e@%.1fdB", (args[i].distance / (float)(msg_len * 8)), eb_n0[i]);
}
printf("\n");
free(args);
free(threads);
return cost;
}
static bool terminated = false;
void sig_handler(int sig) {
if (sig == SIGINT || sig == SIGTERM || sig == SIGHUP) {
if (!terminated) {
terminated = true;
printf("terminating after current poly\n");
}
}
}
void search_simulated_annealing(size_t rate, size_t order, size_t n_steps, conv_tester_t *start,
size_t n_bytes, conv_testbench **scratches, size_t num_scratches,
float *weights, double start_temperature, double cooling_factor,
double *eb_n0, double bpsk_voltage, double bpsk_bit_energy) {
// perform simulated annealing to find the optimal polynomial
float cost = find_cost(rate, order, start->poly, n_bytes, scratches, num_scratches, weights,
eb_n0, bpsk_voltage, bpsk_bit_energy);
correct_convolutional_polynomial_t *neighbor_poly =
malloc(rate * sizeof(correct_convolutional_polynomial_t));
correct_convolutional_polynomial_t *state =
malloc(rate * sizeof(correct_convolutional_polynomial_t));
correct_convolutional_polynomial_t *best =
malloc(rate * sizeof(correct_convolutional_polynomial_t));
float best_cost = cost;
memcpy(state, start->poly, rate * sizeof(correct_convolutional_polynomial_t));
memcpy(best, start->poly, rate * sizeof(correct_convolutional_polynomial_t));
double temperature = start_temperature;
for (size_t i = 0; i < n_steps; i++) {
next_neighbor(state, neighbor_poly, rate, order);
float neighbor_cost =
find_cost(rate, order, neighbor_poly, n_bytes, scratches, num_scratches, weights, eb_n0,
bpsk_voltage, bpsk_bit_energy);
if (accept(cost, neighbor_cost, temperature)) {
// we're moving to our neighbor's house
memcpy(state, neighbor_poly, rate * sizeof(correct_convolutional_polynomial_t));
cost = neighbor_cost;
} else {
// actually where we live now is nice
}
if (cost < best_cost) {
best_cost = cost;
memcpy(best, state, rate * sizeof(correct_convolutional_polynomial_t));
}
temperature *= cooling_factor;
if (terminated) {
break;
}
}
printf("last state:");
for (size_t i = 0; i < rate; i++) {
printf(" %06o", state[i]);
}
printf("\n");
printf("best state:");
for (size_t i = 0; i < rate; i++) {
printf(" %06o", best[i]);
}
memcpy(start->poly, best, rate * sizeof(correct_convolutional_polynomial_t));
free(state);
free(best);
free(neighbor_poly);
}
void test_sa(size_t rate, size_t order, conv_tester_t start, conv_testbench **scratches,
size_t num_scratches, float *weights, size_t n_bytes, double *eb_n0,
double bpsk_bit_energy, size_t n_iter, double bpsk_voltage) {
for (size_t i = 0; i < n_iter; i++) {
double temperature = (i == 0) ? 0.5 : 250;
double cooling_factor = (i == 0) ? 0.985 : 0.95;
size_t n_steps = (i == 0) ? 500 : 100;
search_simulated_annealing(rate, order, n_steps, &start, n_bytes, scratches, num_scratches,
weights, temperature, cooling_factor, eb_n0, bpsk_voltage,
bpsk_bit_energy);
}
}
int main(int argc, char **argv) {
srand(time(NULL));
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
signal(SIGHUP, sig_handler);
size_t rate, order, n_bytes, n_iter;
sscanf(argv[1], "%zu", &rate);
sscanf(argv[2], "%zu", &order);
sscanf(argv[3], "%zu", &n_bytes);
sscanf(argv[4], "%zu", &n_iter);
double bpsk_voltage = 1.0 / sqrt(2.0);
double bpsk_sym_energy = 2 * pow(bpsk_voltage, 2.0);
double bpsk_bit_energy = bpsk_sym_energy / 1.0;
bpsk_bit_energy = bpsk_sym_energy * rate; // rate bits transmitted for every input bit
// correct_convolutional_polynomial_t maxcoeff = (1 << order) - 1;
correct_convolutional_polynomial_t startcoeff = (1 << (order - 1)) + 1;
conv_tester_t start;
start.poly = malloc(rate * sizeof(correct_convolutional_polynomial_t));
for (size_t i = 0; i < rate; i++) {
start.poly[i] = ((rand() % (1 << (order - 2))) << 1) + startcoeff;
}
start.conv = conv_create(rate, order, start.poly);
size_t num_scratches = 4;
float *weights;
conv_testbench **scratches = malloc(num_scratches * sizeof(conv_testbench *));
double *eb_n0;
for (size_t i = 0; i < num_scratches; i++) {
scratches[i] = resize_conv_testbench(NULL, conv_enclen, start.conv, max_block_len);
}
switch (order) {
case 6:
eb_n0 = (double[]){6.0, 5.5, 5.0, 4.5};
weights = (float[]){8000, 400, 20, 1};
break;
case 7:
case 8:
eb_n0 = (double[]){5.5, 5.0, 4.5, 4.0};
weights = (float[]){8000, 400, 20, 1};
break;
case 9:
case 10:
eb_n0 = (double[]){5.0, 4.5, 4.0, 3.5};
weights = (float[]){8000, 400, 20, 1};
break;
case 11:
case 12:
case 13:
eb_n0 = (double[]){4.5, 4.0, 3.5, 3.0};
weights = (float[]){8000, 400, 20, 1};
break;
default:
eb_n0 = (double[]){3.5, 3.0, 2.5, 2.0};
weights = (float[]){8000, 400, 20, 1};
}
test_sa(rate, order, start, scratches, num_scratches, weights, n_bytes, eb_n0, bpsk_bit_energy,
n_iter, bpsk_voltage);
free(start.poly);
conv_destroy(start.conv);
for (size_t i = 0; i < num_scratches; i++) {
free_scratch(scratches[i]);
}
free(scratches);
return 0;
}