kopia lustrzana https://github.com/proto17/dji_droneid
166 wiersze
6.9 KiB
C++
166 wiersze
6.9 KiB
C++
/* -*- c++ -*- */
|
|
/*
|
|
* Copyright 2022 gr-droneid author.
|
|
*
|
|
* SPDX-License-Identifier: GPL-3.0-or-later
|
|
*/
|
|
|
|
#include "normalized_xcorr_estimate_impl.h"
|
|
#include <gnuradio/io_signature.h>
|
|
|
|
#include <volk/volk.h>
|
|
#include <numeric>
|
|
#include <gnuradio/droneid/misc_utils.h>
|
|
|
|
namespace gr {
|
|
namespace droneid {
|
|
|
|
normalized_xcorr_estimate::sptr
|
|
normalized_xcorr_estimate::make(const std::vector<gr_complex> &taps) {
|
|
return gnuradio::get_initial_sptr
|
|
(new normalized_xcorr_estimate_impl(taps));
|
|
}
|
|
|
|
|
|
/*
|
|
* The private constructor
|
|
*/
|
|
normalized_xcorr_estimate_impl::normalized_xcorr_estimate_impl(const std::vector<gr_complex> &taps)
|
|
: gr::block("dot_prod",
|
|
gr::io_signature::make(1, 1, sizeof(gr_complex)),
|
|
gr::io_signature::make(1, 1, sizeof(gr_complex))),
|
|
taps_(taps), window_size_(taps.size()) {
|
|
|
|
// Remove the mean from the taps, conjugate the taps, and calculate the variance ahead of time
|
|
const auto mean =
|
|
std::accumulate(taps_.begin(), taps_.end(), gr_complex{0, 0}) / static_cast<float>(taps_.size());
|
|
|
|
for (auto & tap : taps_) {
|
|
tap = std::conj(tap) - mean;
|
|
}
|
|
|
|
taps_var_ = misc_utils::var_no_mean(&taps_[0], taps_.size());
|
|
|
|
// Create some constants to enable the use of multiplies instead of divides later
|
|
window_size_recip_ = 1.0f / static_cast<float>(window_size_);
|
|
window_size_recip_complex_ = gr_complex{window_size_recip_, 0};
|
|
}
|
|
|
|
/*
|
|
* Our virtual destructor.
|
|
*/
|
|
normalized_xcorr_estimate_impl::~normalized_xcorr_estimate_impl() {
|
|
}
|
|
|
|
int
|
|
normalized_xcorr_estimate_impl::general_work(int noutput_items,
|
|
gr_vector_int &ninput_items,
|
|
gr_vector_const_void_star &input_items,
|
|
gr_vector_void_star &output_items) {
|
|
// Get handles to the input and output arrays
|
|
const auto *in = (const gr_complex *) input_items[0];
|
|
auto *out = (gr_complex *) output_items[0];
|
|
|
|
// Always tell GNU Radio that all samples were accepted even if not this many samples were written out
|
|
consume_each(noutput_items);
|
|
|
|
// This is how the remaining samples are buffered between calls. It's important to realize that this algo
|
|
// needs <window_size> samples to be able to produce one output value. This means that there will always
|
|
// be unused samples at the end of each function call that need to be held onto until the next call. The
|
|
// hope was that set_history() took care of this, but it does not. So, the remaining samples from the last
|
|
// call are stored in <buffer_>. The <in> buffer can't hold more samples (it's not known how many samples
|
|
// wide the buffer is) so in order to use the old samples without jumping through very slow hoops, the new
|
|
// samples are appended to the old samples.
|
|
buffer_.insert(buffer_.end(), in, in + noutput_items);
|
|
|
|
// Exit early if there aren't enough samples to process.
|
|
if (buffer_.size() < window_size_) {
|
|
return 0;
|
|
}
|
|
|
|
// Figure out how many windows worth of data can be processed. It's possible that this specific call
|
|
// doesn't have enough storage in its output buffer to hold all the samples that could be processed. For
|
|
// this reason the min of the available output buffer space and number of windows that could be processed
|
|
// must be used.
|
|
const auto num_steps = std::min(static_cast<uint64_t>(noutput_items), buffer_.size() - window_size_);
|
|
|
|
// Resize the buffers as needed
|
|
if (sums_.size() < num_steps) {
|
|
sums_.resize(num_steps);
|
|
abs_squared_.resize(num_steps + window_size_);
|
|
vars_.resize(num_steps);
|
|
}
|
|
|
|
// TODO(24June2022): There are <window_size-1> extra operations happening on each call. This comes from the
|
|
// fact that some of these computations are being done on samples that are going to be
|
|
// used again on the next function call. Would be a good idea to buffer the abs squared
|
|
// and maybe the running variance average.
|
|
|
|
// What is happening below is roughly the following:
|
|
//
|
|
// for idx = 1:length(buffer_) - window_size_
|
|
// window = buffer_(idx:idx + window_size_ - 1);
|
|
// variance = sum(abs(window).^2) / window_size_;
|
|
// dot_prod = sum(window .* taps_) / window_size_;
|
|
// out(idx) = dot_prod / sqrt(variance * taps_var_);
|
|
// end
|
|
//
|
|
// But the variance is calculated as a running sum. The first variance has to be calculated the hard way,
|
|
// and then every iteration of the loop will subtract off the left-most element of the window that just
|
|
// dropped off, and adds on the new right-most element in the window.
|
|
//
|
|
// Doing this calculation of the first element outside the loop prevents needing a conditional in the
|
|
// critical section
|
|
|
|
// Calculate the first variance the hard way
|
|
volk_32fc_magnitude_squared_32f(&abs_squared_[0], &buffer_[0], num_steps + window_size_);
|
|
auto running_var = std::accumulate(abs_squared_.begin(), abs_squared_.begin() + window_size_, 0.f);
|
|
vars_[0] = running_var;
|
|
|
|
// Calculate the first dot product
|
|
volk_32fc_x2_dot_prod_32fc(&out[0], &buffer_[0], &taps_[0], window_size_);
|
|
|
|
// Calculate the running abs value sum and dot product for the remaining samples
|
|
for (uint32_t idx = 1; idx < num_steps; idx++) {
|
|
// sum(abs(window).^2)
|
|
running_var = running_var - abs_squared_[idx - 1] + abs_squared_[idx + window_size_];
|
|
vars_[idx] = running_var;
|
|
|
|
// Compute tue dot product of the current window and the filter taps
|
|
// sum(window .* taps_)
|
|
volk_32fc_x2_dot_prod_32fc(&out[idx], &buffer_[idx], &taps_[0], window_size_);
|
|
}
|
|
|
|
// Scale the dot product down
|
|
volk_32fc_s32fc_multiply_32fc(&out[0], &out[0], window_size_recip_complex_, num_steps);
|
|
|
|
// Scale the variance sums down
|
|
volk_32f_s32f_multiply_32f(&vars_[0], &vars_[0], window_size_recip_, num_steps);
|
|
|
|
// Multiply each variance by the tap variances then take the reciprocal
|
|
volk_32f_s32f_multiply_32f(&vars_[0], &vars_[0], taps_var_, num_steps);
|
|
|
|
// Calculate the inverse square root (1/sqrt(vars_[x]))
|
|
volk_32f_invsqrt_32f(&vars_[0], &vars_[0], num_steps);
|
|
|
|
// Divide by the square root above
|
|
volk_32fc_32f_multiply_32fc(&out[0], &out[0], &vars_[0], num_steps);
|
|
|
|
// Go through all outputs and replace NaN's with zeros. This isn't strictly required, but nice to have
|
|
for (uint32_t idx = 0; idx < num_steps; idx++) {
|
|
if (out[idx].real() == FP_NAN || out[idx].imag() == FP_NAN) {
|
|
out[idx] = zero_complex_;
|
|
}
|
|
}
|
|
|
|
// Remove all the samples that have been processed from the buffer. Leaving just the last <window_size_-1>
|
|
// samples for the next call
|
|
buffer_.erase(buffer_.begin(), buffer_.begin() + num_steps);
|
|
|
|
// Tell runtime system how many output items we produced.
|
|
return num_steps;
|
|
}
|
|
|
|
} /* namespace droneid */
|
|
} /* namespace gr */
|