Merged master

2017-04-12 23:41:58 +02:00 · 2017-04-12 23:41:58 +02:00 · f55c4c35c3
commit f55c4c35c3
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,9 @@
+csdr
+nmux
+ddcd
+*.o
+*.so
+tags
+dumpvect.*.vect
+grc_tests/top_block.py
+*.swp
--- a/20
+++ b/20
@ -26,8 +26,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-
-
 LIBSOURCES =  fft_fftw.c libcsdr_wrapper.c
 #SOURCES = csdr.c $(LIBSOURCES)
 cpufeature = $(if $(findstring $(1),$(shell cat /proc/cpuinfo)),$(2))
@ -41,29 +39,39 @@ PARAMS_LOOPVECT = -O3 -ffast-math -fdump-tree-vect-details -dumpbase dumpvect
 PARAMS_LIBS = -g -lm -lrt -lfftw3f -DUSE_FFTW -DLIBCSDR_GPL -DUSE_IMA_ADPCM
 PARAMS_SO = -fpic
 PARAMS_MISC = -Wno-unused-result
+#DEBUG_ON = 0 #debug is always on by now (anyway it could be compiled with `make DEBUG_ON=1`)
+#PARAMS_DEBUG = $(if $(DEBUG_ON),-g,)
 FFTW_PACKAGE = fftw-3.3.3
 SO_EXT = $(if $(findstring CYGWIN,$(shell uname -a)),dll,so)
 SO_PATH = $(if $(findstring CYGWIN,$(shell uname -a)),/bin,/usr/lib)

-all: clean-vect
+.PHONY: clean-vect clean
+all: csdr nmux
+libcsdr.so: fft_fftw.c fft_rpi.c libcsdr_wrapper.c libcsdr.c libcsdr_gpl.c fastddc.c fastddc.h  fft_fftw.h  fft_rpi.h  ima_adpcm.h  libcsdr_gpl.h  libcsdr.h  predefined.h
 	@echo NOTE: you may have to manually edit Makefile to optimize for your CPU \(especially if you compile on ARM, please edit PARAMS_NEON\).
 	@echo Auto-detected optimization parameters: $(PARAMS_SIMD)
 	@echo
+	rm -f dumpvect*.vect
 	gcc -std=gnu99 $(PARAMS_LOOPVECT) $(PARAMS_SIMD) $(LIBSOURCES) $(PARAMS_LIBS) $(PARAMS_MISC) -fpic -shared -o libcsdr.$(SO_EXT)
-
 	-./parsevect dumpvect*.vect
+csdr: csdr.c libcsdr.so
 	gcc -std=gnu99 $(PARAMS_LOOPVECT) $(PARAMS_SIMD) csdr.c $(PARAMS_LIBS) -L. -lcsdr $(PARAMS_MISC) -o csdr
+ddcd: ddcd.cpp libcsdr.so ddcd.h
+	g++ $(PARAMS_LOOPVECT) $(PARAMS_SIMD) ddcd.cpp $(PARAMS_LIBS) -L. -lcsdr -lpthread $(PARAMS_MISC) -o ddcd
+nmux: nmux.cpp libcsdr.so nmux.h tsmpool.cpp tsmpool.h
+	g++ $(PARAMS_LOOPVECT) $(PARAMS_SIMD) nmux.cpp tsmpool.cpp $(PARAMS_LIBS) -L. -lcsdr -lpthread $(PARAMS_MISC) -o nmux
 arm-cross: clean-vect
 	#note: this doesn't work since having added FFTW
 	arm-linux-gnueabihf-gcc -std=gnu99 -O3 -fshort-double -ffast-math -dumpbase dumpvect-arm -fdump-tree-vect-details -mfloat-abi=softfp -march=armv7-a -mtune=cortex-a9 -mfpu=neon -mvectorize-with-neon-quad -Wno-unused-result -Wformat=0 $(SOURCES) -lm -o ./csdr
 clean-vect:
 	rm -f dumpvect*.vect
 clean: clean-vect
-	rm -f libcsdr.$(SO_EXT) csdr
+	rm -f libcsdr.$(SO_EXT) csdr nmux
 install:
 	install -m 0755 libcsdr.$(SO_EXT) $(SO_PATH)
 	install -m 0755 csdr /usr/bin
 	install -m 0755 csdr-fm /usr/bin
+	install -m 0755 nmux /usr/bin
 	-ldconfig
 uninstall:
 	rm $(SO_PATH)/libcsdr.$(SO_EXT) /usr/bin/csdr /usr/bin/csdr-fm
@ -85,7 +93,7 @@ emcc-get-deps:
 	emmake make; \
 	emmake make install
 emcc:
-	emcc -O3 -Isdr.js/$(FFTW_PACKAGE)/api -Lsdr.js/$(FFTW_PACKAGE)/emscripten-lib -o sdr.js/sdrjs-compiled.js fft_fftw.c libcsdr_wrapper.c -DLIBCSDR_GPL -DUSE_IMA_ADPCM -DUSE_FFTW -lfftw3f -s EXPORTED_FUNCTIONS="`python sdr.js/exported_functions.py`"
+	emcc -O3 -Isdr.js/$(FFTW_PACKAGE)/api -Lsdr.js/$(FFTW_PACKAGE)/emscripten-lib -o sdr.js/sdrjs-compiled.js fft_fftw.c libcsdr_wrapper.c -s TOTAL_MEMORY=67108864 -DLIBCSDR_GPL -DUSE_IMA_ADPCM -DUSE_FFTW -lfftw3f -s EXPORTED_FUNCTIONS="`python sdr.js/exported_functions.py`"
 	cat sdr.js/sdrjs-header.js sdr.js/sdrjs-compiled.js sdr.js/sdrjs-footer.js > sdr.js/sdr.js
 emcc-beautify:
 	bash -c 'type js-beautify >/dev/null 2>&1; if [ $$? -eq 0 ]; then js-beautify sdr.js/sdr.js >sdr.js/sdr.js.beautiful; mv sdr.js/sdr.js.beautiful sdr.js/sdr.js; fi'
--- a/README.md
+++ b/README.md
@ -205,7 +205,7 @@ Internally, a sine and cosine wave is generated to perform this function, and th

 	shift_addition_cc <rate>

-Operation is the same as with `shift_math_cc`.
+Operation is the same as for `shift_math_cc`.

 Internally, this function uses trigonometric addition formulas to generate sine and cosine, which is a bit faster. (About 4 times on the machine I have tested it on.)

@ -219,6 +219,20 @@ Operation is the same as with `shift_math_cc`.
 Internally, this function uses a look-up table (LUT) to recall the values of the sine function (for the first quadrant).
 The higher the table size is, the smaller the phase error is.

+	shift_addfast_cc <rate>
+
+Operation is the same as for `shift_math_cc`.
+
+Internally, this function uses a NEON-accelerated algorithm on capable systems, so it is advised to use this one on ARM boards.
+
+	shift_unroll_cc <rate>
+
+Operation is the same as for `shift_math_cc`.
+
+This uses a modified algoritm that first stores a vector of sine and cosine values for given phase differences.
+
+The loop in this function unrolls quite well if compiled on a PC. It was the fastest one on an i7 CPU during the tests.
+
 	decimating_shift_addition_cc <rate> [decimation]

 It shifts the input signal in the frequency domain, and also decimates it, without filtering. It will be useful as a part of the FFT channelizer implementation (to be done).
@ -298,11 +312,16 @@ The output sample rate will be `interpolation / decimation × input_sample_rate`

 `transition_bw` and `window` are the parameters of the filter.

-	fractional_decimator_ff <decimation_rate> [transition_bw [window]]
+	fractional_decimator_ff <decimation_rate> [num_poly_points ( [transition_bw [window]] | --prefilter )]

 It can decimate by a floating point ratio.

-`transition_bw` and `window` are the parameters of the filter.
+It uses Lagrance interpolation, where `num_poly_points` (12 by default) input samples are taken into consideration while calculating one output sample. 
+
+It can filter the signal with an anti-aliasing FIR filter before applying the Lagrange interpolation. This filter is inactive by default, but can be activated by:
+
+* passing only the `transition_bw`, or both the `transition_bw` and the `window` parameters of the filter,
+* using the `--prefilter` switch after `num_poly_points` to switch this filter on with the default parameters.

 	bandpass_fir_fft_cc <low_cut> <high_cut> <transition_bw> [window]

@ -310,6 +329,10 @@ It performs a bandpass FIR filter on complex samples, using FFT and the overlap-

 Parameters are described under `firdes_bandpass_c` and `firdes_lowpass_f`.

+	old_fractional_decimator_ff <decimation_rate> [num_poly_points [transition_bw [window]]]
+
+This is the deprecated, old version of `fractional_decimator_ff` (only uses linear interpolation, its filter cuts at 59% of the passband).
+
 	agc_ff [hang_time [reference [attack_rate [decay_rate [max_gain [attack_wait [filter_alpha]]]]]]]

 It is an automatic gain control function.
@ -501,6 +524,15 @@ To remove *sdr.js* and the compiled dependencies:

 	make emcc-clean

+## [nmux] (#nmux)
+
+The repo also contains a command line tool called `nmux`, which is a TCP stream multiplexer. It reads data from the standard input, and sends it to each client connected through TCP sockets. Available command line options are:
+* `--port (-p), --address (-a):` TCP port and address to listen.
+* `--bufsize (-b), --bufcnt (-n)`: Internal buffer size and count.
+* `--help (-h)`: Show help message.
+
+`nmux` was originally written for use in OpenWebRX.
+
 ## [Licensing] (#licensing)

 Most of the code of `libcsdr` is under BSD license.  
--- a/csdr.c
+++ b/csdr.c
@ -48,7 +48,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "ima_adpcm.h"
 #include <sched.h>
 #include <math.h>
+#include <strings.h>
 #include <errno.h>
+#include "fastddc.h"

 char usage[]=
 "csdr - a simple commandline tool for Software Defined Radio receiver DSP.\n\n"
@ -95,7 +97,8 @@ char usage[]=
 "    agc_ff [hang_time [reference [attack_rate [decay_rate [max_gain [attack_wait [filter_alpha]]]]]]]\n"
 "    fastagc_ff [block_size [reference]]\n"
 "    rational_resampler_ff <interpolation> <decimation> [transition_bw [window]]\n"
-"    fractional_decimator_ff <decimation_rate> [transition_bw [window]]\n"
+"    old_fractional_decimator_ff <decimation_rate> [transition_bw [window]]\n"
+"    fractional_decimator_ff <decimation_rate> [num_poly_points ( [transition_bw [window]] | --prefilter )]\n"
 "    fft_cc <fft_size> <out_of_every_n_samples> [window [--octave] [--benchmark]]\n"
 "    logpower_cf [add_db]\n"
 "    fft_benchmark <fft_size> <fft_cycles> [--benchmark]\n"
@ -133,13 +136,7 @@ int bigbufs = 0;
 //change on on 2015-08-29: we don't yield at all. fread() will do it if it blocks
 #define YIELD_EVERY_N_TIMES 3
 //#define TRY_YIELD if(++yield_counter%YIELD_EVERY_N_TIMES==0) sched_yield()
-#define TRY_YIELD
-#ifdef __CYGWIN__
- #pragma message "we go the cyg way"
- #undef TRY_YIELD
- int flush_cntr = 0;
- #define TRY_YIELD fflush(stdout); sched_yield()
-#endif
+#define TRY_YIELD fflush(stdout); sched_yield()
 //unsigned yield_counter=0;

 int badsyntax(char* why)
@ -191,6 +188,22 @@ int init_fifo(int argc, char *argv[])
 			fcntl(fd, F_SETFL, flags | O_NONBLOCK);
 			return fd;
 		}
+		else if(!strcmp(argv[2],"--fd"))  
+		{
+			//to use this:
+			//1. Create a pipe(pipedesc) in your process.
+			//2. fork() and execl() your process to run csdr, and give pipedesc[0] as parameter after --fd 
+			//  Note: when forking, the child process will get a copy of the file descriptor table! That's why this 
+			//  works at all, as file descriptor indexes are normally not transferable between processes, except for a *NIX socket way which is quite complicated... 
+			//3. From your parent process, write into pipedesc[1].
+			//This is implemented in ddcd, check there to see how to do it!
+			int fd;
+			if(sscanf(argv[3], "%d",&fd)<=0) return 0;
+			fprintf(stderr,"csdr: fd control mode on, fd=%d\n", fd);
+			int flags = fcntl(fd, F_GETFL, 0);
+			fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+			return fd;
+		}
 	}
 	return 0;
 }
@ -616,6 +629,7 @@ int main(int argc, char *argv[])
 		{
 			FEOF_CHECK;
 			if(!FREAD_C) break;
+			starting_phase=shift_math_cc((complexf*)input_buffer, (complexf*)output_buffer, the_bufsize, rate, starting_phase);
 			FWRITE_C;
 			TRY_YIELD;
 		}
@ -649,6 +663,105 @@ int main(int argc, char *argv[])
 		return 0;
 	}

+	if(!strcmp(argv[1],"shift_addfast_cc"))
+	{
+		bigbufs=1;
+
+		float starting_phase=0;
+		float rate;
+
+		int fd;
+		if(fd=init_fifo(argc,argv))
+		{
+			while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000);
+		}
+		else
+		{
+			if(argc<=2) return badsyntax("need required parameter (rate)"); 
+			sscanf(argv[2],"%g",&rate);
+		}
+
+		if(!sendbufsize(initialize_buffers())) return -2;
+		for(;;)
+		{
+			shift_addfast_data_t data=shift_addfast_init(rate);
+			fprintf(stderr,"shift_addfast_cc: reinitialized to %g\n",rate);
+			int remain, current_size;
+			float* ibufptr;
+			float* obufptr;
+			for(;;)
+			{
+				FEOF_CHECK;
+				if(!FREAD_C) break;
+				remain=the_bufsize;
+				ibufptr=input_buffer;
+				obufptr=output_buffer;
+				while(remain)
+				{
+					current_size=(remain>1024)?1024:remain;
+					starting_phase=shift_addfast_cc((complexf*)ibufptr, (complexf*)obufptr, current_size, &data, starting_phase);
+					ibufptr+=current_size*2;
+					obufptr+=current_size*2;
+					remain-=current_size;
+				}
+				FWRITE_C;
+				if(read_fifo_ctl(fd,"%g\n",&rate)) break;
+				TRY_YIELD;
+			}
+		}
+		return 0;
+	}
+
+
+	if(!strcmp(argv[1],"shift_unroll_cc"))
+	{
+		bigbufs=1;
+
+		float starting_phase=0;
+		float rate;
+
+		int fd;
+		if(fd=init_fifo(argc,argv))
+		{
+			while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000);
+		}
+		else
+		{
+			if(argc<=2) return badsyntax("need required parameter (rate)"); 
+			sscanf(argv[2],"%g",&rate);
+		}
+
+		if(!sendbufsize(initialize_buffers())) return -2;
+		for(;;)
+		{
+			shift_unroll_data_t data=shift_unroll_init(rate, 1024);
+			fprintf(stderr,"shift_unroll_cc: reinitialized to %g\n",rate);
+			int remain, current_size;
+			float* ibufptr;
+			float* obufptr;
+			for(;;)
+			{
+				FEOF_CHECK;
+				if(!FREAD_C) break;
+				remain=the_bufsize;
+				ibufptr=input_buffer;
+				obufptr=output_buffer;
+				while(remain)
+				{
+					current_size=(remain>1024)?1024:remain;
+					starting_phase=shift_unroll_cc((complexf*)ibufptr, (complexf*)obufptr, current_size, &data, starting_phase);
+					ibufptr+=current_size*2;
+					obufptr+=current_size*2;
+					remain-=current_size;
+				}
+				FWRITE_C;
+				if(read_fifo_ctl(fd,"%g\n",&rate)) break;
+				TRY_YIELD;
+			}
+		}
+		return 0;
+	}
+
 #ifdef LIBCSDR_GPL
 	if(!strcmp(argv[1],"decimating_shift_addition_cc"))
 	{
@ -861,7 +974,7 @@ int main(int argc, char *argv[])
 		{
 			FEOF_CHECK;
 			FREAD_R;
-			for(int i=0; i<the_bufsize;i++) fprintf(stderr, "%g ",input_buffer[i]);
+			for(int i=0; i<the_bufsize;i++) printf("%g ",input_buffer[i]);
 			TRY_YIELD;
 		}

@ -948,7 +1061,7 @@ int main(int argc, char *argv[])
 		padded_taps_length = taps_length+(NEON_ALIGNMENT/4)-1 - ((taps_length+(NEON_ALIGNMENT/4)-1)%(NEON_ALIGNMENT/4));
 		fprintf(stderr,"padded_taps_length = %d\n", padded_taps_length);

-		taps = (float*) (float*)malloc(padded_taps_length+NEON_ALIGNMENT);
+		taps = (float*) (float*)malloc((padded_taps_length+NEON_ALIGNMENT)*sizeof(float));
 		fprintf(stderr,"taps = %x\n", taps);
 		taps =  (float*)((((unsigned)taps)+NEON_ALIGNMENT-1) & ~(NEON_ALIGNMENT-1));
 		fprintf(stderr,"taps = %x\n", taps);
@ -1211,6 +1324,68 @@ int main(int argc, char *argv[])
 		float rate;
 		sscanf(argv[2],"%g",&rate);

+		int num_poly_points = 12;
+		if(argc>=4) sscanf(argv[3],"%d",&num_poly_points);
+		if(num_poly_points&1) return badsyntax("num_poly_points should be even");
+		if(num_poly_points<2) return badsyntax("num_poly_points should be >= 2");
+
+		int use_prefilter = 0;
+		float transition_bw=0.03;
+		window_t window = WINDOW_DEFAULT;
+		if(argc>=5)
+		{
+			if(!strcmp(argv[4], "--prefilter")) 
+			{
+				fprintf(stderr, "fractional_decimator_ff: using prefilter with default values\n");
+				use_prefilter = 1;
+			}
+			else 
+			{
+				sscanf(argv[4],"%g",&transition_bw);
+				if(argc>=6) window = firdes_get_window_from_string(argv[5]);
+			}
+		}
+		fprintf(stderr,"fractional_decimator_ff: use_prefilter = %d, num_poly_points = %d, transition_bw = %g, window = %s\n", 
+			use_prefilter, num_poly_points, transition_bw, firdes_get_string_from_window(window));
+
+		if(!initialize_buffers()) return -2;
+		sendbufsize(the_bufsize / rate);
+
+		if(rate==1) clone_(the_bufsize); //copy input to output in this special case (and stick in this function).
+
+		//Generate filter taps
+		int taps_length = 0;
+		float* taps = NULL;
+		if(use_prefilter)
+		{
+			taps_length = firdes_filter_len(transition_bw);
+			fprintf(stderr,"fractional_decimator_ff: taps_length = %d\n",taps_length);
+			taps = (float*)malloc(sizeof(float)*taps_length);
+			firdes_lowpass_f(taps, taps_length, 0.5/(rate-transition_bw), window); //0.6 const to compensate rolloff
+			//for(int=0;i<taps_length; i++) fprintf(stderr,"%g ",taps[i]);
+		}
+		else fprintf(stderr,"fractional_decimator_ff: not using taps\n");
+		fractional_decimator_ff_t d = fractional_decimator_ff_init(rate, num_poly_points, taps, taps_length); 
+		for(;;)
+		{
+			FEOF_CHECK;
+			if(d.input_processed==0) d.input_processed=the_bufsize;
+			else memcpy(input_buffer, input_buffer+d.input_processed, sizeof(float)*(the_bufsize-d.input_processed));
+			fread(input_buffer+(the_bufsize-d.input_processed), sizeof(float), d.input_processed, stdin);
+			fractional_decimator_ff(input_buffer, output_buffer, the_bufsize, &d);
+			fwrite(output_buffer, sizeof(float), d.output_size, stdout);
+			//fprintf(stderr, "os = %d, ip = %d\n", d.output_size, d.input_processed);
+			TRY_YIELD;
+		}
+	}
+
+	if(!strcmp(argv[1],"old_fractional_decimator_ff"))
+	{
+		//Process the params
+		if(argc<=2) return badsyntax("need required parameters (rate)");
+		float rate;
+		sscanf(argv[2],"%g",&rate);
+
 		float transition_bw=0.03;
 		if(argc>=4) sscanf(argv[3],"%g",&transition_bw);

@ -1219,7 +1394,7 @@ int main(int argc, char *argv[])
 		{
 			window = firdes_get_window_from_string(argv[4]);
 		}
-		else fprintf(stderr,"fractional_decimator_ff: window = %s\n",firdes_get_string_from_window(window));
+		else fprintf(stderr,"old_fractional_decimator_ff: window = %s\n",firdes_get_string_from_window(window));

 		if(!initialize_buffers()) return -2;
 		sendbufsize(the_bufsize / rate);
@ -1228,19 +1403,19 @@ int main(int argc, char *argv[])

 		//Generate filter taps
 		int taps_length = firdes_filter_len(transition_bw);
-		fprintf(stderr,"fractional_decimator_ff: taps_length = %d\n",taps_length);
+		fprintf(stderr,"old_fractional_decimator_ff: taps_length = %d\n",taps_length);
 		float* taps = (float*)malloc(sizeof(float)*taps_length);
 		firdes_lowpass_f(taps, taps_length, 0.59*0.5/(rate-transition_bw), window); //0.6 const to compensate rolloff
 		//for(int=0;i<taps_length; i++) fprintf(stderr,"%g ",taps[i]);

-		static fractional_decimator_ff_t d; //in .bss => initialized to zero
+		static old_fractional_decimator_ff_t d; //in .bss => initialized to zero
 		for(;;)
 		{
 			FEOF_CHECK;
 			if(d.input_processed==0) d.input_processed=the_bufsize;
 			else memcpy(input_buffer, input_buffer+d.input_processed, sizeof(float)*(the_bufsize-d.input_processed));
 			fread(input_buffer+(the_bufsize-d.input_processed), sizeof(float), d.input_processed, stdin);
-			d = fractional_decimator_ff(input_buffer, output_buffer, the_bufsize, rate, taps, taps_length, d);
+			d = old_fractional_decimator_ff(input_buffer, output_buffer, the_bufsize, rate, taps, taps_length, d);
 			fwrite(output_buffer, sizeof(float), d.output_size, stdout);
 			TRY_YIELD;
 		}
@ -1283,6 +1458,8 @@ int main(int argc, char *argv[])
 		FFT_PLAN_T* plan=make_fft_c2c(fft_size, windowed, output, 1, benchmark);
 		if(benchmark) fprintf(stderr," done\n");
 		if(octave) printf("setenv(\"GNUTERM\",\"X11 noraise\");y=zeros(1,%d);semilogy(y,\"ydatasource\",\"y\");\n",fft_size);
+		float *windowt;
+		windowt = precalculate_window(fft_size, window);
 		for(;;)
 		{
 			FEOF_CHECK;
@ -1301,7 +1478,8 @@ int main(int argc, char *argv[])
 				for(int i=0;i<fft_size-every_n_samples;i++) input[i]=input[i+every_n_samples];
 				fread(input+fft_size-every_n_samples, sizeof(complexf), every_n_samples, stdin);
 			}
-			apply_window_c(input,windowed,fft_size,window);
+			//apply_window_c(input,windowed,fft_size,window);
+			apply_precalculated_window_c(input,windowed,fft_size,windowt);
 			fft_execute(plan);
 			if(octave)
 			{
@ -1337,6 +1515,40 @@ int main(int argc, char *argv[])
 		}
 	}

+	if(!strcmp(argv[1],"logaveragepower_cf"))
+	{
+		bigbufs=1;
+		if(argc<=4) return badsyntax("need required parameters (add_db, table_size, avgnumber)"); 
+		float add_db=0;
+		int avgnumber=0;
+		int fft_size=0;
+		
+		sscanf(argv[2],"%g",&add_db);
+		sscanf(argv[3],"%d",&fft_size);
+		sscanf(argv[4],"%d",&avgnumber);
+		
+		float *input = malloc(sizeof(float)*2 * fft_size);
+		float *output = malloc(sizeof(float) * fft_size);
+
+		add_db -= 10.0*log10(avgnumber);
+		for(;;)
+		{
+			int i,n;
+			for(i = 0; i < fft_size; i++) {
+				output[i] = 0;
+			}
+			FEOF_CHECK;
+			for(n = 0; n < avgnumber; n++) {
+				fread (input, sizeof(float)*2, fft_size, stdin);
+				accumulate_power_cf((complexf*)input, output, fft_size);
+			}
+			log_ff(output, output, fft_size, add_db);
+			fwrite (output, sizeof(float), fft_size, stdout);
+			TRY_YIELD;
+		}
+		return 0;
+	}
+
 	if(!strcmp(argv[1],"fft_exchange_sides_ff"))
 	{
 		if(argc<=2) return badsyntax("need required parameters (fft_size)");
@ -1441,8 +1653,6 @@ int main(int argc, char *argv[])
 		float high_cut;
 		float transition_bw;
 		window_t window = WINDOW_DEFAULT;
-		char window_string[256]; //TODO: nice buffer overflow opportunity
-
 		int fd;
 		if(fd=init_fifo(argc,argv))
 		{
@ -1466,7 +1676,7 @@ int main(int argc, char *argv[])
 		if (fft_size-taps_length<200) fft_size<<=1;
 		int input_size = fft_size - taps_length + 1;
 		int overlap_length = taps_length - 1;
-		fprintf(stderr,"bandpass_fir_fft_cc: (fft_size = %d) = (taps_length = %d) + (input_size = %d) - 1\n(overlap_length = %d) = taps_length - 1\n", fft_size, taps_length, input_size, overlap_length);
+		fprintf(stderr,"bandpass_fir_fft_cc: (fft_size = %d) = (taps_length = %d) + (input_size = %d) - 1\n(overlap_length = %d) = taps_length - 1\n", fft_size, taps_length, input_size, overlap_length );
 		if (fft_size<=2) return badsyntax("FFT size error.");

 		if(!sendbufsize(getbufsize())) return -2;
@ -1874,11 +2084,162 @@ int main(int argc, char *argv[])
 		}
 	}

+	if( !strcmp(argv[1],"fastddc_fwd_cc") ) //<decimation> [transition_bw [window]]
+	{	
+		
+		int decimation;
+		if(argc<=2) return badsyntax("need required parameter (decimation)");
+		sscanf(argv[2],"%d",&decimation);
+		
+		float transition_bw = 0.05;
+		if(argc>3) sscanf(argv[3],"%g",&transition_bw);
+
+		window_t window = WINDOW_DEFAULT;
+		if(argc>4)	window=firdes_get_window_from_string(argv[4]);
+		else fprintf(stderr,"fastddc_fwd_cc: window = %s\n",firdes_get_string_from_window(window));
+
+		fastddc_t ddc; 
+		if(fastddc_init(&ddc, transition_bw, decimation, 0)) { badsyntax("error in fastddc_init()"); return 1; }
+		fastddc_print(&ddc,"fastddc_fwd_cc");
+
+		if(!initialize_buffers()) return -2;
+		sendbufsize(ddc.fft_size);
+
+		//make FFT plan
+		complexf* input = 	 (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
+		complexf* windowed = (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
+		complexf* output =   (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
+
+		for(int i=0;i<ddc.fft_size;i++) iof(input,i)=qof(input,i)=0; //null the input buffer
+
+		int benchmark = 1; 
+		if(benchmark) fprintf(stderr,"fastddc_fwd_cc: benchmarking FFT...");
+		FFT_PLAN_T* plan=make_fft_c2c(ddc.fft_size, windowed, output, 1, benchmark);
+		if(benchmark) fprintf(stderr," done\n");
+
+		for(;;)
+		{
+			FEOF_CHECK;
+			//overlapped FFT
+			for(int i=0;i<ddc.overlap_length;i++) input[i]=input[i+ddc.input_size];
+			fread(input+ddc.overlap_length, sizeof(complexf), ddc.input_size, stdin);
+			//apply_window_c(input,windowed,ddc.fft_size,window);
+			memcpy(windowed, input, ddc.fft_size*sizeof(complexf)); //we can switch off windows; TODO: it is likely that we shouldn't apply a window to both the FFT and the filter.
+			fft_execute(plan);
+			fwrite(output, sizeof(complexf), ddc.fft_size, stdout);
+			TRY_YIELD;
+		}
+	}
+
+	if( !strcmp(argv[1],"fastddc_inv_cc") ) //<shift_rate> <decimation> [transition_bw [window]]
+	{	
+		float shift_rate;
+		int plusarg=0;
+
+		int fd;
+		if(fd=init_fifo(argc,argv))
+		{
+			while(!read_fifo_ctl(fd,"%g\n",&shift_rate)) usleep(10000);
+			plusarg=1;
+		}
+		else
+		{
+			if(argc<=2) return badsyntax("need required parameter (rate)"); 
+			sscanf(argv[2],"%g",&shift_rate);
+		}
+
+		int decimation;
+		if(argc<=3+plusarg) return badsyntax("need required parameter (decimation)");
+		sscanf(argv[3+plusarg],"%d",&decimation);
+		//fprintf(stderr, "dec=%d %d\n", decimation);
+
+		float transition_bw = 0.05;
+		if(argc>4+plusarg) sscanf(argv[4+plusarg],"%g",&transition_bw);
+
+		window_t window = WINDOW_DEFAULT;
+		if(argc>5+plusarg)	window=firdes_get_window_from_string(argv[5+plusarg]);
+		else fprintf(stderr,"fastddc_apply_cc: window = %s\n",firdes_get_string_from_window(window));
+
+		for(;;)
+		{
+
+		fastddc_t ddc; 
+		if(fastddc_init(&ddc, transition_bw, decimation, shift_rate)) { badsyntax("error in fastddc_init()"); return 1; }
+		fastddc_print(&ddc,"fastddc_inv_cc");
+
+		if(!initialize_buffers()) return -2;
+		sendbufsize(ddc.post_input_size/ddc.post_decimation); //TODO not exactly correct
+
+		//prepare making the filter and doing FFT on it
+		complexf* taps=(complexf*)calloc(sizeof(complexf),ddc.fft_size); //initialize to zero
+		complexf* taps_fft=(complexf*)malloc(sizeof(complexf)*ddc.fft_size);
+		FFT_PLAN_T* plan_taps = make_fft_c2c(ddc.fft_size, taps, taps_fft, 1, 0); //forward, don't benchmark (we need this only once)
+
+		//make the filter
+		float filter_half_bw = 0.5/decimation;
+		fprintf(stderr, "fastddc_inv_cc: preparing a bandpass filter of [%g, %g] cutoff rates. Real transition bandwidth is: %g\n", (-shift_rate)-filter_half_bw, (-shift_rate)+filter_half_bw, 4.0/ddc.taps_length);
+		firdes_bandpass_c(taps, ddc.taps_length, (-shift_rate)-filter_half_bw, (-shift_rate)+filter_half_bw, window);
+		fft_execute(plan_taps);
+		fft_swap_sides(taps_fft,ddc.fft_size);
+
+		//make FFT plan
+		complexf* inv_input = 	 (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_inv_size);
+		complexf* inv_output =   (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_inv_size);
+		fprintf(stderr,"fastddc_inv_cc: benchmarking FFT...");
+		FFT_PLAN_T* plan_inverse = make_fft_c2c(ddc.fft_inv_size, inv_input, inv_output, 0, 1); //inverse, do benchmark
+		fprintf(stderr," done\n");
+		
+		//alloc. buffers
+		complexf* input = 	 (complexf*)fft_malloc(sizeof(complexf)*ddc.fft_size);
+		complexf* output =   (complexf*)fft_malloc(sizeof(complexf)*ddc.post_input_size);
+
+		decimating_shift_addition_status_t shift_stat;
+		bzero(&shift_stat, sizeof(shift_stat));
+		for(;;)
+		{
+			FEOF_CHECK;
+			fread(input, sizeof(complexf), ddc.fft_size, stdin);
+			shift_stat = fastddc_inv_cc(input, output, &ddc, plan_inverse, taps_fft, shift_stat);
+			fwrite(output, sizeof(complexf), shift_stat.output_size, stdout);
+			//fprintf(stderr, "ss os = %d\n", shift_stat.output_size);
+			TRY_YIELD;
+			if(read_fifo_ctl(fd,"%g\n",&shift_rate)) break;
+		}
+
+		}
+	}
+
+	if( !strcmp(argv[1], "_fft2octave") ) 
+	{
+		int fft_size;
+		if(argc<=2) return badsyntax("need required parameter (fft_size)");
+		sscanf(argv[2],"%d",&fft_size);
+
+		complexf* fft_input=(complexf*)malloc(sizeof(complexf)*fft_size);
+		initialize_buffers();
+		if(!sendbufsize(fft_size)) return -2;
+
+		printf("setenv(\"GNUTERM\",\"X11 noraise\");y=zeros(1,%d);semilogy(y,\"ydatasource\",\"y\");\n",fft_size);
+		for(;;)
+		{
+			FEOF_CHECK;
+			fread(fft_input, sizeof(complexf), fft_size, stdin);
+			printf("fftdata=[");
+			//we have to swap the two parts of the array to get a valid spectrum
+			for(int i=fft_size/2;i<fft_size;i++) printf("(%g)+(%g)*i ",iof(fft_input,i),qof(fft_input,i));
+			for(int i=0;i<fft_size/2;i++) printf("(%g)+(%g)*i ",iof(fft_input,i),qof(fft_input,i)); 
+			printf(
+				"];\n"
+				"y=abs(fftdata);\n"
+				"refreshdata;\n"
+			);
+		}
+	}
+
 	if(!strcmp(argv[1],"none"))
 	{
 		return 0;
 	}

-	return badsyntax("function name given in argument 1 does not exist. Possible causes:\n- You mistyped the commandline.\n- You need to update csdr to a newer version (if available).");
-
+	fprintf(stderr,"csdr: function name given in argument 1 (%s) does not exist. Possible causes:\n- You mistyped the commandline.\n- You need to update csdr to a newer version (if available).", argv[1]); return -1;
 }
--- a/ddcd.cpp
+++ b/ddcd.cpp
@ -0,0 +1,335 @@
+/*
+This software is part of libcsdr, a set of simple DSP routines for
+Software Defined Radio.
+
+Copyright (c) 2014, Andras Retzler <randras@sdr.hu>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ANDRAS RETZLER BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "ddcd.h"
+
+
+int host_port = 0;
+char host_address[100] = "127.0.0.1";
+int thread_cntr = 0;
+
+//CLI parameters
+int decimation = 0;
+float transition_bw = 0.05;
+int bufsize = 1024; //! currently unused
+int bufcnt = 1024;
+char ddc_method_str[100] = "td";
+ddc_method_t ddc_method;
+
+void sig_handler(int signo)
+{
+	fprintf(stderr, MSG_START "signal %d caught, exiting ddcd...\n", signo);
+	fflush(stderr);
+	exit(0);
+}
+
+int main(int argc, char* argv[])
+{
+	int c;
+	for(;;)
+	{
+		int option_index = 0;
+		static struct option long_options[] = {
+		   {"port",       required_argument, 0,  'p' },
+		   {"address",    required_argument, 0,  'a' },
+		   {"decimation", required_argument, 0,  'd' },
+		   {"bufsize", 	  required_argument, 0,  'b' },
+		   {"bufcnt", 	  required_argument, 0,  'n' },
+	       {"method", 	  required_argument, 0,  'm' },
+	       {"transition", required_argument, 0,  't' }
+		};
+		c = getopt_long(argc, argv, "p:a:d:b:n:m:t:", long_options, &option_index);
+		if(c==-1) break;
+		switch (c)
+		{
+		case 'a':
+			host_address[100-1]=0;
+			strncpy(host_address,optarg,100-1);
+			break;
+		case 'p':
+			host_port=atoi(optarg);
+			break;
+		case 'd':
+			decimation=atoi(optarg);
+			break;
+		case 'b':
+			bufsize=atoi(optarg);
+			break;
+		case 'n':
+			bufcnt=atoi(optarg);
+			break;
+		case 'm':
+			ddc_method_str[100-1]=0;
+			strncpy(ddc_method_str,optarg,100-1);
+			break;
+		case 't':
+			sscanf(optarg,"%g",&transition_bw);
+			break;
+		case 0:
+		case '?':
+		case ':':
+		default:;
+			print_exit(MSG_START "error in getopt_long()\n");
+		}
+	}
+
+	if(!decimation) print_exit(MSG_START "missing required command line argument, --decimation.\n");
+	if(!host_port) print_exit(MSG_START "missing required command line argument, --port.\n");
+	if(decimation<0) print_exit(MSG_START "invalid value for --decimation (should be >0).\n");
+	if(decimation==1) fprintf(stderr, MSG_START "decimation = 1, just copying raw samples.\n");
+	if(transition_bw<0||transition_bw>0.5) print_exit(MSG_START "invalid value for --transition (should be between 0 and 0.5).\n");
+	if(bufsize<0) print_exit(MSG_START "invalid value for --bufsize (should be >0)\n");
+	if(bufcnt<0) print_exit(MSG_START "invalid value for --bufcnt (should be >0)\n");
+	if(decimation==1); //don't do anything then //!will have to take care about this later
+	else if(!strcmp(ddc_method_str,"td"))
+	{
+		ddc_method = M_TD;
+		fprintf(stderr, MSG_START "method is M_TD (default).\n");
+	}
+	else if (!strcmp(ddc_method_str,"fastddc"))
+	{
+		ddc_method = M_FASTDDC;
+		fprintf(stderr, MSG_START "method is M_FASTDDC.\n");
+	}
+	else print_exit(MSG_START "invalid parameter given to --method.\n");
+
+	//set signals
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = sig_handler;
+	sigaction(SIGTERM, &sa, NULL);
+	sigaction(SIGKILL, &sa, NULL);
+	sigaction(SIGQUIT, &sa, NULL);
+	sigaction(SIGINT, &sa, NULL);
+	sigaction(SIGHUP, &sa, NULL);
+
+	struct sockaddr_in addr_host;
+    int listen_socket;
+	std::vector<client_t*> clients;
+	clients.reserve(100);
+    listen_socket=socket(AF_INET,SOCK_STREAM,0);
+
+	int sockopt = 1;
+	if( setsockopt(listen_socket, SOL_SOCKET, SO_REUSEADDR, (char *)&sockopt, sizeof(sockopt)) == -1 )
+		error_exit(MSG_START "cannot set SO_REUSEADDR");  //the best description on SO_REUSEADDR ever: http://stackoverflow.com/a/14388707/3182453
+
+	memset(&addr_host,'0',sizeof(addr_host));
+    addr_host.sin_family = AF_INET;
+    addr_host.sin_port = htons(host_port);
+	addr_host.sin_addr.s_addr = INADDR_ANY;
+
+    if( (addr_host.sin_addr.s_addr=inet_addr(host_address)) == INADDR_NONE )
+		error_exit(MSG_START "invalid host address");
+
+	if( bind(listen_socket, (struct sockaddr*) &addr_host, sizeof(addr_host)) < 0 )
+		error_exit(MSG_START "cannot bind() address to the socket");
+
+	if( listen(listen_socket, 10) == -1 )
+		error_exit(MSG_START "cannot listen() on socket");
+
+	fprintf(stderr,MSG_START "listening on %s:%d\n", inet_ntoa(addr_host.sin_addr), host_port);
+
+	struct sockaddr_in addr_cli;
+	socklen_t addr_cli_len = sizeof(addr_cli);
+	int new_socket;
+
+	int highfd = 0;
+	FD_ZERO(&select_fds);
+	FD_SET(listen_socket, &select_fds);
+	maxfd(&highfd, listen_socket);
+	FD_SET(input_fd, &select_fds);
+	maxfd(&highfd, input_fd);
+
+	//Set stdin and listen_socket to non-blocking
+	if(set_nonblocking(input_fd) || set_nonblocking(listen_socket))
+		error_exit(MSG_START "cannot set_nonblocking()");
+
+	//Create tsmpool
+	tsmpool* pool = new tsmpool(bufsize, bufcnt);
+	if(!pool->ok) print_exit(MSG_START "tsmpool failed to initialize\n");
+
+	unsigned char* current_write_buffer = pool->get_write_buffer();
+	int index_in_current_write_buffer = 0;
+
+
+	for(;;)
+	{
+		//Let's wait until there is any new data to read, or any new connection!
+		select(highfd, &select_fds, NULL, NULL, NULL);
+
+		//Is there a new client connection?
+		if( (new_socket = accept(listen_socket, (struct sockaddr*)&addr_cli, &addr_cli_len)) != -1)
+		{
+			clients_close_all_finished();
+			if(pthread_create(&new_client->thread, NULL, client_thread , (void*)&new_client)<0)
+			{
+				//We're the parent
+				client_t* new_client = new client_t;
+				new_client->error = 0;
+				memcpy(&new_client->addr, &addr_cli, sizeof(new_client->addr));
+				new_client->socket = new_socket;
+				new_client->status = CS_CREATED;
+				clients.push_back(new_client);
+				fprintf(stderr, MSG_START "pthread_create() done, clients now: %d\n", clients.size());
+			}
+			else  fprintf(stderr, MSG_START "pthread_create() failed.\n");
+		}
+
+		if(index_in_current_write_buffer >= bufsize)
+		{
+			current_write_buffer = pool->get_write_buffer();
+			index_in_current_write_buffer = 0;
+		}
+		int retval = read(input_fd, current_write_buffer + index_in_current_write_buffer, bufsize - index_in_current_write_buffer);
+		if(retval>0)
+		{
+			index_in_current_write_buffer += retval;
+		}
+		else if(retval==0)
+		{
+			//!end of input stream, close clients and exit
+			print_exit(MSG_START "end of input, exiting.\n")
+		}
+	}
+}
+
+#if 0
+for (int i=0; i<clients.size(); i++)
+{
+	if(write(clients[i]->pipefd[1], buf, retval)==-1)
+	{
+
+		if(!clients[i]->error)
+		{
+			print_client(clients[i], "lost buffer, failed to write pipe.");
+			clients[i]->error=1;
+		}
+		//fprintf(stderr, MSG_START "errno is %d\n", errno); //usually 11
+		//int wpstatus;
+		//int wpresult = waitpid(clients[i]->pid, &wpstatus, WNOHANG);
+		//fprintf(stderr, MSG_START "pid is %d\n",clients[i]->pid);
+		//perror("somethings wrong");
+		//if(wpresult == -1) print_client(clients[i], "error while waitpid()!");
+		//else if(wpresult == 0)
+		waitpid(clients[i]->pid, NULL, WNOHANG);
+		if(!proc_exists(clients[i]->pid))
+		{
+			//Client exited!
+			print_client(clients[i], "closing client from main process.");
+			close(clients[i]->pipefd[1]);
+			close(clients[i]->socket);
+			delete clients[i];
+			clients.erase(clients.begin()+i);
+			fprintf(stderr, MSG_START "done closing client from main process.\n");
+		}
+	}
+	else  { if(clients[i]->error) print_client(clients[i], "pipe okay again."); clients[i]->error=0; }
+}
+}
+//TODO: at the end, server closes pipefd[1] for client
+#endif
+
+void clients_close_all_finished()
+{
+	for(int i=0;i<clients.size();i++)
+	{
+		if(clients[i]->status == CS_THREAD_FINISHED) clients.erase(i);
+	}
+}
+
+void client_parser_push(char c)
+{ //!TODO
+	command_t cmd;
+	char* commands_cstr = commands.c_str();
+	int newline_index = -1;
+
+	for(int i=0;commands_cstr[i];i++) if(commands_cstr[i]=='\n') newline_index = i;
+	if(newline_index == -1)
+
+	char param_name[101];
+	char param_value[101];
+	for(int i=0;i<100;commands_csdr
+
+}
+
+void* client_thread (void* param) //!TODO
+{
+	client_t* me_the_client = (client_t*)param;
+	me_the_client->status = CS_THREAD_RUNNING;
+	char ctl_data_buffer;
+	int retval;
+	tsmpool* p1_temp;
+	tsmpool* p2_temp;
+	const int num_client_buffers = 20;
+	if(ddc_method == M_TD)
+	{
+		p1_temp = new tsmpool(bufsize, )
+	}
+
+	for(;;)
+	{
+		do
+		{
+			retval = recv(me_the_client->socket, &ctl_data_buffer, 1, 0);
+			if(client_parser_push(ctl_data_buffer)) break;
+		} while (retval);
+
+
+		//read control data from socket
+		//process control data
+		//run shift
+		//run decimation
+		//have an exit condition (??)
+		if(ddc_method == M_TD)
+		{
+
+		}
+	}
+	me_the_client->status = CS_THREAD_FINISHED;
+	pthread_exit(NULL);
+	return NULL;
+}
+
+void error_exit(const char* why)
+{
+	perror(why); //do we need a \n at the end of (why)?
+	exit(1); 
+}
+
+void print_exit(const char* why)
+{
+	fprintf(stderr, "%s", why);
+	exit(1);
+}
+
+void maxfd(int* maxfd, int fd)
+{
+	if(fd>=*maxfd) *maxfd=fd+1;
+}
--- a/ddcd.h
+++ b/ddcd.h
@ -0,0 +1,57 @@
+#pragma once
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <signal.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <vector>
+#include <limits.h>
+
+#define SOFTWARE_NAME "ddcd"
+#define MSG_START SOFTWARE_NAME ": "
+
+typedef enum ddc_method_e
+{
+	M_TD,
+	M_FASTDDC
+} ddc_method_t;
+
+typedef enum client_status_e
+{
+	CS_CREATED,
+	CS_THREAD_RUNNING,
+	CS_THREAD_FINISHED
+} client_status_t;
+
+
+typedef struct client_s
+{
+	struct sockaddr_in addr;
+	int socket;
+	int error; //set to non-zero on error (data transfer failed)
+	pthread_t thread;
+	client_status_t status;
+
+} client_t;
+
+typedef enum command_type_e
+{
+	CT_SHIFT,
+	CT_BYPASS
+} command_type_t;
+
+
+typedef struct command_s
+{
+	command_type_t type;
+	float float_param;
+} command_t;
+
+void print_exit(const char* why);
+void error_exit(const char* why);
+void maxfd(int* maxfd, int fd);
--- a/ddcd_old.cpp
+++ b/ddcd_old.cpp
@ -0,0 +1,560 @@
+/*
+This software is part of libcsdr, a set of simple DSP routines for 
+Software Defined Radio.
+
+Copyright (c) 2014, Andras Retzler <randras@sdr.hu>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ANDRAS RETZLER BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "ddcd.h"
+
+
+#define SOFTWARE_NAME "ddcd"
+#define MSG_START SOFTWARE_NAME ": "
+
+int host_port = 0;
+char host_address[100] = "127.0.0.1";
+int decimation = 0;
+float transition_bw = 0.05;
+int bufsize = 1024;
+int bufsizeall;
+int pipe_max_size;
+int in_client = 0;
+char ddc_method_str[100] = "td";
+ddc_method_t ddc_method;
+pid_t main_dsp_proc;
+
+int input_fd = STDIN_FILENO; //can be stdin, or the stdout of main_subprocess
+pid_t main_subprocess_pid = 0;
+pid_t main_subprocess_pgrp = 0;
+pid_t client_subprocess_pid = 0;
+pid_t client_subprocess_pgrp = 0;
+
+char* buf;
+
+int set_nonblocking(int fd)
+{
+	int flagtmp;
+	if((flagtmp = fcntl(fd, F_GETFL))!=-1)
+		if((flagtmp = fcntl(fd, F_SETFL, flagtmp|O_NONBLOCK))!=-1)
+			return 0;
+	return 1;
+}
+
+int proc_exists(pid_t pid)
+{
+	if(pid==0 || pid==1) return 1;
+	return kill(pid, 0) != -1;
+}
+
+void sig_handler(int signo)
+{	
+	int tmpstat;
+	if(signo==SIGPIPE) 
+	{
+		fprintf(stderr,MSG_START "SIGPIPE received.\n");
+		return;
+	}	
+	if(signo==SIGCHLD)
+		if(  main_subprocess_pid  && signo==SIGCHLD && (waitpid(main_subprocess_pid, &tmpstat, WNOHANG), 1) && !proc_exists(main_subprocess_pid)  )
+		{
+			fprintf(stderr,MSG_START "main_subprocess_pid exited! Exiting...\n");
+		}
+	else return;
+	//if(pgrp!=1 && pgrp!=0) //I just want to make sure that we cannot kill init or sched
+	//	killpg(pgrp, signo);
+	if( !in_client && main_subprocess_pid ) killpg2(main_subprocess_pgrp);
+	if( in_client && client_subprocess_pid ) killpg2(client_subprocess_pgrp);
+	fprintf(stderr, MSG_START "signal %d caught in %s, exiting ddcd...\n", signo, (in_client)?"client":"main");
+	fflush(stderr);
+	exit(0);
+}
+
+client_t* this_client;
+
+int main(int argc, char* argv[])
+{
+	int c;
+	fd_set select_fds;
+	
+	for(;;)
+	{
+		int option_index = 0;
+		static struct option long_options[] = {
+		   {"port",       required_argument, 0,  'p' },
+		   {"address",    required_argument, 0,  'a' },
+		   {"decimation", required_argument, 0,  'd' },
+		   {"bufsize", 	  required_argument, 0,  'b' },
+	       {"method", 	  required_argument, 0,  'm' },
+	       {"transition", required_argument, 0,  't' }
+		};
+		c = getopt_long(argc, argv, "p:a:d:b:m:t:", long_options, &option_index);
+		if(c==-1) break;
+		switch (c) 
+		{
+		case 'a':
+			host_address[100-1]=0;
+			strncpy(host_address,optarg,100-1);
+			break;
+		case 'p':
+			host_port=atoi(optarg);
+			break;
+		case 'd':
+			decimation=atoi(optarg);
+			break;
+		case 'b':
+			bufsize=atoi(optarg);
+			break;
+		case 'm':
+			ddc_method_str[100-1]=0;
+			strncpy(ddc_method_str,optarg,100-1);
+			break;
+		case 't':
+			sscanf(optarg,"%g",&transition_bw);
+			break;
+		case 0:
+		case '?':
+		case ':':
+		default:;
+			print_exit(MSG_START "error in getopt_long()\n");
+		}
+	}
+	
+	if(!decimation) print_exit(MSG_START "missing required command line argument, --decimation.\n");
+	if(!host_port) print_exit(MSG_START "missing required command line argument, --port.\n");
+	if(decimation<0) print_exit(MSG_START "invalid value for --decimation (should be >0).\n");
+	if(decimation==1) fprintf(stderr, MSG_START "decimation = 1, just copying raw samples.\n");
+	if(transition_bw<0||transition_bw>0.5) print_exit(MSG_START "invalid value for --transition (should be between 0 and 0.5).\n");
+	
+	if(decimation==1); //don't do anything then
+	else if(!strcmp(ddc_method_str,"td")) 
+	{
+		ddc_method = M_TD; 
+		fprintf(stderr, MSG_START "method is M_TD (default).\n");
+	}
+	else if (!strcmp(ddc_method_str,"fastddc")) 
+	{
+		ddc_method = M_FASTDDC; 
+		fprintf(stderr, MSG_START "method is M_FASTDDC.\n");
+	}
+	else print_exit(MSG_START "invalid parameter given to --method.\n");
+
+	//set signals
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = sig_handler;
+	sigaction(SIGTERM, &sa, NULL);
+	sigaction(SIGKILL, &sa, NULL);
+	sigaction(SIGQUIT, &sa, NULL);
+	sigaction(SIGINT, &sa, NULL);
+	sigaction(SIGHUP, &sa, NULL);
+	sigaction(SIGCHLD, &sa, NULL);
+	sigaction(SIGPIPE, &sa, NULL);
+	prctl(PR_SET_PDEATHSIG, SIGHUP); //get a signal when parent exits
+
+	struct sockaddr_in addr_host;
+    int listen_socket;
+	std::vector<client_t*> clients;
+	clients.reserve(100);
+    listen_socket=socket(AF_INET,SOCK_STREAM,0);
+
+	int sockopt = 1;
+	if( setsockopt(listen_socket, SOL_SOCKET, SO_REUSEADDR, (char *)&sockopt, sizeof(sockopt)) == -1 )
+		error_exit(MSG_START "cannot set SO_REUSEADDR");  //the best description on SO_REUSEADDR ever: http://stackoverflow.com/a/14388707/3182453
+
+    memset(&addr_host,'0',sizeof(addr_host));
+    addr_host.sin_family=AF_INET;
+    addr_host.sin_port=htons(host_port);
+	addr_host.sin_addr.s_addr = INADDR_ANY;
+
+    if( (addr_host.sin_addr.s_addr=inet_addr(host_address)) == INADDR_NONE ) 
+		error_exit(MSG_START "invalid host address");
+
+	if( bind(listen_socket, (struct sockaddr*) &addr_host, sizeof(addr_host)) < 0 )
+		error_exit(MSG_START "cannot bind() address to the socket");
+
+	if( listen(listen_socket, 10) == -1 )
+		error_exit(MSG_START "cannot listen() on socket");
+
+	fprintf(stderr,MSG_START "listening on %s:%d\n", inet_ntoa(addr_host.sin_addr), host_port);
+
+	struct sockaddr_in addr_cli;
+	socklen_t addr_cli_len = sizeof(addr_cli);
+	int new_socket;
+
+	bufsizeall = bufsize*sizeof(char);
+	buf = (char*)malloc(bufsizeall);
+
+	FILE* tempfile = fopen("/proc/sys/fs/pipe-max-size","r");
+	if(!tempfile)
+	{
+		perror(MSG_START "cannot read /proc/sys/fs/pipe-max-size");
+	}
+	else
+	{
+		char pipe_max_size_str[100];
+		int tfread = fread(pipe_max_size_str, 1, 100, tempfile);
+		pipe_max_size_str[tfread]='\0';
+		pipe_max_size = atoi(pipe_max_size_str);
+		//fprintf(stderr, MSG_START "note: pipe_max_size = %d\n", pipe_max_size);
+		//if(pipe_max_size>4096 && fcntl(STDIN_FILENO, F_SETPIPE_SZ, pipe_max_size)==-1)
+		//	perror("failed to fcntl(STDIN_FILENO, F_SETPIPE_SZ, ...)");
+	}
+
+	//We'll see if it is a good idea:
+	//setpgrp();
+	//pgrp = getpgrp();
+	//It is not, because we can't catch Ctrl+C (SIGINT), as it is sent to a process group...
+
+	//Start DSP subprocess from the main process if required
+	char main_subprocess_cmd_buf[500];
+
+
+	int pipe_m2s_ctl[2];	//main to subprocess :: control channel
+	int pipe_s2m[2];		//subprocess to main
+
+	if(pipe(pipe_m2s_ctl)) error_exit(MSG_START "couldn't create pipe_m2s_ctl");
+	if(pipe(pipe_s2m)) error_exit(MSG_START "couldn't create pipe_s2m");
+
+	if(decimation!=1)
+	{
+		switch(ddc_method)
+		{
+		case M_TD:
+			break;
+		case M_FASTDDC:
+			sprintf(main_subprocess_cmd_buf, subprocess_args_fastddc_1, decimation, transition_bw);
+			fprintf(stderr, MSG_START "starting main_subprocess_cmd: %s\n", main_subprocess_cmd_buf);
+			if(!(main_subprocess_pid = run_subprocess( main_subprocess_cmd_buf, 0, pipe_s2m, &main_subprocess_pgrp )))
+				print_exit(MSG_START "couldn't start main_subprocess_cmd!\n");
+			close(STDIN_FILENO); // redirect stdin to the stdin of the subprocess 
+			break;
+		}
+	}
+
+	int highfd = 0; 
+	FD_ZERO(&select_fds);
+	FD_SET(listen_socket, &select_fds);
+	maxfd(&highfd, listen_socket);
+	if(main_subprocess_pid) input_fd = pipe_s2m[0]; //else STDIN_FILENO
+	FD_SET(input_fd, &select_fds);
+	maxfd(&highfd, input_fd);
+
+	//Set stdin and listen_socket to non-blocking 
+	if(set_nonblocking(input_fd) || set_nonblocking(listen_socket)) //don't do it before subprocess fork!
+		error_exit(MSG_START "cannot set_nonblocking()");
+
+	for(;;)
+	{
+		//Let's wait until there is any new data to read, or any new connection!
+		select(highfd, &select_fds, NULL, NULL, NULL);
+
+		//Is there a new client connection?
+		if( (new_socket = accept(listen_socket, (struct sockaddr*)&addr_cli, &addr_cli_len)) != -1)
+		{ 
+			this_client = new client_t;
+			this_client->error = 0;
+			memcpy(&this_client->addr, &addr_cli, sizeof(this_client->addr));
+			this_client->socket = new_socket;
+			if(pipe(this_client->pipefd) == -1)
+			{ 
+				perror(MSG_START "cannot open new pipe() for the client");
+				continue;
+			}
+			if(fcntl(this_client->pipefd[1], F_SETPIPE_SZ, pipe_max_size) == -1)
+				perror("failed to F_SETPIPE_SZ for the client pipe");
+			if(this_client->pid = fork())
+			{
+				//We're the parent
+				set_nonblocking(this_client->pipefd[1]);
+				clients.push_back(this_client);
+				fprintf(stderr, MSG_START "client pid: %d\n", this_client->pid);
+			}
+			else
+			{
+				//We're the client
+				client();
+				return 1;
+			}
+		}
+
+		int retval = read(input_fd, buf, bufsizeall);
+		if(retval==0)
+		{
+			//end of input stream, close clients and exit
+		}
+		else if(retval != -1)
+		{
+			for (int i=0; i<clients.size(); i++)
+			{
+				if(write(clients[i]->pipefd[1], buf, retval)==-1)
+				{
+					
+					if(!clients[i]->error) 
+					{
+						print_client(clients[i], "lost buffer, failed to write pipe.");
+						clients[i]->error=1;
+					}
+					//fprintf(stderr, MSG_START "errno is %d\n", errno); //usually 11
+					//int wpstatus;
+					//int wpresult = waitpid(clients[i]->pid, &wpstatus, WNOHANG);
+					//fprintf(stderr, MSG_START "pid is %d\n",clients[i]->pid);
+					//perror("somethings wrong");
+					//if(wpresult == -1) print_client(clients[i], "error while waitpid()!");
+					//else if(wpresult == 0) 
+					waitpid(clients[i]->pid, NULL, WNOHANG);
+					if(!proc_exists(clients[i]->pid))
+					{
+						//Client exited!
+						print_client(clients[i], "closing client from main process.");
+						close(clients[i]->pipefd[1]);
+						close(clients[i]->socket);
+						delete clients[i];
+						clients.erase(clients.begin()+i);
+						fprintf(stderr, MSG_START "done closing client from main process.\n");
+					}
+				}
+				else  { if(clients[i]->error) print_client(clients[i], "pipe okay again."); clients[i]->error=0; }
+			}
+		}
+		//TODO: at the end, server closes pipefd[1] for client
+	}
+
+	return 0; 
+}
+
+pid_t run_subprocess(char* cmd, int* pipe_in, int* pipe_out, pid_t* pgrp)
+{
+	/*char sem_name[101];
+	snprintf(sem_name,100,"ddcd_sem_%d",getpid());
+	sem_t mysem;
+	if(sem_init(&mysem, 1, 1)==-1) error_exit("failed to sem_init() in run_subprocess()");
+	fprintf(stderr, "sem_waiting\n");
+	if(sem_wait(&mysem)==-1) error_exit("the first sem_wait() failed in run_subprocess()");
+	fprintf(stderr, "sem_waited\n");
+	*/
+	int syncpipe[2];
+	if(pipe(syncpipe)==-1) error_exit("failed to create pipe()");
+	pid_t pid = fork();
+
+	if(pid < 0) return 0; //fork failed
+	if(pid == 0)
+	{
+		setpgrp();
+		write(syncpipe[1], " ", 1);
+		//if(sem_post(&mysem)==-1) error_exit("failed to sem_post() in run_subprocess()");
+		//We're the subprocess
+		//fprintf(stderr, "run_subprocess :: execl\n");
+		//if(fcntl(pipe_in[1], F_SETPIPE_SZ, pipe_max_size) == -1) perror("Failed to F_SETPIPE_SZ in run_subprocess()");
+		if(pipe_in)
+		{
+			close(pipe_in[1]);
+			dup2(pipe_in[0], STDIN_FILENO);
+		}
+		if(pipe_out)
+		{
+			close(pipe_out[0]);
+			dup2(pipe_out[1], STDOUT_FILENO);
+		}
+		execl("/bin/bash","bash","-c",cmd, (char*)0);
+		error_exit(MSG_START "run_subprocess failed to execute command");
+	}
+	else
+	{ 
+		//if(sem_wait(&mysem)==-1) error_exit("the second sem_wait() failed in run_subprocess()");
+		int synctemp;
+		read(syncpipe[0], &synctemp, 1);
+		*pgrp = getpgid(pid);
+		fprintf(stderr, MSG_START "run_subprocess pgid returned = %d\n", *pgrp);
+		return pid;
+	}
+}
+
+void print_client(client_t* client, const char* what)
+{
+	fprintf(stderr,MSG_START "(client %s:%d) %s\n", inet_ntoa(client->addr.sin_addr), client->addr.sin_port, what);
+}
+
+#define CTL_BUFSIZE 1024
+
+int read_socket_ctl(int fd, char* output, int max_size)
+{
+	//fprintf(stderr, "doing read_socket_ctl %d\n", fd);
+	//if(!fd) return 0;
+	static char buffer[CTL_BUFSIZE];
+	static int buffer_index=0;
+	if(buffer_index==CTL_BUFSIZE) buffer_index=0;
+	int bytes_read=recv(fd,buffer+buffer_index,(CTL_BUFSIZE-buffer_index)*sizeof(char), MSG_DONTWAIT);
+	if(bytes_read<=0) return 0;
+	//fprintf(stderr, "recv %d\n", bytes_read);
+	
+	int prev_newline_at=0;
+	int last_newline_at=0;
+	for(int i=0;i<buffer_index+bytes_read;i++) 
+	{
+		if(buffer[i]=='\n') 
+		{
+			prev_newline_at=last_newline_at;
+			last_newline_at=i+1;
+		}
+	}
+	if(last_newline_at)
+	{
+		int oi=0;
+		for(int i=prev_newline_at;buffer[i]!='\n'&&oi<max_size;i++) output[oi++]=buffer[i]; //copy to output buffer
+		output[oi++]='\0';
+		memmove(buffer,buffer+last_newline_at,buffer_index+bytes_read-last_newline_at);
+		buffer_index=bytes_read-last_newline_at;
+		return 1;
+	}
+	else
+	{
+		buffer_index+=bytes_read;
+	 	return 0;
+	}
+}
+
+int ctl_get_arg(char* input, const char* cmd, const char* format, ...)
+{
+	int retval=0;
+	int cmdlen=strlen(cmd);
+	if(input[cmdlen]=='=')
+	{
+		//fprintf(stderr, "cga found=\n");
+		if(input[cmdlen]=0, !strcmp(input,cmd))
+		{
+			//fprintf(stderr, "cga foundokay\n");
+			va_list vl;
+			va_start(vl,format);
+			retval=vsscanf(input+cmdlen+1,format,vl);
+			va_end(vl);
+		}
+		input[cmdlen]='=';
+	}
+	//fprintf(stderr, "cga retval %d\n", retval);
+	return retval;
+}
+
+void client()
+{
+	in_client=1;
+	print_client(this_client, "client process forked.");
+	
+	char client_subprocess_cmd_buf[500];
+	int input_fd = this_client->pipefd[0];
+	int pipe_ctl[2], pipe_stdout[2];
+
+	prctl(PR_SET_PDEATHSIG, SIGHUP); //get a signal when parent exits
+	
+	if(decimation!=1)
+	{
+		
+		if(pipe(pipe_ctl)==-1) error_exit(MSG_START "cannot open new pipe() for the client subprocess");
+		if(pipe(pipe_stdout)==-1) error_exit(MSG_START "cannot open new pipe() for the client subprocess");
+		switch(ddc_method)
+		{
+		case M_TD:
+			sprintf(client_subprocess_cmd_buf, subprocess_cmd_td, pipe_ctl[0], decimation, transition_bw);
+			break;
+		case M_FASTDDC:
+			sprintf(client_subprocess_cmd_buf, subprocess_args_fastddc_2, pipe_ctl[0], decimation, transition_bw);			
+			break;
+		}
+
+		if(!(client_subprocess_pid = run_subprocess( client_subprocess_cmd_buf, this_client->pipefd, pipe_stdout, &client_subprocess_pgrp))) 
+			print_exit(MSG_START "couldn't start client_subprocess_cmd!\n");
+		fprintf(stderr, MSG_START "starting client_subprocess_cmd: %s\n", client_subprocess_cmd_buf);
+		input_fd = pipe_stdout[0]; //we don't have to set it nonblocking
+		fprintf(stderr, MSG_START "pipe_stdout[0] = %d\n", pipe_stdout[0]);
+		write(pipe_ctl[1], "0.0\n", 4);
+	}
+	char recv_cmd[CTL_BUFSIZE];
+	char temps[CTL_BUFSIZE*2];
+	int tempi;
+	float tempf;
+
+	for(;;)
+	{
+		while(read_socket_ctl(this_client->socket, recv_cmd, CTL_BUFSIZE)) 
+		{
+			sprintf(temps, "read_socket_ctl: %s", recv_cmd);
+			print_client(this_client, temps);
+			if(ctl_get_arg(recv_cmd, "bypass", "%d", &tempi))
+			{
+				if(tempi==1 && client_subprocess_pid)
+				{
+					//print_client(this_client, "suspending client_subprocess_pgrp...\n");
+					//fprintf(stderr, "client_subprocess_pgrp = %d\n", client_subprocess_pgrp);
+					//killpg(client_subprocess_pgrp, SIGTSTP);
+					//while(proc_exists(client_subprocess_pid)) usleep(10000);
+					//print_client(this_client, "done killing client_subprocess_pid.\n");
+					input_fd=this_client->pipefd[0]; //by doing this, we don't read from pipe_stdout[0] anymore, so that csdr stops doing anything, and also doesn't read anymore from the input: we get the whole I/Q stream!
+				}
+				if(tempi==0 && client_subprocess_pid)
+				{
+					input_fd=pipe_stdout[0];
+				}
+				
+			}
+			if(ctl_get_arg(recv_cmd, "shift", "%g", &tempf))
+			{			
+				tempi=sprintf(temps, "%g\n", tempf);
+				write(pipe_ctl[1], temps, tempi);
+				fsync(pipe_ctl[1]);
+			}
+		}
+		int nread = read(input_fd,buf,bufsizeall);
+		if(nread<=0) continue;
+		if(send(this_client->socket,buf,nread,0)==-1)
+		{
+			print_client(this_client, "client process is exiting.\n");
+			if(client_subprocess_pid) killpg2(client_subprocess_pgrp);
+			exit(0);
+		}
+	}	
+}
+
+void killpg2(pid_t pgrp)
+{
+	//fprintf(stderr, MSG_START "killpg2: %d\n", pgrp);
+	if(pgrp!=1 && pgrp!=0) killpg(pgrp, SIGTERM);
+}
+
+void error_exit(const char* why)
+{
+	perror(why);
+	exit(1);
+}
+
+void print_exit(const char* why)
+{
+	fprintf(stderr, "%s", why);
+	exit(1);
+}
+
+void maxfd(int* maxfd, int fd)
+{
+	if(fd>=*maxfd) *maxfd=fd+1; 
+}
--- a/ddcd_old.h
+++ b/ddcd_old.h
@ -0,0 +1,62 @@
+#pragma once
+
+#include <signal.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <iostream>
+#include <vector>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <stdarg.h>
+#include <sys/stat.h>
+#include <semaphore.h>
+
+typedef struct client_s
+{
+	struct sockaddr_in addr;
+	int socket;
+	pid_t pid;
+	int pipefd[2];
+	int error;
+	pid_t dsp_proc;
+} client_t;
+
+
+void client();
+void error_exit(const char* why);
+void print_exit(const char* why);
+void print_client(client_t* client, const char* what);
+int proc_exists(pid_t pid);
+pid_t run_subprocess(char* cmd, int* pipe_in, int* pipe_out, pid_t* pgrp);
+void maxfd(int* maxfd, int fd);
+void sig_handler(int signo);
+void killpg2(pid_t pgrp);
+int ctl_get_arg(char* input, const char* cmd, const char* format, ...);
+
+typedef enum ddc_method_e 
+{
+	M_TD,
+	M_FASTDDC
+} ddc_method_t;
+
+const char subprocess_cmd_td[] = "csdr "
+#ifdef NEON_OPTS
+	"shift_addfast_cc"
+#else
+	"shift_unroll_cc"
+#endif
+	" --fd %d | csdr fir_decimate_cc %d %g";
+
+const char subprocess_args_fastddc_1[] = "csdr fastddc_fwd_cc %d %g";
+//const char subprocess_args_fastddc_1[] = "csdr through %d %g";
+const char subprocess_args_fastddc_2[] = "csdr fastddc_inv_cc --fd %d %d %g";
+//const char subprocess_args_fastddc_2[] = "csdr convert_u8_f %d %d %g";
--- a/fastddc.c
+++ b/fastddc.c
@ -0,0 +1,166 @@
+/*
+This software is part of libcsdr, a set of simple DSP routines for 
+Software Defined Radio.
+
+Copyright (c) 2014, Andras Retzler <randras@sdr.hu>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ANDRAS RETZLER BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "fastddc.h"
+
+//DDC implementation based on:
+//http://www.3db-labs.com/01598092_MultibandFilterbank.pdf
+
+inline int is_integer(float a) { return floorf(a) == a; }
+
+int fastddc_init(fastddc_t* ddc, float transition_bw, int decimation, float shift_rate)
+{
+	ddc->pre_decimation = 1; //this will be done in the frequency domain
+	ddc->post_decimation = decimation; //this will be done in the time domain
+	while( is_integer((float)ddc->post_decimation/2) && ddc->post_decimation/2 != 1) 
+	{
+		ddc->post_decimation/=2;
+		ddc->pre_decimation*=2;
+	}
+	ddc->taps_min_length = firdes_filter_len(transition_bw); //his is the minimal number of taps to achieve the given transition_bw; we are likely to have more taps than this number.
+	ddc->taps_length = next_pow2(ceil(ddc->taps_min_length/(float)ddc->pre_decimation) * ddc->pre_decimation) + 1; //the number of taps must be a multiple of the decimation factor
+	ddc->fft_size = next_pow2(ddc->taps_length * 4); //it is a good rule of thumb for performance (based on the article), but we should do benchmarks
+	while (ddc->fft_size<ddc->pre_decimation) ddc->fft_size*=2; //fft_size should be a multiple of pre_decimation.
+	ddc->overlap_length = ddc->taps_length - 1;
+	ddc->input_size = ddc->fft_size - ddc->overlap_length;
+	ddc->fft_inv_size = ddc->fft_size / ddc->pre_decimation;
+
+	//Shift operation in the frequency domain: we can shift by a multiple of v.
+	ddc->v = ddc->fft_size/ddc->overlap_length; //overlap factor | +-1 ? (or maybe ceil() this?)
+	int middlebin=ddc->fft_size / 2;
+	ddc->startbin = middlebin + middlebin * (-shift_rate) * 2;	
+	//fprintf(stderr, "ddc->startbin=%g\n",(float)ddc->startbin);
+	ddc->startbin = ddc->v * round( ddc->startbin / (float)ddc->v );
+	//fprintf(stderr, "ddc->startbin=%g\n",(float)ddc->startbin);
+	ddc->offsetbin = ddc->startbin - middlebin;
+	ddc->post_shift = (ddc->pre_decimation)*(shift_rate+((float)ddc->offsetbin/ddc->fft_size));
+	ddc->pre_shift = ddc->offsetbin/(float)ddc->fft_size;
+	ddc->dsadata = decimating_shift_addition_init(ddc->post_shift, ddc->post_decimation);
+
+	//Overlap is scrapped, not added
+	ddc->scrap=ddc->overlap_length/ddc->pre_decimation; //TODO this is problematic sometimes! overlap_length = 401 :: scrap = 200
+	ddc->post_input_size=ddc->fft_inv_size-ddc->scrap;
+
+	return ddc->fft_size<=2; //returns true on error
+}
+
+
+void fastddc_print(fastddc_t* ddc, char* source)
+{
+	fprintf(stderr,
+		"%s: fastddc_print_sizes(): (fft_size = %d) = (taps_length = %d) + (input_size = %d) - 1\n"
+		"  overlap     ::  (overlap_length = %d) = taps_length - 1, taps_min_length = %d\n"
+		"  decimation  ::  decimation = (pre_decimation = %d) * (post_decimation = %d), fft_inv_size = %d\n"
+		"  shift       ::  startbin = %d, offsetbin = %d, v = %d, pre_shift = %g, post_shift = %g\n"
+		"  o&s         ::  post_input_size = %d, scrap = %d\n"
+		, 
+		source, ddc->fft_size, ddc->taps_length, ddc->input_size, 
+		ddc->overlap_length, ddc->taps_min_length,
+		ddc->pre_decimation, ddc->post_decimation, ddc->fft_inv_size,
+		ddc->startbin, ddc->offsetbin, ddc->v, ddc->pre_shift, ddc->post_shift, 
+		ddc->post_input_size, ddc->scrap );
+}
+
+void fft_swap_sides(complexf* io, int fft_size)
+{
+	int middle=fft_size/2;
+	complexf temp;
+	for(int i=0;i<middle;i++)
+	{
+		iof(&temp,0)=iof(io,i);
+		qof(&temp,0)=qof(io,i);
+		iof(io,i)=iof(io,i+middle);
+		qof(io,i)=qof(io,i+middle);
+		iof(io,i+middle)=iof(&temp,0);
+		qof(io,i+middle)=qof(&temp,0);
+	}
+}
+
+decimating_shift_addition_status_t fastddc_inv_cc(complexf* input, complexf* output, fastddc_t* ddc, FFT_PLAN_T* plan_inverse, complexf* taps_fft, decimating_shift_addition_status_t shift_stat)
+{
+	//implements DDC by using the overlap & scrap method
+	//TODO: +/-1s on overlap_size et al
+	//input shoud have ddc->fft_size number of elements
+
+	complexf* inv_input = plan_inverse->input;
+	complexf* inv_output = plan_inverse->output;
+
+	//Initialize buffers for inverse FFT to zero
+	for(int i=0;i<plan_inverse->size;i++)
+	{
+		iof(inv_input,i)=0;
+		qof(inv_input,i)=0;
+	}
+
+	//Alias & shift & filter at once
+	fft_swap_sides(input, ddc->fft_size); //TODO this is not very optimal, but now we stick with this slow solution until we got the algorithm working
+	//fprintf(stderr, " === fastddc_inv_cc() ===\n");
+	//The problem is, we have to say that the output_index should be the _center_ of the spectrum when i is at startbin! (startbin is at the _center_ of the input to downconvert, not at its first bin!)
+	for(int i=0;i<ddc->fft_size;i++)
+	{
+		int output_index = (ddc->fft_size+i-ddc->offsetbin+(ddc->fft_inv_size/2))%plan_inverse->size;
+		int tap_index = i;
+		//fprintf(stderr, "output_index = %d , tap_index = %d, input index = %d\n", output_index, tap_index, i);
+		//cmultadd(inv_input+output_index, input+i, taps_fft+tap_index); //cmultadd(output, input1, input2):   complex output += complex input1 * complex input 2
+		// (a+b*i)*(c+d*i) = (ac-bd)+(ad+bc)*i
+		// a = iof(input,i)
+		// b = qof(input,i)
+		// c = iof(taps_fft,i)
+		// d = qof(taps_fft,i)
+		iof(inv_input,output_index) += iof(input,i) * iof(taps_fft,i) - qof(input,i) * qof(taps_fft,i);
+		qof(inv_input,output_index) += iof(input,i) * qof(taps_fft,i) + qof(input,i) * iof(taps_fft,i);
+		//iof(inv_input,output_index) += iof(input,i); //no filter
+		//qof(inv_input,output_index) += qof(input,i);		
+	}
+
+	//Normalize inv fft bins (now our output level is not higher than the input... but we may optimize this into the later loop when we normalize by size)
+	for(int i=0;i<plan_inverse->size;i++)
+	{
+		iof(inv_input,i)/=ddc->pre_decimation;
+		qof(inv_input,i)/=ddc->pre_decimation;
+	}
+
+	fft_swap_sides(inv_input,plan_inverse->size);
+	fft_execute(plan_inverse);
+
+	//Normalize data
+	for(int i=0;i<plan_inverse->size;i++) //@fastddc_inv_cc: normalize by size
+	{
+		iof(inv_output,i)/=plan_inverse->size;
+		qof(inv_output,i)/=plan_inverse->size;
+	}
+	
+	//Overlap is scrapped, not added
+	//Shift correction
+	shift_stat=decimating_shift_addition_cc(inv_output+ddc->scrap, output, ddc->post_input_size, ddc->dsadata, ddc->post_decimation, shift_stat);
+	//shift_stat.output_size = ddc->post_input_size; //bypass shift correction
+	//memcpy(output, inv_output+ddc->scrap, sizeof(complexf)*ddc->post_input_size);
+	return shift_stat;
+}
--- a/fastddc.h
+++ b/fastddc.h
@ -0,0 +1,29 @@
+#include <math.h>
+#include "libcsdr.h"
+#include "libcsdr_gpl.h"
+
+typedef struct fastddc_s
+{
+	int pre_decimation;
+	int post_decimation;
+	int taps_length; 
+	int taps_min_length;
+	int overlap_length; //it is taps_length - 1
+	int fft_size;
+	int fft_inv_size;
+	int input_size;
+	int post_input_size;
+	float pre_shift;
+	int startbin; //for pre_shift
+	int v; //step for pre_shift
+	int offsetbin;
+	float post_shift;
+	int output_scrape;
+	int scrap;
+	shift_addition_data_t dsadata;
+} fastddc_t;
+
+int fastddc_init(fastddc_t* ddc, float transition_bw, int decimation, float shift_rate);
+decimating_shift_addition_status_t fastddc_inv_cc(complexf* input, complexf* output, fastddc_t* ddc, FFT_PLAN_T* plan_inverse, complexf* taps_fft, decimating_shift_addition_status_t shift_stat);
+void fastddc_print(fastddc_t* ddc, char* source);
+void fft_swap_sides(complexf* io, int fft_size);
--- a/grc_tests/test_bandpass_fir_fft.grc
+++ b/grc_tests/test_bandpass_fir_fft.grc
--- a/grc_tests/test_fastddc.grc
+++ b/grc_tests/test_fastddc.grc
@ -0,0 +1,996 @@
+<?xml version='1.0' encoding='utf-8'?>
+<?grc format='1' created='3.7.8'?>
+<flow_graph>
+  <timestamp>Sat Nov 15 20:06:19 2014</timestamp>
+  <block>
+    <key>options</key>
+    <param>
+      <key>author</key>
+      <value></value>
+    </param>
+    <param>
+      <key>window_size</key>
+      <value>1280, 1024</value>
+    </param>
+    <param>
+      <key>category</key>
+      <value>Custom</value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>description</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(10, 10)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>generate_options</key>
+      <value>wx_gui</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>top_block</value>
+    </param>
+    <param>
+      <key>max_nouts</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>realtime_scheduling</key>
+      <value></value>
+    </param>
+    <param>
+      <key>run_options</key>
+      <value>prompt</value>
+    </param>
+    <param>
+      <key>run</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>thread_safe_setters</key>
+      <value></value>
+    </param>
+    <param>
+      <key>title</key>
+      <value></value>
+    </param>
+  </block>
+  <block>
+    <key>variable</key>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(128, 179)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>decimation</value>
+    </param>
+    <param>
+      <key>value</key>
+      <value>4</value>
+    </param>
+  </block>
+  <block>
+    <key>variable_slider</key>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>converver</key>
+      <value>float_converter</value>
+    </param>
+    <param>
+      <key>value</key>
+      <value>50</value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(16, 267)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>freq</value>
+    </param>
+    <param>
+      <key>label</key>
+      <value></value>
+    </param>
+    <param>
+      <key>max</key>
+      <value>samp_rate/2</value>
+    </param>
+    <param>
+      <key>min</key>
+      <value>-samp_rate/2</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value></value>
+    </param>
+    <param>
+      <key>num_steps</key>
+      <value>100</value>
+    </param>
+    <param>
+      <key>style</key>
+      <value>wx.SL_HORIZONTAL</value>
+    </param>
+  </block>
+  <block>
+    <key>variable</key>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(9, 170)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>value</key>
+      <value>400000</value>
+    </param>
+  </block>
+  <block>
+    <key>analog_noise_source_x</key>
+    <param>
+      <key>amp</key>
+      <value>1</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(224, 403)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>analog_noise_source_x_0</value>
+    </param>
+    <param>
+      <key>maxoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>minoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>noise_type</key>
+      <value>analog.GR_GAUSSIAN</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>seed</key>
+      <value>0</value>
+    </param>
+  </block>
+  <block>
+    <key>analog_pll_freqdet_cf</key>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(112, 675)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>analog_pll_freqdet_cf_0</value>
+    </param>
+    <param>
+      <key>w</key>
+      <value>(3.141592654/200)/2</value>
+    </param>
+    <param>
+      <key>max_freq</key>
+      <value>3.141592654</value>
+    </param>
+    <param>
+      <key>maxoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>min_freq</key>
+      <value>-3.141592654</value>
+    </param>
+    <param>
+      <key>minoutbuf</key>
+      <value>0</value>
+    </param>
+  </block>
+  <block>
+    <key>analog_sig_source_x</key>
+    <param>
+      <key>amp</key>
+      <value>0.2</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>1</value>
+    </param>
+    <param>
+      <key>freq</key>
+      <value>freq</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(224, 29)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>analog_sig_source_x_0</value>
+    </param>
+    <param>
+      <key>maxoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>minoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>offset</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>samp_rate</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>waveform</key>
+      <value>analog.GR_COS_WAVE</value>
+    </param>
+  </block>
+  <block>
+    <key>blocks_multiply_const_vxx</key>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>const</key>
+      <value>(samp_rate/decimation)*(1/(2*3.141592654))</value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(368, 691)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>blocks_multiply_const_vxx_0</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>float</value>
+    </param>
+    <param>
+      <key>maxoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>minoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>vlen</key>
+      <value>1</value>
+    </param>
+  </block>
+  <block>
+    <key>blocks_throttle</key>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(424, 235)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>blocks_throttle_0</value>
+    </param>
+    <param>
+      <key>ignoretag</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>maxoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>minoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>samples_per_second</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>vlen</key>
+      <value>1</value>
+    </param>
+  </block>
+  <block>
+    <key>ha5kfu_execproc_xx</key>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>commandline</key>
+      <value>csdr fastddc_fwd_cc %d | csdr fastddc_inv_cc 0.4 %d"%(decimation,decimation)+"</value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(616, 235)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>ha5kfu_execproc_xx_1</value>
+    </param>
+    <param>
+      <key>maxoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>minoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>cc</value>
+    </param>
+  </block>
+  <block>
+    <key>notebook</key>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(160, 283)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>nb</value>
+    </param>
+    <param>
+      <key>labels</key>
+      <value>['FFT', 'Scope']</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value></value>
+    </param>
+    <param>
+      <key>style</key>
+      <value>wx.NB_TOP</value>
+    </param>
+  </block>
+  <block>
+    <key>wxgui_fftsink2</key>
+    <param>
+      <key>avg_alpha</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>average</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>baseband_freq</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>fft_size</key>
+      <value>1024</value>
+    </param>
+    <param>
+      <key>freqvar</key>
+      <value>None</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(952, 155)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>wxgui_fftsink2_0</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value>nb, 0</value>
+    </param>
+    <param>
+      <key>peak_hold</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>ref_level</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>ref_scale</key>
+      <value>2.0</value>
+    </param>
+    <param>
+      <key>fft_rate</key>
+      <value>15</value>
+    </param>
+    <param>
+      <key>samp_rate</key>
+      <value>samp_rate/decimation</value>
+    </param>
+    <param>
+      <key>title</key>
+      <value>FFT plot of csdr processed signal</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>win_size</key>
+      <value></value>
+    </param>
+    <param>
+      <key>win</key>
+      <value>None</value>
+    </param>
+    <param>
+      <key>y_divs</key>
+      <value>10</value>
+    </param>
+    <param>
+      <key>y_per_div</key>
+      <value>10</value>
+    </param>
+  </block>
+  <block>
+    <key>wxgui_fftsink2</key>
+    <param>
+      <key>avg_alpha</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>average</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>baseband_freq</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>1</value>
+    </param>
+    <param>
+      <key>fft_size</key>
+      <value>1024</value>
+    </param>
+    <param>
+      <key>freqvar</key>
+      <value>None</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(616, 291)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>wxgui_fftsink2_0_0</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value></value>
+    </param>
+    <param>
+      <key>peak_hold</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>ref_level</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>ref_scale</key>
+      <value>2.0</value>
+    </param>
+    <param>
+      <key>fft_rate</key>
+      <value>15</value>
+    </param>
+    <param>
+      <key>samp_rate</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>title</key>
+      <value>FFT plot of original signal</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>win_size</key>
+      <value></value>
+    </param>
+    <param>
+      <key>win</key>
+      <value>None</value>
+    </param>
+    <param>
+      <key>y_divs</key>
+      <value>10</value>
+    </param>
+    <param>
+      <key>y_per_div</key>
+      <value>10</value>
+    </param>
+  </block>
+  <block>
+    <key>wxgui_numbersink2</key>
+    <param>
+      <key>avg_alpha</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>average</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>decimal_places</key>
+      <value>10</value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>factor</key>
+      <value>1.0</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(576, 619)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>wxgui_numbersink2_0</value>
+    </param>
+    <param>
+      <key>max_value</key>
+      <value>(samp_rate/decimation)/2</value>
+    </param>
+    <param>
+      <key>min_value</key>
+      <value>(-samp_rate/decimation)/2</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value></value>
+    </param>
+    <param>
+      <key>number_rate</key>
+      <value>15</value>
+    </param>
+    <param>
+      <key>peak_hold</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>ref_level</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>samp_rate</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>show_gauge</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>title</key>
+      <value>PLL locked at</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>float</value>
+    </param>
+    <param>
+      <key>units</key>
+      <value>Hz</value>
+    </param>
+    <param>
+      <key>win_size</key>
+      <value></value>
+    </param>
+  </block>
+  <block>
+    <key>wxgui_scopesink2</key>
+    <param>
+      <key>ac_couple</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(952, 35)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>wxgui_scopesink2_0</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value>nb, 1</value>
+    </param>
+    <param>
+      <key>num_inputs</key>
+      <value>1</value>
+    </param>
+    <param>
+      <key>samp_rate</key>
+      <value>samp_rate/decimation</value>
+    </param>
+    <param>
+      <key>t_scale</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>title</key>
+      <value>Scope plot of csdr processed signal</value>
+    </param>
+    <param>
+      <key>trig_mode</key>
+      <value>wxgui.TRIG_MODE_AUTO</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>v_offset</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>v_scale</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>win_size</key>
+      <value></value>
+    </param>
+    <param>
+      <key>xy_mode</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>y_axis_label</key>
+      <value>Counts</value>
+    </param>
+  </block>
+  <connection>
+    <source_block_id>analog_noise_source_x_0</source_block_id>
+    <sink_block_id>blocks_throttle_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>analog_pll_freqdet_cf_0</source_block_id>
+    <sink_block_id>blocks_multiply_const_vxx_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>analog_sig_source_x_0</source_block_id>
+    <sink_block_id>blocks_throttle_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>blocks_multiply_const_vxx_0</source_block_id>
+    <sink_block_id>wxgui_numbersink2_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>blocks_throttle_0</source_block_id>
+    <sink_block_id>ha5kfu_execproc_xx_1</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>blocks_throttle_0</source_block_id>
+    <sink_block_id>wxgui_fftsink2_0_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>ha5kfu_execproc_xx_1</source_block_id>
+    <sink_block_id>analog_pll_freqdet_cf_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>ha5kfu_execproc_xx_1</source_block_id>
+    <sink_block_id>wxgui_fftsink2_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>ha5kfu_execproc_xx_1</source_block_id>
+    <sink_block_id>wxgui_scopesink2_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+</flow_graph>
--- a/grc_tests/test_fractional_decimator.grc
+++ b/grc_tests/test_fractional_decimator.grc
--- a/grc_tests/test_rational_resampler.grc
+++ b/grc_tests/test_rational_resampler.grc
--- a/grc_tests/test_shift.grc
+++ b/grc_tests/test_shift.grc
--- a/grc_tests/test_shift_remote.grc
+++ b/grc_tests/test_shift_remote.grc
@ -0,0 +1,971 @@
+<?xml version='1.0' encoding='utf-8'?>
+<?grc format='1' created='3.7.8'?>
+<flow_graph>
+  <timestamp>Thu Jan 15 18:51:48 2015</timestamp>
+  <block>
+    <key>options</key>
+    <param>
+      <key>author</key>
+      <value></value>
+    </param>
+    <param>
+      <key>window_size</key>
+      <value>1280, 1024</value>
+    </param>
+    <param>
+      <key>category</key>
+      <value>Custom</value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>description</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(10, 10)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>generate_options</key>
+      <value>wx_gui</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>top_block</value>
+    </param>
+    <param>
+      <key>max_nouts</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>realtime_scheduling</key>
+      <value></value>
+    </param>
+    <param>
+      <key>run_options</key>
+      <value>prompt</value>
+    </param>
+    <param>
+      <key>run</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>thread_safe_setters</key>
+      <value></value>
+    </param>
+    <param>
+      <key>title</key>
+      <value></value>
+    </param>
+  </block>
+  <block>
+    <key>variable_slider</key>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>converver</key>
+      <value>float_converter</value>
+    </param>
+    <param>
+      <key>value</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(24, 331)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>gen_freq</value>
+    </param>
+    <param>
+      <key>label</key>
+      <value>Frequency:</value>
+    </param>
+    <param>
+      <key>max</key>
+      <value>samp_rate/2</value>
+    </param>
+    <param>
+      <key>min</key>
+      <value>-samp_rate/2</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value></value>
+    </param>
+    <param>
+      <key>num_steps</key>
+      <value>100</value>
+    </param>
+    <param>
+      <key>style</key>
+      <value>wx.SL_HORIZONTAL</value>
+    </param>
+  </block>
+  <block>
+    <key>variable</key>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(8, 195)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>rate</value>
+    </param>
+    <param>
+      <key>value</key>
+      <value>-0.1</value>
+    </param>
+  </block>
+  <block>
+    <key>variable</key>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(176, 11)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>value</key>
+      <value>250e3</value>
+    </param>
+  </block>
+  <block>
+    <key>analog_sig_source_x</key>
+    <param>
+      <key>amp</key>
+      <value>1</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>freq</key>
+      <value>gen_freq</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(8, 75)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>analog_sig_source_x_0</value>
+    </param>
+    <param>
+      <key>maxoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>minoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>offset</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>samp_rate</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>waveform</key>
+      <value>analog.GR_SIN_WAVE</value>
+    </param>
+  </block>
+  <block>
+    <key>blocks_throttle</key>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(224, 107)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>blocks_throttle_0</value>
+    </param>
+    <param>
+      <key>ignoretag</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>maxoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>minoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>samples_per_second</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>vlen</key>
+      <value>1</value>
+    </param>
+  </block>
+  <block>
+    <key>ha5kfu_execproc_xx</key>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>commandline</key>
+      <value>"csdr shift_addition_cc %g"%rate</value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(824, 315)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>ha5kfu_execproc_xx_0_0</value>
+    </param>
+    <param>
+      <key>maxoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>minoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>cc</value>
+    </param>
+  </block>
+  <block>
+    <key>ha5kfu_execproc_xx</key>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>commandline</key>
+      <value>ncat -v raspberrypi.local 5321</value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(536, 443)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>ha5kfu_execproc_xx_0_0_0_1</value>
+    </param>
+    <param>
+      <key>maxoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>minoutbuf</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>cc</value>
+    </param>
+  </block>
+  <block>
+    <key>notebook</key>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(272, 11)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>nb0</value>
+    </param>
+    <param>
+      <key>labels</key>
+      <value>['original', 'shift_addition_cc','shift_addfast_cc',]</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value></value>
+    </param>
+    <param>
+      <key>style</key>
+      <value>wx.NB_TOP</value>
+    </param>
+  </block>
+  <block>
+    <key>wxgui_fftsink2</key>
+    <param>
+      <key>avg_alpha</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>average</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>baseband_freq</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>fft_size</key>
+      <value>1024</value>
+    </param>
+    <param>
+      <key>freqvar</key>
+      <value>None</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(848, 27)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>wxgui_fftsink2_0_0</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value>nb0,0</value>
+    </param>
+    <param>
+      <key>peak_hold</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>ref_level</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>ref_scale</key>
+      <value>2.0</value>
+    </param>
+    <param>
+      <key>fft_rate</key>
+      <value>15</value>
+    </param>
+    <param>
+      <key>samp_rate</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>title</key>
+      <value>FFT Plot</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>win_size</key>
+      <value></value>
+    </param>
+    <param>
+      <key>win</key>
+      <value>None</value>
+    </param>
+    <param>
+      <key>y_divs</key>
+      <value>10</value>
+    </param>
+    <param>
+      <key>y_per_div</key>
+      <value>10</value>
+    </param>
+  </block>
+  <block>
+    <key>wxgui_fftsink2</key>
+    <param>
+      <key>avg_alpha</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>average</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>baseband_freq</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>fft_size</key>
+      <value>1024</value>
+    </param>
+    <param>
+      <key>freqvar</key>
+      <value>None</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(1112, 339)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>wxgui_fftsink2_0_1</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value>nb0,1</value>
+    </param>
+    <param>
+      <key>peak_hold</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>ref_level</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>ref_scale</key>
+      <value>2.0</value>
+    </param>
+    <param>
+      <key>fft_rate</key>
+      <value>15</value>
+    </param>
+    <param>
+      <key>samp_rate</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>title</key>
+      <value>FFT Plot</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>win_size</key>
+      <value></value>
+    </param>
+    <param>
+      <key>win</key>
+      <value>None</value>
+    </param>
+    <param>
+      <key>y_divs</key>
+      <value>10</value>
+    </param>
+    <param>
+      <key>y_per_div</key>
+      <value>10</value>
+    </param>
+  </block>
+  <block>
+    <key>wxgui_fftsink2</key>
+    <param>
+      <key>avg_alpha</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>average</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>baseband_freq</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>fft_size</key>
+      <value>1024</value>
+    </param>
+    <param>
+      <key>freqvar</key>
+      <value>None</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(808, 387)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>wxgui_fftsink2_0_1_1</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value>nb0,2</value>
+    </param>
+    <param>
+      <key>peak_hold</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>ref_level</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>ref_scale</key>
+      <value>2.0</value>
+    </param>
+    <param>
+      <key>fft_rate</key>
+      <value>15</value>
+    </param>
+    <param>
+      <key>samp_rate</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>title</key>
+      <value>FFT Plot</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>win_size</key>
+      <value></value>
+    </param>
+    <param>
+      <key>win</key>
+      <value>None</value>
+    </param>
+    <param>
+      <key>y_divs</key>
+      <value>10</value>
+    </param>
+    <param>
+      <key>y_per_div</key>
+      <value>10</value>
+    </param>
+  </block>
+  <block>
+    <key>wxgui_scopesink2</key>
+    <param>
+      <key>ac_couple</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(1112, 555)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>wxgui_scopesink2_0_0</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value>nb0,1</value>
+    </param>
+    <param>
+      <key>num_inputs</key>
+      <value>1</value>
+    </param>
+    <param>
+      <key>samp_rate</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>t_scale</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>title</key>
+      <value>Scope Plot</value>
+    </param>
+    <param>
+      <key>trig_mode</key>
+      <value>wxgui.TRIG_MODE_NORM</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>v_offset</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>v_scale</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>win_size</key>
+      <value></value>
+    </param>
+    <param>
+      <key>xy_mode</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>y_axis_label</key>
+      <value>Counts</value>
+    </param>
+  </block>
+  <block>
+    <key>wxgui_scopesink2</key>
+    <param>
+      <key>ac_couple</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>alias</key>
+      <value></value>
+    </param>
+    <param>
+      <key>comment</key>
+      <value></value>
+    </param>
+    <param>
+      <key>affinity</key>
+      <value></value>
+    </param>
+    <param>
+      <key>_enabled</key>
+      <value>True</value>
+    </param>
+    <param>
+      <key>_coordinate</key>
+      <value>(808, 611)</value>
+    </param>
+    <param>
+      <key>_rotation</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>grid_pos</key>
+      <value></value>
+    </param>
+    <param>
+      <key>id</key>
+      <value>wxgui_scopesink2_0_0_1</value>
+    </param>
+    <param>
+      <key>notebook</key>
+      <value>nb0,2</value>
+    </param>
+    <param>
+      <key>num_inputs</key>
+      <value>1</value>
+    </param>
+    <param>
+      <key>samp_rate</key>
+      <value>samp_rate</value>
+    </param>
+    <param>
+      <key>t_scale</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>title</key>
+      <value>Scope Plot</value>
+    </param>
+    <param>
+      <key>trig_mode</key>
+      <value>wxgui.TRIG_MODE_NORM</value>
+    </param>
+    <param>
+      <key>type</key>
+      <value>complex</value>
+    </param>
+    <param>
+      <key>v_offset</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>v_scale</key>
+      <value>0</value>
+    </param>
+    <param>
+      <key>win_size</key>
+      <value></value>
+    </param>
+    <param>
+      <key>xy_mode</key>
+      <value>False</value>
+    </param>
+    <param>
+      <key>y_axis_label</key>
+      <value>Counts</value>
+    </param>
+  </block>
+  <connection>
+    <source_block_id>analog_sig_source_x_0</source_block_id>
+    <sink_block_id>blocks_throttle_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>blocks_throttle_0</source_block_id>
+    <sink_block_id>ha5kfu_execproc_xx_0_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>blocks_throttle_0</source_block_id>
+    <sink_block_id>ha5kfu_execproc_xx_0_0_0_1</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>blocks_throttle_0</source_block_id>
+    <sink_block_id>wxgui_fftsink2_0_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>ha5kfu_execproc_xx_0_0</source_block_id>
+    <sink_block_id>wxgui_fftsink2_0_1</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>ha5kfu_execproc_xx_0_0</source_block_id>
+    <sink_block_id>wxgui_scopesink2_0_0</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>ha5kfu_execproc_xx_0_0_0_1</source_block_id>
+    <sink_block_id>wxgui_fftsink2_0_1_1</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+  <connection>
+    <source_block_id>ha5kfu_execproc_xx_0_0_0_1</source_block_id>
+    <sink_block_id>wxgui_scopesink2_0_0_1</sink_block_id>
+    <source_key>0</source_key>
+    <sink_key>0</sink_key>
+  </connection>
+</flow_graph>
--- a/grc_tests/test_shift_remote.sh
+++ b/grc_tests/test_shift_remote.sh
@ -0,0 +1,9 @@
+#!/bin/sh
+# Run this script on a Raspberry Pi 2, while running test_shift_remote.grc on your PC. 
+# It allows you to debug the NEON-accelerated version of specific DSP algorithms on the target hardware.
+TEMPSCRIPT="/tmp/test_shift_remote_exec.sh"
+echo '#!/bin/sh\ncsdr shift_addfast_cc -0.1' > $TEMPSCRIPT
+cat $TEMPSCRIPT
+chmod +x $TEMPSCRIPT
+ncat -vvl 5321 -e $TEMPSCRIPT
+rm $TEMPSCRIPT
--- a/libcsdr.c
+++ b/libcsdr.c
@ -263,8 +263,208 @@ float shift_table_cc(complexf* input, complexf* output, int input_size, float ra
 	return phase;
 }

+
+shift_unroll_data_t shift_unroll_init(float rate, int size)
+{
+	shift_unroll_data_t output;
+	output.phase_increment=2*rate*PI;
+	output.size = size;
+	output.dsin=(float*)malloc(sizeof(float)*size);
+	output.dcos=(float*)malloc(sizeof(float)*size);
+	float myphase = 0;
+	for(int i=0;i<size;i++)
+	{
+		myphase += output.phase_increment;
+		while(myphase>PI) myphase-=2*PI;
+		while(myphase<-PI) myphase+=2*PI;		
+		output.dsin[i]=sin(myphase);
+		output.dcos[i]=cos(myphase);
+	}
+	return output;	
+}
+
+float shift_unroll_cc(complexf *input, complexf* output, int input_size, shift_unroll_data_t* d, float starting_phase)
+{
+	//input_size should be multiple of 4
+	//fprintf(stderr, "shift_addfast_cc: input_size = %d\n", input_size);
+	float cos_start=cos(starting_phase);
+	float sin_start=sin(starting_phase);
+	register float cos_val, sin_val;
+	for(int i=0;i<input_size; i++) //@shift_unroll_cc
+	{
+		cos_val = cos_start * d->dcos[i] - sin_start * d->dsin[i];
+		sin_val  = sin_start * d->dcos[i] + cos_start * d->dsin[i];
+		iof(output,i)=cos_val*iof(input,i)-sin_val*qof(input,i);
+		qof(output,i)=sin_val*iof(input,i)+cos_val*qof(input,i);
+	}
+	starting_phase+=input_size*d->phase_increment;
+	while(starting_phase>PI) starting_phase-=2*PI;
+	while(starting_phase<-PI) starting_phase+=2*PI;
+	return starting_phase;
+}
+
+shift_addfast_data_t shift_addfast_init(float rate)
+{
+	shift_addfast_data_t output;
+	output.phase_increment=2*rate*PI;
+	for(int i=0;i<4;i++)
+	{
+		output.dsin[i]=sin(output.phase_increment*(i+1));
+		output.dcos[i]=cos(output.phase_increment*(i+1));
+	}
+	return output;
+}
+
 #ifdef NEON_OPTS
-#pragma message "We have a faster fir_decimate_cc now."
+#pragma message "Manual NEON optimizations are ON: we have a faster shift_addfast_cc now."
+
+float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_addfast_data_t* d, float starting_phase)
+{
+	//input_size should be multiple of 4
+	float cos_start[4], sin_start[4];
+	float cos_vals[4], sin_vals[4];
+	for(int i=0;i<4;i++) 
+	{
+		cos_start[i] = cos(starting_phase);
+		sin_start[i] = sin(starting_phase);
+	}
+
+	float* pdcos = d->dcos;
+	float* pdsin = d->dsin;
+	register float* pinput = (float*)input;
+	register float* pinput_end = (float*)(input+input_size);
+	register float* poutput = (float*)output;
+
+	//Register map:
+	#define RDCOS "q0" //dcos, dsin
+	#define RDSIN "q1"
+	#define RCOSST "q2" //cos_start, sin_start
+	#define RSINST "q3"
+	#define RCOSV "q4" //cos_vals, sin_vals
+	#define RSINV "q5"
+	#define ROUTI "q6" //output_i, output_q
+	#define ROUTQ "q7" 
+	#define RINPI "q8" //input_i, input_q
+	#define RINPQ "q9"
+	#define R3(x,y,z) x ", " y ", " z "\n\t"
+
+	asm volatile( //(the range of q is q0-q15)
+		"		vld1.32	{" RDCOS "}, [%[pdcos]]\n\t"
+		"		vld1.32	{" RDSIN "}, [%[pdsin]]\n\t"
+		"		vld1.32	{" RCOSST "}, [%[cos_start]]\n\t"
+		"		vld1.32	{" RSINST "}, [%[sin_start]]\n\t"
+		"for_addfast: vld2.32 {" RINPI "-" RINPQ "}, [%[pinput]]!\n\t" //load q0 and q1 directly from the memory address stored in pinput, with interleaving (so that we get the I samples in RINPI and the Q samples in RINPQ), also increment the memory address in pinput (hence the "!" mark) 
+
+		//C version:
+		//cos_vals[j] = cos_start * d->dcos[j] - sin_start * d->dsin[j];
+		//sin_vals[j] = sin_start * d->dcos[j] + cos_start * d->dsin[j];
+
+		"		vmul.f32 " R3(RCOSV, RCOSST, RDCOS)  //cos_vals[i] = cos_start * d->dcos[i]
+		"		vmls.f32 " R3(RCOSV, RSINST, RDSIN)  //cos_vals[i] -= sin_start * d->dsin[i]
+		"		vmul.f32 " R3(RSINV, RSINST, RDCOS)  //sin_vals[i] = sin_start * d->dcos[i]
+		"		vmla.f32 " R3(RSINV, RCOSST, RDSIN)  //sin_vals[i] += cos_start * d->dsin[i]
+
+		//C version:
+		//iof(output,4*i+j)=cos_vals[j]*iof(input,4*i+j)-sin_vals[j]*qof(input,4*i+j);
+		//qof(output,4*i+j)=sin_vals[j]*iof(input,4*i+j)+cos_vals[j]*qof(input,4*i+j);	
+		"		vmul.f32 " R3(ROUTI, RCOSV, RINPI) //output_i =  cos_vals * input_i
+		"		vmls.f32 " R3(ROUTI, RSINV, RINPQ) //output_i -= sin_vals * input_q
+		"		vmul.f32 " R3(ROUTQ, RSINV, RINPI) //output_q =  sin_vals * input_i
+		"		vmla.f32 " R3(ROUTQ, RCOSV, RINPQ) //output_i += cos_vals * input_q
+
+		"		vst2.32 {" ROUTI "-" ROUTQ "}, [%[poutput]]!\n\t" //store the outputs in memory
+		//"		add %[poutput],%[poutput],#32\n\t"
+		"		vdup.32 " RCOSST ", d9[1]\n\t" // cos_start[0-3] = cos_vals[3]
+		"		vdup.32 " RSINST ", d11[1]\n\t" // sin_start[0-3] = sin_vals[3]
+
+		"		cmp %[pinput], %[pinput_end]\n\t" //if(pinput != pinput_end)
+		"		bcc for_addfast\n\t"			  //	then goto for_addfast
+	:
+		[pinput]"+r"(pinput), [poutput]"+r"(poutput) //output operand list -> C variables that we will change from ASM
+	:
+		[pinput_end]"r"(pinput_end), [pdcos]"r"(pdcos), [pdsin]"r"(pdsin), [sin_start]"r"(sin_start), [cos_start]"r"(cos_start) //input operand list
+	: 
+		"memory", "q0", "q1", "q2", "q4", "q5", "q6", "q7", "q8", "q9", "cc" //clobber list
+	);
+	starting_phase+=input_size*d->phase_increment;
+	while(starting_phase>PI) starting_phase-=2*PI;
+	while(starting_phase<-PI) starting_phase+=2*PI;
+	return starting_phase;
+}
+
+#else
+
+
+#if 1
+
+#define SADF_L1(j) cos_vals_ ## j = cos_start * dcos_ ## j - sin_start * dsin_ ## j; \
+	sin_vals_ ## j = sin_start * dcos_ ## j + cos_start * dsin_ ## j;
+#define SADF_L2(j) iof(output,4*i+j)=(cos_vals_ ## j)*iof(input,4*i+j)-(sin_vals_ ## j)*qof(input,4*i+j); \
+	qof(output,4*i+j)=(sin_vals_ ## j)*iof(input,4*i+j)+(cos_vals_ ## j)*qof(input,4*i+j);
+
+float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_addfast_data_t* d, float starting_phase)
+{
+	//input_size should be multiple of 4
+	//fprintf(stderr, "shift_addfast_cc: input_size = %d\n", input_size);
+	float cos_start=cos(starting_phase);
+	float sin_start=sin(starting_phase);
+	float register cos_vals_0, cos_vals_1, cos_vals_2, cos_vals_3,
+		sin_vals_0, sin_vals_1, sin_vals_2, sin_vals_3, 
+		dsin_0 = d->dsin[0], dsin_1 = d->dsin[1], dsin_2 = d->dsin[2], dsin_3 = d->dsin[3],
+		dcos_0 = d->dcos[0], dcos_1 = d->dcos[1], dcos_2 = d->dcos[2], dcos_3 = d->dcos[3];
+
+	for(int i=0;i<input_size/4; i++) //@shift_addfast_cc
+	{
+		SADF_L1(0)
+		SADF_L1(1)
+		SADF_L1(2)
+		SADF_L1(3)
+		SADF_L2(0)
+		SADF_L2(1)
+		SADF_L2(2)
+		SADF_L2(3)
+		cos_start = cos_vals_3;
+		sin_start = sin_vals_3;
+	}
+	starting_phase+=input_size*d->phase_increment;
+	while(starting_phase>PI) starting_phase-=2*PI;
+	while(starting_phase<-PI) starting_phase+=2*PI;
+	return starting_phase;
+}
+#else
+float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_addfast_data_t* d, float starting_phase)
+{
+	//input_size should be multiple of 4
+	//fprintf(stderr, "shift_addfast_cc: input_size = %d\n", input_size);
+	float cos_start=cos(starting_phase);
+	float sin_start=sin(starting_phase);
+	float cos_vals[4], sin_vals[4];
+	for(int i=0;i<input_size/4; i++) //@shift_addfast_cc
+	{
+		for(int j=0;j<4;j++) //@shift_addfast_cc
+		{
+			cos_vals[j] = cos_start * d->dcos[j] - sin_start * d->dsin[j];
+			sin_vals[j] = sin_start * d->dcos[j] + cos_start * d->dsin[j];
+		}
+		for(int j=0;j<4;j++) //@shift_addfast_cc
+		{
+			iof(output,4*i+j)=cos_vals[j]*iof(input,4*i+j)-sin_vals[j]*qof(input,4*i+j);
+			qof(output,4*i+j)=sin_vals[j]*iof(input,4*i+j)+cos_vals[j]*qof(input,4*i+j);
+		}
+		cos_start = cos_vals[3];
+		sin_start = sin_vals[3];
+	}
+	starting_phase+=input_size*d->phase_increment;
+	while(starting_phase>PI) starting_phase-=2*PI;
+	while(starting_phase<-PI) starting_phase+=2*PI;
+	return starting_phase;
+}
+#endif
+
+#endif
+
+#ifdef NEON_OPTS
+#pragma message "Manual NEON optimizations are ON: we have a faster fir_decimate_cc now."

 //max help: http://community.arm.com/groups/android-community/blog/2015/03/27/arm-neon-programming-quick-reference

@ -280,11 +480,7 @@ int fir_decimate_cc(complexf *input, complexf *output, int input_size, int decim
 	for(int i=0; i<input_size; i+=decimation) //@fir_decimate_cc: outer loop
 	{
 		if(i+taps_length>input_size) break;
-		register float acci=0;
-		register float accq=0;
-
-		register int ti=0;
-		register float* pinput=(float*)&(input[i+ti]);
+		register float* pinput=(float*)&(input[i]);
 		register float* ptaps=taps;
 		register float* ptaps_end=taps+taps_length;
 		float quad_acciq [8];
@ -297,13 +493,13 @@ q4, q5: accumulator for I branch and Q branch (will be the output)
 */

 		asm volatile(
-			"		vmov.f32 q4, #0.0\n\t" //another way to null the accumulators
-			"		vmov.f32 q5, #0.0\n\t"
+			"		veor q4, q4\n\t"
+			"		veor q5, q5\n\t"
 			"for_fdccasm: vld2.32	{q0-q1}, [%[pinput]]!\n\t" //load q0 and q1 directly from the memory address stored in pinput, with interleaving (so that we get the I samples in q0 and the Q samples in q1), also increment the memory address in pinput (hence the "!" mark) //http://community.arm.com/groups/processors/blog/2010/03/17/coding-for-neon--part-1-load-and-stores
 			"		vld1.32	{q2}, [%[ptaps]]!\n\t"
 			"		vmla.f32 q4, q0, q2\n\t" //quad_acc_i += quad_input_i * quad_taps_1 //http://stackoverflow.com/questions/3240440/how-to-use-the-multiply-and-accumulate-intrinsics-in-arm-cortex-a8 //http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0489e/CIHEJBIE.html
 			"		vmla.f32 q5, q1, q2\n\t" //quad_acc_q += quad_input_q * quad_taps_1
-			"		cmp %[ptaps], %[ptaps_end]\n\t" //if(ptaps == ptaps_end)
+			"		cmp %[ptaps], %[ptaps_end]\n\t" //if(ptaps != ptaps_end)
 			"		bcc for_fdccasm\n\t"			//	then goto for_fdcasm
 			"		vst1.32 {q4}, [%[quad_acci]]\n\t" //if the loop is finished, store the two accumulators in memory
 			"		vst1.32 {q5}, [%[quad_accq]]\n\t"
@ -454,7 +650,7 @@ float inline fir_one_pass_ff(float* input, float* taps, int taps_length)
 	return acc;
 }

-fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, fractional_decimator_ff_t d)
+old_fractional_decimator_ff_t old_fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, old_fractional_decimator_ff_t d)
 {
 	if(rate<=1.0) return d; //sanity check, can't decimate <=1.0
 	//This routine can handle floating point decimation rates.
@ -487,6 +683,104 @@ fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, i
 	return d;
 }

+fractional_decimator_ff_t fractional_decimator_ff_init(float rate, int num_poly_points, float* taps, int taps_length)
+{
+	fractional_decimator_ff_t d;
+	d.num_poly_points = num_poly_points&~1; //num_poly_points needs to be even!
+	d.poly_precalc_denomiator = (float*)malloc(d.num_poly_points*sizeof(float));
+	//x0..x3
+	//-1,0,1,2
+	//-(4/2)+1
+	//x0..x5
+	//-2,-1,0,1,2,3
+	d.xifirst=-(num_poly_points/2)+1, d.xilast=num_poly_points/2;
+	int id = 0; //index in poly_precalc_denomiator
+	for(int xi=d.xifirst;xi<=d.xilast;xi++)
+	{
+		d.poly_precalc_denomiator[id]=1;
+		for(int xj=d.xifirst;xj<=d.xilast;xj++)
+		{
+			if(xi!=xj) d.poly_precalc_denomiator[id] *= (xi-xj); //poly_precalc_denomiator could be integer as well. But that would later add a necessary conversion.
+		}
+		id++;
+	}
+	d.where=-d.xifirst;
+	d.coeffs_buf=(float*)malloc(d.num_poly_points*sizeof(float)); 
+	d.filtered_buf=(float*)malloc(d.num_poly_points*sizeof(float)); 
+	//d.last_inputs_circbuf = (float)malloc(d.num_poly_points*sizeof(float));
+	//d.last_inputs_startsat = 0; 
+	//d.last_inputs_samplewhere = -1;
+	//for(int i=0;i<num_poly_points; i++) d.last_inputs_circbuf[i] = 0;
+	d.rate = rate;
+	d.taps = taps;
+	d.taps_length = taps_length;
+	d.input_processed = 0;
+	return d;
+}
+
+#define DEBUG_ASSERT 1
+void fractional_decimator_ff(float* input, float* output, int input_size, fractional_decimator_ff_t* d)
+{
+	//This routine can handle floating point decimation rates.
+	//It applies polynomial interpolation to samples that are taken into consideration from a pre-filtered input.
+	//The pre-filter can be switched off by applying taps=NULL.
+	//fprintf(stderr, "drate=%f\n", d->rate);
+	if(DEBUG_ASSERT) assert(d->rate > 1.0); 
+	if(DEBUG_ASSERT) assert(d->where >= -d->xifirst);
+	int oi=0; //output index
+	int index_high; 
+#define FD_INDEX_LOW (index_high-1)
+	//we optimize to calculate ceilf(where) only once every iteration, so we do it here:
+	for(;(index_high=ceilf(d->where))+d->num_poly_points+d->taps_length<input_size;d->where+=d->rate) //@fractional_decimator_ff
+	{
+		//d->num_poly_points above is theoretically more than we could have here, but this makes the spectrum look good
+		int sxifirst = FD_INDEX_LOW + d->xifirst; 
+		int sxilast = FD_INDEX_LOW + d->xilast; 
+		if(d->taps) 
+			for(int wi=0;wi<d->num_poly_points;wi++) d->filtered_buf[wi] = fir_one_pass_ff(input+FD_INDEX_LOW+wi, d->taps, d->taps_length);
+		else
+			for(int wi=0;wi<d->num_poly_points;wi++) d->filtered_buf[wi] = *(input+FD_INDEX_LOW+wi);
+		int id=0;
+		float xwhere = d->where - FD_INDEX_LOW;
+		for(int xi=d->xifirst;xi<=d->xilast;xi++)
+		{
+			d->coeffs_buf[id]=1;
+			for(int xj=d->xifirst;xj<=d->xilast;xj++)
+			{
+				if(xi!=xj) d->coeffs_buf[id] *= (xwhere-xj);
+			}
+			id++;		
+		}
+		float acc = 0;
+		for(int i=0;i<d->num_poly_points;i++)
+		{
+			acc += (d->coeffs_buf[i]/d->poly_precalc_denomiator[i])*d->filtered_buf[i];  //(xnom/xden)*yn
+		}
+		output[oi++]=acc;
+	}
+	d->input_processed = FD_INDEX_LOW + d->xifirst;
+	d->where -= d->input_processed;
+	d->output_size = oi;
+}
+
+/*
+ * Some notes to myself on the circular buffer I wanted to implement here:
+		int last_input_samplewhere_shouldbe = (index_high-1)+xifirst;
+		int last_input_offset = last_input_samplewhere_shouldbe - d->last_input_samplewhere;
+		if(last_input_offset < num_poly_points)
+		{
+			//if we can move the last_input circular buffer, we move, and add the new samples at the end
+			d->last_inputs_startsat += last_input_offset;
+			d->last_inputs_startsat %= num_poly_points;
+			int num_copied_samples = 0;
+			for(int i=0; i<last_input_offset; i++)
+			{
+				d->last_inputs_circbuf[i]=
+			}
+			d->last_input_samplewhere = d->las
+		}
+	However, I think I should just rather do a continuous big buffer.
+*/

 void apply_fir_fft_cc(FFT_PLAN_T* plan, FFT_PLAN_T* plan_inverse, complexf* taps_fft, complexf* last_overlap, int overlap_size)
 {
@ -930,6 +1224,29 @@ void apply_window_c(complexf* input, complexf* output, int size, window_t window
 	}
 }

+float *precalculate_window(int size, window_t window)
+{
+	float (*window_function)(float)=firdes_get_window_kernel(window);
+	float *windowt;
+	windowt = malloc(sizeof(float) * size);
+	for(int i=0;i<size;i++) //@precalculate_window
+	{
+		float rate=(float)i/(size-1);
+		windowt[i] = window_function(2.0*rate+1.0);
+	}
+	return windowt;
+}
+
+void apply_precalculated_window_c(complexf* input, complexf* output, int size, float *windowt)
+{
+	for(int i=0;i<size;i++) //@apply_precalculated_window_c
+	{
+		iof(output,i)=iof(input,i)*windowt[i];
+		qof(output,i)=qof(input,i)*windowt[i];
+	}
+}
+
+
 void apply_window_f(float* input, float* output, int size, window_t window)
 {
 	float (*window_function)(float)=firdes_get_window_kernel(window);
@ -949,6 +1266,19 @@ void logpower_cf(complexf* input, float* output, int size, float add_db)
 	for(int i=0;i<size;i++) output[i]=10*output[i]+add_db; //@logpower_cf: pass 3
 }

+void accumulate_power_cf(complexf* input, float* output, int size)
+{
+	for(int i=0;i<size;i++) output[i] += iof(input,i)*iof(input,i) + qof(input,i)*qof(input,i); //@logpower_cf: pass 1
+	
+}
+
+void log_ff(float* input, float* output, int size, float add_db) {
+	for(int i=0;i<size;i++) output[i]=log10(input[i]); //@logpower_cf: pass 2
+
+	for(int i=0;i<size;i++) output[i]=10*output[i]+add_db; //@logpower_cf: pass 3
+}
+
+
 /*
  _____        _                                            _
 |  __ \      | |                                          (_)
--- a/libcsdr.h
+++ b/libcsdr.h
@ -68,6 +68,8 @@ typedef struct complexf_s { float i; float q; } complexf;
 //they dropped M_PI in C99, so we define it:
 #define PI ((float)3.14159265358979323846)

+#define TIME_TAKEN(start,end) ((end.tv_sec-start.tv_sec)+(end.tv_nsec-start.tv_nsec)/1e9)
+
 //window
 typedef enum window_s
 {
@ -140,17 +142,42 @@ typedef struct rational_resampler_ff_s
 rational_resampler_ff_t rational_resampler_ff(float *input, float *output, int input_size, int interpolation, int decimation, float *taps, int taps_length, int last_taps_delay);
 void rational_resampler_get_lowpass_f(float* output, int output_size, int interpolation, int decimation, window_t window);

+float *precalculate_window(int size, window_t window);
 void apply_window_c(complexf* input, complexf* output, int size, window_t window);
+void apply_precalculated_window_c(complexf* input, complexf* output, int size, float *windowt);
 void apply_window_f(float* input, float* output, int size, window_t window);
 void logpower_cf(complexf* input, float* output, int size, float add_db);
+void accumulate_power_cf(complexf* input, float* output, int size);
+void log_ff(float* input, float* output, int size, float add_db);

 typedef struct fractional_decimator_ff_s
+{
+	float where;
+	int input_processed;
+	int output_size;
+	int num_poly_points; //number of samples that the Lagrange interpolator will use
+	float* poly_precalc_denomiator; //while we don't precalculate coefficients here as in a Farrow structure, because it is a fractional interpolator, but we rather precaculate part of the interpolator expression
+	//float* last_inputs_circbuf; //circular buffer to store the last (num_poly_points) number of input samples.
+	//int last_inputs_startsat; //where the circular buffer starts now
+	//int last_inputs_samplewhere; 
+	float* coeffs_buf;
+	float* filtered_buf;
+	int xifirst; 
+	int xilast; 
+	float rate;
+	float *taps;
+	int taps_length;
+} fractional_decimator_ff_t;
+fractional_decimator_ff_t fractional_decimator_ff_init(float rate, int num_poly_points, float* taps, int taps_length);
+void fractional_decimator_ff(float* input, float* output, int input_size, fractional_decimator_ff_t* d);
+
+typedef struct old_fractional_decimator_ff_s
 {
 	float remain;
 	int input_processed;
 	int output_size;
-} fractional_decimator_ff_t;
-fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, fractional_decimator_ff_t d);
+} old_fractional_decimator_ff_t;
+old_fractional_decimator_ff_t old_fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, old_fractional_decimator_ff_t d);

 typedef struct shift_table_data_s
 {
@ -161,6 +188,25 @@ void shift_table_deinit(shift_table_data_t table_data);
 shift_table_data_t shift_table_init(int table_size);
 float shift_table_cc(complexf* input, complexf* output, int input_size, float rate, shift_table_data_t table_data, float starting_phase);

+typedef struct shift_addfast_data_s
+{
+	float dsin[4];
+	float dcos[4];
+	float phase_increment;
+} shift_addfast_data_t;
+shift_addfast_data_t shift_addfast_init(float rate);
+shift_addfast_data_t shift_addfast_init(float rate);
+float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_addfast_data_t* d, float starting_phase);
+
+typedef struct shift_unroll_data_s
+{
+	float* dsin;
+	float* dcos;
+	float phase_increment;
+	int size;
+} shift_unroll_data_t;
+float shift_unroll_cc(complexf *input, complexf* output, int input_size, shift_unroll_data_t* d, float starting_phase);
+shift_unroll_data_t shift_unroll_init(float rate, int size);

 int log2n(int x);
 int next_pow2(int x);
--- a/libcsdr_wrapper.c
+++ b/libcsdr_wrapper.c
@ -1,4 +1,5 @@
 #include "libcsdr.c"
 #include "libcsdr_gpl.c"
 #include "ima_adpcm.c"
+#include "fastddc.c"
 //this wrapper helps parsevect.py to generate better output
--- a/2
+++ b/2
@ -0,0 +1,2 @@
+#!/bin/bash
+gcc test200.c --std=gnu99 -o test200 -DUSE_FFTW -DLIBCSDR_GPL -lcsdr
--- a/nmux-todo.md
+++ b/nmux-todo.md
@ -0,0 +1,7 @@
+Remove nmux repo, it will rather be part of csdr
+Try in OpenWebRX
+Add UDP support
+Evaluate performance against ncat
+Remove debug messages
+Document README.md
+Test with a limited number of people
--- a/nmux.cpp
+++ b/nmux.cpp
@ -0,0 +1,374 @@
+/*
+This software is part of libcsdr, a set of simple DSP routines for
+Software Defined Radio.
+
+Copyright (c) 2014, Andras Retzler <randras@sdr.hu>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ANDRAS RETZLER BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "nmux.h"
+
+char help_text[]="nmux is a TCP stream multiplexer. It reads data from the standard input, and sends it to each client connected through TCP sockets. Available command line options are:\n"
+"\t--port (-p), --address (-a): TCP port and address to listen.\n"
+"\t--bufsize (-b), --bufcnt (-n): Internal buffer size and count.\n"
+"\t--help (-h): Show this message.\n";
+
+int host_port = 0;
+char host_address[100] = "127.0.0.1";
+int thread_cntr = 0;
+
+//CLI parameters
+int bufsize = 1024; 
+int bufcnt = 1024;
+
+char** global_argv;
+int global_argc;
+tsmpool* pool;
+
+pthread_cond_t wait_condition;
+pthread_mutex_t wait_mutex;
+
+void sig_handler(int signo)
+{
+	fprintf(stderr, MSG_START "signal %d caught, exiting...\n", signo);
+	fflush(stderr);
+	exit(0);
+}
+
+int main(int argc, char* argv[])
+{
+	global_argv = argv;
+	global_argc = argc;
+	int c;
+	int no_options = 1;
+	for(;;)
+	{
+		int option_index = 0;
+		static struct option long_options[] = {
+		   {"port",       required_argument, 0,  'p' },
+		   {"address",    required_argument, 0,  'a' },
+		   {"bufsize", 	  required_argument, 0,  'b' },
+		   {"bufcnt", 	  required_argument, 0,  'n' },
+		   {"help", 	  no_argument, 		 0,  'h' },
+		   {0,			  0,                 0,  0   }
+		};
+		c = getopt_long(argc, argv, "p:a:b:n:h", long_options, &option_index);
+		if(c==-1) break;
+		no_options = 0;
+		switch (c)
+		{
+		case 'a':
+			host_address[100-1]=0;
+			strncpy(host_address,optarg,100-1);
+			break;
+		case 'p':
+			host_port=atoi(optarg);
+			break;
+		case 'b':
+			bufsize=atoi(optarg);
+			break;
+		case 'n':
+			bufcnt=atoi(optarg);
+			break;
+		case 'h':
+			print_exit(help_text);
+			break;
+		case 0:
+		case '?':
+		case ':':
+		default:
+			print_exit(MSG_START "error in getopt_long()\n");
+		}
+	}
+
+	if(no_options) print_exit(help_text);
+	if(!host_port) print_exit(MSG_START "missing required command line argument, --port.\n");
+	if(bufsize<=0) print_exit(MSG_START "invalid value for --bufsize (should be >0)\n");
+	if(bufcnt<=0) print_exit(MSG_START "invalid value for --bufcnt (should be >0)\n");
+
+	//set signals
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = sig_handler;
+	sigaction(SIGTERM, &sa, NULL);
+	sigaction(SIGKILL, &sa, NULL);
+	sigaction(SIGQUIT, &sa, NULL);
+	sigaction(SIGINT, &sa, NULL);
+	sigaction(SIGHUP, &sa, NULL);
+
+	struct sockaddr_in addr_host;
+    int listen_socket;
+	std::vector<client_t*> clients;
+	clients.reserve(100);
+    listen_socket=socket(AF_INET,SOCK_STREAM,0);
+
+	int sockopt = 1;
+	if( setsockopt(listen_socket, SOL_SOCKET, SO_REUSEADDR, (char *)&sockopt, sizeof(sockopt)) == -1 )
+		error_exit(MSG_START "cannot set SO_REUSEADDR");  //the best description on SO_REUSEADDR ever: http://stackoverflow.com/a/14388707/3182453
+
+	memset(&addr_host,'0',sizeof(addr_host));
+    addr_host.sin_family = AF_INET;
+    addr_host.sin_port = htons(host_port);
+	addr_host.sin_addr.s_addr = INADDR_ANY;
+
+    if( (addr_host.sin_addr.s_addr=inet_addr(host_address)) == INADDR_NONE )
+		error_exit(MSG_START "invalid host address");
+
+	if( bind(listen_socket, (struct sockaddr*) &addr_host, sizeof(addr_host)) < 0 )
+		error_exit(MSG_START "cannot bind() address to the socket");
+
+	if( listen(listen_socket, 10) == -1 )
+		error_exit(MSG_START "cannot listen() on socket");
+
+	fprintf(stderr, MSG_START "listening on %s:%d\n", inet_ntoa(addr_host.sin_addr), host_port);
+
+	struct sockaddr_in addr_cli;
+	socklen_t addr_cli_len = sizeof(addr_cli);
+	int new_socket;
+
+	int highfd = 0;
+	maxfd(&highfd, listen_socket);
+	maxfd(&highfd, STDIN_FILENO);
+	
+	fd_set select_fds;
+
+	//Set stdin and listen_socket to non-blocking
+	if(set_nonblocking(STDIN_FILENO) || set_nonblocking(listen_socket))
+		error_exit(MSG_START "cannot set_nonblocking()");
+
+	//Create tsmpool
+	pool = new tsmpool(bufsize, bufcnt);
+	if(!pool->is_ok()) print_exit(MSG_START "tsmpool failed to initialize\n");
+
+	unsigned char* current_write_buffer = (unsigned char*)pool->get_write_buffer();
+	int index_in_current_write_buffer = 0;
+
+	//Create wait condition: client threads waiting for input data from the main thread will be
+	//	waiting on this condition. They will be woken up with pthread_cond_broadcast() if new
+	//	data arrives.
+	if(pthread_cond_init(&wait_condition, NULL)) 
+		print_exit(MSG_START "pthread_cond_init failed"); //cond_attrs is ignored by Linux
+	
+	if(pthread_mutex_init(&wait_mutex, NULL))
+		print_exit(MSG_START "pthread_mutex_t failed"); //cond_attrs is ignored by Linux
+
+	for(;;)
+	{
+		FD_ZERO(&select_fds);
+		FD_SET(listen_socket, &select_fds);
+		FD_SET(STDIN_FILENO, &select_fds);
+
+		if(NMUX_DEBUG) fprintf(stderr, "mainfor: selecting...");
+		//Let's wait until there is any new data to read, or any new connection!
+		int select_ret = select(highfd, &select_fds, NULL, NULL, NULL);
+		if(NMUX_DEBUG) fprintf(stderr, "selected.\n");
+		if(select_ret == -1) error_exit("mainfor select() error");
+
+		//Is there a new client connection?
+		if( FD_ISSET(listen_socket, &select_fds) && ((new_socket = accept(listen_socket, (struct sockaddr*)&addr_cli, &addr_cli_len)) != -1) )
+		{
+			if(NMUX_DEBUG) 
+			{
+				fprintf(stderr, "\x1b[1m\x1b[33mmainfor: clients before closing: ");
+				for(int i=0;i<clients.size();i++) fprintf(stderr, "0x%x ", (intptr_t)clients[i]);
+				fprintf(stderr, "\x1b[0m\n");
+			}
+			if(NMUX_DEBUG) fprintf(stderr, "mainfor: accepted (socket = %d).\n", new_socket);
+			//Close all finished clients
+			for(int i=0;i<clients.size();i++)
+			{
+				if(clients[i]->status == CS_THREAD_FINISHED)
+				{
+					if(NMUX_DEBUG) fprintf(stderr, "mainfor: client removed: %d\n", i);
+					//client destructor
+					pool->remove_thread(clients[i]->tsmthread);
+					clients.erase(clients.begin()+i);
+					i--;
+				}
+			}
+			if(NMUX_DEBUG) 
+			{
+				fprintf(stderr, "\x1b[1m\x1b[33mmainfor: clients after closing: ");
+				for(int i=0;i<clients.size();i++) fprintf(stderr, "0x%x ", (intptr_t)clients[i]);
+				fprintf(stderr, "\x1b[0m\n");
+			}
+
+			//We're the parent, let's create a new client and initialize it
+			client_t* new_client = new client_t;
+			new_client->error = 0;
+			memcpy(&new_client->addr, &addr_cli, sizeof(struct sockaddr_in));
+			new_client->socket = new_socket;
+			new_client->status = CS_CREATED;
+			new_client->tsmthread = pool->register_thread();
+			new_client->lpool = pool;
+			new_client->sleeping = 0;
+			if(pthread_create(&new_client->thread, NULL, client_thread, (void*)new_client)==0)
+			{
+				clients.push_back(new_client);
+				fprintf(stderr, MSG_START "pthread_create() done, clients now: %d\n", clients.size());
+			}
+			else
+			{
+				fprintf(stderr, MSG_START "pthread_create() failed.\n");
+				pool->remove_thread(new_client->tsmthread);
+				delete new_client;
+			}
+		}
+
+		if( FD_ISSET(STDIN_FILENO, &select_fds) )
+		{
+			if(index_in_current_write_buffer >= bufsize)
+			{
+				if(NMUX_DEBUG) fprintf(stderr, "mainfor: gwbing...");
+				current_write_buffer = (unsigned char*)pool->get_write_buffer();
+				if(NMUX_DEBUG) fprintf(stderr, "gwbed.\nmainfor: cond broadcasting...");
+				pthread_mutex_lock(&wait_mutex);
+				pthread_cond_broadcast(&wait_condition); 
+				pthread_mutex_unlock(&wait_mutex);
+				if(NMUX_DEBUG) fprintf(stderr, "cond broadcasted.\n");
+					//Shouldn't we do it after we put data in?
+					//	No, on get_write_buffer() actually the previous buffer is getting available 
+					//	for read for threads that wait for new data (wait on global pthead mutex 
+					//	wait_condition). 
+				index_in_current_write_buffer = 0;
+			}
+
+			if(NMUX_DEBUG) fprintf(stderr, "mainfor: reading...\n");
+			int read_ret = read(STDIN_FILENO, current_write_buffer + index_in_current_write_buffer, bufsize - index_in_current_write_buffer);
+			if(NMUX_DEBUG) fprintf(stderr, "read %d\n", read_ret);
+			if(read_ret>0)
+			{
+				index_in_current_write_buffer += read_ret;
+			}
+			else if(read_ret==0)
+			{
+				//End of input stream, close clients and exit
+				print_exit(MSG_START "(main thread/for) end input stream, exiting.\n");
+			}
+			else if(read_ret==-1)
+			{
+				if(errno == EAGAIN) { if(NMUX_DEBUG) fprintf(stderr, "mainfor: read EAGAIN\n"); /* seems like select would block forever, so we just read again */ }
+				else error_exit(MSG_START "(main thread/for) error in read(), exiting.\n");
+			}
+		}
+	}
+}
+
+void* client_thread (void* param)
+{
+	fprintf(stderr, "client 0x%x: started!\n", (intptr_t)param);
+	client_t* this_client = (client_t*)param;
+	this_client->status = CS_THREAD_RUNNING;
+	int retval;
+	tsmpool* lpool = this_client->lpool;
+	if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: socket = %d!\n", (intptr_t)param, this_client->socket);
+
+	if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: poll init...", (intptr_t)param);
+	struct pollfd pollfds[1];
+	pollfds[0].fd = this_client->socket;
+	pollfds[0].events = POLLOUT;
+	pollfds[0].revents = 0;
+	if(NMUX_DEBUG) fprintf(stderr, "client poll inited.\n");
+
+	//Set this_client->socket to non-blocking
+	if(set_nonblocking(this_client->socket))
+		error_exit(MSG_START "cannot set_nonblocking() on this_client->socket");
+
+	int client_buffer_index = 0;
+	int client_goto_source = 0;
+	char* pool_read_buffer = NULL;
+
+	for(;;)
+	{
+		//Wait until there is any data to send.
+		//  If I haven't sent all the data from my last buffer, don't wait.
+		//	(Wait for the server process to wake me up.)
+		while(!pool_read_buffer || client_buffer_index >= lpool->size)
+		{
+			if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: trying to grb\n", (intptr_t)param);
+			pool_read_buffer = (char*)lpool->get_read_buffer(this_client->tsmthread);
+			if(pool_read_buffer) { client_buffer_index = 0; break; }
+			if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: cond_waiting for more data\n", (intptr_t)param);
+			pthread_mutex_lock(&wait_mutex);
+			this_client->sleeping = 1;
+			pthread_cond_wait(&wait_condition, &wait_mutex);
+			pthread_mutex_unlock(&wait_mutex);
+		}
+
+		//Wait for the socket to be available for write.
+		if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: polling for socket write...", (intptr_t)param);
+		int ret = poll(pollfds, 1, -1);
+		if(NMUX_DEBUG) fprintf(stderr, "client polled for socket write.\n");
+		if(ret == 0) continue;
+		else if (ret == -1) { client_goto_source = 1; goto client_thread_exit; }
+
+		//Read data from global tsmpool and write it to client socket
+		if(NMUX_DEBUG) fprintf(stderr, "client 0x%x: sending...", (intptr_t)param);
+		ret = send(this_client->socket, pool_read_buffer + client_buffer_index, lpool->size - client_buffer_index, MSG_NOSIGNAL);
+		if(NMUX_DEBUG) fprintf(stderr, "client sent.\n");
+		if(ret == -1) 
+		{
+			switch(errno)
+			{
+				case EAGAIN: break;
+				default: client_goto_source = 2; goto client_thread_exit;
+			}
+		}
+		else client_buffer_index += ret;
+	}
+
+client_thread_exit:
+	fprintf(stderr, "client 0x%x: CS_THREAD_FINISHED, client_goto_source = %d, errno = %d", (intptr_t)param, client_goto_source, errno);
+	this_client->status = CS_THREAD_FINISHED;
+	pthread_exit(NULL);
+	return NULL;
+}
+
+
+int set_nonblocking(int fd)
+{
+	int flagtmp;
+	if((flagtmp = fcntl(fd, F_GETFL))!=-1)
+		if((flagtmp = fcntl(fd, F_SETFL, flagtmp|O_NONBLOCK))!=-1)
+			return 0;
+	return 1;
+}
+
+void error_exit(const char* why)
+{
+	perror(why); //do we need a \n at the end of (why)?
+	exit(1);
+}
+
+void print_exit(const char* why)
+{
+	fprintf(stderr, "%s", why);
+	exit(1);
+}
+
+void maxfd(int* maxfd, int fd)
+{
+	if(fd>=*maxfd) *maxfd=fd+1;
+}
--- a/nmux.h
+++ b/nmux.h
@ -0,0 +1,45 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include "tsmpool.h"
+
+#define MSG_START "nmux: "
+#define NMUX_DEBUG 0
+
+typedef enum client_status_e
+{
+	CS_CREATED,
+	CS_THREAD_RUNNING,
+	CS_THREAD_FINISHED
+} client_status_t;
+
+
+typedef struct client_s
+{
+	struct sockaddr_in addr;
+	int socket;
+	int error; //set to non-zero on error (data transfer failed)
+	pthread_t thread;
+    tsmthread_t* tsmthread;
+	client_status_t status;
+    //the following members are there to give access to some global variables inside the thread:
+    tsmpool* lpool; 
+    int sleeping;
+} client_t;
+
+void print_exit(const char* why);
+void sig_handler(int signo);
+void* client_thread (void* param);
+void error_exit(const char* why);
+void maxfd(int* maxfd, int fd);
+int set_nonblocking(int fd);
--- a/test200.c
+++ b/test200.c
@ -0,0 +1,124 @@
+/*
+This software is part of libcsdr, a set of simple DSP routines for 
+Software Defined Radio.
+
+Copyright (c) 2014-2015, Andras Retzler <randras@sdr.hu>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ANDRAS RETZLER BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+
+#include "libcsdr.h"
+#include "libcsdr_gpl.h"
+
+#define T_BUFSIZE (1024*1024/4)
+#define T_N (200)
+#define T_TAPS (1023)
+#define T_DECFACT (200)
+
+int main()
+{
+	fprintf(stderr,"Getting a %d of random samples...\n", T_BUFSIZE);
+	int urand_fp = open("/dev/urandom",O_RDWR);
+	unsigned char* buf_u8 = (unsigned char*)malloc(sizeof(unsigned char)*T_BUFSIZE*2);
+	complexf* buf_c = (complexf*)malloc(sizeof(complexf)*T_BUFSIZE);
+	complexf* outbuf_c = (complexf*)malloc(sizeof(complexf)*T_BUFSIZE);
+	read(urand_fp, buf_u8, T_BUFSIZE);
+	close(urand_fp);
+	
+	for(int i=0;i<T_BUFSIZE;i++)
+	{ 
+		iof(buf_c,i)=buf_u8[2*i]/128.0;
+		qof(buf_c,i)=buf_u8[2*i+1]/128.0;
+	}
+
+
+	float* taps_f = (float*)malloc(sizeof(float)*T_TAPS);
+	firdes_lowpass_f(taps_f, T_TAPS, 1.0f/T_DECFACT, WINDOW_DEFAULT);
+
+	struct timespec start_time, end_time;	
+
+	fprintf(stderr,"Starting tests of processing %d samples...\n", T_BUFSIZE*T_N);
+
+	//fir_decimate_cc
+        clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
+        for(int i=0;i<T_N;i++) fir_decimate_cc(buf_c, outbuf_c, T_BUFSIZE, 10, taps_f, T_TAPS);
+        clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
+        fprintf(stderr,"fir_decimate_cc done in %g seconds.\n",TIME_TAKEN(start_time,end_time));
+
+
+	//shift_math_cc
+	float starting_phase = 0;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
+	for(int i=0;i<T_N;i++) starting_phase = shift_math_cc(buf_c, outbuf_c, T_BUFSIZE, 0.1, starting_phase);
+	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
+	fprintf(stderr,"shift_math_cc done in %g seconds.\n",TIME_TAKEN(start_time,end_time));
+
+	//shift_table_cc	
+	shift_table_data_t shift_table_data=shift_table_init(65536);
+	starting_phase = 0;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
+	for(int i=0;i<T_N;i++) starting_phase = starting_phase=shift_table_cc(buf_c, outbuf_c, T_BUFSIZE, 0.1, shift_table_data, starting_phase);;
+	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
+	fprintf(stderr,"shift_table_cc (table size = %d) done in %g seconds.\n",65536,TIME_TAKEN(start_time,end_time));
+
+
+	//shift_addition_cc	
+	shift_addition_data_t data_addition = shift_addition_init(0.1);
+	starting_phase = 0;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
+	for(int i=0;i<T_N;i++) starting_phase = shift_addition_cc(buf_c, outbuf_c, T_BUFSIZE, data_addition, starting_phase);
+	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
+	fprintf(stderr,"shift_addition_cc done in %g seconds.\n",TIME_TAKEN(start_time,end_time));
+
+	//shift_addfast_cc	
+	shift_addfast_data_t data_addfast = shift_addfast_init(0.1);
+	starting_phase = 0;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
+	for(int i=0;i<T_N;i++) starting_phase = shift_addfast_cc(buf_c, outbuf_c, T_BUFSIZE, &data_addfast, starting_phase);
+	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
+	fprintf(stderr,"shift_addfast_cc done in %g seconds.\n",TIME_TAKEN(start_time,end_time));
+
+	//shift_unroll_cc
+	shift_unroll_data_t data_unroll = shift_unroll_init(0.1, T_BUFSIZE);
+	starting_phase = 0;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &start_time);
+	for(int i=0;i<T_N;i++) starting_phase = shift_unroll_cc(buf_c, outbuf_c, T_BUFSIZE, &data_unroll, starting_phase);
+	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
+	fprintf(stderr,"shift_unroll_cc done in %g seconds.\n",TIME_TAKEN(start_time,end_time));
+
+
+}
--- a/tsmpool.cpp
+++ b/tsmpool.cpp
@ -0,0 +1,74 @@
+#include "tsmpool.h"
+
+tsmpool::tsmpool(size_t size, int num) :
+	size(size), 
+	num(num) //number of buffers of (size) to alloc
+{
+	this->threads_cntr = 0;
+	this->ok = 1;
+	this->lowest_read_index = -1;
+	this->write_index = 0;
+	this->my_read_index = index_before(0);
+    if (pthread_mutex_init(&this->mutex, NULL) != 0) { this->ok = 0; return; }
+	for(int i=0; i<num; i++) 
+	{
+		void* newptr = (void*)new char[size];
+		if(!newptr) { this->ok = 0; return; }
+		buffers.push_back(newptr);
+	}
+}
+
+int tsmpool::is_ok() { return this->ok; }
+
+void* tsmpool::get_write_buffer()
+{
+	//if(write_index==index_before(lowest_read_index)) return NULL;
+	pthread_mutex_lock(&this->mutex);
+	void* to_return = buffers[write_index];
+	write_index = index_next(write_index);
+	pthread_mutex_unlock(&this->mutex);
+	if(TSM_DEBUG) fprintf(stderr, "gwb: write_index = %d\n", write_index);
+	return to_return;
+}
+
+tsmthread_t* tsmpool::register_thread()
+{
+	if(!ok) return NULL;
+	pthread_mutex_lock(&this->mutex);
+	tsmthread_t* thread = new tsmthread_t();
+	thread->read_index = index_before(write_index);
+	threads.push_back(thread);
+	pthread_mutex_unlock(&this->mutex);
+	return thread;
+}
+
+int tsmpool::remove_thread(tsmthread_t* thread)
+{
+	pthread_mutex_lock(&this->mutex);
+	for(int i=0;i<threads.size();i++)
+		if(threads[i] == thread)
+		{
+			delete threads[i];
+			threads.erase(threads.begin()+i);
+			break;
+		}
+	pthread_mutex_unlock(&this->mutex);
+}
+
+void* tsmpool::get_read_buffer(tsmthread_t* thread)
+{
+	pthread_mutex_lock(&this->mutex);
+	int* actual_read_index = (thread==NULL) ? &my_read_index : &thread->read_index;
+	if(*actual_read_index==index_before(write_index)) 
+	{
+		if(TSM_DEBUG) fprintf(stderr, "grb: fail,"
+			"read_index %d is just before write_index\n", *actual_read_index);
+		pthread_mutex_unlock(&this->mutex);
+		return NULL;
+	}
+	void* to_return = buffers[*actual_read_index];
+	*actual_read_index=index_next(*actual_read_index);
+	pthread_mutex_unlock(&this->mutex);
+	if(TSM_DEBUG) fprintf(stderr, "grb: read_index = %d\n", *actual_read_index);
+	return to_return;
+}
--- a/tsmpool.h
+++ b/tsmpool.h
@ -0,0 +1,43 @@
+//tsmpool stands for Thread-Safe Memory Pool.
+
+//It implements a big circular buffer that one thread writes into, and multiple threads read from.
+//The reader threads have lower priority than the writer thread (they can be left behind if the don't read fast enough).
+
+#include <vector>
+#include <pthread.h>
+
+#define TSM_DEBUG 0
+#include <stdio.h>
+
+using namespace std;
+
+typedef struct tsmthread_s
+{
+	int read_index; //it always points to the next buffer to be read
+} tsmthread_t;
+
+class tsmpool
+{
+private:
+	vector<tsmthread_t*> threads;
+	vector<void*> buffers;
+	int threads_cntr;
+	pthread_mutex_t mutex;
+	int ok; //tsmpool is expected to be included in C-style programs. 
+			//	If something fails in the constructor, it will be seen here instead of a try{}catch{}
+	int write_index; //it always points to the next buffer to be written
+	int lowest_read_index; //unused
+	int my_read_index; //it is used when tsmpool is used as a single writer - single reader circular buffer
+
+public:
+	const size_t size;
+	const int num;
+	int is_ok();
+	tsmpool(size_t size, int num);
+	void* get_write_buffer();
+	tsmthread_t* register_thread();
+	int remove_thread(tsmthread_t* thread);
+	void* get_read_buffer(tsmthread_t* thread);
+	int index_next(int index) { return (index+1==num)?0:index+1; }
+	int index_before(int index) { return (index-1<0)?num-1:index-1; }
+};