@article{3028413, title = "Fully systolic FFT architecture for giga-sample applications", author = "Babionitakis, K. and Chouliaras, V.A. and Manolopoulos, K. and Nakos, K. and Reisis, D. and Vlassopoulos, N.", journal = "Journal of Signal Processing Systems", year = "2010", volume = "58", number = "3", pages = "281-299", issn = "1939-8018, 1939-8115", doi = "10.1007/s11265-009-0364-1", keywords = "Clock frequency; CMOS processs; Input datas; K-complex; Operating frequency; Radix 2; Radix-4; Real time; Realtime processing, CMOS integrated circuits; Field programmable gate arrays (FPGA), Fast Fourier transforms", abstract = "We present a novel 4096 complex-point, fully systolic VLSI FFT architecture based on the combination of three consecutive radix-4 stages resulting in a 64-point FFT engine. The outcome of cascading these 64-point FFT engines is an improved architecture that efficiently processes large input data sets in real time. Using 64-point FFT engines reduces the buffering and the latency to one third of a fully unfolded radix-4 architecture, while the radix-4 schema simplifies the calculations within each engine. The proposed 4096 complex point architecture has been implemented on a FPGA achieving a post-route clock frequency of 200 MHz resulting in a sustained throughput of 4096 point/20.48 μs. It has also been implemented on a high performance 0.13 μm, 1P8M CMOS process achieving a worst-case (0.9 V, 125 C) post-route clock frequency of 604.5 MHz and a sustained throughput of 4096 point/3.89 μs while consuming 4.4 W. The architecture is extended to accomplish FFT computations of 16K, 64K and 256K complex points with 352, 256 and 188 MHz operating frequencies respectively. © 2009 Springer Science+Business Media, LLC." }