Commit 0096d462550a4852259c89b83e36e613d0ed73eb
Exists in
master
Merge branch 'master' of https://lxsd.femto-st.fr/gitlab/jfriedt/ifcs2018-article
Showing 11 changed files Side-by-side Diff
ifcs2018_journal.tex
... | ... | @@ -142,9 +142,9 @@ |
142 | 142 | and for any hardware platform (Altera, Xilinx...). To do this we have defined an |
143 | 143 | abstract model to represent some basic operations of DSP. |
144 | 144 | |
145 | -For the moment, we are focused on only two operations: the filtering and the shift of data. | |
145 | +For the moment, we are focused on only two operations: the filtering and the shifting of data. | |
146 | 146 | We have chosen this basic operation because the shifting and the filtering have already be studied in |
147 | -lot of works {\color{red} mettre les nouvelles référence ici} hence it will be easier | |
147 | +lot of works \cite{lim_1996, lim_1988, young_1992, smith_1998} hence it will be easier | |
148 | 148 | to check and validate our results. |
149 | 149 | |
150 | 150 | However having only two operations is insufficient to work with complex DSP but |
151 | 151 | |
152 | 152 | |
153 | 153 | |
154 | 154 | |
155 | 155 | |
156 | 156 | |
157 | 157 | |
158 | 158 | |
159 | 159 | |
160 | 160 | |
161 | 161 | |
162 | 162 | |
163 | 163 | |
164 | 164 | |
165 | 165 | |
166 | 166 | |
167 | 167 | |
168 | 168 | |
169 | 169 | |
170 | 170 | |
171 | 171 | |
172 | 172 | |
173 | 173 | |
174 | 174 | |
... | ... | @@ -283,169 +283,568 @@ |
283 | 283 | \label{fig:sum_rejection} |
284 | 284 | \end{figure} |
285 | 285 | |
286 | +Finally we can describe our abstract model with following expressions : | |
287 | +\begin{align} | |
288 | +\text{Maximize } & \sum_{i=1}^n r_i \notag \\ | |
289 | +\sum_{i=1}^n a_i & \leq \mathcal{A} & \label{eq:area} \\ | |
290 | +a_i & = C_i \times (\pi_i^C + \pi_i^-), & \forall i \in [1, n] \label{eq:areadef} \\ | |
291 | +r_i & = F(C_i, \pi_i^C), & \forall i \in [1, n] \label{eq:rejectiondef} \\ | |
292 | +\pi_i^+ & = \pi_i^- + \pi_i^C - \pi_i^S, & \forall i \in [1, n] \label{eq:bits} \\ | |
293 | +\pi_{i - 1}^+ & = \pi_i^-, & \forall i \in [2, n] \label{eq:inout} \\ | |
294 | +\pi_i^+ & \geq 1 + \sum_{k=1}^{i} \left(1 + \frac{r_j}{6}\right), & \forall i \in [1, n] \label{eq:maxshift} \\ | |
295 | +\pi_1^- &= \Pi^I \label{eq:init} | |
296 | +\end{align} | |
297 | + | |
298 | +{\color{red} Je sais que l'idée est de ne pas parler du programme linéaire mais | |
299 | +ça me semble quand même indispensable. Au pire, j'essaierai de revoir ça si on | |
300 | +est vraiment en manque de place.} | |
301 | + | |
302 | +Equation~\ref{eq:area} states that the total area taken by the filters must be | |
303 | +less than the available area. Equation~\ref{eq:areadef} gives the definition of | |
304 | +the area for a filter. More precisely, it is the area of the FIR as the Shifter | |
305 | +does not need any circuitry. We consider that the FIR needs $C_i$ registers of size | |
306 | +$\pi_i^C + \pi_i^-$~bits to store the results of the multiplications of the | |
307 | +input data and the coefficients. Equation~\ref{eq:rejectiondef} gives the | |
308 | +definition of the rejection of the filter thanks to function~$F$ that we defined | |
309 | +previously. The Shifter does not introduce negative rejection as we explain later, | |
310 | +so the rejection only comes from the FIR. Equation~\ref{eq:bits} states the | |
311 | +relation between $\pi_i^+$ and $\pi_i^-$. The multiplications in the FIR add | |
312 | +$\pi_i^C$ bits as most coefficients are close to zero, and the Shifter removes | |
313 | +$\pi_i^S$ bits. Equation~\ref{eq:inout} states that the output number of bits of | |
314 | +a filter is the same as the input number of bits of the next filter. | |
315 | +Equation~\ref{eq:maxshift} ensures that the Shifter does not introduce negative | |
316 | +rejection. Indeed, the results of the FIR can be right shifted without compromising | |
317 | +the quality of the rejection until a threshold. Each bit of the output data | |
318 | +increases the maximum rejection level of 6~dB. We add one to take the sign bit | |
319 | +into account. If equation~\ref{eq:maxshift} was not present, the Shifter could | |
320 | +shift too much and introduce some noise in the output data. Each supplementary | |
321 | +shift bit would cause 6~dB of noise. A totally equivalent equation is: | |
322 | +$\pi_i^S \leq \pi_i^- + \pi_i^C - 1 - \sum_{k=1}^{i} \left(1 + \frac{r_j}{6}\right) $. | |
323 | +Finally, equation~\ref{eq:init} gives the global input's number of bits. | |
324 | + | |
325 | +This model is non-linear and even non-quadratic, as $F$ does not have a known | |
326 | +linear or quadratic expression. We introduce $p$ FIR configurations | |
327 | + $(C_{ij}, \pi_{ij}^C), 1 \leq j \leq p$ that are constants. We define binary | |
328 | + variable $\delta_{ij}$ that has value 1 if stage~$i$ is in configuration~$j$ | |
329 | + and 0 otherwise. The new equations are as follows: | |
330 | + | |
331 | +\begin{align} | |
332 | +a_i & = \sum_{j=1}^p \delta_{ij} \times C_{ij} \times (\pi_{ij}^C + \pi_i^-), & \forall i \in [1, n] \label{eq:areadef2} \\ | |
333 | +r_i & = \sum_{j=1}^p \delta_{ij} \times F(C_{ij}, \pi_{ij}^C), & \forall i \in [1, n] \label{eq:rejectiondef2} \\ | |
334 | +\pi_i^+ & = \pi_i^- + \left(\sum_{j=1}^p \delta_{ij} \pi_{ij}^C\right) - \pi_i^S, & \forall i \in [1, n] \label{eq:bits2} \\ | |
335 | +\sum_{j=1}^p \delta_{ij} & \leq 1, & \forall i \in [1, n] \label{eq:config} | |
336 | +\end{align} | |
337 | + | |
338 | +Equations \ref{eq:areadef2}, \ref{eq:rejectiondef2} and \ref{eq:bits2} replace | |
339 | +respectively equations \ref{eq:areadef}, \ref{eq:rejectiondef} and \ref{eq:bits}. | |
340 | +Equation~\ref{eq:config} states that for each stage, a single configuration is chosen at most. | |
341 | + | |
342 | +The next section shows the results for this quadratic program but the section~\ref{sec:fixed_rej} | |
343 | +presents the results for the complementary problem. In this case we want | |
344 | +minimize the occupied area for a targeted rejection level. Hence we have replace | |
345 | +the objective function with: | |
346 | +\begin{align} | |
347 | +\text{Minimize } & \sum_{i=1}^n a_i \notag | |
348 | +\end{align} | |
349 | +We adapt our constraints of quadratic program to replace the equation \ref{eq:area} | |
350 | +by the equation \ref{eq:rejection_min} where $\mathcal{R}$ is the minimal | |
351 | +rejection required. | |
352 | + | |
353 | +\begin{align} | |
354 | +\sum_{i=1}^n r_i & \geq \mathcal{R} & \label{eq:rejection_min} | |
355 | +\end{align} | |
356 | + | |
357 | +\section{Design workflow} | |
358 | +\label{sec:workflow} | |
359 | + | |
360 | +In this section, we describe the workflow to compute all the results presented in section~\ref{sec:fixed_area}. | |
361 | +Figure~\ref{fig:workflow} shows the global workflow and the different steps involved in the computations of the results. | |
362 | + | |
363 | +\begin{figure} | |
364 | + \centering | |
365 | + \begin{tikzpicture}[node distance=0.75cm and 2cm] | |
366 | + \node[draw,minimum size=1cm] (Solver) { Filter Solver } ; | |
367 | + \node (Start) [left= 3cm of Solver] { } ; | |
368 | + \node[draw,minimum size=1cm] (TCL) [right= of Solver] { TCL Script } ; | |
369 | + \node (Input) [above= of TCL] { } ; | |
370 | + \node[draw,minimum size=1cm] (Deploy) [below= of Solver] { Deploy Script } ; | |
371 | + \node[draw,minimum size=1cm] (Bitstream) [below= of TCL] { Bitstream } ; | |
372 | + \node[draw,minimum size=1cm,rounded corners] (Board) [below right= of Deploy] { Board } ; | |
373 | + \node[draw,minimum size=1cm] (Postproc) [below= of Deploy] { Post-Processing } ; | |
374 | + \node (Results) [left= of Postproc] { } ; | |
375 | + | |
376 | + \draw[->] (Start) edge node [above] { $\mathcal{A}, n, \Pi^I$ } node [below] { $(C_{ij}, \pi_{ij}^C), F$ } (Solver) ; | |
377 | + \draw[->] (Input) edge node [left] { ADC or PRN } (TCL) ; | |
378 | + \draw[->] (Solver) edge node [below] { (1a) } (TCL) ; | |
379 | + \draw[->] (Solver) edge node [right] { (1b) } (Deploy) ; | |
380 | + \draw[->] (TCL) edge node [left] { (2) } (Bitstream) ; | |
381 | + \draw[->,dashed] (Bitstream) -- (Deploy) ; | |
382 | + \draw[->] (Deploy) to[out=-30,in=120] node [above] { (3) } (Board) ; | |
383 | + \draw[->] (Board) to[out=150,in=-60] node [below] { (4) } (Deploy) ; | |
384 | + \draw[->] (Deploy) edge node [left] { (5) } (Postproc) ; | |
385 | + \draw[->] (Postproc) -- (Results) ; | |
386 | + \end{tikzpicture} | |
387 | + \caption{Design workflow from the input parameters to the results} | |
388 | + \label{fig:workflow} | |
389 | +\end{figure} | |
390 | + | |
391 | +The filter solver is a C++ program that takes as input the maximum area | |
392 | +$\mathcal{A}$, the number of stages $n$, the size of the input signal $\Pi^I$, | |
393 | +the FIR configurations $(C_{ij}, \pi_{ij}^C)$ and the function $F$. It creates | |
394 | +the quadratic programs and uses the Gurobi solver to get the optimal results. | |
395 | +Then it produces two scripts: a TCL script ((1a) on figure~\ref{fig:workflow}) | |
396 | +and a deploy script ((1b) on figure~\ref{fig:workflow}). | |
397 | + | |
398 | +The TCL script describes the whole digital processing chain from the beginning | |
399 | +(the raw signal data) to the end (the filtered data). | |
400 | +The raw input data generated from a Pseudo Random Number (PRN) | |
401 | +generator inside the FPGA and $\Pi^I$ is fixed at 16~bits. | |
402 | +Then the script builds each stage of the chain with a generic FIR task that | |
403 | +comes from a skeleton library. The generic FIR is highly configurable | |
404 | +with the number of coefficients and the size of the coefficients. The coefficients | |
405 | +themselves are not stored in the script. | |
406 | +Whereas the signal is processed in real-time, the output signal is stored as | |
407 | +consecutive bursts of data. | |
408 | + | |
409 | +The TCL script is used by Vivado to produce the FPGA bitstream ((2) on figure~\ref{fig:workflow}). | |
410 | +We use the 2018.2 version of Xilinx Vivado and we execute the synthesized | |
411 | +bitstream on a Redpitaya board fitted with a Xilinx Zynq-7010 series | |
412 | +FPGA (xc7z010clg400-1) and two 125~MS/s ADC. | |
413 | +The board works with a Buildroot Linux image. We have developed some tools and | |
414 | +drivers to flash and communicate with the FPGA. They are used to automatize all | |
415 | +the workflow inside the board: load the filter coefficients and retrieve the | |
416 | +computed data. | |
417 | + | |
418 | +The deploy script uploads the bitstream to the board ((3) on | |
419 | +figure~\ref{fig:workflow}), flashes the FPGA, loads the different drivers, | |
420 | +configures the coefficients of the FIR filters. It then waits for the results | |
421 | +and retrieves the data to the main computer ((4) on figure~\ref{fig:workflow}). | |
422 | + | |
423 | +Finally, an Octave post-processing script computes the final results thanks to | |
424 | +the output data ((5) on figure~\ref{fig:workflow}). | |
425 | +The results are normalized so that the Power Spectrum Density (PSD) starts at zero | |
426 | +and the different configurations can be compared. | |
427 | + | |
428 | +The workflow used to compute the results in section~\ref{sec:fixed_rej}, we | |
429 | +have just adapted the quadratic program but the rest of the workflow is unchanged. | |
430 | + | |
286 | 431 | \section{Experiments with fixed area space} |
432 | +\label{sec:fixed_area} | |
433 | +This section presents the output of the filter solver {\em i.e.} the computed | |
434 | +configurations for each stage, the computed rejection and the computed silicon area. | |
435 | +This is interesting to understand the choices made by the solver to compute its solutions. | |
287 | 436 | |
437 | +The experimental setup is composed of three cases. The raw input is generated | |
438 | +by a Pseudo Random Number (PRN) generator, which fixes the input data size $\Pi^I$. | |
439 | +Then the total silicon area $\mathcal{A}$ has been fixed to either 500, 1000 or 1500 | |
440 | +arbitrary units. Hence, the three cases have been named: MAX/500, MAX/1000, MAX/1500. | |
441 | +The number of configurations $p$ is 1827, with $C_i$ ranging from 3 to 60 and $\pi^C$ | |
442 | +ranging from 2 to 22. In each case, the quadratic program has been able to give a | |
443 | +result up to five stages ($n = 5$) in the cascaded filter. | |
444 | + | |
445 | +Table~\ref{tbl:gurobi_max_500} shows the results obtained by the filter solver for MAX/500. | |
446 | +Table~\ref{tbl:gurobi_max_1000} shows the results obtained by the filter solver for MAX/1000. | |
447 | +Table~\ref{tbl:gurobi_max_1500} shows the results obtained by the filter solver for MAX/1500. | |
448 | + | |
449 | +\renewcommand{\arraystretch}{1.4} | |
450 | + | |
451 | +\begin{table} | |
452 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MAX/500} | |
453 | + \label{tbl:gurobi_max_500} | |
454 | + \centering | |
455 | + {\scalefont{0.77} | |
456 | + \begin{tabular}{|c|ccccc|c|c|} | |
457 | + \hline | |
458 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
459 | + \hline | |
460 | + 1 & (21, 7, 0) & - & - & - & - & 32~dB & 483 \\ | |
461 | + 2 & (3, 3, 15) & (31, 9, 0) & - & - & - & 58~dB & 460 \\ | |
462 | + 3 & (3, 3, 15) & (27, 9, 0) & (5, 3, 0) & - & - & 66~dB & 488 \\ | |
463 | + 4 & (3, 3, 15) & (19, 7, 0) & (11, 5, 0) & (3, 3, 0) & - & 74~dB & 499 \\ | |
464 | + 5 & (3, 3, 15) & (23, 8, 0) & (3, 3, 1) & (3, 3, 0) & (3, 3, 0) & 78~dB & 489 \\ | |
465 | + \hline | |
466 | + \end{tabular} | |
467 | + } | |
468 | +\end{table} | |
469 | + | |
470 | +\begin{table} | |
471 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MAX/1000} | |
472 | + \label{tbl:gurobi_max_1000} | |
473 | + \centering | |
474 | + {\scalefont{0.77} | |
475 | + \begin{tabular}{|c|ccccc|c|c|} | |
476 | + \hline | |
477 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
478 | + \hline | |
479 | + 1 & (37, 11, 0) & - & - & - & - & 56~dB & 999 \\ | |
480 | + 2 & (3, 3, 15) & (51, 14, 0) & - & - & - & 87~dB & 975 \\ | |
481 | + 3 & (3, 3, 15) & (35, 11, 0) & (19, 7, 0) & - & - & 99~dB & 1000 \\ | |
482 | + 4 & (3, 4, 16) & (27, 8, 0) & (19, 7, 1) & (11, 5, 0) & - & 103~dB & 998 \\ | |
483 | + 5 & (3, 3, 15) & (31, 9, 0) & (19, 7, 0) & (3, 3, 1) & (3, 3, 0) & 111~dB & 984 \\ | |
484 | + \hline | |
485 | + \end{tabular} | |
486 | + } | |
487 | +\end{table} | |
488 | + | |
489 | +\begin{table} | |
490 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MAX/1500} | |
491 | + \label{tbl:gurobi_max_1500} | |
492 | + \centering | |
493 | + {\scalefont{0.77} | |
494 | + \begin{tabular}{|c|ccccc|c|c|} | |
495 | + \hline | |
496 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
497 | + \hline | |
498 | + 1 & (47, 15, 0) & - & - & - & - & 71~dB & 1457 \\ | |
499 | + 2 & (19, 6, 15) & (51, 14, 0) & - & - & - & 103~dB & 1489 \\ | |
500 | + 3 & (3, 3, 15) & (35, 11, 0) & (35, 11, 0) & - & - & 122~dB & 1492 \\ | |
501 | + 4 & (3, 3, 15) & (27, 8, 0) & (19, 7, 0) & (27, 9, 0) & - & 129~dB & 1498 \\ | |
502 | + 5 & (3, 3, 15) & (23, 9, 2) & (27, 9, 0) & (19, 7, 0) & (3, 3, 0) & 136~dB & 1499 \\ | |
503 | + \hline | |
504 | + \end{tabular} | |
505 | + } | |
506 | +\end{table} | |
507 | + | |
508 | +\renewcommand{\arraystretch}{1} | |
509 | + | |
510 | +From these tables, we can first state that the more stages are used to define | |
511 | +the cascaded FIR filters, the better the rejection. It was an expected result as it has | |
512 | +been previously observed that many small filters are better than | |
513 | +a single large filter \cite{lim_1988, lim_1996, young_1992}, despite such conclusion | |
514 | +being hardly used in practice due to the lack of tools for identifying individual filter | |
515 | +coefficients in the cascaded approach. | |
516 | + | |
517 | +Second, the larger the silicon area, the better the rejection. This was also an | |
518 | +expected result as more area means a filter of better quality (more coefficients | |
519 | +or more bits per coefficient). | |
520 | + | |
521 | +Then, we also observe that the first stage can have a larger shift than the other | |
522 | +stages. This is explained by the fact that the solver tries to use just enough | |
523 | +bits for the computed rejection after each stage. In the first stage, a | |
524 | +balance between a strong rejection with a low number of bits is targeted. Equation~\ref{eq:maxshift} | |
525 | +gives the relation between both values. | |
526 | + | |
527 | +Finally, we note that the solver consumes all the given silicon area. | |
528 | + | |
529 | +The following graphs present the rejection for real data on the FPGA. In all following | |
530 | +figures, the solid line represents the actual rejection of the filtered | |
531 | +data on the FPGA as measured experimentally and the dashed line are the noise level | |
532 | +given by the quadratic solver. The configurations are those computed in the previous section. | |
533 | + | |
534 | +Figure~\ref{fig:max_500_result} shows the rejection of the different configurations in the case of MAX/500. | |
535 | +Figure~\ref{fig:max_1000_result} shows the rejection of the different configurations in the case of MAX/1000. | |
536 | +Figure~\ref{fig:max_1500_result} shows the rejection of the different configurations in the case of MAX/1500. | |
537 | + | |
288 | 538 | \begin{figure} |
289 | 539 | \centering |
290 | -\includegraphics[width=\linewidth]{images/max_rejection/prn_500} | |
291 | -\caption{Experimental results for design with PRN as data input and 500 a.u. as max arbitrary space} | |
292 | -\label{fig:prn_500} | |
540 | +\includegraphics[width=\linewidth]{images/max_500} | |
541 | +\caption{Signal spectrum for MAX/500} | |
542 | +\label{fig:max_500_result} | |
293 | 543 | \end{figure} |
294 | 544 | |
295 | 545 | \begin{figure} |
296 | 546 | \centering |
297 | -\includegraphics[width=\linewidth]{images/max_rejection/prn_1000} | |
298 | -\caption{Experimental results for design with PRN as data input and 1000 a.u. as max arbitrary space} | |
299 | -\label{fig:prn_1000} | |
547 | +\includegraphics[width=\linewidth]{images/max_1000} | |
548 | +\caption{Signal spectrum for MAX/1000} | |
549 | +\label{fig:max_1000_result} | |
300 | 550 | \end{figure} |
301 | 551 | |
302 | 552 | \begin{figure} |
303 | 553 | \centering |
304 | -\includegraphics[width=\linewidth]{images/max_rejection/prn_2000} | |
305 | -\caption{Experimental results for design with PRN as data input and 2000 a.u. as max arbitrary space} | |
306 | -\label{fig:prn_2000} | |
554 | +\includegraphics[width=\linewidth]{images/max_1500} | |
555 | +\caption{Signal spectrum for MAX/1500} | |
556 | +\label{fig:max_1500_result} | |
307 | 557 | \end{figure} |
308 | 558 | |
309 | -\begin{table} | |
310 | -\centering | |
311 | -\begin{tabular}{|c|c|ccc|c|c|} | |
312 | -\hline | |
313 | -\multicolumn{2}{|c|}{\multirow{2}{*}{Stage}} & \multicolumn{3}{c|}{Stage} & \multirow{2}{*}{Rejection} & \multirow{2}{*}{Area} \\ \cline{3-5} | |
314 | -\multicolumn{2}{|c|}{} & i = 1 & i = 2 & i = 3 & & \\ \hline | |
315 | - & C & 19 & - & - & & \\ | |
316 | -n = 1 & $pi^C$ & 7 & - & - & 33 dB & 437 a.u. \\ | |
317 | - & $pi^S$ & 0 & - & - & & \\ \hline | |
318 | - & C & 11 & 19 & - & & \\ | |
319 | -n = 2 & $pi^C$ & 5 & 7 & - & 53 dB & 478 a.u. \\ | |
320 | - & $pi^S$ & 16 & 0 & - & & \\ \hline | |
321 | - & C & 9 & 15 & 11 & & \\ | |
322 | -n = 3 & $pi^C$ & 4 & 6 & 5 & 57 dB & 499 a.u. \\ | |
323 | - & $pi^S$ & 16 & 3 & 0 & & \\ \hline | |
324 | -\end{tabular} | |
325 | -\caption{Solver results for design with PRN as data input and 500 a.u. as max arbitrary space} | |
326 | -\label{tbl:prn_500} | |
327 | -\end{table} | |
559 | +In all cases, we observe that the actual rejection is close to the rejection computed by the solver. | |
328 | 560 | |
561 | +We compare the actual silicon resources given by Vivado to the | |
562 | +resources in arbitrary units. | |
563 | +The goal is to check that our arbitrary units of silicon area models well enough | |
564 | +the real resources on the FPGA. Especially we want to verify that, for a given | |
565 | +number of arbitrary units, the actual silicon resources do not depend on the | |
566 | +number of stages $n$. Most significantly, our approach aims | |
567 | +at remaining far enough from the practical logic gate implementation used by | |
568 | +various vendors to remain platform independent and be portable from one | |
569 | +architecture to another. | |
570 | + | |
571 | +Table~\ref{tbl:resources_usage} shows the resources usage in the case of MAX/500, MAX/1000 and | |
572 | +MAX/1500 \emph{i.e.} when the maximum allowed silicon area is fixed to 500, 1000 | |
573 | +and 1500 arbitrary units. We have taken care to extract solely the resources used by | |
574 | +the FIR filters and remove additional processing blocks including FIFO and PL to | |
575 | +PS communication. | |
576 | + | |
329 | 577 | \begin{table} |
330 | -\centering | |
331 | -{\scalefont{0.85} | |
332 | -\begin{tabular}{|c|c|ccccc|c|c|} | |
333 | -\hline | |
334 | -\multicolumn{2}{|c|}{\multirow{2}{*}{Stage}} & \multicolumn{5}{c|}{Stage} & \multirow{2}{*}{Rejection} & \multirow{2}{*}{Area} \\ \cline{3-7} | |
335 | -\multicolumn{2}{|c|}{} & i = 1 & i = 2 & i = 3 & i = 4 & i = 5 & & \\ \hline | |
336 | - & C & 37 & - & - & - & - & & \\ | |
337 | -n = 1 & $pi^C$ & 11 & - & - & - & - & 56 dB & 999 a.u. \\ | |
338 | - & $pi^S$ & 0 & - & - & - & - & & \\ \hline | |
339 | - & C & 11 & 39 & - & - & - & & \\ | |
340 | -n = 2 & $pi^C$ & 5 & 13 & - & - & - & 82 dB & 972 a.u. \\ | |
341 | - & $pi^S$ & 16 & 0 & - & - & - & & \\ \hline | |
342 | - & C & 9 & 31 & 19 & - & - & & \\ | |
343 | -n = 3 & $pi^C$ & 7 & 8 & 7 & - & - & 93 dB & 990 a.u. \\ | |
344 | - & $pi^S$ & 19 & 2 & 0 & - & - & & \\ \hline | |
345 | - & C & 9 & 19 & 17 & 11 & - & & \\ | |
346 | -n = 4 & $pi^C$ & 4 & 7 & 7 & 5 & - & 99 dB & 992 a.u. \\ | |
347 | - & $pi^S$ & 16 & 3 & 3 & 0 & - & & \\ \hline | |
348 | - & C & 9 & 15 & 11 & 11 & 11 & & \\ | |
349 | -n = 5 & $pi^C$ & 4 & 7 & 5 & 5 & 5 & 99 dB & 998 a.u. \\ | |
350 | - & $pi^S$ & 16 & 3 & 2 & 1 & 1 & & \\ \hline | |
351 | -\end{tabular} | |
352 | -} | |
353 | -\caption{Solver results for design with PRN as data input and 1000 a.u. as max arbitrary space} | |
354 | -\label{tbl:prn_1000} | |
578 | + \caption{Resource occupation. The last column refers to available resources on a Zynq-7010 as found on the Redpitaya.} | |
579 | + \label{tbl:resources_usage} | |
580 | + \centering | |
581 | + \begin{tabular}{|c|c|ccc|c|} | |
582 | + \hline | |
583 | + $n$ & & MAX/500 & MAX/1000 & MAX/1500 & \emph{Zynq 7010} \\ \hline\hline | |
584 | + & LUT & 249 & 453 & 627 & \emph{17600} \\ | |
585 | + 1 & BRAM & 1 & 1 & 1 & \emph{120} \\ | |
586 | + & DSP & 21 & 37 & 47 & \emph{80} \\ \hline | |
587 | + & LUT & 2374 & 5494 & 691 & \emph{17600} \\ | |
588 | + 2 & BRAM & 2 & 2 & 2 & \emph{120} \\ | |
589 | + & DSP & 0 & 0 & 70 & \emph{80} \\ \hline | |
590 | + & LUT & 2443 & 3304 & 3521 & \emph{17600} \\ | |
591 | + 3 & BRAM & 3 & 3 & 3 & \emph{120} \\ | |
592 | + & DSP & 0 & 19 & 35 & \emph{80} \\ \hline | |
593 | + & LUT & 2634 & 3753 & 2557 & \emph{17600} \\ | |
594 | + 4 & BRAM & 4 & 4 & 4 & \emph{120} \\ | |
595 | + & DPS & 0 & 19 & 46 & \emph{80} \\ \hline | |
596 | + & LUT & 2423 & 3047 & 2847 & \emph{17600} \\ | |
597 | + 5 & BRAM & 5 & 5 & 5 & \emph{120} \\ | |
598 | + & DPS & 0 & 22 & 46 & \emph{80} \\ \hline | |
599 | + \end{tabular} | |
355 | 600 | \end{table} |
356 | 601 | |
602 | +In some cases, Vivado replaces the DSPs by Look Up Tables (LUTs). We assume that, | |
603 | +when the filters coefficients are small enough, or when the input size is small | |
604 | +enough, Vivado optimized resource consumption by selecting multiplexers to | |
605 | +implement the multiplications instead of a DSP. In this case, it is quite difficult | |
606 | +to compare the whole silicon budget. | |
607 | + | |
608 | +However, a rough estimation can be made with a simple equivalence. Looking at | |
609 | +the first column (MAX/500), where the number of LUTs is quite stable for $n \geq 2$, | |
610 | +we can deduce that a DSP is roughly equivalent to 100~LUTs in terms of silicon | |
611 | +area use. With this equivalence, our 500 arbitraty units corresponds to 2500 LUTs, | |
612 | +1000 arbitrary units corresponds to 5000 LUTs and 1500 arbitrary units corresponds | |
613 | +to 7300 LUTs. The conclusion is that the orders of magnitude of our arbitrary | |
614 | +unit are quite good. The relatively small differences can probably be explained | |
615 | +by the optimizations done by Vivado based on the detailed map of available processing resources. | |
616 | + | |
617 | +We present the computation time to solve the quadratic problem. | |
618 | +For each case, the filter solver software are executed with a Intel(R) Xeon(R) CPU E5606 | |
619 | +cadenced at 2.13~GHz. The CPU has 8 cores that are used by Gurobi to solve | |
620 | +the quadratic problem. | |
621 | + | |
622 | +Table~\ref{tbl:area_time} shows the time needed to solve the quadratic | |
623 | +problem when the maximal area is fixed to 500, 1000 and 1500 arbitrary units. | |
624 | + | |
357 | 625 | \begin{table} |
626 | +\caption{Time to solve the quadratic program with Gurobi} | |
627 | +\label{tbl:area_time} | |
358 | 628 | \centering |
359 | -{\scalefont{0.85} | |
360 | -\begin{tabular}{|c|c|ccccc|c|c|} | |
361 | -\hline | |
362 | -\multicolumn{2}{|c|}{\multirow{2}{*}{Stage}} & \multicolumn{5}{c|}{Stage} & \multirow{2}{*}{Rejection} & \multirow{2}{*}{Area} \\ \cline{3-7} | |
363 | -\multicolumn{2}{|c|}{} & i = 1 & i = 2 & i = 3 & i = 4 & i = 5 & & \\ \hline | |
364 | - & C & 39 & - & - & - & - & & \\ | |
365 | -n = 1 & $pi^C$ & 13 & - & - & - & - & 61 dB & 1131 a.u. \\ | |
366 | - & $pi^S$ & 0 & - & - & - & - & & \\ \hline | |
367 | - & C & 37 & 39 & - & - & - & & \\ | |
368 | -n = 2 & $pi^C$ & 11 & 13 & - & - & - & 117 dB & 1974 a.u. \\ | |
369 | - & $pi^S$ & 17 & 0 & - & - & - & & \\ \hline | |
370 | - & C & 15 & 35 & 35 & - & - & & \\ | |
371 | -n = 3 & $pi^C$ & 9 & 11 & 11 & - & - & 138 dB & 1985 a.u. \\ | |
372 | - & $pi^S$ & 19 & 3 & 0 & - & - & & \\ \hline | |
373 | - & C & 11 & 27 & 27 & 23 & - & & \\ | |
374 | -n = 4 & $pi^C$ & 5 & 9 & 9 & 9 & - & 148 dB & 1993 a.u. \\ | |
375 | - & $pi^S$ & 16 & 3 & 2 & 0 & - & & \\ \hline | |
376 | - & C & 11 & 27 & 31 & 11 & 11 & & \\ | |
377 | -n = 5 & $pi^C$ & 5 & 9 & 8 & 5 & 5 & 153 dB & 2000 a.u. \\ | |
378 | - & $pi^S$ & 16 & 3 & 1 & 0 & 1 & & \\ \hline | |
629 | +\begin{tabular}{|c|c|c|c|}\hline | |
630 | +$n$ & Time (MAX/500) & Time (MAX/1000) & Time (MAX/1500) \\\hline\hline | |
631 | +1 & 0.1~s & 0.1~s & 0.3~s \\ | |
632 | +2 & 1.1~s & 2.2~s & 12~s \\ | |
633 | +3 & 17~s & 137~s ($\approx$ 2~min) & 275~s ($\approx$ 4~min) \\ | |
634 | +4 & 52~s & 5448~s ($\approx$ 90~min) & 5505~s ($\approx$ 17~h) \\ | |
635 | +5 & 286~s ($\approx$ 4~min) & 4119~s ($\approx$ 68~min) & 235479~s ($\approx$ 3~days) \\\hline | |
379 | 636 | \end{tabular} |
380 | -} | |
381 | -\caption{Solver results for design with PRN as data input and 2000 a.u. as max arbitrary space} | |
382 | -\label{tbl:prn_2000} | |
383 | 637 | \end{table} |
384 | 638 | |
639 | +As expected, the computation time seems to rise exponentially with the number of stages. % TODO: exponentiel ? | |
640 | +When the area is limited, the design exploration space is more limited and the solver is able to | |
641 | +find an optimal solution faster. On the contrary, in the case of MAX/1500 with | |
642 | +5~stages, we were not able to obtain a result after 40~hours of computation so we decided to stop. | |
643 | + | |
644 | +\section{Experiments with fixed rejection target} | |
645 | +\label{sec:fixed_rej} | |
646 | +This section presents the results of complementary quadratic program which we | |
647 | +minimize the area occupation for a targeted noise level. | |
648 | + | |
649 | +The experimental setup is also composed of three cases. The raw input is the same | |
650 | +as previous section, a PRN generator, which fixes the input data size $\Pi^I$. | |
651 | +Then the targeted rejection $\mathcal{R}$ has been fixed to either 40, 60 or 80~dB. | |
652 | +Hence, the three cases have been named: MIN/40, MIN/60, MIN/80. | |
653 | +The number of configurations $p$ is the same as previous section. | |
654 | + | |
655 | +Table~\ref{tbl:gurobi_min_40} shows the results obtained by the filter solver for MIN/40. | |
656 | +Table~\ref{tbl:gurobi_min_60} shows the results obtained by the filter solver for MIN/60. | |
657 | +Table~\ref{tbl:gurobi_min_80} shows the results obtained by the filter solver for MIN/80. | |
658 | + | |
659 | +\renewcommand{\arraystretch}{1.4} | |
660 | + | |
385 | 661 | \begin{table} |
386 | -\centering | |
387 | -\begin{tabular}{|c|c|c|c|c|}\hline | |
388 | -Input & Stages & Computation time & Vivado time & Redpitaya time \\\hline\hline | |
389 | - & 1 & 0.02~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
390 | -PRN & 2 & 1.70~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
391 | - & 3 & 19~s & $\approx$ 20 min & $\approx$ 1 min \\\hline | |
392 | -\end{tabular} | |
393 | -\caption{Time to compute and deploy the designs for PRN 500} | |
394 | -\label{tbl:time_prn_500} | |
662 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MIN/40} | |
663 | + \label{tbl:gurobi_min_40} | |
664 | + \centering | |
665 | + {\scalefont{0.77} | |
666 | + \begin{tabular}{|c|ccccc|c|c|} | |
667 | + \hline | |
668 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
669 | + \hline | |
670 | + 1 & (27, 8, 0) & - & - & - & - & 41~dB & 648 \\ | |
671 | + 2 & (3, 2, 14) & (19, 7, 0) & - & - & - & 40~dB & 263 \\ | |
672 | + 3 & (3, 3, 15) & (11, 5, 0) & (3, 3, 0) & - & - & 41~dB & 192 \\ | |
673 | + 4 & (3, 3, 15) & (3, 3, 0) & (3, 3, 0) & (3, 3, 0) & - & 42~dB & 147 \\ | |
674 | + \hline | |
675 | + \end{tabular} | |
676 | + } | |
395 | 677 | \end{table} |
396 | 678 | |
397 | 679 | \begin{table} |
398 | -\centering | |
399 | -\begin{tabular}{|c|c|c|c|c|}\hline | |
400 | -Input & Stages & Computation time & Vivado time & Redpitaya time \\\hline\hline | |
401 | - & 1 & 0.07~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
402 | - & 2 & 1.31~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
403 | -PRN & 3 & 119~s ($\approx$ 2~min) & $\approx$ 20 min & $\approx$ 1 min \\ | |
404 | - & 4 & 270~s ($\approx$ 5~min) & $\approx$ 20 min & $\approx$ 1 min \\ | |
405 | - & 5 & 5998~s ($\approx$ 2~h) & $\approx$ 20 min & $\approx$ 1 min \\\hline | |
406 | -\end{tabular} | |
407 | -\caption{Time to compute and deploy the designs for PRN 1000} | |
408 | -\label{tbl:time_prn_1000} | |
680 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MIN/60} | |
681 | + \label{tbl:gurobi_min_60} | |
682 | + \centering | |
683 | + {\scalefont{0.77} | |
684 | + \begin{tabular}{|c|ccccc|c|c|} | |
685 | + \hline | |
686 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
687 | + \hline | |
688 | + 1 & (39, 13, 0) & - & - & - & - & 60~dB & 1131 \\ | |
689 | + 2 & (3, 3, 15) & (35, 10, 0) & - & - & - & 60~dB & 547 \\ | |
690 | + 3 & (3, 3, 15) & (27, 8, 0) & (3, 3, 0) & - & - & 62~dB & 426 \\ | |
691 | + 4 & (3, 2, 14) & (11, 5, 1) & (11, 5, 0) & (3, 3, 0) & - & 60~dB & 344 \\ | |
692 | + 5 & (3, 2, 14) & (3, 3, 1) & (3, 3, 0) & (3, 3, 0) & (3, 3, 0) & 60~dB & 279 \\ | |
693 | + \hline | |
694 | + \end{tabular} | |
695 | + } | |
409 | 696 | \end{table} |
410 | 697 | |
411 | 698 | \begin{table} |
412 | -\centering | |
413 | -\begin{tabular}{|c|c|c|c|c|}\hline | |
414 | -Input & Stages & Computation time & Vivado time & Redpitaya time \\\hline\hline | |
415 | - & 1 & 0.07~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
416 | - & 2 & 0.75~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
417 | -PRN & 3 & 36~s & - & - \\ | |
418 | - & 4 & 14500~s ($\approx$ 4~h) & $\approx$ 20 min & $\approx$ 1 min \\ | |
419 | - & 5 & 74237~s ($\approx$ 20~h) & $\approx$ 20 min & $\approx$ 1 min \\\hline | |
420 | -\end{tabular} | |
421 | -\caption{Time to compute and deploy the designs for PRN 2000} | |
422 | -\label{tbl:time_prn_2000} | |
699 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MIN/80} | |
700 | + \label{tbl:gurobi_min_80} | |
701 | + \centering | |
702 | + {\scalefont{0.77} | |
703 | + \begin{tabular}{|c|ccccc|c|c|} | |
704 | + \hline | |
705 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
706 | + \hline | |
707 | + 1 & (55, 16, 0) & - & - & - & - & 81~dB & 1760 \\ | |
708 | + 2 & (3, 3, 15) & (47, 14, 0) & - & - & - & 80~dB & 903 \\ | |
709 | + 3 & (3, 3, 15) & (23, 9, 0) & (19, 7, 0) & - & - & 80~dB & 698 \\ | |
710 | + 4 & (3, 3, 15) & (27, 9, 0) & (7, 7, 4) & (3, 3, 0) & - & 80~dB & 605 \\ | |
711 | + 5 & (3, 2, 14) & (27, 8, 0) & (3, 3, 1) & (3, 3, 0) & (3, 3, 0) & 81~dB & 534 \\ | |
712 | + \hline | |
713 | + \end{tabular} | |
714 | + } | |
423 | 715 | \end{table} |
716 | +\renewcommand{\arraystretch}{1} | |
424 | 717 | |
425 | -\section{Experiments with fixed rejection target} | |
718 | +From these tables, we can first state that all configuration reach the target rejection | |
719 | +level and more we have stages lesser is the area occupied in arbitrary unit. | |
720 | +Futhermore, the area of the monolithic filter is twice bigger than the two cascaded. | |
721 | +More generally, more there is filters lower is the occupied area. | |
426 | 722 | |
723 | +Like in previous section, the solver choose always a little filter as first | |
724 | +filter stage and the second one is often the biggest filter. this choice can be explain | |
725 | +as the previous section. The solver uses just enough bits to not degrade the input | |
726 | +signal and in second filter it can choose a better filter to improve rejection without | |
727 | +have too bits in the output data. | |
728 | + | |
729 | +For the specific case in MIN/40 for $n = 5$ the solver has determined that the optimal | |
730 | +number of filter is 4 so it not chose any configuration in last filter. Hence this | |
731 | +solution is equivalent to the result for $n = 4$. | |
732 | + | |
733 | +The following graphs present the rejection for real data on the FPGA. In all following | |
734 | +figures, the solid line represents the actual rejection of the filtered | |
735 | +data on the FPGA as measured experimentally and the dashed line are the noise level | |
736 | +given by the quadratic solver. | |
737 | + | |
738 | +Figure~\ref{fig:min_40} shows the rejection of the different configurations in the case of MIN/40. | |
739 | +Figure~\ref{fig:min_60} shows the rejection of the different configurations in the case of MIN/60. | |
740 | +Figure~\ref{fig:min_80} shows the rejection of the different configurations in the case of MIN/80. | |
741 | + | |
427 | 742 | \begin{figure} |
428 | 743 | \centering |
429 | -\includegraphics[width=\linewidth]{images/min_area/prn_50} | |
430 | -\caption{Results for design with PRN as data input and 50 dB as aimed rejection level} | |
431 | -\label{fig:prn_500} | |
744 | +\includegraphics[width=\linewidth]{images/min_40} | |
745 | +\caption{Signal spectrum for MIN/40} | |
746 | +\label{fig:min_40} | |
432 | 747 | \end{figure} |
433 | 748 | |
434 | 749 | \begin{figure} |
435 | 750 | \centering |
436 | -\includegraphics[width=\linewidth]{images/min_area/prn_100} | |
437 | -\caption{Results for design with PRN as data input and 50 dB as aimed rejection level} | |
438 | -\label{fig:prn_100} | |
751 | +\includegraphics[width=\linewidth]{images/min_60} | |
752 | +\caption{Signal spectrum for MIN/60} | |
753 | +\label{fig:min_60} | |
439 | 754 | \end{figure} |
440 | 755 | |
441 | 756 | \begin{figure} |
442 | 757 | \centering |
443 | -\includegraphics[width=\linewidth]{images/min_area/prn_150} | |
444 | -\caption{Results for design with PRN as data input and 2000 a.u. as max arbitrary space} | |
445 | -\label{fig:prn_150} | |
758 | +\includegraphics[width=\linewidth]{images/min_80} | |
759 | +\caption{Signal spectrum for MIN/80} | |
760 | +\label{fig:min_80} | |
446 | 761 | \end{figure} |
447 | 762 | |
763 | +We observe that all rejections given by the quadratic solver are close to the real | |
764 | +rejection. All curves prove that the constraint to reach the target rejection is | |
765 | +respected both monolithic filter or cascaded filters. | |
766 | + | |
767 | +Table~\ref{tbl:resources_usage} shows the resources usage in the case of MIN/40, MIN/60 and | |
768 | +MIN/80 \emph{i.e.} when the target rejection is fixed to 40, 60 and 80~dB. We | |
769 | +have taken care to extract solely the resources used by | |
770 | +the FIR filters and remove additional processing blocks including FIFO and PL to | |
771 | +PS communication. | |
772 | + | |
773 | +\begin{table} | |
774 | + \caption{Resource occupation. The last column refers to available resources on a Zynq-7010 as found on the Redpitaya.} | |
775 | + \label{tbl:resources_usage_comp} | |
776 | + \centering | |
777 | + \begin{tabular}{|c|c|ccc|c|} | |
778 | + \hline | |
779 | + $n$ & & MIN/40 & MIN/60 & MIN/80 & \emph{Zynq 7010} \\ \hline\hline | |
780 | + & LUT & 343 & 334 & 772 & \emph{17600} \\ | |
781 | + 1 & BRAM & 1 & 1 & 1 & \emph{120} \\ | |
782 | + & DSP & 27 & 39 & 55 & \emph{80} \\ \hline | |
783 | + & LUT & 1252 & 2862 & 5099 & \emph{17600} \\ | |
784 | + 2 & BRAM & 2 & 2 & 2 & \emph{120} \\ | |
785 | + & DSP & 0 & 0 & 0 & \emph{80} \\ \hline | |
786 | + & LUT & 891 & 2148 & 2023 & \emph{17600} \\ | |
787 | + 3 & BRAM & 3 & 3 & 3 & \emph{120} \\ | |
788 | + & DSP & 0 & 0 & 19 & \emph{80} \\ \hline | |
789 | + & LUT & 662 & 1729 & 2451 & \emph{17600} \\ | |
790 | + 4 & BRAM & 4 & 4 & 4 & \emph{120} \\ | |
791 | + & DPS & 0 & 0 & 7 & \emph{80} \\ \hline | |
792 | + & LUT & - & 1259 & 2602 & \emph{17600} \\ | |
793 | + 5 & BRAM & - & 5 & 5 & \emph{120} \\ | |
794 | + & DPS & - & 0 & 0 & \emph{80} \\ \hline | |
795 | + \end{tabular} | |
796 | +\end{table} | |
797 | + | |
798 | +If we keep the previous estimation of cost of one DSP in term of LUT (1 DSP $\approx$ 100 LUT) | |
799 | +the real resource consumption decrease in function of number of stage filter according | |
800 | +to the solution given by the quadratic solver. Indeed, we have always a decreasing | |
801 | +consumption even if the difference between the monolithic and the two cascaded | |
802 | +filters is lesser than expected. | |
803 | + | |
804 | +Finally, the table~\ref{tbl:area_time_comp} shows the computation time to solve | |
805 | +the quadratic program. | |
806 | + | |
807 | +\begin{table} | |
808 | +\caption{Time to solve the quadratic program with Gurobi} | |
809 | +\label{tbl:area_time_comp} | |
810 | +\centering | |
811 | +\begin{tabular}{|c|c|c|c|}\hline | |
812 | +$n$ & Time (MIN/40) & Time (MIN/60) & Time (MIN/80) \\\hline\hline | |
813 | +1 & 0.07~s & 0.02~s & 0.01~s \\ | |
814 | +2 & 7.8~s & 16~s & 14~s \\ | |
815 | +3 & 4.7~s & 14~s & 28~s \\ | |
816 | +4 & 39~s & 20~s & 193~s \\ | |
817 | +5 & 126~s & 12~s & 170~s \\\hline | |
818 | +\end{tabular} | |
819 | +\end{table} | |
820 | + | |
821 | +The time needed to solve this configuration are substantially faster than time | |
822 | +needed in the previous section. Indeed the worst time in this case is only 3~minutes | |
823 | +in balance of 3~days on previous section. We are able to solve more easily this | |
824 | +problem than the previous one. | |
825 | + | |
448 | 826 | \section{Conclusion} |
827 | + | |
828 | +In this paper, we have proposed a new approach to work with a cascade of FIR filter inside a FPGA. | |
829 | +This method aims to be hardware independent and focus an high-level of abstraction. | |
830 | +We have modeled the FIR filter operation and the data shift impact. With this model | |
831 | +we have created a quadratic program to select the optimal FIR coefficient set to reject a | |
832 | +maximum of noise. In our experiments we have chosen deliberately some common tools | |
833 | +to design the filter coefficients but we can use any other method. | |
834 | + | |
835 | +Our experimental results are very promising in providing a rational approach to selecting | |
836 | +the coefficients of each FIR filter in the context of a performance target for a chain of | |
837 | +such filters. The FPGA design that is produced automatically by our | |
838 | +workflow is able to filter an input signal as expected which validates our model and our approach. | |
839 | +We can easily change the quadratic program to adapt it to an other problem. | |
840 | + | |
841 | +A perspective is to model and add the decimators to the processing chain to have a classical | |
842 | +FIR filter and decimator. The impact of the decimator is not so trivial, especially in terms of silicon | |
843 | +area for the subsequent stages since some hardware optimization can be applied in | |
844 | +this case. | |
845 | + | |
846 | +The software used to demonstrate the concepts developed in this paper is based on the | |
847 | +CPU-FPGA co-design framework available at \url{https://github.com/oscimp/oscimpDigital}. | |
449 | 848 | |
450 | 849 | \section*{Acknowledgement} |
451 | 850 |
images/max_1000.pdf
No preview for this file type
images/max_1500.pdf
No preview for this file type
images/max_500.pdf
No preview for this file type
images/max_rejection/prn_1000.pdf
No preview for this file type
images/max_rejection/prn_2000.pdf
No preview for this file type
images/max_rejection/prn_500.pdf
No preview for this file type
images/min_40.pdf
No preview for this file type
images/min_60.pdf
No preview for this file type
images/min_80.pdf
No preview for this file type
references.bib
... | ... | @@ -10,7 +10,7 @@ |
10 | 10 | } |
11 | 11 | |
12 | 12 | @article{kodek1980design, |
13 | - title={Design of optimal finite wordlength {FIR} digital filters using integer | |
13 | + title={Design of optimal finite wordlength {FIR} digital filters using integer | |
14 | 14 | programming techniques}, |
15 | 15 | author={Kodek, Dusan}, |
16 | 16 | journal={IEEE Transactions on Acoustics, Speech, and Signal Processing}, |
... | ... | @@ -43,4 +43,56 @@ |
43 | 43 | year={2016}, |
44 | 44 | publisher={AIP Publishing} |
45 | 45 | } |
46 | + | |
47 | +@inproceedings{lim_1996, | |
48 | +author={Y.-C. Lim and R. Yang and B. Liu}, | |
49 | +booktitle={1996 IEEE International Symposium on Circuits and Systems. Circuits and Systems Connecting the World. ISCAS 96}, | |
50 | +title={The design of cascaded FIR filters}, | |
51 | +year={1996}, | |
52 | +volume={2}, | |
53 | +number={}, | |
54 | +pages={181-184 vol.2}, | |
55 | +keywords={cascade networks;digital filters;FIR filters;filtering theory;linear programming;frequency response;cascaded FIR filters;stopband response;minimum attenuation requirement;passband ripple magnitude;linear-programming technique;FIR filter design;filter optimisation;Finite impulse response filter;IIR filters;Passband;Frequency;Signal sampling;Band pass filters;Digital filters;Attenuation;Image sampling;Linear programming}, | |
56 | +doi={10.1109/ISCAS.1996.540382}, | |
57 | +ISSN={}, | |
58 | +month={May},} | |
59 | + | |
60 | +@article{lim_1988, | |
61 | +author={Y. C. {Lim} and B. {Liu}}, | |
62 | +journal={IEEE Transactions on Acoustics, Speech, and Signal Processing}, | |
63 | +title={Design of cascade form FIR filters with discrete valued coefficients}, | |
64 | +year={1988}, | |
65 | +volume={36}, | |
66 | +number={11}, | |
67 | +pages={1735-1739}, | |
68 | +keywords={cascade networks;digital filters;filtering and prediction theory;iterative equalisation strategy;cascade form FIR filters;discrete valued coefficients;peak ripple;prototype filter;roundoff noise property;Finite impulse response filter;Low pass filters;Band pass filters;Passband;Prototypes;Frequency;Digital filters;Digital arithmetic;Design optimization;Sampling methods}, | |
69 | +doi={10.1109/29.9010}, | |
70 | +ISSN={0096-3518}, | |
71 | +month={Nov},} | |
72 | + | |
73 | +@inproceedings{young_1992, | |
74 | +author={C. {Young} and D. L. {Jones}}, | |
75 | +booktitle={[Proceedings] ICASSP-92: 1992 IEEE International Conference on Acoustics, Speech, and Signal Processing}, | |
76 | +title={Improvement in finite wordlength FIR digital filter design by cascading}, | |
77 | +year={1992}, | |
78 | +volume={5}, | |
79 | +number={}, | |
80 | +pages={109-112 vol.5}, | |
81 | +keywords={approximation theory;digital filters;integer programming;series (mathematics);finite wordlength filter;quantization;FIR digital filter design;finite impulse response;digital systems;finite wordlength coefficients;cascaded subfilters;stopband suppression;Taylor series approximation;linear integer program;passband deviation;Finite impulse response filter;Digital filters;Linear programming;Passband;Quantization;Frequency response;Digital systems;Taylor series;Minimax techniques;Design optimization}, | |
82 | +doi={10.1109/ICASSP.1992.226646}, | |
83 | +ISSN={1520-6149}, | |
84 | +month={March},} | |
85 | + | |
86 | +@article{smith_1998, | |
87 | +author={L. M. {Smith}}, | |
88 | +journal={IEEE Transactions on Signal Processing}, | |
89 | +title={Decomposition of FIR digital filters for realization via the cascade connection of subfilters}, | |
90 | +year={1998}, | |
91 | +volume={46}, | |
92 | +number={6}, | |
93 | +pages={1681-1684}, | |
94 | +keywords={FIR filters;digital filters;cascade networks;Z transforms;transfer functions;frequency response;Newton-Raphson method;convergence of numerical methods;search problems;poles and zeros;FIR digital filters;subfilters cascade connection;even-order linear-phase FIR filters;filter decomposition;fourth-order subfilters;second-order subfilters;roots;z-domain filter transfer function;complex z plane;impulse response symmetry;unit circle;perimeter;complex values;real values;impulse response coefficients;root-finding algorithm;Newton-Raphson method;2D search;Cauchy-Riemann relations;convergence speed;frequency response characteristics;Finite impulse response filter;Digital filters;Polynomials;Programmable logic arrays;Transfer functions;Testing;Frequency response;Application specific integrated circuits;Nonlinear filters;Passband}, | |
95 | +doi={10.1109/78.678490}, | |
96 | +ISSN={1053-587X}, | |
97 | +month={June},} |