Commit 0096d462550a4852259c89b83e36e613d0ed73eb
Exists in
master
Merge branch 'master' of https://lxsd.femto-st.fr/gitlab/jfriedt/ifcs2018-article
Showing 11 changed files Side-by-side Diff
ifcs2018_journal.tex
| ... | ... | @@ -142,9 +142,9 @@ |
| 142 | 142 | and for any hardware platform (Altera, Xilinx...). To do this we have defined an |
| 143 | 143 | abstract model to represent some basic operations of DSP. |
| 144 | 144 | |
| 145 | -For the moment, we are focused on only two operations: the filtering and the shift of data. | |
| 145 | +For the moment, we are focused on only two operations: the filtering and the shifting of data. | |
| 146 | 146 | We have chosen this basic operation because the shifting and the filtering have already be studied in |
| 147 | -lot of works {\color{red} mettre les nouvelles référence ici} hence it will be easier | |
| 147 | +lot of works \cite{lim_1996, lim_1988, young_1992, smith_1998} hence it will be easier | |
| 148 | 148 | to check and validate our results. |
| 149 | 149 | |
| 150 | 150 | However having only two operations is insufficient to work with complex DSP but |
| 151 | 151 | |
| 152 | 152 | |
| 153 | 153 | |
| 154 | 154 | |
| 155 | 155 | |
| 156 | 156 | |
| 157 | 157 | |
| 158 | 158 | |
| 159 | 159 | |
| 160 | 160 | |
| 161 | 161 | |
| 162 | 162 | |
| 163 | 163 | |
| 164 | 164 | |
| 165 | 165 | |
| 166 | 166 | |
| 167 | 167 | |
| 168 | 168 | |
| 169 | 169 | |
| 170 | 170 | |
| 171 | 171 | |
| 172 | 172 | |
| 173 | 173 | |
| 174 | 174 | |
| ... | ... | @@ -283,169 +283,568 @@ |
| 283 | 283 | \label{fig:sum_rejection} |
| 284 | 284 | \end{figure} |
| 285 | 285 | |
| 286 | +Finally we can describe our abstract model with following expressions : | |
| 287 | +\begin{align} | |
| 288 | +\text{Maximize } & \sum_{i=1}^n r_i \notag \\ | |
| 289 | +\sum_{i=1}^n a_i & \leq \mathcal{A} & \label{eq:area} \\ | |
| 290 | +a_i & = C_i \times (\pi_i^C + \pi_i^-), & \forall i \in [1, n] \label{eq:areadef} \\ | |
| 291 | +r_i & = F(C_i, \pi_i^C), & \forall i \in [1, n] \label{eq:rejectiondef} \\ | |
| 292 | +\pi_i^+ & = \pi_i^- + \pi_i^C - \pi_i^S, & \forall i \in [1, n] \label{eq:bits} \\ | |
| 293 | +\pi_{i - 1}^+ & = \pi_i^-, & \forall i \in [2, n] \label{eq:inout} \\ | |
| 294 | +\pi_i^+ & \geq 1 + \sum_{k=1}^{i} \left(1 + \frac{r_j}{6}\right), & \forall i \in [1, n] \label{eq:maxshift} \\ | |
| 295 | +\pi_1^- &= \Pi^I \label{eq:init} | |
| 296 | +\end{align} | |
| 297 | + | |
| 298 | +{\color{red} Je sais que l'idée est de ne pas parler du programme linéaire mais | |
| 299 | +ça me semble quand même indispensable. Au pire, j'essaierai de revoir ça si on | |
| 300 | +est vraiment en manque de place.} | |
| 301 | + | |
| 302 | +Equation~\ref{eq:area} states that the total area taken by the filters must be | |
| 303 | +less than the available area. Equation~\ref{eq:areadef} gives the definition of | |
| 304 | +the area for a filter. More precisely, it is the area of the FIR as the Shifter | |
| 305 | +does not need any circuitry. We consider that the FIR needs $C_i$ registers of size | |
| 306 | +$\pi_i^C + \pi_i^-$~bits to store the results of the multiplications of the | |
| 307 | +input data and the coefficients. Equation~\ref{eq:rejectiondef} gives the | |
| 308 | +definition of the rejection of the filter thanks to function~$F$ that we defined | |
| 309 | +previously. The Shifter does not introduce negative rejection as we explain later, | |
| 310 | +so the rejection only comes from the FIR. Equation~\ref{eq:bits} states the | |
| 311 | +relation between $\pi_i^+$ and $\pi_i^-$. The multiplications in the FIR add | |
| 312 | +$\pi_i^C$ bits as most coefficients are close to zero, and the Shifter removes | |
| 313 | +$\pi_i^S$ bits. Equation~\ref{eq:inout} states that the output number of bits of | |
| 314 | +a filter is the same as the input number of bits of the next filter. | |
| 315 | +Equation~\ref{eq:maxshift} ensures that the Shifter does not introduce negative | |
| 316 | +rejection. Indeed, the results of the FIR can be right shifted without compromising | |
| 317 | +the quality of the rejection until a threshold. Each bit of the output data | |
| 318 | +increases the maximum rejection level of 6~dB. We add one to take the sign bit | |
| 319 | +into account. If equation~\ref{eq:maxshift} was not present, the Shifter could | |
| 320 | +shift too much and introduce some noise in the output data. Each supplementary | |
| 321 | +shift bit would cause 6~dB of noise. A totally equivalent equation is: | |
| 322 | +$\pi_i^S \leq \pi_i^- + \pi_i^C - 1 - \sum_{k=1}^{i} \left(1 + \frac{r_j}{6}\right) $. | |
| 323 | +Finally, equation~\ref{eq:init} gives the global input's number of bits. | |
| 324 | + | |
| 325 | +This model is non-linear and even non-quadratic, as $F$ does not have a known | |
| 326 | +linear or quadratic expression. We introduce $p$ FIR configurations | |
| 327 | + $(C_{ij}, \pi_{ij}^C), 1 \leq j \leq p$ that are constants. We define binary | |
| 328 | + variable $\delta_{ij}$ that has value 1 if stage~$i$ is in configuration~$j$ | |
| 329 | + and 0 otherwise. The new equations are as follows: | |
| 330 | + | |
| 331 | +\begin{align} | |
| 332 | +a_i & = \sum_{j=1}^p \delta_{ij} \times C_{ij} \times (\pi_{ij}^C + \pi_i^-), & \forall i \in [1, n] \label{eq:areadef2} \\ | |
| 333 | +r_i & = \sum_{j=1}^p \delta_{ij} \times F(C_{ij}, \pi_{ij}^C), & \forall i \in [1, n] \label{eq:rejectiondef2} \\ | |
| 334 | +\pi_i^+ & = \pi_i^- + \left(\sum_{j=1}^p \delta_{ij} \pi_{ij}^C\right) - \pi_i^S, & \forall i \in [1, n] \label{eq:bits2} \\ | |
| 335 | +\sum_{j=1}^p \delta_{ij} & \leq 1, & \forall i \in [1, n] \label{eq:config} | |
| 336 | +\end{align} | |
| 337 | + | |
| 338 | +Equations \ref{eq:areadef2}, \ref{eq:rejectiondef2} and \ref{eq:bits2} replace | |
| 339 | +respectively equations \ref{eq:areadef}, \ref{eq:rejectiondef} and \ref{eq:bits}. | |
| 340 | +Equation~\ref{eq:config} states that for each stage, a single configuration is chosen at most. | |
| 341 | + | |
| 342 | +The next section shows the results for this quadratic program but the section~\ref{sec:fixed_rej} | |
| 343 | +presents the results for the complementary problem. In this case we want | |
| 344 | +minimize the occupied area for a targeted rejection level. Hence we have replace | |
| 345 | +the objective function with: | |
| 346 | +\begin{align} | |
| 347 | +\text{Minimize } & \sum_{i=1}^n a_i \notag | |
| 348 | +\end{align} | |
| 349 | +We adapt our constraints of quadratic program to replace the equation \ref{eq:area} | |
| 350 | +by the equation \ref{eq:rejection_min} where $\mathcal{R}$ is the minimal | |
| 351 | +rejection required. | |
| 352 | + | |
| 353 | +\begin{align} | |
| 354 | +\sum_{i=1}^n r_i & \geq \mathcal{R} & \label{eq:rejection_min} | |
| 355 | +\end{align} | |
| 356 | + | |
| 357 | +\section{Design workflow} | |
| 358 | +\label{sec:workflow} | |
| 359 | + | |
| 360 | +In this section, we describe the workflow to compute all the results presented in section~\ref{sec:fixed_area}. | |
| 361 | +Figure~\ref{fig:workflow} shows the global workflow and the different steps involved in the computations of the results. | |
| 362 | + | |
| 363 | +\begin{figure} | |
| 364 | + \centering | |
| 365 | + \begin{tikzpicture}[node distance=0.75cm and 2cm] | |
| 366 | + \node[draw,minimum size=1cm] (Solver) { Filter Solver } ; | |
| 367 | + \node (Start) [left= 3cm of Solver] { } ; | |
| 368 | + \node[draw,minimum size=1cm] (TCL) [right= of Solver] { TCL Script } ; | |
| 369 | + \node (Input) [above= of TCL] { } ; | |
| 370 | + \node[draw,minimum size=1cm] (Deploy) [below= of Solver] { Deploy Script } ; | |
| 371 | + \node[draw,minimum size=1cm] (Bitstream) [below= of TCL] { Bitstream } ; | |
| 372 | + \node[draw,minimum size=1cm,rounded corners] (Board) [below right= of Deploy] { Board } ; | |
| 373 | + \node[draw,minimum size=1cm] (Postproc) [below= of Deploy] { Post-Processing } ; | |
| 374 | + \node (Results) [left= of Postproc] { } ; | |
| 375 | + | |
| 376 | + \draw[->] (Start) edge node [above] { $\mathcal{A}, n, \Pi^I$ } node [below] { $(C_{ij}, \pi_{ij}^C), F$ } (Solver) ; | |
| 377 | + \draw[->] (Input) edge node [left] { ADC or PRN } (TCL) ; | |
| 378 | + \draw[->] (Solver) edge node [below] { (1a) } (TCL) ; | |
| 379 | + \draw[->] (Solver) edge node [right] { (1b) } (Deploy) ; | |
| 380 | + \draw[->] (TCL) edge node [left] { (2) } (Bitstream) ; | |
| 381 | + \draw[->,dashed] (Bitstream) -- (Deploy) ; | |
| 382 | + \draw[->] (Deploy) to[out=-30,in=120] node [above] { (3) } (Board) ; | |
| 383 | + \draw[->] (Board) to[out=150,in=-60] node [below] { (4) } (Deploy) ; | |
| 384 | + \draw[->] (Deploy) edge node [left] { (5) } (Postproc) ; | |
| 385 | + \draw[->] (Postproc) -- (Results) ; | |
| 386 | + \end{tikzpicture} | |
| 387 | + \caption{Design workflow from the input parameters to the results} | |
| 388 | + \label{fig:workflow} | |
| 389 | +\end{figure} | |
| 390 | + | |
| 391 | +The filter solver is a C++ program that takes as input the maximum area | |
| 392 | +$\mathcal{A}$, the number of stages $n$, the size of the input signal $\Pi^I$, | |
| 393 | +the FIR configurations $(C_{ij}, \pi_{ij}^C)$ and the function $F$. It creates | |
| 394 | +the quadratic programs and uses the Gurobi solver to get the optimal results. | |
| 395 | +Then it produces two scripts: a TCL script ((1a) on figure~\ref{fig:workflow}) | |
| 396 | +and a deploy script ((1b) on figure~\ref{fig:workflow}). | |
| 397 | + | |
| 398 | +The TCL script describes the whole digital processing chain from the beginning | |
| 399 | +(the raw signal data) to the end (the filtered data). | |
| 400 | +The raw input data generated from a Pseudo Random Number (PRN) | |
| 401 | +generator inside the FPGA and $\Pi^I$ is fixed at 16~bits. | |
| 402 | +Then the script builds each stage of the chain with a generic FIR task that | |
| 403 | +comes from a skeleton library. The generic FIR is highly configurable | |
| 404 | +with the number of coefficients and the size of the coefficients. The coefficients | |
| 405 | +themselves are not stored in the script. | |
| 406 | +Whereas the signal is processed in real-time, the output signal is stored as | |
| 407 | +consecutive bursts of data. | |
| 408 | + | |
| 409 | +The TCL script is used by Vivado to produce the FPGA bitstream ((2) on figure~\ref{fig:workflow}). | |
| 410 | +We use the 2018.2 version of Xilinx Vivado and we execute the synthesized | |
| 411 | +bitstream on a Redpitaya board fitted with a Xilinx Zynq-7010 series | |
| 412 | +FPGA (xc7z010clg400-1) and two 125~MS/s ADC. | |
| 413 | +The board works with a Buildroot Linux image. We have developed some tools and | |
| 414 | +drivers to flash and communicate with the FPGA. They are used to automatize all | |
| 415 | +the workflow inside the board: load the filter coefficients and retrieve the | |
| 416 | +computed data. | |
| 417 | + | |
| 418 | +The deploy script uploads the bitstream to the board ((3) on | |
| 419 | +figure~\ref{fig:workflow}), flashes the FPGA, loads the different drivers, | |
| 420 | +configures the coefficients of the FIR filters. It then waits for the results | |
| 421 | +and retrieves the data to the main computer ((4) on figure~\ref{fig:workflow}). | |
| 422 | + | |
| 423 | +Finally, an Octave post-processing script computes the final results thanks to | |
| 424 | +the output data ((5) on figure~\ref{fig:workflow}). | |
| 425 | +The results are normalized so that the Power Spectrum Density (PSD) starts at zero | |
| 426 | +and the different configurations can be compared. | |
| 427 | + | |
| 428 | +The workflow used to compute the results in section~\ref{sec:fixed_rej}, we | |
| 429 | +have just adapted the quadratic program but the rest of the workflow is unchanged. | |
| 430 | + | |
| 286 | 431 | \section{Experiments with fixed area space} |
| 432 | +\label{sec:fixed_area} | |
| 433 | +This section presents the output of the filter solver {\em i.e.} the computed | |
| 434 | +configurations for each stage, the computed rejection and the computed silicon area. | |
| 435 | +This is interesting to understand the choices made by the solver to compute its solutions. | |
| 287 | 436 | |
| 437 | +The experimental setup is composed of three cases. The raw input is generated | |
| 438 | +by a Pseudo Random Number (PRN) generator, which fixes the input data size $\Pi^I$. | |
| 439 | +Then the total silicon area $\mathcal{A}$ has been fixed to either 500, 1000 or 1500 | |
| 440 | +arbitrary units. Hence, the three cases have been named: MAX/500, MAX/1000, MAX/1500. | |
| 441 | +The number of configurations $p$ is 1827, with $C_i$ ranging from 3 to 60 and $\pi^C$ | |
| 442 | +ranging from 2 to 22. In each case, the quadratic program has been able to give a | |
| 443 | +result up to five stages ($n = 5$) in the cascaded filter. | |
| 444 | + | |
| 445 | +Table~\ref{tbl:gurobi_max_500} shows the results obtained by the filter solver for MAX/500. | |
| 446 | +Table~\ref{tbl:gurobi_max_1000} shows the results obtained by the filter solver for MAX/1000. | |
| 447 | +Table~\ref{tbl:gurobi_max_1500} shows the results obtained by the filter solver for MAX/1500. | |
| 448 | + | |
| 449 | +\renewcommand{\arraystretch}{1.4} | |
| 450 | + | |
| 451 | +\begin{table} | |
| 452 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MAX/500} | |
| 453 | + \label{tbl:gurobi_max_500} | |
| 454 | + \centering | |
| 455 | + {\scalefont{0.77} | |
| 456 | + \begin{tabular}{|c|ccccc|c|c|} | |
| 457 | + \hline | |
| 458 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
| 459 | + \hline | |
| 460 | + 1 & (21, 7, 0) & - & - & - & - & 32~dB & 483 \\ | |
| 461 | + 2 & (3, 3, 15) & (31, 9, 0) & - & - & - & 58~dB & 460 \\ | |
| 462 | + 3 & (3, 3, 15) & (27, 9, 0) & (5, 3, 0) & - & - & 66~dB & 488 \\ | |
| 463 | + 4 & (3, 3, 15) & (19, 7, 0) & (11, 5, 0) & (3, 3, 0) & - & 74~dB & 499 \\ | |
| 464 | + 5 & (3, 3, 15) & (23, 8, 0) & (3, 3, 1) & (3, 3, 0) & (3, 3, 0) & 78~dB & 489 \\ | |
| 465 | + \hline | |
| 466 | + \end{tabular} | |
| 467 | + } | |
| 468 | +\end{table} | |
| 469 | + | |
| 470 | +\begin{table} | |
| 471 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MAX/1000} | |
| 472 | + \label{tbl:gurobi_max_1000} | |
| 473 | + \centering | |
| 474 | + {\scalefont{0.77} | |
| 475 | + \begin{tabular}{|c|ccccc|c|c|} | |
| 476 | + \hline | |
| 477 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
| 478 | + \hline | |
| 479 | + 1 & (37, 11, 0) & - & - & - & - & 56~dB & 999 \\ | |
| 480 | + 2 & (3, 3, 15) & (51, 14, 0) & - & - & - & 87~dB & 975 \\ | |
| 481 | + 3 & (3, 3, 15) & (35, 11, 0) & (19, 7, 0) & - & - & 99~dB & 1000 \\ | |
| 482 | + 4 & (3, 4, 16) & (27, 8, 0) & (19, 7, 1) & (11, 5, 0) & - & 103~dB & 998 \\ | |
| 483 | + 5 & (3, 3, 15) & (31, 9, 0) & (19, 7, 0) & (3, 3, 1) & (3, 3, 0) & 111~dB & 984 \\ | |
| 484 | + \hline | |
| 485 | + \end{tabular} | |
| 486 | + } | |
| 487 | +\end{table} | |
| 488 | + | |
| 489 | +\begin{table} | |
| 490 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MAX/1500} | |
| 491 | + \label{tbl:gurobi_max_1500} | |
| 492 | + \centering | |
| 493 | + {\scalefont{0.77} | |
| 494 | + \begin{tabular}{|c|ccccc|c|c|} | |
| 495 | + \hline | |
| 496 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
| 497 | + \hline | |
| 498 | + 1 & (47, 15, 0) & - & - & - & - & 71~dB & 1457 \\ | |
| 499 | + 2 & (19, 6, 15) & (51, 14, 0) & - & - & - & 103~dB & 1489 \\ | |
| 500 | + 3 & (3, 3, 15) & (35, 11, 0) & (35, 11, 0) & - & - & 122~dB & 1492 \\ | |
| 501 | + 4 & (3, 3, 15) & (27, 8, 0) & (19, 7, 0) & (27, 9, 0) & - & 129~dB & 1498 \\ | |
| 502 | + 5 & (3, 3, 15) & (23, 9, 2) & (27, 9, 0) & (19, 7, 0) & (3, 3, 0) & 136~dB & 1499 \\ | |
| 503 | + \hline | |
| 504 | + \end{tabular} | |
| 505 | + } | |
| 506 | +\end{table} | |
| 507 | + | |
| 508 | +\renewcommand{\arraystretch}{1} | |
| 509 | + | |
| 510 | +From these tables, we can first state that the more stages are used to define | |
| 511 | +the cascaded FIR filters, the better the rejection. It was an expected result as it has | |
| 512 | +been previously observed that many small filters are better than | |
| 513 | +a single large filter \cite{lim_1988, lim_1996, young_1992}, despite such conclusion | |
| 514 | +being hardly used in practice due to the lack of tools for identifying individual filter | |
| 515 | +coefficients in the cascaded approach. | |
| 516 | + | |
| 517 | +Second, the larger the silicon area, the better the rejection. This was also an | |
| 518 | +expected result as more area means a filter of better quality (more coefficients | |
| 519 | +or more bits per coefficient). | |
| 520 | + | |
| 521 | +Then, we also observe that the first stage can have a larger shift than the other | |
| 522 | +stages. This is explained by the fact that the solver tries to use just enough | |
| 523 | +bits for the computed rejection after each stage. In the first stage, a | |
| 524 | +balance between a strong rejection with a low number of bits is targeted. Equation~\ref{eq:maxshift} | |
| 525 | +gives the relation between both values. | |
| 526 | + | |
| 527 | +Finally, we note that the solver consumes all the given silicon area. | |
| 528 | + | |
| 529 | +The following graphs present the rejection for real data on the FPGA. In all following | |
| 530 | +figures, the solid line represents the actual rejection of the filtered | |
| 531 | +data on the FPGA as measured experimentally and the dashed line are the noise level | |
| 532 | +given by the quadratic solver. The configurations are those computed in the previous section. | |
| 533 | + | |
| 534 | +Figure~\ref{fig:max_500_result} shows the rejection of the different configurations in the case of MAX/500. | |
| 535 | +Figure~\ref{fig:max_1000_result} shows the rejection of the different configurations in the case of MAX/1000. | |
| 536 | +Figure~\ref{fig:max_1500_result} shows the rejection of the different configurations in the case of MAX/1500. | |
| 537 | + | |
| 288 | 538 | \begin{figure} |
| 289 | 539 | \centering |
| 290 | -\includegraphics[width=\linewidth]{images/max_rejection/prn_500} | |
| 291 | -\caption{Experimental results for design with PRN as data input and 500 a.u. as max arbitrary space} | |
| 292 | -\label{fig:prn_500} | |
| 540 | +\includegraphics[width=\linewidth]{images/max_500} | |
| 541 | +\caption{Signal spectrum for MAX/500} | |
| 542 | +\label{fig:max_500_result} | |
| 293 | 543 | \end{figure} |
| 294 | 544 | |
| 295 | 545 | \begin{figure} |
| 296 | 546 | \centering |
| 297 | -\includegraphics[width=\linewidth]{images/max_rejection/prn_1000} | |
| 298 | -\caption{Experimental results for design with PRN as data input and 1000 a.u. as max arbitrary space} | |
| 299 | -\label{fig:prn_1000} | |
| 547 | +\includegraphics[width=\linewidth]{images/max_1000} | |
| 548 | +\caption{Signal spectrum for MAX/1000} | |
| 549 | +\label{fig:max_1000_result} | |
| 300 | 550 | \end{figure} |
| 301 | 551 | |
| 302 | 552 | \begin{figure} |
| 303 | 553 | \centering |
| 304 | -\includegraphics[width=\linewidth]{images/max_rejection/prn_2000} | |
| 305 | -\caption{Experimental results for design with PRN as data input and 2000 a.u. as max arbitrary space} | |
| 306 | -\label{fig:prn_2000} | |
| 554 | +\includegraphics[width=\linewidth]{images/max_1500} | |
| 555 | +\caption{Signal spectrum for MAX/1500} | |
| 556 | +\label{fig:max_1500_result} | |
| 307 | 557 | \end{figure} |
| 308 | 558 | |
| 309 | -\begin{table} | |
| 310 | -\centering | |
| 311 | -\begin{tabular}{|c|c|ccc|c|c|} | |
| 312 | -\hline | |
| 313 | -\multicolumn{2}{|c|}{\multirow{2}{*}{Stage}} & \multicolumn{3}{c|}{Stage} & \multirow{2}{*}{Rejection} & \multirow{2}{*}{Area} \\ \cline{3-5} | |
| 314 | -\multicolumn{2}{|c|}{} & i = 1 & i = 2 & i = 3 & & \\ \hline | |
| 315 | - & C & 19 & - & - & & \\ | |
| 316 | -n = 1 & $pi^C$ & 7 & - & - & 33 dB & 437 a.u. \\ | |
| 317 | - & $pi^S$ & 0 & - & - & & \\ \hline | |
| 318 | - & C & 11 & 19 & - & & \\ | |
| 319 | -n = 2 & $pi^C$ & 5 & 7 & - & 53 dB & 478 a.u. \\ | |
| 320 | - & $pi^S$ & 16 & 0 & - & & \\ \hline | |
| 321 | - & C & 9 & 15 & 11 & & \\ | |
| 322 | -n = 3 & $pi^C$ & 4 & 6 & 5 & 57 dB & 499 a.u. \\ | |
| 323 | - & $pi^S$ & 16 & 3 & 0 & & \\ \hline | |
| 324 | -\end{tabular} | |
| 325 | -\caption{Solver results for design with PRN as data input and 500 a.u. as max arbitrary space} | |
| 326 | -\label{tbl:prn_500} | |
| 327 | -\end{table} | |
| 559 | +In all cases, we observe that the actual rejection is close to the rejection computed by the solver. | |
| 328 | 560 | |
| 561 | +We compare the actual silicon resources given by Vivado to the | |
| 562 | +resources in arbitrary units. | |
| 563 | +The goal is to check that our arbitrary units of silicon area models well enough | |
| 564 | +the real resources on the FPGA. Especially we want to verify that, for a given | |
| 565 | +number of arbitrary units, the actual silicon resources do not depend on the | |
| 566 | +number of stages $n$. Most significantly, our approach aims | |
| 567 | +at remaining far enough from the practical logic gate implementation used by | |
| 568 | +various vendors to remain platform independent and be portable from one | |
| 569 | +architecture to another. | |
| 570 | + | |
| 571 | +Table~\ref{tbl:resources_usage} shows the resources usage in the case of MAX/500, MAX/1000 and | |
| 572 | +MAX/1500 \emph{i.e.} when the maximum allowed silicon area is fixed to 500, 1000 | |
| 573 | +and 1500 arbitrary units. We have taken care to extract solely the resources used by | |
| 574 | +the FIR filters and remove additional processing blocks including FIFO and PL to | |
| 575 | +PS communication. | |
| 576 | + | |
| 329 | 577 | \begin{table} |
| 330 | -\centering | |
| 331 | -{\scalefont{0.85} | |
| 332 | -\begin{tabular}{|c|c|ccccc|c|c|} | |
| 333 | -\hline | |
| 334 | -\multicolumn{2}{|c|}{\multirow{2}{*}{Stage}} & \multicolumn{5}{c|}{Stage} & \multirow{2}{*}{Rejection} & \multirow{2}{*}{Area} \\ \cline{3-7} | |
| 335 | -\multicolumn{2}{|c|}{} & i = 1 & i = 2 & i = 3 & i = 4 & i = 5 & & \\ \hline | |
| 336 | - & C & 37 & - & - & - & - & & \\ | |
| 337 | -n = 1 & $pi^C$ & 11 & - & - & - & - & 56 dB & 999 a.u. \\ | |
| 338 | - & $pi^S$ & 0 & - & - & - & - & & \\ \hline | |
| 339 | - & C & 11 & 39 & - & - & - & & \\ | |
| 340 | -n = 2 & $pi^C$ & 5 & 13 & - & - & - & 82 dB & 972 a.u. \\ | |
| 341 | - & $pi^S$ & 16 & 0 & - & - & - & & \\ \hline | |
| 342 | - & C & 9 & 31 & 19 & - & - & & \\ | |
| 343 | -n = 3 & $pi^C$ & 7 & 8 & 7 & - & - & 93 dB & 990 a.u. \\ | |
| 344 | - & $pi^S$ & 19 & 2 & 0 & - & - & & \\ \hline | |
| 345 | - & C & 9 & 19 & 17 & 11 & - & & \\ | |
| 346 | -n = 4 & $pi^C$ & 4 & 7 & 7 & 5 & - & 99 dB & 992 a.u. \\ | |
| 347 | - & $pi^S$ & 16 & 3 & 3 & 0 & - & & \\ \hline | |
| 348 | - & C & 9 & 15 & 11 & 11 & 11 & & \\ | |
| 349 | -n = 5 & $pi^C$ & 4 & 7 & 5 & 5 & 5 & 99 dB & 998 a.u. \\ | |
| 350 | - & $pi^S$ & 16 & 3 & 2 & 1 & 1 & & \\ \hline | |
| 351 | -\end{tabular} | |
| 352 | -} | |
| 353 | -\caption{Solver results for design with PRN as data input and 1000 a.u. as max arbitrary space} | |
| 354 | -\label{tbl:prn_1000} | |
| 578 | + \caption{Resource occupation. The last column refers to available resources on a Zynq-7010 as found on the Redpitaya.} | |
| 579 | + \label{tbl:resources_usage} | |
| 580 | + \centering | |
| 581 | + \begin{tabular}{|c|c|ccc|c|} | |
| 582 | + \hline | |
| 583 | + $n$ & & MAX/500 & MAX/1000 & MAX/1500 & \emph{Zynq 7010} \\ \hline\hline | |
| 584 | + & LUT & 249 & 453 & 627 & \emph{17600} \\ | |
| 585 | + 1 & BRAM & 1 & 1 & 1 & \emph{120} \\ | |
| 586 | + & DSP & 21 & 37 & 47 & \emph{80} \\ \hline | |
| 587 | + & LUT & 2374 & 5494 & 691 & \emph{17600} \\ | |
| 588 | + 2 & BRAM & 2 & 2 & 2 & \emph{120} \\ | |
| 589 | + & DSP & 0 & 0 & 70 & \emph{80} \\ \hline | |
| 590 | + & LUT & 2443 & 3304 & 3521 & \emph{17600} \\ | |
| 591 | + 3 & BRAM & 3 & 3 & 3 & \emph{120} \\ | |
| 592 | + & DSP & 0 & 19 & 35 & \emph{80} \\ \hline | |
| 593 | + & LUT & 2634 & 3753 & 2557 & \emph{17600} \\ | |
| 594 | + 4 & BRAM & 4 & 4 & 4 & \emph{120} \\ | |
| 595 | + & DPS & 0 & 19 & 46 & \emph{80} \\ \hline | |
| 596 | + & LUT & 2423 & 3047 & 2847 & \emph{17600} \\ | |
| 597 | + 5 & BRAM & 5 & 5 & 5 & \emph{120} \\ | |
| 598 | + & DPS & 0 & 22 & 46 & \emph{80} \\ \hline | |
| 599 | + \end{tabular} | |
| 355 | 600 | \end{table} |
| 356 | 601 | |
| 602 | +In some cases, Vivado replaces the DSPs by Look Up Tables (LUTs). We assume that, | |
| 603 | +when the filters coefficients are small enough, or when the input size is small | |
| 604 | +enough, Vivado optimized resource consumption by selecting multiplexers to | |
| 605 | +implement the multiplications instead of a DSP. In this case, it is quite difficult | |
| 606 | +to compare the whole silicon budget. | |
| 607 | + | |
| 608 | +However, a rough estimation can be made with a simple equivalence. Looking at | |
| 609 | +the first column (MAX/500), where the number of LUTs is quite stable for $n \geq 2$, | |
| 610 | +we can deduce that a DSP is roughly equivalent to 100~LUTs in terms of silicon | |
| 611 | +area use. With this equivalence, our 500 arbitraty units corresponds to 2500 LUTs, | |
| 612 | +1000 arbitrary units corresponds to 5000 LUTs and 1500 arbitrary units corresponds | |
| 613 | +to 7300 LUTs. The conclusion is that the orders of magnitude of our arbitrary | |
| 614 | +unit are quite good. The relatively small differences can probably be explained | |
| 615 | +by the optimizations done by Vivado based on the detailed map of available processing resources. | |
| 616 | + | |
| 617 | +We present the computation time to solve the quadratic problem. | |
| 618 | +For each case, the filter solver software are executed with a Intel(R) Xeon(R) CPU E5606 | |
| 619 | +cadenced at 2.13~GHz. The CPU has 8 cores that are used by Gurobi to solve | |
| 620 | +the quadratic problem. | |
| 621 | + | |
| 622 | +Table~\ref{tbl:area_time} shows the time needed to solve the quadratic | |
| 623 | +problem when the maximal area is fixed to 500, 1000 and 1500 arbitrary units. | |
| 624 | + | |
| 357 | 625 | \begin{table} |
| 626 | +\caption{Time to solve the quadratic program with Gurobi} | |
| 627 | +\label{tbl:area_time} | |
| 358 | 628 | \centering |
| 359 | -{\scalefont{0.85} | |
| 360 | -\begin{tabular}{|c|c|ccccc|c|c|} | |
| 361 | -\hline | |
| 362 | -\multicolumn{2}{|c|}{\multirow{2}{*}{Stage}} & \multicolumn{5}{c|}{Stage} & \multirow{2}{*}{Rejection} & \multirow{2}{*}{Area} \\ \cline{3-7} | |
| 363 | -\multicolumn{2}{|c|}{} & i = 1 & i = 2 & i = 3 & i = 4 & i = 5 & & \\ \hline | |
| 364 | - & C & 39 & - & - & - & - & & \\ | |
| 365 | -n = 1 & $pi^C$ & 13 & - & - & - & - & 61 dB & 1131 a.u. \\ | |
| 366 | - & $pi^S$ & 0 & - & - & - & - & & \\ \hline | |
| 367 | - & C & 37 & 39 & - & - & - & & \\ | |
| 368 | -n = 2 & $pi^C$ & 11 & 13 & - & - & - & 117 dB & 1974 a.u. \\ | |
| 369 | - & $pi^S$ & 17 & 0 & - & - & - & & \\ \hline | |
| 370 | - & C & 15 & 35 & 35 & - & - & & \\ | |
| 371 | -n = 3 & $pi^C$ & 9 & 11 & 11 & - & - & 138 dB & 1985 a.u. \\ | |
| 372 | - & $pi^S$ & 19 & 3 & 0 & - & - & & \\ \hline | |
| 373 | - & C & 11 & 27 & 27 & 23 & - & & \\ | |
| 374 | -n = 4 & $pi^C$ & 5 & 9 & 9 & 9 & - & 148 dB & 1993 a.u. \\ | |
| 375 | - & $pi^S$ & 16 & 3 & 2 & 0 & - & & \\ \hline | |
| 376 | - & C & 11 & 27 & 31 & 11 & 11 & & \\ | |
| 377 | -n = 5 & $pi^C$ & 5 & 9 & 8 & 5 & 5 & 153 dB & 2000 a.u. \\ | |
| 378 | - & $pi^S$ & 16 & 3 & 1 & 0 & 1 & & \\ \hline | |
| 629 | +\begin{tabular}{|c|c|c|c|}\hline | |
| 630 | +$n$ & Time (MAX/500) & Time (MAX/1000) & Time (MAX/1500) \\\hline\hline | |
| 631 | +1 & 0.1~s & 0.1~s & 0.3~s \\ | |
| 632 | +2 & 1.1~s & 2.2~s & 12~s \\ | |
| 633 | +3 & 17~s & 137~s ($\approx$ 2~min) & 275~s ($\approx$ 4~min) \\ | |
| 634 | +4 & 52~s & 5448~s ($\approx$ 90~min) & 5505~s ($\approx$ 17~h) \\ | |
| 635 | +5 & 286~s ($\approx$ 4~min) & 4119~s ($\approx$ 68~min) & 235479~s ($\approx$ 3~days) \\\hline | |
| 379 | 636 | \end{tabular} |
| 380 | -} | |
| 381 | -\caption{Solver results for design with PRN as data input and 2000 a.u. as max arbitrary space} | |
| 382 | -\label{tbl:prn_2000} | |
| 383 | 637 | \end{table} |
| 384 | 638 | |
| 639 | +As expected, the computation time seems to rise exponentially with the number of stages. % TODO: exponentiel ? | |
| 640 | +When the area is limited, the design exploration space is more limited and the solver is able to | |
| 641 | +find an optimal solution faster. On the contrary, in the case of MAX/1500 with | |
| 642 | +5~stages, we were not able to obtain a result after 40~hours of computation so we decided to stop. | |
| 643 | + | |
| 644 | +\section{Experiments with fixed rejection target} | |
| 645 | +\label{sec:fixed_rej} | |
| 646 | +This section presents the results of complementary quadratic program which we | |
| 647 | +minimize the area occupation for a targeted noise level. | |
| 648 | + | |
| 649 | +The experimental setup is also composed of three cases. The raw input is the same | |
| 650 | +as previous section, a PRN generator, which fixes the input data size $\Pi^I$. | |
| 651 | +Then the targeted rejection $\mathcal{R}$ has been fixed to either 40, 60 or 80~dB. | |
| 652 | +Hence, the three cases have been named: MIN/40, MIN/60, MIN/80. | |
| 653 | +The number of configurations $p$ is the same as previous section. | |
| 654 | + | |
| 655 | +Table~\ref{tbl:gurobi_min_40} shows the results obtained by the filter solver for MIN/40. | |
| 656 | +Table~\ref{tbl:gurobi_min_60} shows the results obtained by the filter solver for MIN/60. | |
| 657 | +Table~\ref{tbl:gurobi_min_80} shows the results obtained by the filter solver for MIN/80. | |
| 658 | + | |
| 659 | +\renewcommand{\arraystretch}{1.4} | |
| 660 | + | |
| 385 | 661 | \begin{table} |
| 386 | -\centering | |
| 387 | -\begin{tabular}{|c|c|c|c|c|}\hline | |
| 388 | -Input & Stages & Computation time & Vivado time & Redpitaya time \\\hline\hline | |
| 389 | - & 1 & 0.02~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
| 390 | -PRN & 2 & 1.70~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
| 391 | - & 3 & 19~s & $\approx$ 20 min & $\approx$ 1 min \\\hline | |
| 392 | -\end{tabular} | |
| 393 | -\caption{Time to compute and deploy the designs for PRN 500} | |
| 394 | -\label{tbl:time_prn_500} | |
| 662 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MIN/40} | |
| 663 | + \label{tbl:gurobi_min_40} | |
| 664 | + \centering | |
| 665 | + {\scalefont{0.77} | |
| 666 | + \begin{tabular}{|c|ccccc|c|c|} | |
| 667 | + \hline | |
| 668 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
| 669 | + \hline | |
| 670 | + 1 & (27, 8, 0) & - & - & - & - & 41~dB & 648 \\ | |
| 671 | + 2 & (3, 2, 14) & (19, 7, 0) & - & - & - & 40~dB & 263 \\ | |
| 672 | + 3 & (3, 3, 15) & (11, 5, 0) & (3, 3, 0) & - & - & 41~dB & 192 \\ | |
| 673 | + 4 & (3, 3, 15) & (3, 3, 0) & (3, 3, 0) & (3, 3, 0) & - & 42~dB & 147 \\ | |
| 674 | + \hline | |
| 675 | + \end{tabular} | |
| 676 | + } | |
| 395 | 677 | \end{table} |
| 396 | 678 | |
| 397 | 679 | \begin{table} |
| 398 | -\centering | |
| 399 | -\begin{tabular}{|c|c|c|c|c|}\hline | |
| 400 | -Input & Stages & Computation time & Vivado time & Redpitaya time \\\hline\hline | |
| 401 | - & 1 & 0.07~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
| 402 | - & 2 & 1.31~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
| 403 | -PRN & 3 & 119~s ($\approx$ 2~min) & $\approx$ 20 min & $\approx$ 1 min \\ | |
| 404 | - & 4 & 270~s ($\approx$ 5~min) & $\approx$ 20 min & $\approx$ 1 min \\ | |
| 405 | - & 5 & 5998~s ($\approx$ 2~h) & $\approx$ 20 min & $\approx$ 1 min \\\hline | |
| 406 | -\end{tabular} | |
| 407 | -\caption{Time to compute and deploy the designs for PRN 1000} | |
| 408 | -\label{tbl:time_prn_1000} | |
| 680 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MIN/60} | |
| 681 | + \label{tbl:gurobi_min_60} | |
| 682 | + \centering | |
| 683 | + {\scalefont{0.77} | |
| 684 | + \begin{tabular}{|c|ccccc|c|c|} | |
| 685 | + \hline | |
| 686 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
| 687 | + \hline | |
| 688 | + 1 & (39, 13, 0) & - & - & - & - & 60~dB & 1131 \\ | |
| 689 | + 2 & (3, 3, 15) & (35, 10, 0) & - & - & - & 60~dB & 547 \\ | |
| 690 | + 3 & (3, 3, 15) & (27, 8, 0) & (3, 3, 0) & - & - & 62~dB & 426 \\ | |
| 691 | + 4 & (3, 2, 14) & (11, 5, 1) & (11, 5, 0) & (3, 3, 0) & - & 60~dB & 344 \\ | |
| 692 | + 5 & (3, 2, 14) & (3, 3, 1) & (3, 3, 0) & (3, 3, 0) & (3, 3, 0) & 60~dB & 279 \\ | |
| 693 | + \hline | |
| 694 | + \end{tabular} | |
| 695 | + } | |
| 409 | 696 | \end{table} |
| 410 | 697 | |
| 411 | 698 | \begin{table} |
| 412 | -\centering | |
| 413 | -\begin{tabular}{|c|c|c|c|c|}\hline | |
| 414 | -Input & Stages & Computation time & Vivado time & Redpitaya time \\\hline\hline | |
| 415 | - & 1 & 0.07~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
| 416 | - & 2 & 0.75~s & $\approx$ 20 min & $\approx$ 1 min \\ | |
| 417 | -PRN & 3 & 36~s & - & - \\ | |
| 418 | - & 4 & 14500~s ($\approx$ 4~h) & $\approx$ 20 min & $\approx$ 1 min \\ | |
| 419 | - & 5 & 74237~s ($\approx$ 20~h) & $\approx$ 20 min & $\approx$ 1 min \\\hline | |
| 420 | -\end{tabular} | |
| 421 | -\caption{Time to compute and deploy the designs for PRN 2000} | |
| 422 | -\label{tbl:time_prn_2000} | |
| 699 | + \caption{Configurations $(C_i, \pi_i^C, \pi_i^S)$, rejections and areas (in arbitrary units) for MIN/80} | |
| 700 | + \label{tbl:gurobi_min_80} | |
| 701 | + \centering | |
| 702 | + {\scalefont{0.77} | |
| 703 | + \begin{tabular}{|c|ccccc|c|c|} | |
| 704 | + \hline | |
| 705 | + $n$ & $i = 1$ & $i = 2$ & $i = 3$ & $i = 4$ & $i = 5$ & Rejection & Area \\ | |
| 706 | + \hline | |
| 707 | + 1 & (55, 16, 0) & - & - & - & - & 81~dB & 1760 \\ | |
| 708 | + 2 & (3, 3, 15) & (47, 14, 0) & - & - & - & 80~dB & 903 \\ | |
| 709 | + 3 & (3, 3, 15) & (23, 9, 0) & (19, 7, 0) & - & - & 80~dB & 698 \\ | |
| 710 | + 4 & (3, 3, 15) & (27, 9, 0) & (7, 7, 4) & (3, 3, 0) & - & 80~dB & 605 \\ | |
| 711 | + 5 & (3, 2, 14) & (27, 8, 0) & (3, 3, 1) & (3, 3, 0) & (3, 3, 0) & 81~dB & 534 \\ | |
| 712 | + \hline | |
| 713 | + \end{tabular} | |
| 714 | + } | |
| 423 | 715 | \end{table} |
| 716 | +\renewcommand{\arraystretch}{1} | |
| 424 | 717 | |
| 425 | -\section{Experiments with fixed rejection target} | |
| 718 | +From these tables, we can first state that all configuration reach the target rejection | |
| 719 | +level and more we have stages lesser is the area occupied in arbitrary unit. | |
| 720 | +Futhermore, the area of the monolithic filter is twice bigger than the two cascaded. | |
| 721 | +More generally, more there is filters lower is the occupied area. | |
| 426 | 722 | |
| 723 | +Like in previous section, the solver choose always a little filter as first | |
| 724 | +filter stage and the second one is often the biggest filter. this choice can be explain | |
| 725 | +as the previous section. The solver uses just enough bits to not degrade the input | |
| 726 | +signal and in second filter it can choose a better filter to improve rejection without | |
| 727 | +have too bits in the output data. | |
| 728 | + | |
| 729 | +For the specific case in MIN/40 for $n = 5$ the solver has determined that the optimal | |
| 730 | +number of filter is 4 so it not chose any configuration in last filter. Hence this | |
| 731 | +solution is equivalent to the result for $n = 4$. | |
| 732 | + | |
| 733 | +The following graphs present the rejection for real data on the FPGA. In all following | |
| 734 | +figures, the solid line represents the actual rejection of the filtered | |
| 735 | +data on the FPGA as measured experimentally and the dashed line are the noise level | |
| 736 | +given by the quadratic solver. | |
| 737 | + | |
| 738 | +Figure~\ref{fig:min_40} shows the rejection of the different configurations in the case of MIN/40. | |
| 739 | +Figure~\ref{fig:min_60} shows the rejection of the different configurations in the case of MIN/60. | |
| 740 | +Figure~\ref{fig:min_80} shows the rejection of the different configurations in the case of MIN/80. | |
| 741 | + | |
| 427 | 742 | \begin{figure} |
| 428 | 743 | \centering |
| 429 | -\includegraphics[width=\linewidth]{images/min_area/prn_50} | |
| 430 | -\caption{Results for design with PRN as data input and 50 dB as aimed rejection level} | |
| 431 | -\label{fig:prn_500} | |
| 744 | +\includegraphics[width=\linewidth]{images/min_40} | |
| 745 | +\caption{Signal spectrum for MIN/40} | |
| 746 | +\label{fig:min_40} | |
| 432 | 747 | \end{figure} |
| 433 | 748 | |
| 434 | 749 | \begin{figure} |
| 435 | 750 | \centering |
| 436 | -\includegraphics[width=\linewidth]{images/min_area/prn_100} | |
| 437 | -\caption{Results for design with PRN as data input and 50 dB as aimed rejection level} | |
| 438 | -\label{fig:prn_100} | |
| 751 | +\includegraphics[width=\linewidth]{images/min_60} | |
| 752 | +\caption{Signal spectrum for MIN/60} | |
| 753 | +\label{fig:min_60} | |
| 439 | 754 | \end{figure} |
| 440 | 755 | |
| 441 | 756 | \begin{figure} |
| 442 | 757 | \centering |
| 443 | -\includegraphics[width=\linewidth]{images/min_area/prn_150} | |
| 444 | -\caption{Results for design with PRN as data input and 2000 a.u. as max arbitrary space} | |
| 445 | -\label{fig:prn_150} | |
| 758 | +\includegraphics[width=\linewidth]{images/min_80} | |
| 759 | +\caption{Signal spectrum for MIN/80} | |
| 760 | +\label{fig:min_80} | |
| 446 | 761 | \end{figure} |
| 447 | 762 | |
| 763 | +We observe that all rejections given by the quadratic solver are close to the real | |
| 764 | +rejection. All curves prove that the constraint to reach the target rejection is | |
| 765 | +respected both monolithic filter or cascaded filters. | |
| 766 | + | |
| 767 | +Table~\ref{tbl:resources_usage} shows the resources usage in the case of MIN/40, MIN/60 and | |
| 768 | +MIN/80 \emph{i.e.} when the target rejection is fixed to 40, 60 and 80~dB. We | |
| 769 | +have taken care to extract solely the resources used by | |
| 770 | +the FIR filters and remove additional processing blocks including FIFO and PL to | |
| 771 | +PS communication. | |
| 772 | + | |
| 773 | +\begin{table} | |
| 774 | + \caption{Resource occupation. The last column refers to available resources on a Zynq-7010 as found on the Redpitaya.} | |
| 775 | + \label{tbl:resources_usage_comp} | |
| 776 | + \centering | |
| 777 | + \begin{tabular}{|c|c|ccc|c|} | |
| 778 | + \hline | |
| 779 | + $n$ & & MIN/40 & MIN/60 & MIN/80 & \emph{Zynq 7010} \\ \hline\hline | |
| 780 | + & LUT & 343 & 334 & 772 & \emph{17600} \\ | |
| 781 | + 1 & BRAM & 1 & 1 & 1 & \emph{120} \\ | |
| 782 | + & DSP & 27 & 39 & 55 & \emph{80} \\ \hline | |
| 783 | + & LUT & 1252 & 2862 & 5099 & \emph{17600} \\ | |
| 784 | + 2 & BRAM & 2 & 2 & 2 & \emph{120} \\ | |
| 785 | + & DSP & 0 & 0 & 0 & \emph{80} \\ \hline | |
| 786 | + & LUT & 891 & 2148 & 2023 & \emph{17600} \\ | |
| 787 | + 3 & BRAM & 3 & 3 & 3 & \emph{120} \\ | |
| 788 | + & DSP & 0 & 0 & 19 & \emph{80} \\ \hline | |
| 789 | + & LUT & 662 & 1729 & 2451 & \emph{17600} \\ | |
| 790 | + 4 & BRAM & 4 & 4 & 4 & \emph{120} \\ | |
| 791 | + & DPS & 0 & 0 & 7 & \emph{80} \\ \hline | |
| 792 | + & LUT & - & 1259 & 2602 & \emph{17600} \\ | |
| 793 | + 5 & BRAM & - & 5 & 5 & \emph{120} \\ | |
| 794 | + & DPS & - & 0 & 0 & \emph{80} \\ \hline | |
| 795 | + \end{tabular} | |
| 796 | +\end{table} | |
| 797 | + | |
| 798 | +If we keep the previous estimation of cost of one DSP in term of LUT (1 DSP $\approx$ 100 LUT) | |
| 799 | +the real resource consumption decrease in function of number of stage filter according | |
| 800 | +to the solution given by the quadratic solver. Indeed, we have always a decreasing | |
| 801 | +consumption even if the difference between the monolithic and the two cascaded | |
| 802 | +filters is lesser than expected. | |
| 803 | + | |
| 804 | +Finally, the table~\ref{tbl:area_time_comp} shows the computation time to solve | |
| 805 | +the quadratic program. | |
| 806 | + | |
| 807 | +\begin{table} | |
| 808 | +\caption{Time to solve the quadratic program with Gurobi} | |
| 809 | +\label{tbl:area_time_comp} | |
| 810 | +\centering | |
| 811 | +\begin{tabular}{|c|c|c|c|}\hline | |
| 812 | +$n$ & Time (MIN/40) & Time (MIN/60) & Time (MIN/80) \\\hline\hline | |
| 813 | +1 & 0.07~s & 0.02~s & 0.01~s \\ | |
| 814 | +2 & 7.8~s & 16~s & 14~s \\ | |
| 815 | +3 & 4.7~s & 14~s & 28~s \\ | |
| 816 | +4 & 39~s & 20~s & 193~s \\ | |
| 817 | +5 & 126~s & 12~s & 170~s \\\hline | |
| 818 | +\end{tabular} | |
| 819 | +\end{table} | |
| 820 | + | |
| 821 | +The time needed to solve this configuration are substantially faster than time | |
| 822 | +needed in the previous section. Indeed the worst time in this case is only 3~minutes | |
| 823 | +in balance of 3~days on previous section. We are able to solve more easily this | |
| 824 | +problem than the previous one. | |
| 825 | + | |
| 448 | 826 | \section{Conclusion} |
| 827 | + | |
| 828 | +In this paper, we have proposed a new approach to work with a cascade of FIR filter inside a FPGA. | |
| 829 | +This method aims to be hardware independent and focus an high-level of abstraction. | |
| 830 | +We have modeled the FIR filter operation and the data shift impact. With this model | |
| 831 | +we have created a quadratic program to select the optimal FIR coefficient set to reject a | |
| 832 | +maximum of noise. In our experiments we have chosen deliberately some common tools | |
| 833 | +to design the filter coefficients but we can use any other method. | |
| 834 | + | |
| 835 | +Our experimental results are very promising in providing a rational approach to selecting | |
| 836 | +the coefficients of each FIR filter in the context of a performance target for a chain of | |
| 837 | +such filters. The FPGA design that is produced automatically by our | |
| 838 | +workflow is able to filter an input signal as expected which validates our model and our approach. | |
| 839 | +We can easily change the quadratic program to adapt it to an other problem. | |
| 840 | + | |
| 841 | +A perspective is to model and add the decimators to the processing chain to have a classical | |
| 842 | +FIR filter and decimator. The impact of the decimator is not so trivial, especially in terms of silicon | |
| 843 | +area for the subsequent stages since some hardware optimization can be applied in | |
| 844 | +this case. | |
| 845 | + | |
| 846 | +The software used to demonstrate the concepts developed in this paper is based on the | |
| 847 | +CPU-FPGA co-design framework available at \url{https://github.com/oscimp/oscimpDigital}. | |
| 449 | 848 | |
| 450 | 849 | \section*{Acknowledgement} |
| 451 | 850 |
images/max_1000.pdf
No preview for this file type
images/max_1500.pdf
No preview for this file type
images/max_500.pdf
No preview for this file type
images/max_rejection/prn_1000.pdf
No preview for this file type
images/max_rejection/prn_2000.pdf
No preview for this file type
images/max_rejection/prn_500.pdf
No preview for this file type
images/min_40.pdf
No preview for this file type
images/min_60.pdf
No preview for this file type
images/min_80.pdf
No preview for this file type
references.bib
| ... | ... | @@ -10,7 +10,7 @@ |
| 10 | 10 | } |
| 11 | 11 | |
| 12 | 12 | @article{kodek1980design, |
| 13 | - title={Design of optimal finite wordlength {FIR} digital filters using integer | |
| 13 | + title={Design of optimal finite wordlength {FIR} digital filters using integer | |
| 14 | 14 | programming techniques}, |
| 15 | 15 | author={Kodek, Dusan}, |
| 16 | 16 | journal={IEEE Transactions on Acoustics, Speech, and Signal Processing}, |
| ... | ... | @@ -43,4 +43,56 @@ |
| 43 | 43 | year={2016}, |
| 44 | 44 | publisher={AIP Publishing} |
| 45 | 45 | } |
| 46 | + | |
| 47 | +@inproceedings{lim_1996, | |
| 48 | +author={Y.-C. Lim and R. Yang and B. Liu}, | |
| 49 | +booktitle={1996 IEEE International Symposium on Circuits and Systems. Circuits and Systems Connecting the World. ISCAS 96}, | |
| 50 | +title={The design of cascaded FIR filters}, | |
| 51 | +year={1996}, | |
| 52 | +volume={2}, | |
| 53 | +number={}, | |
| 54 | +pages={181-184 vol.2}, | |
| 55 | +keywords={cascade networks;digital filters;FIR filters;filtering theory;linear programming;frequency response;cascaded FIR filters;stopband response;minimum attenuation requirement;passband ripple magnitude;linear-programming technique;FIR filter design;filter optimisation;Finite impulse response filter;IIR filters;Passband;Frequency;Signal sampling;Band pass filters;Digital filters;Attenuation;Image sampling;Linear programming}, | |
| 56 | +doi={10.1109/ISCAS.1996.540382}, | |
| 57 | +ISSN={}, | |
| 58 | +month={May},} | |
| 59 | + | |
| 60 | +@article{lim_1988, | |
| 61 | +author={Y. C. {Lim} and B. {Liu}}, | |
| 62 | +journal={IEEE Transactions on Acoustics, Speech, and Signal Processing}, | |
| 63 | +title={Design of cascade form FIR filters with discrete valued coefficients}, | |
| 64 | +year={1988}, | |
| 65 | +volume={36}, | |
| 66 | +number={11}, | |
| 67 | +pages={1735-1739}, | |
| 68 | +keywords={cascade networks;digital filters;filtering and prediction theory;iterative equalisation strategy;cascade form FIR filters;discrete valued coefficients;peak ripple;prototype filter;roundoff noise property;Finite impulse response filter;Low pass filters;Band pass filters;Passband;Prototypes;Frequency;Digital filters;Digital arithmetic;Design optimization;Sampling methods}, | |
| 69 | +doi={10.1109/29.9010}, | |
| 70 | +ISSN={0096-3518}, | |
| 71 | +month={Nov},} | |
| 72 | + | |
| 73 | +@inproceedings{young_1992, | |
| 74 | +author={C. {Young} and D. L. {Jones}}, | |
| 75 | +booktitle={[Proceedings] ICASSP-92: 1992 IEEE International Conference on Acoustics, Speech, and Signal Processing}, | |
| 76 | +title={Improvement in finite wordlength FIR digital filter design by cascading}, | |
| 77 | +year={1992}, | |
| 78 | +volume={5}, | |
| 79 | +number={}, | |
| 80 | +pages={109-112 vol.5}, | |
| 81 | +keywords={approximation theory;digital filters;integer programming;series (mathematics);finite wordlength filter;quantization;FIR digital filter design;finite impulse response;digital systems;finite wordlength coefficients;cascaded subfilters;stopband suppression;Taylor series approximation;linear integer program;passband deviation;Finite impulse response filter;Digital filters;Linear programming;Passband;Quantization;Frequency response;Digital systems;Taylor series;Minimax techniques;Design optimization}, | |
| 82 | +doi={10.1109/ICASSP.1992.226646}, | |
| 83 | +ISSN={1520-6149}, | |
| 84 | +month={March},} | |
| 85 | + | |
| 86 | +@article{smith_1998, | |
| 87 | +author={L. M. {Smith}}, | |
| 88 | +journal={IEEE Transactions on Signal Processing}, | |
| 89 | +title={Decomposition of FIR digital filters for realization via the cascade connection of subfilters}, | |
| 90 | +year={1998}, | |
| 91 | +volume={46}, | |
| 92 | +number={6}, | |
| 93 | +pages={1681-1684}, | |
| 94 | +keywords={FIR filters;digital filters;cascade networks;Z transforms;transfer functions;frequency response;Newton-Raphson method;convergence of numerical methods;search problems;poles and zeros;FIR digital filters;subfilters cascade connection;even-order linear-phase FIR filters;filter decomposition;fourth-order subfilters;second-order subfilters;roots;z-domain filter transfer function;complex z plane;impulse response symmetry;unit circle;perimeter;complex values;real values;impulse response coefficients;root-finding algorithm;Newton-Raphson method;2D search;Cauchy-Riemann relations;convergence speed;frequency response characteristics;Finite impulse response filter;Digital filters;Polynomials;Programmable logic arrays;Transfer functions;Testing;Frequency response;Application specific integrated circuits;Nonlinear filters;Passband}, | |
| 95 | +doi={10.1109/78.678490}, | |
| 96 | +ISSN={1053-587X}, | |
| 97 | +month={June},} |