Commit b0ed3be3eef886db4fcbf50b7bbe564c761b0531
1 parent
836ff72905
Exists in
master
final version
Showing 2 changed files with 11 additions and 7 deletions Inline Diff
biblio.bib
@thesis{gwen-cogen, | 1 | 1 | @thesis{gwen-cogen, | |
author = {Gwenhaël Goavec-Merou}, | 2 | 2 | author = {Gwenhaël Goavec-Merou}, | |
title = {Générateur de coprocesseur pour le traitement de données en flux (vidéo ou similaire) sur {FPGA}}, | 3 | 3 | title = {Générateur de coprocesseur pour le traitement de données en flux (vidéo ou similaire) sur {FPGA}}, | |
institution = {FEMTO-ST}, | 4 | 4 | institution = {FEMTO-ST}, | |
year = {2014} | 5 | 5 | year = {2014} | |
} | 6 | 6 | } | |
7 | 7 | |||
@article{hide, | 8 | 8 | @article{hide, | |
title={HIDE: A hardware intelligent description environment}, | 9 | 9 | title={HIDE: A hardware intelligent description environment}, | |
author={Benkrid, Khaled and Belkacemi, S and Benkrid, Abdsamad}, | 10 | 10 | author={Benkrid, Khaled and Belkacemi, S and Benkrid, Abdsamad}, | |
journal={Microprocessors and Microsystems}, | 11 | 11 | journal={Microprocessors and Microsystems}, | |
volume={30}, | 12 | 12 | volume={30}, | |
number={6}, | 13 | 13 | number={6}, | |
pages={283--300}, | 14 | 14 | pages={283--300}, | |
year={2006}, | 15 | 15 | year={2006}, | |
publisher={Elsevier} | 16 | 16 | publisher={Elsevier} | |
} | 17 | 17 | } | |
18 | 18 | |||
@inproceedings{skeleton, | 19 | 19 | @inproceedings{skeleton, | |
title={High level programming for {FPGA} based image and video processing using hardware skeletons}, | 20 | 20 | title={High level programming for {FPGA} based image and video processing using hardware skeletons}, | |
author={Benkrid, Khaled and Crookes, Danny and Smith, J and Benkrid, Abdsamad}, | 21 | 21 | author={Benkrid, Khaled and Crookes, Danny and Smith, J and Benkrid, Abdsamad}, | |
booktitle={Field-Programmable Custom Computing Machines, 2001. FCCM'01. The 9th Annual IEEE Symposium on}, | 22 | 22 | booktitle={Field-Programmable Custom Computing Machines, 2001. FCCM'01. The 9th Annual IEEE Symposium on}, | |
pages={219--226}, | 23 | 23 | pages={219--226}, | |
year={2001}, | 24 | 24 | year={2001}, | |
organization={IEEE} | 25 | 25 | organization={IEEE} | |
} | 26 | 26 | } | |
27 | 27 | |||
@article{benkrid2004application, | 28 | 28 | @article{benkrid2004application, | |
title={From application descriptions to hardware in seconds: a logic-based approach to bridging the gap}, | 29 | 29 | title={From application descriptions to hardware in seconds: a logic-based approach to bridging the gap}, | |
author={Benkrid, Khaled and Crookes, Danny}, | 30 | 30 | author={Benkrid, Khaled and Crookes, Danny}, | |
journal={Very Large Scale Integration (VLSI) Systems, IEEE Transactions on}, | 31 | 31 | journal={Very Large Scale Integration (VLSI) Systems, IEEE Transactions on}, | |
volume={12}, | 32 | 32 | volume={12}, | |
number={4}, | 33 | 33 | number={4}, | |
pages={420--436}, | 34 | 34 | pages={420--436}, | |
year={2004}, | 35 | 35 | year={2004}, | |
publisher={IEEE} | 36 | 36 | publisher={IEEE} | |
} | 37 | 37 | } | |
38 | 38 | |||
@phdthesis{these-dsp-fpga, | 39 | 39 | @phdthesis{these-dsp-fpga, | |
title={Design methodologies and architectures for digital signal processing on {FPGA}s}, | 40 | 40 | title={Design methodologies and architectures for digital signal processing on {FPGA}s}, | |
author={Mirzaei, Shahnam}, | 41 | 41 | author={Mirzaei, Shahnam}, | |
year={2010}, | 42 | 42 | year={2010}, | |
school={UNIVERSITY OF CALIFORNIA SANTA BARBARA} | 43 | 43 | school={UNIVERSITY OF CALIFORNIA SANTA BARBARA} | |
} | 44 | 44 | } | |
45 | 45 | |||
@article{def1-ordo, | 46 | 46 | @article{def1-ordo, | |
title={Algorithmique Parallèle-Cours Et Exercices Corrigés}, | 47 | 47 | title={Algorithmique Parallèle-Cours Et Exercices Corrigés}, | |
author={Legrand, Arnaud and Robert, Yves}, | 48 | 48 | author={Legrand, Arnaud and Robert, Yves}, | |
year={2003}, | 49 | 49 | year={2003}, | |
publisher={Dunod} | 50 | 50 | publisher={Dunod} | |
} | 51 | 51 | } | |
52 | 52 | |||
@article{these-mathias, | 53 | 53 | @article{these-mathias, | |
title={Optimisation du débit pour des applications linéaires multi-tâches sur plateformes distribuées incluant des temps de reconfiguration}, | 54 | 54 | title={Optimisation du débit pour des applications linéaires multi-tâches sur plateformes distribuées incluant des temps de reconfiguration}, | |
author={Coqblin, Mathias}, | 55 | 55 | author={Coqblin, Mathias}, | |
institution = {FEMTO-ST}, | 56 | 56 | institution = {FEMTO-ST}, | |
year={2012} | 57 | 57 | year={2012} | |
} | 58 | 58 | } | |
59 | 59 | |||
@thesis{these-alex, | 60 | 60 | @thesis{these-alex, | |
author = {Alexandru Dobrila}, | 61 | 61 | author = {Alexandru Dobrila}, | |
title = {Optimisation du débit en environnement distribué incertain}, | 62 | 62 | title = {Optimisation du débit en environnement distribué incertain}, | |
institution = {FEMTO-ST}, | 63 | 63 | institution = {FEMTO-ST}, | |
year = {2011} | 64 | 64 | year = {2011} | |
} | 65 | 65 | } | |
66 | 66 | |||
@book{def2-ordo, | 67 | 67 | @book{def2-ordo, | |
title={Handbook of scheduling: algorithms, models, and performance analysis}, | 68 | 68 | title={Handbook of scheduling: algorithms, models, and performance analysis}, | |
author={Leung, Joseph YT}, | 69 | 69 | author={Leung, Joseph YT}, | |
year={2004}, | 70 | 70 | year={2004}, | |
publisher={CRC Press} | 71 | 71 | publisher={CRC Press} | |
} | 72 | 72 | } | |
73 | 73 | |||
@inproceedings{def-ordo-en-ligne, | 74 | 74 | @inproceedings{def-ordo-en-ligne, | |
title={On the Definition of "On-Line" in Job Scheduling Problems}, | 75 | 75 | title={On the Definition of "On-Line" in Job Scheduling Problems}, | |
author={Feitelson, Dror G and Mu'alem, Ahuva W}, | 76 | 76 | author={Feitelson, Dror G and Mu'alem, Ahuva W}, | |
booktitle={SIGACT NEWS}, | 77 | 77 | booktitle={SIGACT NEWS}, | |
year={2000}, | 78 | 78 | year={2000}, | |
organization={Citeseer} | 79 | 79 | organization={Citeseer} | |
} | 80 | 80 | } | |
81 | 81 | |||
@article{shmueli2005backfilling, | 82 | 82 | @article{shmueli2005backfilling, | |
title={Backfilling with lookahead to optimize the packing of parallel jobs}, | 83 | 83 | title={Backfilling with lookahead to optimize the packing of parallel jobs}, | |
author={Shmueli, Edi and Feitelson, Dror G}, | 84 | 84 | author={Shmueli, Edi and Feitelson, Dror G}, | |
journal={Journal of Parallel and Distributed Computing}, | 85 | 85 | journal={Journal of Parallel and Distributed Computing}, | |
volume={65}, | 86 | 86 | volume={65}, | |
number={9}, | 87 | 87 | number={9}, | |
pages={1090--1107}, | 88 | 88 | pages={1090--1107}, | |
year={2005}, | 89 | 89 | year={2005}, | |
publisher={Elsevier} | 90 | 90 | publisher={Elsevier} | |
} | 91 | 91 | } | |
92 | 92 | |||
@article{graham1979optimization, | 93 | 93 | @article{graham1979optimization, | |
title={Optimization and approximation in deterministic sequencing and scheduling: a survey}, | 94 | 94 | title={Optimization and approximation in deterministic sequencing and scheduling: a survey}, | |
author={Graham, Ronald L and Lawler, Eugene L and Lenstra, Jan Karel and Kan, AHG Rinnooy}, | 95 | 95 | author={Graham, Ronald L and Lawler, Eugene L and Lenstra, Jan Karel and Kan, AHG Rinnooy}, | |
journal={Annals of discrete mathematics}, | 96 | 96 | journal={Annals of discrete mathematics}, | |
volume={5}, | 97 | 97 | volume={5}, | |
pages={287--326}, | 98 | 98 | pages={287--326}, | |
year={1979}, | 99 | 99 | year={1979}, | |
publisher={Elsevier} | 100 | 100 | publisher={Elsevier} | |
} | 101 | 101 | } | |
102 | 102 | |||
@article{salvador2012accelerating, | 103 | 103 | @article{salvador2012accelerating, | |
title={Accelerating {FPGA}-based evolution of wavelet transform filters by optimized task scheduling}, | 104 | 104 | title={Accelerating {FPGA}-based evolution of wavelet transform filters by optimized task scheduling}, | |
author={Salvador, Ruben and Vidal, Alberto and Moreno, Felix and Riesgo, Teresa and Sekanina, Lukas}, | 105 | 105 | author={Salvador, Ruben and Vidal, Alberto and Moreno, Felix and Riesgo, Teresa and Sekanina, Lukas}, | |
journal={Microprocessors and Microsystems}, | 106 | 106 | journal={Microprocessors and Microsystems}, | |
volume={36}, | 107 | 107 | volume={36}, | |
number={5}, | 108 | 108 | number={5}, | |
pages={427--438}, | 109 | 109 | pages={427--438}, | |
year={2012}, | 110 | 110 | year={2012}, | |
publisher={Elsevier} | 111 | 111 | publisher={Elsevier} | |
} | 112 | 112 | } | |
113 | 113 | |||
@article{zhuo2007scalable, | 114 | 114 | @article{zhuo2007scalable, | |
title={Scalable and modular algorithms for floating-point matrix multiplication on reconfigurable computing systems}, | 115 | 115 | title={Scalable and modular algorithms for floating-point matrix multiplication on reconfigurable computing systems}, | |
author={Zhuo, Ling and Prasanna, Viktor K}, | 116 | 116 | author={Zhuo, Ling and Prasanna, Viktor K}, | |
journal={Parallel and Distributed Systems, IEEE Transactions on}, | 117 | 117 | journal={Parallel and Distributed Systems, IEEE Transactions on}, | |
volume={18}, | 118 | 118 | volume={18}, | |
number={4}, | 119 | 119 | number={4}, | |
pages={433--448}, | 120 | 120 | pages={433--448}, | |
year={2007}, | 121 | 121 | year={2007}, | |
publisher={IEEE} | 122 | 122 | publisher={IEEE} | |
} | 123 | 123 | } | |
124 | 124 | |||
@article{olariu1993computing, | 125 | 125 | @article{olariu1993computing, | |
title={Computing the Hough transform on reconfigurable meshes}, | 126 | 126 | title={Computing the Hough transform on reconfigurable meshes}, | |
author={Olariu, Stephan and Schwing, James L and Zhang, Jingyuan}, | 127 | 127 | author={Olariu, Stephan and Schwing, James L and Zhang, Jingyuan}, | |
journal={Image and vision computing}, | 128 | 128 | journal={Image and vision computing}, | |
volume={11}, | 129 | 129 | volume={11}, | |
number={10}, | 130 | 130 | number={10}, | |
pages={623--628}, | 131 | 131 | pages={623--628}, | |
year={1993}, | 132 | 132 | year={1993}, | |
publisher={Elsevier} | 133 | 133 | publisher={Elsevier} | |
} | 134 | 134 | } | |
135 | 135 | |||
@article{pan1999improved, | 136 | 136 | @article{pan1999improved, | |
title={An improved constant-time algorithm for computing the Radon and Hough transforms on a reconfigurable mesh}, | 137 | 137 | title={An improved constant-time algorithm for computing the Radon and Hough transforms on a reconfigurable mesh}, | |
author={Pan, Yi and Li, Keqin and Hamdi, Mounir}, | 138 | 138 | author={Pan, Yi and Li, Keqin and Hamdi, Mounir}, | |
journal={Systems, Man and Cybernetics, Part A: Systems and Humans, IEEE Transactions on}, | 139 | 139 | journal={Systems, Man and Cybernetics, Part A: Systems and Humans, IEEE Transactions on}, | |
volume={29}, | 140 | 140 | volume={29}, | |
number={4}, | 141 | 141 | number={4}, | |
pages={417--421}, | 142 | 142 | pages={417--421}, | |
year={1999}, | 143 | 143 | year={1999}, | |
publisher={IEEE} | 144 | 144 | publisher={IEEE} | |
} | 145 | 145 | } | |
146 | 146 | |||
@article{kasbah2008multigrid, | 147 | 147 | @article{kasbah2008multigrid, | |
title={Multigrid solvers in reconfigurable hardware}, | 148 | 148 | title={Multigrid solvers in reconfigurable hardware}, | |
author={Kasbah, Safaa J and Damaj, Issam W and Haraty, Ramzi A}, | 149 | 149 | author={Kasbah, Safaa J and Damaj, Issam W and Haraty, Ramzi A}, | |
journal={Journal of Computational and Applied Mathematics}, | 150 | 150 | journal={Journal of Computational and Applied Mathematics}, | |
volume={213}, | 151 | 151 | volume={213}, | |
number={1}, | 152 | 152 | number={1}, | |
pages={79--94}, | 153 | 153 | pages={79--94}, | |
year={2008}, | 154 | 154 | year={2008}, | |
publisher={Elsevier} | 155 | 155 | publisher={Elsevier} | |
} | 156 | 156 | } | |
157 | 157 | |||
@inproceedings{crookes1998environment, | 158 | 158 | @inproceedings{crookes1998environment, | |
title={An environment for generating {FPGA} architectures for image algebra-based algorithms}, | 159 | 159 | title={An environment for generating {FPGA} architectures for image algebra-based algorithms}, | |
author={Crookes, Danny and Alotaibi, Khalid and Bouridane, Ahmed and Donachy, Paul and Benkrid, Abdsamad}, | 160 | 160 | author={Crookes, Danny and Alotaibi, Khalid and Bouridane, Ahmed and Donachy, Paul and Benkrid, Abdsamad}, | |
booktitle={Image Processing, 1998. ICIP 98. Proceedings. 1998 International Conference on}, | 161 | 161 | booktitle={Image Processing, 1998. ICIP 98. Proceedings. 1998 International Conference on}, | |
pages={990--994}, | 162 | 162 | pages={990--994}, | |
year={1998}, | 163 | 163 | year={1998}, | |
organization={IEEE} | 164 | 164 | organization={IEEE} | |
} | 165 | 165 | } | |
166 | 166 | |||
@article{crookes2000design, | 167 | 167 | @article{crookes2000design, | |
title={Design and implementation of a high level programming environment for {FPGA}-based image processing}, | 168 | 168 | title={Design and implementation of a high level programming environment for {FPGA}-based image processing}, | |
author={Crookes, D and Benkrid, K and Bouridane, A and Alotaibi, K and Benkrid, A}, | 169 | 169 | author={Crookes, D and Benkrid, K and Bouridane, A and Alotaibi, K and Benkrid, A}, | |
journal={IEE Proceedings-Vision, Image and Signal Processing}, | 170 | 170 | journal={IEE Proceedings-Vision, Image and Signal Processing}, | |
volume={147}, | 171 | 171 | volume={147}, | |
number={4}, | 172 | 172 | number={4}, | |
pages={377--384}, | 173 | 173 | pages={377--384}, | |
year={2000}, | 174 | 174 | year={2000}, | |
publisher={IET} | 175 | 175 | publisher={IET} | |
} | 176 | 176 | } | |
177 | 177 | |||
@article{benkrid2002towards, | 178 | 178 | @article{benkrid2002towards, | |
title={Towards a general framework for {FPGA} based image processing using hardware skeletons}, | 179 | 179 | title={Towards a general framework for {FPGA} based image processing using hardware skeletons}, | |
author={Benkrid, Khaled and Crookes, Danny and Benkrid, Abdsamad}, | 180 | 180 | author={Benkrid, Khaled and Crookes, Danny and Benkrid, Abdsamad}, |
ifcs2018_proceeding.tex
% JMF : revoir l'abstract : on y avait mis le Zynq7010 de la redpitaya en montrant | 1 | 1 | % JMF : revoir l'abstract : on y avait mis le Zynq7010 de la redpitaya en montrant | |
% comment optimiser les perfs a surface finie. Ici aussi on tombait dans le cas ou` | 2 | 2 | % comment optimiser les perfs a surface finie. Ici aussi on tombait dans le cas ou` | |
% la solution a 1 seul FIR n'etait simplement pas synthetisable => fusionner les deux | 3 | 3 | % la solution a 1 seul FIR n'etait simplement pas synthetisable => fusionner les deux | |
% contributions pour le papier TUFFC | 4 | 4 | % contributions pour le papier TUFFC | |
5 | 5 | |||
\documentclass[a4paper,conference]{IEEEtran/IEEEtran} | 6 | 6 | \documentclass[a4paper,conference]{IEEEtran/IEEEtran} | |
\usepackage{graphicx,color,hyperref} | 7 | 7 | \usepackage{graphicx,color,hyperref} | |
\usepackage{amsfonts} | 8 | 8 | \usepackage{amsfonts} | |
\usepackage{amsthm} | 9 | 9 | \usepackage{amsthm} | |
\usepackage{amssymb} | 10 | 10 | \usepackage{amssymb} | |
\usepackage{amsmath} | 11 | 11 | \usepackage{amsmath} | |
\usepackage{algorithm2e} | 12 | 12 | \usepackage{algorithm2e} | |
\usepackage{url,balance} | 13 | 13 | \usepackage{url,balance} | |
\usepackage[normalem]{ulem} | 14 | 14 | \usepackage[normalem]{ulem} | |
% correct bad hyphenation here | 15 | 15 | % correct bad hyphenation here | |
\hyphenation{op-tical net-works semi-conduc-tor} | 16 | 16 | \hyphenation{op-tical net-works semi-conduc-tor} | |
\textheight=26cm | 17 | 17 | \textheight=26cm | |
\setlength{\footskip}{30pt} | 18 | 18 | \setlength{\footskip}{30pt} | |
\pagenumbering{gobble} | 19 | 19 | \pagenumbering{gobble} | |
\begin{document} | 20 | 20 | \begin{document} | |
\title{Filter optimization for real time digital processing of radiofrequency signals: application | 21 | 21 | \title{Filter optimization for real time digital processing of radiofrequency signals: application | |
to oscillator metrology} | 22 | 22 | to oscillator metrology} | |
23 | 23 | |||
\author{\IEEEauthorblockN{A. Hugeat\IEEEauthorrefmark{1}\IEEEauthorrefmark{2}, J. Bernard\IEEEauthorrefmark{2}, | 24 | 24 | \author{\IEEEauthorblockN{A. Hugeat\IEEEauthorrefmark{1}\IEEEauthorrefmark{2}, J. Bernard\IEEEauthorrefmark{2}, | |
G. Goavec-M\'erou\IEEEauthorrefmark{1}, | 25 | 25 | G. Goavec-M\'erou\IEEEauthorrefmark{1}, | |
P.-Y. Bourgeois\IEEEauthorrefmark{1}, J.-M. Friedt\IEEEauthorrefmark{1}} | 26 | 26 | P.-Y. Bourgeois\IEEEauthorrefmark{1}, J.-M. Friedt\IEEEauthorrefmark{1}} | |
\IEEEauthorblockA{\IEEEauthorrefmark{1}FEMTO-ST, Time \& Frequency department, Besan\c con, France } | 27 | 27 | \IEEEauthorblockA{\IEEEauthorrefmark{1}FEMTO-ST, Time \& Frequency department, Besan\c con, France } | |
\IEEEauthorblockA{\IEEEauthorrefmark{2}FEMTO-ST, Computer Science department DISC, Besan\c con, France \\ | 28 | 28 | \IEEEauthorblockA{\IEEEauthorrefmark{2}FEMTO-ST, Computer Science department DISC, Besan\c con, France \\ | |
Email: \{pyb2,jmfriedt\}@femto-st.fr} | 29 | 29 | Email: \{pyb2,jmfriedt\}@femto-st.fr} | |
} | 30 | 30 | } | |
\maketitle | 31 | 31 | \maketitle | |
\thispagestyle{plain} | 32 | 32 | \thispagestyle{plain} | |
\pagestyle{plain} | 33 | 33 | \pagestyle{plain} | |
\newtheorem{definition}{Definition} | 34 | 34 | \newtheorem{definition}{Definition} | |
35 | 35 | |||
\begin{abstract} | 36 | 36 | \begin{abstract} | |
Software Defined Radio (SDR) provides stability, flexibility and reconfigurability to | 37 | 37 | Software Defined Radio (SDR) provides stability, flexibility and reconfigurability to | |
radiofrequency signal processing. Applied to oscillator characterization in the context | 38 | 38 | radiofrequency signal processing. Applied to oscillator characterization in the context | |
of ultrastable clocks, stringent filtering requirements are defined by spurious signal or | 39 | 39 | of ultrastable clocks, stringent filtering requirements are defined by spurious signal or | |
noise rejection needs. Since real time radiofrequency processing must be performed in a | 40 | 40 | noise rejection needs. Since real time radiofrequency processing must be performed in a | |
Field Programmable Array to meet timing constraints, we investigate optimization strategies | 41 | 41 | Field Programmable Array to meet timing constraints, we investigate optimization strategies | |
to design filters meeting rejection characteristics while limiting the hardware resources | 42 | 42 | to design filters meeting rejection characteristics while limiting the hardware resources | |
required and keeping timing constraints within the targeted measurement bandwidths. | 43 | 43 | required and keeping timing constraints within the targeted measurement bandwidths. | |
\end{abstract} | 44 | 44 | \end{abstract} | |
45 | 45 | |||
\begin{IEEEkeywords} | 46 | 46 | \begin{IEEEkeywords} | |
Software Defined Radio, Mixed-Integer Linear Programming, Finite Impulse Response filter | 47 | 47 | Software Defined Radio, Mixed-Integer Linear Programming, Finite Impulse Response filter | |
\end{IEEEkeywords} | 48 | 48 | \end{IEEEkeywords} | |
49 | 49 | |||
\section{Digital signal processing of ultrastable clock signals} | 50 | 50 | \section{Digital signal processing of ultrastable clock signals} | |
51 | 51 | |||
Analog oscillator phase noise characteristics are classically performed by downconverting | 52 | 52 | Analog oscillator phase noise characteristics are classically performed by downconverting | |
the radiofrequency signal using a saturated mixer to bring the radiofrequency signal to baseband, | 53 | 53 | the radiofrequency signal using a saturated mixer to bring the radiofrequency signal to baseband, | |
followed by a Fourier analysis of the beat signal to analyze phase fluctuations close to carrier. In | 54 | 54 | followed by a Fourier analysis of the beat signal to analyze phase fluctuations close to carrier. In | |
a fully digital approach, the radiofrequency signal is digitized and numerically downconverted by | 55 | 55 | a fully digital approach, the radiofrequency signal is digitized and numerically downconverted by | |
multiplying the samples with a local numerically controlled oscillator (Fig. \ref{schema}) \cite{rsi}. | 56 | 56 | multiplying the samples with a local numerically controlled oscillator (Fig. \ref{schema}) \cite{rsi}. | |
57 | 57 | |||
\begin{figure}[h!tb] | 58 | 58 | \begin{figure}[h!tb] | |
\begin{center} | 59 | 59 | \begin{center} | |
\includegraphics[width=.8\linewidth]{images/schema} | 60 | 60 | \includegraphics[width=.8\linewidth]{images/schema} | |
\end{center} | 61 | 61 | \end{center} | |
\caption{Fully digital oscillator phase noise characterization: the Device Under Test | 62 | 62 | \caption{Fully digital oscillator phase noise characterization: the Device Under Test | |
(DUT) signal is sampled by the radiofrequency grade Analog to Digital Converter (ADC) and | 63 | 63 | (DUT) signal is sampled by the radiofrequency grade Analog to Digital Converter (ADC) and | |
downconverted by mixing with a Numerically Controlled Oscillator (NCO). Unwanted signals | 64 | 64 | downconverted by mixing with a Numerically Controlled Oscillator (NCO). Unwanted signals | |
and noise aliases are rejected by a Low Pass Filter (LPF) implemented as a cascade of Finite | 65 | 65 | and noise aliases are rejected by a Low Pass Filter (LPF) implemented as a cascade of Finite | |
Impulse Response (FIR) filters. The signal is then decimated before a Fourier analysis displays | 66 | 66 | Impulse Response (FIR) filters. The signal is then decimated before a Fourier analysis displays | |
the spectral characteristics of the phase fluctuations.} | 67 | 67 | the spectral characteristics of the phase fluctuations.} | |
\label{schema} | 68 | 68 | \label{schema} | |
\end{figure} | 69 | 69 | \end{figure} | |
70 | 70 | |||
As with the analog mixer, | 71 | 71 | As with the analog mixer, | |
the non-linear behavior of the downconverter introduces noise or spurious signal aliasing as | 72 | 72 | the non-linear behavior of the downconverter introduces noise or spurious signal aliasing as | |
well as the generation of the frequency sum signal in addition to the frequency difference. | 73 | 73 | well as the generation of the frequency sum signal in addition to the frequency difference. | |
These unwanted spectral characteristics must be rejected before decimating the data stream | 74 | 74 | These unwanted spectral characteristics must be rejected before decimating the data stream | |
for the phase noise spectral characterization \cite{andrich2018high}. The characteristics introduced between the | 75 | 75 | for the phase noise spectral characterization \cite{andrich2018high}. The characteristics introduced between the | |
downconverter | 76 | 76 | downconverter | |
and the decimation processing blocks are core characteristics of an oscillator characterization | 77 | 77 | and the decimation processing blocks are core characteristics of an oscillator characterization | |
system, and must reject out-of-band signals below the targeted phase noise -- typically in the | 78 | 78 | system, and must reject out-of-band signals below the targeted phase noise -- typically in the | |
sub -170~dBc/Hz for ultrastable oscillator we aim at characterizing. The filter blocks will | 79 | 79 | sub -170~dBc/Hz for ultrastable oscillator we aim at characterizing. The filter blocks will | |
use most resources of the Field Programmable Gate Array (FPGA) used to process the radiofrequency | 80 | 80 | use most resources of the Field Programmable Gate Array (FPGA) used to process the radiofrequency | |
datastream: optimizing the performance of the filter while reducing the needed resources is | 81 | 81 | datastream: optimizing the performance of the filter while reducing the needed resources is | |
hence tackled in a systematic approach using optimization techniques. Most significantly, we | 82 | 82 | hence tackled in a systematic approach using optimization techniques. Most significantly, we | |
tackle the issue by attempting to cascade multiple Finite Impulse Response (FIR) filters with | 83 | 83 | tackle the issue by attempting to cascade multiple Finite Impulse Response (FIR) filters with | |
tunable number of coefficients and tunable number of bits representing the coefficients and the | 84 | 84 | tunable number of coefficients and tunable number of bits representing the coefficients and the | |
data being processed. | 85 | 85 | data being processed. | |
86 | 86 | |||
\section{Finite impulse response filter} | 87 | 87 | \section{Finite impulse response filter} | |
88 | 88 | |||
We select FIR filter for their unconditional stability and ease of design. A FIR filter is defined | 89 | 89 | We select FIR filter for their unconditional stability and ease of design. A FIR filter is defined | |
by a set of weights $b_k$ applied to the inputs $x_k$ through a convolution to generate the | 90 | 90 | by a set of weights $b_k$ applied to the inputs $x_k$ through a convolution to generate the | |
outputs $y_k$ | 91 | 91 | outputs $y_k$ | |
$$y_n=\sum_{k=0}^N b_k x_{n-k}$$ | 92 | 92 | $$y_n=\sum_{k=0}^N b_k x_{n-k}$$ | |
93 | 93 | |||
As opposed to an implementation on a general purpose processor in which word size is defined by the | 94 | 94 | As opposed to an implementation on a general purpose processor in which word size is defined by the | |
processor architecture, implementing such a filter on an FPGA offer more degrees of freedom since | 95 | 95 | processor architecture, implementing such a filter on an FPGA offer more degrees of freedom since | |
not only the coefficient values and number of taps must be defined, but also the number of bits | 96 | 96 | not only the coefficient values and number of taps must be defined, but also the number of bits | |
defining the coefficients and the sample size. For this reason, and because we consider pipeline | 97 | 97 | defining the coefficients and the sample size. For this reason, and because we consider pipeline | |
processing (as opposed to First-In, First-Out FIFO memory batch processing) of radiofrequency | 98 | 98 | processing (as opposed to First-In, First-Out FIFO memory batch processing) of radiofrequency | |
signals, High Level Synthesis (HLS) languages \cite{kasbah2008multigrid} are not considered but | 99 | 99 | signals, High Level Synthesis (HLS) languages \cite{kasbah2008multigrid} are not considered but | |
the problem is tackled at the Very-high-speed-integrated-circuit Hardware Description Language (VHDL) level. | 100 | 100 | the problem is tackled at the Very-high-speed-integrated-circuit Hardware Description Language (VHDL) level. | |
Since latency is not an issue in a openloop phase noise characterization instrument, the large | 101 | 101 | Since latency is not an issue in a openloop phase noise characterization instrument, the large | |
numbre of taps in the FIR, as opposed to the shorter Infinite Impulse Response (IIR) filter, | 102 | 102 | numbre of taps in the FIR, as opposed to the shorter Infinite Impulse Response (IIR) filter, | |
is not considered as an issue as would be in a closed loop system. | 103 | 103 | is not considered as an issue as would be in a closed loop system. | |
104 | 104 | |||
The coefficients are classically expressed as floating point values. However, this binary | 105 | 105 | The coefficients are classically expressed as floating point values. However, this binary | |
number representation is not efficient for fast arithmetic computation by an FPGA. Instead, | 106 | 106 | number representation is not efficient for fast arithmetic computation by an FPGA. Instead, | |
we select to quantify these floating point values into integer values. This quantization | 107 | 107 | we select to quantify these floating point values into integer values. This quantization | |
will result in some precision loss. | 108 | 108 | will result in some precision loss. | |
109 | 109 | |||
%As illustrated in Fig. \ref{float_vs_int}, we see that we aren't | 110 | 110 | %As illustrated in Fig. \ref{float_vs_int}, we see that we aren't | |
%need too coefficients or too sample size. If we have lot of coefficients but a small sample size, | 111 | 111 | %need too coefficients or too sample size. If we have lot of coefficients but a small sample size, | |
%the first and last are equal to zero. But if we have too sample size for few coefficients that not improve the quality. | 112 | 112 | %the first and last are equal to zero. But if we have too sample size for few coefficients that not improve the quality. | |
113 | 113 | |||
% JMF je ne comprends pas la derniere phrase ci-dessus ni la figure ci dessous | 114 | 114 | % JMF je ne comprends pas la derniere phrase ci-dessus ni la figure ci dessous | |
% AH en gros je voulais dire que prendre trop peu de bit avec trop de coeff, ça induit ta figure (bien mieux faite que moi) | 115 | 115 | % AH en gros je voulais dire que prendre trop peu de bit avec trop de coeff, ça induit ta figure (bien mieux faite que moi) | |
% et que l'inverse trop de bit sur pas assez de coeff on ne gagne rien, je vais essayer de la reformuler | 116 | 116 | % et que l'inverse trop de bit sur pas assez de coeff on ne gagne rien, je vais essayer de la reformuler | |
117 | 117 | |||
%\begin{figure}[h!tb] | 118 | 118 | %\begin{figure}[h!tb] | |
%\includegraphics[width=\linewidth]{images/float-vs-integer.pdf} | 119 | 119 | %\includegraphics[width=\linewidth]{images/float-vs-integer.pdf} | |
%\caption{Impact of the quantization resolution of the coefficients} | 120 | 120 | %\caption{Impact of the quantization resolution of the coefficients} | |
%\label{float_vs_int} | 121 | 121 | %\label{float_vs_int} | |
%\end{figure} | 122 | 122 | %\end{figure} | |
123 | 123 | |||
\begin{figure}[h!tb] | 124 | 124 | \begin{figure}[h!tb] | |
\includegraphics[width=\linewidth]{images/demo_filtre} | 125 | 125 | \includegraphics[width=\linewidth]{images/demo_filtre} | |
\caption{Impact of the quantization resolution of the coefficients: the quantization is | 126 | 126 | \caption{Impact of the quantization resolution of the coefficients: the quantization is | |
set to 6~bits -- with the horizontal black lines indicating $\pm$1 least significant bit -- setting | 127 | 127 | set to 6~bits -- with the horizontal black lines indicating $\pm$1 least significant bit -- setting | |
the 30~first and 30~last coefficients out of the initial 128~band-pass | 128 | 128 | the 30~first and 30~last coefficients out of the initial 128~band-pass | |
filter coefficients to 0 (red dots).} | 129 | 129 | filter coefficients to 0 (red dots).} | |
\label{float_vs_int} | 130 | 130 | \label{float_vs_int} | |
\end{figure} | 131 | 131 | \end{figure} | |
132 | 132 | |||
The tradeoff between quantization resolution and number of coefficients when considering | 133 | 133 | The tradeoff between quantization resolution and number of coefficients when considering | |
integer operations is not trivial. As an illustration of the issue related to the | 134 | 134 | integer operations is not trivial. As an illustration of the issue related to the | |
relation between number of fiter taps and quantization, Fig. \ref{float_vs_int} exhibits | 135 | 135 | relation between number of fiter taps and quantization, Fig. \ref{float_vs_int} exhibits | |
a 128-coefficient FIR bandpass filter designed using floating point numbers (blue). Upon | 136 | 136 | a 128-coefficient FIR bandpass filter designed using floating point numbers (blue). Upon | |
quantization on 6~bit integers, 60 of the 128~coefficients in the beginning and end of the | 137 | 137 | quantization on 6~bit integers, 60 of the 128~coefficients in the beginning and end of the | |
taps become null, making the large number of coefficients irrelevant and allowing to save | 138 | 138 | taps become null, making the large number of coefficients irrelevant and allowing to save | |
processing resource by shrinking the filter length. This tradeoff aimed at minimizing resources | 139 | 139 | processing resource by shrinking the filter length. This tradeoff aimed at minimizing resources | |
to reach a given rejection level, or maximizing out of band rejection for a given computational | 140 | 140 | to reach a given rejection level, or maximizing out of band rejection for a given computational | |
resource, will drive the investigation on cascading filters designed with varying tap resolution | 141 | 141 | resource, will drive the investigation on cascading filters designed with varying tap resolution | |
and tap length, as will be shown in the next section. Indeed, our development strategy closely | 142 | 142 | and tap length, as will be shown in the next section. Indeed, our development strategy closely | |
follows the skeleton approach \cite{crookes1998environment, crookes2000design, benkrid2002towards} | 143 | 143 | follows the skeleton approach \cite{crookes1998environment, crookes2000design, benkrid2002towards} | |
in which basic blocks are defined and characterized before being assembled \cite{hide} | 144 | 144 | in which basic blocks are defined and characterized before being assembled \cite{hide} | |
in a complete processing chain. In our case, assembling the filter blocks is a simpler block | 145 | 145 | in a complete processing chain. In our case, assembling the filter blocks is a simpler block | |
combination process since we assume a single value to be processed and a single value to be | 146 | 146 | combination process since we assume a single value to be processed and a single value to be | |
generated at each clock cycle. The FIR filters will not be considered to decimate in the | 147 | 147 | generated at each clock cycle. The FIR filters will not be considered to decimate in the | |
current implementation: the decimation is assumed to be located after the FIR cascade at the | 148 | 148 | current implementation: the decimation is assumed to be located after the FIR cascade at the | |
moment. | 149 | 149 | moment. | |
150 | 150 | |||
\section{Filter optimization} | 151 | 151 | \section{Filter optimization} | |
152 | 152 | |||
A basic approach for implementing the FIR filter is to compute the transfer function of | 153 | 153 | A basic approach for implementing the FIR filter is to compute the transfer function of | |
a monolithic filter: this single filter defines all coefficients with the same resolution | 154 | 154 | a monolithic filter: this single filter defines all coefficients with the same resolution | |
(number of bits) and processes data represented with their own resolution. Meeting the | 155 | 155 | (number of bits) and processes data represented with their own resolution. Meeting the | |
filter shape requires a large number of coefficients, limited by resources of the FPGA since | 156 | 156 | filter shape requires a large number of coefficients, limited by resources of the FPGA since | |
this filter must process data stream at the radiofrequency sampling rate after the mixer. | 157 | 157 | this filter must process data stream at the radiofrequency sampling rate after the mixer. | |
158 | 158 | |||
An optimization problem \cite{leung2004handbook} aims at improving one or many | 159 | 159 | An optimization problem \cite{leung2004handbook} aims at improving one or many | |
performance criteria within a constrained resource environment. Amongst the tools | 160 | 160 | performance criteria within a constrained resource environment. Amongst the tools | |
developed to meet this aim, Mixed-Integer Linear Programming (MILP) provides the framework to | 161 | 161 | developed to meet this aim, Mixed-Integer Linear Programming (MILP) provides the framework to | |
formally define the stated problem and search for an optimal use of available | 162 | 162 | formally define the stated problem and search for an optimal use of available | |
resources \cite{yu2007design, kodek1980design}. | 163 | 163 | resources \cite{yu2007design, kodek1980design}. | |
164 | 164 | |||
First we need to ensure that our problem is a real optimization problem. When | 165 | 165 | First we need to ensure that our problem is a real optimization problem. When | |
designing a processing function in the FPGA, we aim at meeting some requirement such as | 166 | 166 | designing a processing function in the FPGA, we aim at meeting some requirement such as | |
the throughput, the computation time or the noise rejection noise. However, due to limited | 167 | 167 | the throughput, the computation time or the noise rejection noise. However, due to limited | |
resources to design the process like BRAM (high performance RAM), DSP (Digital Signal Processor) | 168 | 168 | resources to design the process like BRAM (high performance RAM), DSP (Digital Signal Processor) | |
or LUT (Look Up Table), a tradeoff must be generally searched between performance and available | 169 | 169 | or LUT (Look Up Table), a tradeoff must be generally searched between performance and available | |
computational resources: optimizing some criteria within finite, limited | 170 | 170 | computational resources: optimizing some criteria within finite, limited | |
resources indeed matches the definition of a classical optimization problem. | 171 | 171 | resources indeed matches the definition of a classical optimization problem. | |
172 | 172 | |||
Specifically the degrees of freedom when addressing the problem of replacing the single monolithic | 173 | 173 | Specifically the degrees of freedom when addressing the problem of replacing the single monolithic | |
FIR with a cascade of optimized filters are the number of coefficients $N_i$ of each filter $i$, | 174 | 174 | FIR with a cascade of optimized filters are the number of coefficients $N_i$ of each filter $i$, | |
the number of bits $C_i$ representing the coefficients and the number of bits $D_i$ representing | 175 | 175 | the number of bits $C_i$ representing the coefficients and the number of bits $D_i$ needed to represent | |
-the data fed to the filter. Because each FIR in the chain is fed the output of the previous stage, | 176 | 176 | the data $x_k$ fed to each filter as provided by the acquisition or previous processing stage. | |
177 | Because each FIR in the chain is fed the output of the previous stage, | |||
the optimization of the complete processing chain within a constrained resource environment is not | 177 | 178 | the optimization of the complete processing chain within a constrained resource environment is not | |
trivial. The resource occupation of a FIR filter is considered as $C_i \times N_i$ which aims | 178 | 179 | trivial. The resource occupation of a FIR filter is considered as $C_i \times N_i$ which aims | |
at approximating the number of bits needed in a worst case condition to represent the output of the | 179 | 180 | at approximating the number of bits needed in a worst case condition to represent the output of the | |
FIR. Indeed, the number of bits generated by the FIR is $(C_i+D_i)\times\log_2(N_i)$ with $D_i$ | 180 | 181 | FIR. Indeed, the number of bits generated by the $i$th FIR is $(C_i+D_i)\times\log_2(N_i)$, but the | |
the number of bits needed to represent the data $x_k$ generated by the previous stage, but the | 181 | |||
$\log$ function is avoided for its incompatibility with a linear programming description, and | 182 | 182 | $\log$ function is avoided for its incompatibility with a linear programming description, and | |
the simple product is approximated as the number of gates needed to perform the calculation. Such an | 183 | 183 | the simple product is approximated as the number of gates needed to perform the calculation. Such an | |
occupied area estimate assumes that the number of gates scales as the number of bits and the number | 184 | 184 | occupied area estimate assumes that the number of gates scales as the number of bits and the number | |
of coefficients, but does not account for the detailed implementation of the hardware. Indeed, | 185 | 185 | of coefficients, but does not account for the detailed implementation of the hardware. Indeed, | |
various FPGA implementations will provide different hardware functionalities, and we shall consider | 186 | 186 | various FPGA implementations will provide different hardware functionalities, and we shall consider | |
at the end of the design a synthesis step using vendor software to assess the validity of the solution | 187 | 187 | at the end of the design a synthesis step using vendor software to assess the validity of the solution | |
found. As an example of the limitation linked to the lack of detailed hardware consideration, Block Random | 188 | 188 | found. As an example of the limitation linked to the lack of detailed hardware consideration, Block Random | |
Access Memory (BRAM) used to store filter coefficients are not shared amongst filters, and multiplications | 189 | 189 | Access Memory (BRAM) used to store filter coefficients are not shared amongst filters, and multiplications | |
are most efficiently implemented by using DSP blocks whose input word | 190 | 190 | are most efficiently implemented by using DSP blocks whose input word | |
size is finite. DSPs are a scarce resource to be saved in a practical implementation. Keeping a high | 191 | 191 | size is finite. DSPs are a scarce resource to be saved in a practical implementation. Keeping a high | |
abstraction on the resource occupation is nevertheless selected in the following discussion in order | 192 | 192 | abstraction on the resource occupation is nevertheless selected in the following discussion in order | |
to leave enough degrees of freedom in the problem to try and find original solutions: too many | 193 | 193 | to leave enough degrees of freedom in the problem to try and find original solutions: too many | |
constraints in the initial statement of the problem leave little room for finding an optimal solution. | 194 | 194 | constraints in the initial statement of the problem leave little room for finding an optimal solution. | |
195 | 195 | |||
\begin{figure}[h!tb] | 196 | 196 | \begin{figure}[h!tb] | |
\begin{center} | 197 | 197 | \begin{center} | |
\includegraphics[width=.5\linewidth]{schema2} | 198 | 198 | \includegraphics[width=.5\linewidth]{schema2} | |
\caption{Shape of the filter transmitted power $P$ as a function of frequency: | 199 | 199 | \caption{Shape of the filter transmitted power $P$ as a function of frequency: | |
the bandpass BP is considered to occupy the initial | 200 | 200 | the bandpass BP is considered to occupy the initial | |
40\% of the Nyquist frequency range, the stopband the last 40\%, allowing 20\% transition | 201 | 201 | 40\% of the Nyquist frequency range, the stopband the last 40\%, allowing 20\% transition | |
width.} | 202 | 202 | width.} | |
\label{rejection-shape} | 203 | 203 | \label{rejection-shape} | |
\end{center} | 204 | 204 | \end{center} | |
\end{figure} | 205 | 205 | \end{figure} | |
206 | 206 | |||
Following these considerations, the model is expressed as: | 207 | 207 | Following these considerations, the model is expressed as: | |
\begin{align} | 208 | 208 | \begin{align} | |
\begin{cases} | 209 | 209 | \begin{cases} | |
\mathcal{R}_i &= \mathcal{F}(N_i, C_i)\\ | 210 | 210 | \mathcal{R}_i &= \mathcal{F}(N_i, C_i)\\ | |
\mathcal{A}_i &= N_i \times C_i\\ | 211 | 211 | \mathcal{A}_i &= N_i \times C_i\\ | |
\Delta_i &= \Delta _{i-1} + \mathcal{P}_i | 212 | 212 | \Delta_i &= \Delta _{i-1} + \mathcal{P}_i | |
\end{cases} | 213 | 213 | \end{cases} | |
\label{model-FIR} | 214 | 214 | \label{model-FIR} | |
\end{align} | 215 | 215 | \end{align} | |
To explain the system \ref{model-FIR}, $\mathcal{R}_i$ represents the stopband rejection dependence with $N_i$ and $C_i$, $\mathcal{A}_i$ | 216 | 216 | To explain the system \ref{model-FIR}, $\mathcal{R}_i$ represents the stopband rejection dependence with $N_i$ and $C_i$, $\mathcal{A}_i$ | |
is a theoretical area occupation of the processing block on the FPGA as discussed earlier, and $\Delta_i$ is the total rejection for the current stage $i$. | 217 | 217 | is a theoretical area occupation of the processing block on the FPGA as discussed earlier, and $\Delta_i$ is the total rejection for the current stage $i$. | |
Since the function $\mathcal{F}$ cannot be explictly expressed, we run simulations to determine the rejection depending | 218 | 218 | Since the function $\mathcal{F}$ cannot be explictly expressed, we run simulations to determine the rejection depending | |
on $N_i$ and $C_i$. However, selecting the right filter requires a clear definition of the rejection criterion. Selecting an | 219 | 219 | on $N_i$ and $C_i$. However, selecting the right filter requires a clear definition of the rejection criterion. Selecting an | |
incorrect criterion will lead the linear program solver to produce a solution which might not meet the user requirements. | 220 | 220 | incorrect criterion will lead the linear program solver to produce a solution which might not meet the user requirements. | |
Hence, amongst various criteria including the mean or median value of the FIR response in the stopband as will | 221 | 221 | Hence, amongst various criteria including the mean or median value of the FIR response in the stopband as will | |
be illustrated lated (section \ref{median}), we have designed | 222 | 222 | be illustrated lated (section \ref{median}), we have designed | |
a criterion aimed at avoiding ripples in the passband and considering the maximum of the FIR spectral response in the stopband | 223 | 223 | a criterion aimed at avoiding ripples in the passband and considering the maximum of the FIR spectral response in the stopband | |
(Fig. \ref{rejection-shape}). The bandpass criterion is defined as the sum of the absolute values of the spectral response | 224 | 224 | (Fig. \ref{rejection-shape}). The bandpass criterion is defined as the sum of the absolute values of the spectral response | |
in the bandpass, reminiscent of a standard deviation of the spectral response: this criterion must be minimized to avoid | 225 | 225 | in the bandpass, reminiscent of a standard deviation of the spectral response: this criterion must be minimized to avoid | |
ripples in the passband. The stopband transfer function maximum must also be minimized in order to improve the filter | 226 | 226 | ripples in the passband. The stopband transfer function maximum must also be minimized in order to improve the filter | |
rejection capability. Weighing these two criteria allows designing the linear program to be solved. | 227 | 227 | rejection capability. Weighing these two criteria allows designing the linear program to be solved. | |
228 | 228 | |||
\begin{figure}[h!tb] | 229 | 229 | \begin{figure}[h!tb] | |
\includegraphics[width=\linewidth]{images/noise-rejection.pdf} | 230 | 230 | \includegraphics[width=\linewidth]{images/noise-rejection.pdf} | |
\caption{Rejection as a function of number of coefficients and number of bits} | 231 | 231 | \caption{Rejection as a function of number of coefficients and number of bits} | |
\label{noise-rejection} | 232 | 232 | \label{noise-rejection} | |
\end{figure} | 233 | 233 | \end{figure} | |
234 | 234 | |||
The objective function maximizes the noise rejection ($\max(\Delta_{i_{\max}})$) while keeping resource | 235 | 235 | The objective function maximizes the noise rejection ($\max(\Delta_{i_{\max}})$) while keeping resource | |
occupation below a user-defined threshold, or as will be discussed here, aims at minimizing the area | 236 | 236 | occupation below a user-defined threshold, or as will be discussed here, aims at minimizing the area | |
needed to reach a given rejection ($\min(S_q)$ in the forthcoming discussion, Eqs. \ref{cstr_size} | 237 | 237 | needed to reach a given rejection ($\min(S_q)$ in the forthcoming discussion, Eqs. \ref{cstr_size} | |
and \ref{cstr_rejection}). The MILP solver is allowed to choose the number of successive | 238 | 238 | and \ref{cstr_rejection}). The MILP solver is allowed to choose the number of successive | |
filters, within an upper bound. The last problem is to model the noise rejection. Since filter | 239 | 239 | filters, within an upper bound. The last problem is to model the noise rejection. Since filter | |
noise rejection capability is not modeled with linear equations, a look-up-table is generated | 240 | 240 | noise rejection capability is not modeled with linear equations, a look-up-table is generated | |
for multiple filter configurations in which the $C_i$, $D_i$ and $N_i$ parameters are varied: for each | 241 | 241 | for multiple filter configurations in which the $C_i$, $D_i$ and $N_i$ parameters are varied: for each | |
one of these conditions, the low-pass filter rejection is stored as computed by the frequency response | 242 | 242 | one of these conditions, the low-pass filter rejection is stored as computed by the frequency response | |
of the digital filter (Fig. \ref{noise-rejection}). Various rejection criteria have been investigated, | 243 | 243 | of the digital filter (Fig. \ref{noise-rejection}). Various rejection criteria have been investigated, | |
including mean value of the stopband response, median value of the stopband response, or as finally | 244 | 244 | including mean value of the stopband response, median value of the stopband response, or as finally | |
selected, maximum value in the stopband. An intuitive analysis of the chart of Fig. \ref{noise-rejection} | 245 | 245 | selected, maximum value in the stopband. An intuitive analysis of the chart of Fig. \ref{noise-rejection} | |
hints at an optimum | 246 | 246 | hints at an optimum | |
set of tap length and number of bit for representing the coefficients along the line of the pyramidal | 247 | 247 | set of tap length and number of bit for representing the coefficients along the line of the pyramidal | |
shaped rejection capability function. | 248 | 248 | shaped rejection capability function. | |
249 | 249 | |||
Linear program formalism for solving the problem is well documented: an objective function is | 250 | 250 | Linear program formalism for solving the problem is well documented: an objective function is | |
defined which is linearly dependent on the parameters to be optimized. Constraints are expressed | 251 | 251 | defined which is linearly dependent on the parameters to be optimized. Constraints are expressed | |
as linear equations and solved using one of the available solvers, in our case GLPK\cite{glpk}. | 252 | 252 | as linear equations and solved using one of the available solvers, in our case GLPK\cite{glpk}. | |
With the notations used in the description of system \ref{model-FIR}, we have defined the linear problem as: | 253 | 253 | With the notations used in the description of system \ref{model-FIR}, we have defined the linear problem as: | |
\paragraph{Variables} | 254 | 254 | \paragraph{Variables} | |
\begin{align*} | 255 | 255 | \begin{align*} | |
x_{i,j} \in \lbrace 0,1 \rbrace & \text{ $i$ is a given filter} \\ | 256 | 256 | x_{i,j} \in \lbrace 0,1 \rbrace & \text{ $i$ is a given filter} \\ | |
& \text{ $j$ is the stage} \\ | 257 | 257 | & \text{ $j$ is the stage} \\ | |
& \text{ If $x_{i,j}$ is equal to 1, the filter is selected} \\ | 258 | 258 | & \text{ If $x_{i,j}$ is equal to 1, the filter is selected} \\ | |
\end{align*} | 259 | 259 | \end{align*} | |
\paragraph{Constants} | 260 | 260 | \paragraph{Constants} | |
\begin{align*} | 261 | 261 | \begin{align*} | |
\mathcal{F} = \lbrace F_1 ... F_p \rbrace & \text{ All possible filters}\\ | 262 | 262 | \mathcal{F} = \lbrace F_1 ... F_p \rbrace & \text{ All possible filters}\\ | |
& \text{ $p$ is the number of different filters} \\ | 263 | 263 | & \text{ $p$ is the number of different filters} \\ | |
% N(i) & \text{ % Constant to let the | 264 | 264 | % N(i) & \text{ % Constant to let the | |
% number of coefficients %} \\ & \text{ | 265 | 265 | % number of coefficients %} \\ & \text{ | |
% for filter $i$}\\ | 266 | 266 | % for filter $i$}\\ | |
% C(i) & \text{ % Constant to let the | 267 | 267 | % C(i) & \text{ % Constant to let the | |
% number of bits of %}\\ & \text{ | 268 | 268 | % number of bits of %}\\ & \text{ | |
% each coefficient for filter $i$}\\ | 269 | 269 | % each coefficient for filter $i$}\\ | |
\mathcal{S}_{\max} & \text{ Total space available inside the FPGA} | 270 | 270 | \mathcal{S}_{\max} & \text{ Total space available inside the FPGA} | |
\end{align*} | 271 | 271 | \end{align*} | |
\paragraph{Constraints} | 272 | 272 | \paragraph{Constraints} | |
\begin{align} | 273 | 273 | \begin{align} | |
1 \leq i \leq p & \nonumber\\ | 274 | 274 | 1 \leq i \leq p & \nonumber\\ | |
1 \leq j \leq q & \text{ $q$ is the max of filter stage} \nonumber \\ | 275 | 275 | 1 \leq j \leq q & \text{ $q$ is the max of filter stage} \nonumber \\ | |
\forall j, \mathlarger{\sum_{i}} x_{i,j} = 1 & \text{ At most one filter by stage} \nonumber\\ | 276 | 276 | \forall j, \mathlarger{\sum_{i}} x_{i,j} = 1 & \text{ At most one filter by stage} \nonumber\\ | |
\mathcal{S}_0 = 0 & \text{ initial occupation} \nonumber\\ | 277 | 277 | \mathcal{S}_0 = 0 & \text{ initial occupation} \nonumber\\ | |
\forall j, \mathcal{S}_j = \mathcal{S}_{j-1} + \mathlarger{\sum_i (x_{i,j} \times \mathcal{A}_i)} \label{cstr_size} \\ | 278 | 278 | \forall j, \mathcal{S}_j = \mathcal{S}_{j-1} + \mathlarger{\sum_i (x_{i,j} \times \mathcal{A}_i)} \label{cstr_size} \\ | |
\mathcal{S}_j \leq \mathcal{S}_{\max}\nonumber \\ | 279 | 279 | \mathcal{S}_j \leq \mathcal{S}_{\max}\nonumber \\ | |
\mathcal{N}_0 = 0 & \text{ initial rejection}\nonumber\\ | 280 | 280 | \mathcal{N}_0 = 0 & \text{ initial rejection}\nonumber\\ | |
\forall j, \mathcal{N}_j = \mathcal{N}_{j-1} + \mathlarger{\sum_i (x_{i,j} \times \mathcal{R}_i)} \label{cstr_rejection} \\ | 281 | 281 | \forall j, \mathcal{N}_j = \mathcal{N}_{j-1} + \mathlarger{\sum_i (x_{i,j} \times \mathcal{R}_i)} \label{cstr_rejection} \\ | |
\mathcal{N}_q \geqslant 160 & \text{ an user defined bound}\nonumber\\ | 282 | 282 | \mathcal{N}_q \geqslant 160 & \text{ an user defined bound}\nonumber\\ | |
& \text{ (e.g. 160~dB here)}\nonumber\\\nonumber | 283 | 283 | & \text{ (e.g. 160~dB here)}\nonumber\\\nonumber | |
\end{align} | 284 | 284 | \end{align} | |
\paragraph{Goal} | 285 | 285 | \paragraph{Goal} | |
\begin{align*} | 286 | 286 | \begin{align*} | |
\min \mathcal{S}_q | 287 | 287 | \min \mathcal{S}_q | |
\end{align*} | 288 | 288 | \end{align*} | |
289 | 289 | |||
The constraint \ref{cstr_size} means the occupation for the current stage $j$ depends on | 290 | 290 | The constraint \ref{cstr_size} means the occupation for the current stage $j$ depends on | |
the previous occupation and the occupation of current selected filter (it is possible | 291 | 291 | the previous occupation and the occupation of current selected filter (it is possible | |
that no filter is selected for this stage). And the second one \ref{cstr_rejection} | 292 | 292 | that no filter is selected for this stage). And the second one \ref{cstr_rejection} | |
means the same thing but for the rejection, the rejection depends the previous rejection | 293 | 293 | means the same thing but for the rejection, the rejection depends the previous rejection | |
plus the rejection of selected filter. | 294 | 294 | plus the rejection of selected filter. | |
295 | 295 | |||
\subsection{Low bandpass ripple and maximum rejection criteria} | 296 | 296 | \subsection{Low bandpass ripple and maximum rejection criteria} | |
297 | 297 | |||
The MILP solver provides a solution to the problem by selecting a series of small FIR with | 298 | 298 | The MILP solver provides a solution to the problem by selecting a series of small FIR with | |
increasing number of bits representing data and coefficients as well as an increasing number | 299 | 299 | increasing number of bits representing data and coefficients as well as an increasing number | |
of coefficients, instead of a single monolithic filter. | 300 | 300 | of coefficients, instead of a single monolithic filter. | |
301 | 301 | |||
\begin{figure}[h!tb] | 302 | 302 | \begin{figure}[h!tb] | |
% \includegraphics[width=\linewidth]{images/compare-fir.pdf} | 303 | 303 | % \includegraphics[width=\linewidth]{images/compare-fir.pdf} | |
\includegraphics[width=\linewidth]{images/fir-mono-vs-fir-series-noise-fixe-jmf-light.pdf} | 304 | 304 | \includegraphics[width=\linewidth]{images/fir-mono-vs-fir-series-noise-fixe-jmf-light.pdf} | |
\caption{Comparison of the rejection capability between a series of FIR and a monolithic FIR | 305 | 305 | \caption{Comparison of the rejection capability between a series of FIR and a monolithic FIR | |
with a cutoff frequency set at half the Nyquist frequency.} | 306 | 306 | with a cutoff frequency set at half the Nyquist frequency.} | |
\label{compare-fir} | 307 | 307 | \label{compare-fir} | |
\end{figure} | 308 | 308 | \end{figure} | |
309 | 309 | |||
Fig. \ref{compare-fir} exhibits the | 310 | 310 | Fig. \ref{compare-fir} exhibits the | |
performance comparison between one solution and a monolithic FIR when selecting a cutoff | 311 | 311 | performance comparison between one solution and a monolithic FIR when selecting a cutoff | |
frequency of half the Nyquist frequency: a series of 5 FIR and a series of 10 FIR with the | 312 | 312 | frequency of half the Nyquist frequency: a series of 5 FIR and a series of 10 FIR with the | |
same space usage are provided as selected by the MILP solver. The FIR cascade provides improved | 313 | 313 | same space usage are provided as selected by the MILP solver. The FIR cascade provides improved | |
rejection than the monolithic FIR at the expense of a lower cutoff frequency which remains to | 314 | 314 | rejection than the monolithic FIR at the expense of a lower cutoff frequency which remains to | |
be tuned or compensated for. | 315 | 315 | be tuned or compensated for. | |
316 | 316 | |||
317 | 317 | |||
The resource occupation when synthesizing such FIR on a Xilinx FPGA is summarized as Tab. \ref{t1}. | 318 | 318 | The resource occupation when synthesizing such FIR on a Xilinx FPGA is summarized as Tab. \ref{t1}. | |
We have considered a set of resources representative of the hardware platform we work on, | 319 | 319 | We have considered a set of resources representative of the hardware platform we work on, | |
Avnet's Zedboard featuring a Xilinx XC7Z020-CLG484-1 Zynq System on Chip (SoC). The results reported in | 320 | 320 | Avnet's Zedboard featuring a Xilinx XC7Z020-CLG484-1 Zynq System on Chip (SoC). The results reported in | |
Tab. \ref{t1} emphasize that implementing the monolithic single FIR is impossible due to | 321 | 321 | Tab. \ref{t1} emphasize that implementing the monolithic single FIR is impossible due to | |
the insufficient hardware resources (exhausted LUT resources), while the FIR cascading 5 or 10 | 322 | 322 | the insufficient hardware resources (exhausted LUT resources), while the FIR cascading 5 or 10 | |
filters fit in the available resources. However, in all cases the DSP resources are fully | 323 | 323 | filters fit in the available resources. However, in all cases the DSP resources are fully | |
used: while the design can be synthesized using Xilinx proprietary Vivado 2016.2 software, | 324 | 324 | used: while the design can be synthesized using Xilinx proprietary Vivado 2016.2 software, | |
implementing the design fails due to the excessive resource usage preventing routing the signals | 325 | 325 | implementing the design fails due to the excessive resource usage preventing routing the signals | |
on the FPGA. Such results emphasize on the one hand the improvement prospect of the optimization | 326 | 326 | on the FPGA. Such results emphasize on the one hand the improvement prospect of the optimization | |
procedure by finding non-trivial solutions matching resource constraints, but on the other | 327 | 327 | procedure by finding non-trivial solutions matching resource constraints, but on the other | |
hand also illustrates the limitation of a model with an abstraction layer that does not account | 328 | 328 | hand also illustrates the limitation of a model with an abstraction layer that does not account | |
for the detailed architecture of the hardware. | 329 | 329 | for the detailed architecture of the hardware. | |
330 | 330 | |||
\begin{table}[h!tb] | 331 | 331 | \begin{table}[h!tb] | |
\caption{Resource occupation on a Xilinx Zynq-7000 series FPGA when synthesizing the FIR cascade | 332 | 332 | \caption{Resource occupation on a Xilinx Zynq-7000 series FPGA when synthesizing the FIR cascade | |
identified as optimal by the MILP solver within a finite resource criterion. The last line refers | 333 | 333 | identified as optimal by the MILP solver within a finite resource criterion. The last line refers | |
to available resources on a Zynq-7020 as found on the Zedboard.} | 334 | 334 | to available resources on a Zynq-7020 as found on the Zedboard.} | |
\begin{center} | 335 | 335 | \begin{center} | |
\begin{tabular}{|c|cccc|}\hline | 336 | 336 | \begin{tabular}{|c|cccc|}\hline | |
FIR & BlockRAM & LookUpTables & DSP & rejection (dB)\\\hline\hline | 337 | 337 | FIR & BlockRAM & LookUpTables & DSP & rejection (dB)\\\hline\hline | |
1 (monolithic) & 1 & 76183 & 220 & -162 \\ | 338 | 338 | 1 (monolithic) & 1 & 76183 & 220 & -162 \\ | |
5 & 5 & 18597 & 220 & -160 \\ | 339 | 339 | 5 & 5 & 18597 & 220 & -160 \\ | |
10 & 8 & 24729 & 220 & -161 \\\hline\hline | 340 | 340 | 10 & 8 & 24729 & 220 & -161 \\\hline\hline | |
\textbf{Zynq 7020} & \textbf{420} & \textbf{53200} & \textbf{220} & \\\hline | 341 | 341 | \textbf{Zynq 7020} & \textbf{420} & \textbf{53200} & \textbf{220} & \\\hline | |
%\begin{tabular}{|c|ccccc|}\hline | 342 | 342 | %\begin{tabular}{|c|ccccc|}\hline | |
%FIR & BRAM36 & BRAM18 & LUT & DSP & rejection (dB)\\\hline\hline | 343 | 343 | %FIR & BRAM36 & BRAM18 & LUT & DSP & rejection (dB)\\\hline\hline | |
%1 (monolithic) & 1 & 0 & {\color{Red}76183} & 220 & -162 \\ | 344 | 344 | %1 (monolithic) & 1 & 0 & {\color{Red}76183} & 220 & -162 \\ | |
%5 & 0 & 5 & {\color{Green}18597} & 220 & -160 \\ | 345 | 345 | %5 & 0 & 5 & {\color{Green}18597} & 220 & -160 \\ | |
%10 & 0 & 8 & {\color{Green}24729} & 220 & -161 \\\hline\hline | 346 | 346 | %10 & 0 & 8 & {\color{Green}24729} & 220 & -161 \\\hline\hline | |
%\textbf{Zynq 7020} & \textbf{140} & \textbf{280} & \textbf{53200} & \textbf{220} & \\\hline | 347 | 347 | %\textbf{Zynq 7020} & \textbf{140} & \textbf{280} & \textbf{53200} & \textbf{220} & \\\hline | |
\end{tabular} | 348 | 348 | \end{tabular} | |
\end{center} | 349 | 349 | \end{center} | |
%\vspace{-0.7cm} | 350 | 350 | %\vspace{-0.7cm} | |
\label{t1} | 351 | 351 | \label{t1} | |
\end{table} | 352 | 352 | \end{table} | |
353 | 353 | |||
\subsection{Alternate criteria}\label{median} | 354 | 354 | \subsection{Alternate criteria}\label{median} | |
355 | 355 | |||
Fig. \ref{compare-fir} provides FIR solutions matching well the targeted transfer | 356 | 356 | Fig. \ref{compare-fir} provides FIR solutions matching well the targeted transfer | |
function, namely low ripple in the bandpass defined as the first 40\% of the frequency | 357 | 357 | function, namely low ripple in the bandpass defined as the first 40\% of the frequency | |
range and maximum rejection of 160~dB in the last 40\% stopband. We illustrate now, for | 358 | 358 | range and maximum rejection of 160~dB in the last 40\% stopband. We illustrate now, for | |
demonstrating the need to properly select the optimization criterion, two cases of poor | 359 | 359 | demonstrating the need to properly select the optimization criterion, two cases of poor | |
filter shapes obtained by selecting the mean value and median value of the rejection, | 360 | 360 | filter shapes obtained by selecting the mean value and median value of the rejection, | |
with no consideration for the ripples in the bandpass. The results of the optimizations, | 361 | 361 | with no consideration for the ripples in the bandpass. The results of the optimizations, | |
in these cases, are shown in Figs. \ref{compare-mean} and \ref{compare-median}. | 362 | 362 | in these cases, are shown in Figs. \ref{compare-mean} and \ref{compare-median}. | |
363 | 363 | |||
\begin{figure}[h!tb] | 364 | 364 | \begin{figure}[h!tb] | |
\includegraphics[width=\linewidth]{images/fir-mono-vs-fir-series-noise-fixe-mean-light.pdf} | 365 | 365 | \includegraphics[width=\linewidth]{images/fir-mono-vs-fir-series-noise-fixe-mean-light.pdf} | |
\caption{Comparison of the rejection capability between a series of FIR and a monolithic FIR | 366 | 366 | \caption{Comparison of the rejection capability between a series of FIR and a monolithic FIR | |
with a cutoff frequency set at half the Nyquist frequency.} | 367 | 367 | with a cutoff frequency set at half the Nyquist frequency.} | |
\label{compare-mean} | 368 | 368 | \label{compare-mean} | |
\end{figure} | 369 | 369 | \end{figure} | |
370 | 370 | |||
In the case of the mean value criterion (Fig. \ref{compare-mean}), the solution is not | 371 | 371 | In the case of the mean value criterion (Fig. \ref{compare-mean}), the solution is not | |
acceptable since the notch at the end of the transition band compensates for some unacceptable | 372 | 372 | acceptable since the notch at the end of the transition band compensates for some unacceptable | |
rise in the rejection close to the Nyquist frequency. Applying such a filter might yield excessive | 373 | 373 | rise in the rejection close to the Nyquist frequency. Applying such a filter might yield excessive | |
high frequency spurious components to be aliased at low frequency when decimating the signal. | 374 | 374 | high frequency spurious components to be aliased at low frequency when decimating the signal. | |
Similarly, the lack of criterion on the bandpass shape induces a shape with poor flatness and | 375 | 375 | Similarly, the lack of criterion on the bandpass shape induces a shape with poor flatness and | |
and slowly decaying transfer function starting to attenuate spectral components well before the | 376 | 376 | and slowly decaying transfer function starting to attenuate spectral components well before the | |
transition band starts. Such issues are partly aleviated by replacing a mean rejection value with | 377 | 377 | transition band starts. Such issues are partly aleviated by replacing a mean rejection value with | |
a median rejection value (Fig. \ref{compare-median}) but solutions remain unacceptable for | 378 | 378 | a median rejection value (Fig. \ref{compare-median}) but solutions remain unacceptable for | |
the reasons stated previously and much poorer than those found with the maximum rejection criterion | 379 | 379 | the reasons stated previously and much poorer than those found with the maximum rejection criterion | |
selected earlier (Fig. \ref{compare-fir}). | 380 | 380 | selected earlier (Fig. \ref{compare-fir}). | |
381 | 381 | |||
\begin{figure}[h!tb] | 382 | 382 | \begin{figure}[h!tb] | |
\includegraphics[width=\linewidth]{images/fir-mono-vs-fir-series-noise-fixe-median-light.pdf} | 383 | 383 | \includegraphics[width=\linewidth]{images/fir-mono-vs-fir-series-noise-fixe-median-light.pdf} | |
\caption{Comparison of the rejection capability between a series of FIR and a monolithic FIR | 384 | 384 | \caption{Comparison of the rejection capability between a series of FIR and a monolithic FIR | |
with a cutoff frequency set at half the Nyquist frequency.} | 385 | 385 | with a cutoff frequency set at half the Nyquist frequency.} | |
\label{compare-median} | 386 | 386 | \label{compare-median} | |
\end{figure} | 387 | 387 | \end{figure} | |
388 | 388 | |||
\section{Filter coefficient selection} | 389 | 389 | \section{Filter coefficient selection} | |
390 | 390 | |||
The coefficients of a single monolithic filter are computed as the impulse response | 391 | 391 | The coefficients of a single monolithic filter are computed as the impulse response | |
of the filter transfer function, and practically approximated by a multitude of methods | 392 | 392 | of the filter transfer function, and practically approximated by a multitude of methods | |
including least square optimization (Matlab's {\tt firls} function), Hamming or Kaiser windowing | 393 | 393 | including least square optimization (Matlab's {\tt firls} function), Hamming or Kaiser windowing | |
(Matlab's {\tt fir1} function). | 394 | 394 | (Matlab's {\tt fir1} function). | |
395 | 395 | |||
\begin{figure}[h!tb] | 396 | 396 | \begin{figure}[h!tb] | |
\includegraphics[width=\linewidth]{images/fir1-vs-firls} | 397 | 397 | \includegraphics[width=\linewidth]{images/fir1-vs-firls} | |
\caption{Evolution of the rejection capability of least-square optimized filters and Hamming | 398 | 398 | \caption{Evolution of the rejection capability of least-square optimized filters and Hamming | |
FIR filters as a function of the number of coefficients, for floating point numbers and 8-bit | 399 | 399 | FIR filters as a function of the number of coefficients, for floating point numbers and 8-bit | |
encoded integers.} | 400 | 400 | encoded integers.} | |
\label{2} | 401 | 401 | \label{2} | |
\end{figure} | 402 | 402 | \end{figure} | |
403 | 403 | |||
Cascading filters opens a new optimization opportunity by | 404 | 404 | Cascading filters opens a new optimization opportunity by | |
selecting various coefficient sets depending on the number of coefficients. Fig. \ref{2} | 405 | 405 | selecting various coefficient sets depending on the number of coefficients. Fig. \ref{2} | |
illustrates that for a number of coefficients ranging from 8 to 47, {\tt fir1} provides a better | 406 | 406 | illustrates that for a number of coefficients ranging from 8 to 47, {\tt fir1} provides a better | |
rejection than {\tt firls}: since the linear solver increases the number of coefficients along | 407 | 407 | rejection than {\tt firls}: since the linear solver increases the number of coefficients along | |
the processing chain, the type of selected filter also changes depending on the number of coefficients | 408 | 408 | the processing chain, the type of selected filter also changes depending on the number of coefficients | |
and evolves along the processing chain. | 409 | 409 | and evolves along the processing chain. | |
410 | 410 | |||
\section{Conclusion} | 411 | 411 | \section{Conclusion} | |
412 | 412 | |||
We address the optimization problem of designing a low-pass filter chain in a Field Programmable Gate | 413 | 413 | We address the optimization problem of designing a low-pass filter chain in a Field Programmable Gate | |
Array for improved noise rejection within constrained resource occupation, as needed for | 414 | 414 | Array for improved noise rejection within constrained resource occupation, as needed for | |
real time processing of radiofrequency signal when characterizing spectral phase noise | 415 | 415 | real time processing of radiofrequency signal when characterizing spectral phase noise | |
characteristics of stable oscillators. The flexibility of the digital approach makes the result | 416 | 416 | characteristics of stable oscillators. The flexibility of the digital approach makes the result | |
best suited for closing the loop and using the measurement output in a feedback loop for | 417 | 417 | best suited for closing the loop and using the measurement output in a feedback loop for | |
controlling clocks, e.g. in a quartz-stabilized high performance clock whose long term behavior | 418 | 418 | controlling clocks, e.g. in a quartz-stabilized high performance clock whose long term behavior | |
is controlled by non-piezoelectric resonator (sapphire resonator, microwave or optical | 419 | 419 | is controlled by non-piezoelectric resonator (sapphire resonator, microwave or optical | |
atomic transition). | 420 | 420 | atomic transition). | |
421 | 421 | |||
\section*{Acknowledgement} | 422 | 422 | \section*{Acknowledgement} | |
423 | 423 | |||
This work is supported by the ANR Programme d'Investissement d'Avenir in | 424 | 424 | This work is supported by the ANR Programme d'Investissement d'Avenir in | |
progress at the Time and Frequency Departments of the FEMTO-ST Institute | 425 | 425 | progress at the Time and Frequency Departments of the FEMTO-ST Institute | |
(Oscillator IMP, First-TF and Refimeve+), and by R\'egion de Franche-Comt\'e. | 426 | 426 | (Oscillator IMP, First-TF and Refimeve+), and by R\'egion de Franche-Comt\'e. | |
The authors would like to thank E. Rubiola, F. Vernotte, G. Cabodevila for support and | 427 | 427 | The authors would like to thank E. Rubiola, F. Vernotte, and G. Cabodevila | |
fruitful discussions. | 428 | 428 | for support and fruitful discussions. | |
429 | 429 | |||
\bibliographystyle{IEEEtran} | 430 | 430 | \bibliographystyle{IEEEtran} | |
\balance | 431 | 431 | \balance | |
\bibliography{references,biblio} | 432 | 432 | \bibliography{references,biblio} | |
\end{document} | 433 | 433 | \end{document} | |
434 | 434 | |||
\section{Contexte d'ordonnancement} | 435 | 435 | \section{Contexte d'ordonnancement} | |
Dans cette partie, nous donnerons des d\'efinitions de termes rattach\'es au domaine de l'ordonnancement | 436 | 436 | Dans cette partie, nous donnerons des d\'efinitions de termes rattach\'es au domaine de l'ordonnancement | |
et nous verrons que le sujet trait\'e se rapproche beaucoup d'un problème d'ordonnancement. De ce fait | 437 | 437 | et nous verrons que le sujet trait\'e se rapproche beaucoup d'un problème d'ordonnancement. De ce fait | |
nous pourrons aller plus loin que les travaux vus pr\'ec\'edemment et nous tenterons des approches d'ordonnancement | 438 | 438 | nous pourrons aller plus loin que les travaux vus pr\'ec\'edemment et nous tenterons des approches d'ordonnancement | |
et d'optimisation. | 439 | 439 | et d'optimisation. | |
440 | 440 | |||
\subsection{D\'efinition du vocabulaire} | 441 | 441 | \subsection{D\'efinition du vocabulaire} | |
Avant tout, il faut d\'efinir ce qu'est un problème d'optimisation. Il y a deux d\'efinitions | 442 | 442 | Avant tout, il faut d\'efinir ce qu'est un problème d'optimisation. Il y a deux d\'efinitions | |
importantes à donner. La première est propos\'ee par Legrand et Robert dans leur livre \cite{def1-ordo} : | 443 | 443 | importantes à donner. La première est propos\'ee par Legrand et Robert dans leur livre \cite{def1-ordo} : | |
\begin{definition} | 444 | 444 | \begin{definition} | |
\label{def-ordo1} | 445 | 445 | \label{def-ordo1} | |
Un ordonnancement d'un système de t\^aches $G\ =\ (V,\ E,\ w)$ est une fonction $\sigma$ : | 446 | 446 | Un ordonnancement d'un système de t\^aches $G\ =\ (V,\ E,\ w)$ est une fonction $\sigma$ : | |
$V \rightarrow \mathbb{N}$ telle que $\sigma(u) + w(u) \leq \sigma(v)$ pour toute arête $(u,\ v) \in E$. | 447 | 447 | $V \rightarrow \mathbb{N}$ telle que $\sigma(u) + w(u) \leq \sigma(v)$ pour toute arête $(u,\ v) \in E$. | |
\end{definition} | 448 | 448 | \end{definition} | |
449 | 449 | |||
Dit plus simplement, l'ensemble $V$ repr\'esente les t\^aches à ex\'ecuter, l'ensemble $E$ repr\'esente les d\'ependances | 450 | 450 | Dit plus simplement, l'ensemble $V$ repr\'esente les t\^aches à ex\'ecuter, l'ensemble $E$ repr\'esente les d\'ependances | |
des t\^aches et $w$ les temps d'ex\'ecution de la t\^ache. La fonction $\sigma$ donne donc l'heure de d\'ebut de | 451 | 451 | des t\^aches et $w$ les temps d'ex\'ecution de la t\^ache. La fonction $\sigma$ donne donc l'heure de d\'ebut de | |
chacune des t\^aches. La d\'efinition dit que si une t\^ache $v$ d\'epend d'une t\^ache $u$ alors | 452 | 452 | chacune des t\^aches. La d\'efinition dit que si une t\^ache $v$ d\'epend d'une t\^ache $u$ alors | |
la date de d\'ebut de $v$ sera plus grande ou \'egale au d\'ebut de l'ex\'ecution de la t\^ache $u$ plus son | 453 | 453 | la date de d\'ebut de $v$ sera plus grande ou \'egale au d\'ebut de l'ex\'ecution de la t\^ache $u$ plus son | |
temps d'ex\'ecution. | 454 | 454 | temps d'ex\'ecution. | |
455 | 455 | |||
Une autre d\'efinition importante qui est propos\'ee par Leung et al. \cite{def2-ordo} est : | 456 | 456 | Une autre d\'efinition importante qui est propos\'ee par Leung et al. \cite{def2-ordo} est : | |
\begin{definition} | 457 | 457 | \begin{definition} | |
\label{def-ordo2} | 458 | 458 | \label{def-ordo2} | |
L'ordonnancement traite de l'allocation de ressources rares à des activit\'es avec | 459 | 459 | L'ordonnancement traite de l'allocation de ressources rares à des activit\'es avec | |
l'objectif d'optimiser un ou plusieurs critères de performance. | 460 | 460 | l'objectif d'optimiser un ou plusieurs critères de performance. | |
\end{definition} | 461 | 461 | \end{definition} | |
462 | 462 | |||
Cette d\'efinition est plus g\'en\'erique mais elle nous int\'eresse d'avantage que la d\'efinition \ref{def-ordo1}. | 463 | 463 | Cette d\'efinition est plus g\'en\'erique mais elle nous int\'eresse d'avantage que la d\'efinition \ref{def-ordo1}. | |
En effet, la partie qui nous int\'eresse dans cette première d\'efinition est le respect de la pr\'ec\'edance des t\^aches. | 464 | 464 | En effet, la partie qui nous int\'eresse dans cette première d\'efinition est le respect de la pr\'ec\'edance des t\^aches. | |
Dans les faits les dates de d\'ebut ne nous int\'eressent pas r\'eellement. | 465 | 465 | Dans les faits les dates de d\'ebut ne nous int\'eressent pas r\'eellement. | |
466 | 466 | |||
En revanche la d\'efinition \ref{def-ordo2} sera au c\oe{}ur du projet. Pour se convaincre de cela, | 467 | 467 | En revanche la d\'efinition \ref{def-ordo2} sera au c\oe{}ur du projet. Pour se convaincre de cela, | |
il nous faut d'abord d\'efinir quel est le type de problème d'ordonnancement qu'on traite et quelles | 468 | 468 | il nous faut d'abord d\'efinir quel est le type de problème d'ordonnancement qu'on traite et quelles | |
sont les m\'ethodes qu'on peut appliquer. | 469 | 469 | sont les m\'ethodes qu'on peut appliquer. | |
470 | 470 | |||
Les problèmes d'ordonnancement peuvent être class\'es en diff\'erentes cat\'egories : | 471 | 471 | Les problèmes d'ordonnancement peuvent être class\'es en diff\'erentes cat\'egories : | |
\begin{itemize} | 472 | 472 | \begin{itemize} | |
\item T\^aches ind\'ependantes : dans cette cat\'egorie de problèmes, les t\^aches sont complètement ind\'ependantes | 473 | 473 | \item T\^aches ind\'ependantes : dans cette cat\'egorie de problèmes, les t\^aches sont complètement ind\'ependantes | |
les unes des autres. Dans notre cas, ce n'est pas le plus adapt\'e. | 474 | 474 | les unes des autres. Dans notre cas, ce n'est pas le plus adapt\'e. | |
\item Graphe de t\^aches : la d\'efinition \ref{def-ordo1} d\'ecrit cette cat\'egorie. La plupart du temps, | 475 | 475 | \item Graphe de t\^aches : la d\'efinition \ref{def-ordo1} d\'ecrit cette cat\'egorie. La plupart du temps, | |
les t\^aches sont repr\'esent\'ees par une DAG. Cette cat\'egorie est très proche de notre cas puisque nous devons \'egalement ex\'ecuter | 476 | 476 | les t\^aches sont repr\'esent\'ees par une DAG. Cette cat\'egorie est très proche de notre cas puisque nous devons \'egalement ex\'ecuter | |
des t\^aches qui ont un certain nombre de d\'ependances. On pourra même dire que dans certain cas, | 477 | 477 | des t\^aches qui ont un certain nombre de d\'ependances. On pourra même dire que dans certain cas, | |
on a des anti-arbres, c'est à dire que nous avons une multitude de t\^aches d'entr\'ees qui convergent vers une | 478 | 478 | on a des anti-arbres, c'est à dire que nous avons une multitude de t\^aches d'entr\'ees qui convergent vers une | |
t\^ache de fin. | 479 | 479 | t\^ache de fin. | |
\item Workflow : cette cat\'egorie est une sous cat\'egorie des graphes de t\^aches dans le sens où | 480 | 480 | \item Workflow : cette cat\'egorie est une sous cat\'egorie des graphes de t\^aches dans le sens où | |
il s'agit d'un graphe de t\^aches r\'ep\'et\'e de nombreuses de fois. C'est exactement ce type de problème | 481 | 481 | il s'agit d'un graphe de t\^aches r\'ep\'et\'e de nombreuses de fois. C'est exactement ce type de problème | |
que nous traitons ici. | 482 | 482 | que nous traitons ici. | |
\end{itemize} | 483 | 483 | \end{itemize} | |
484 | 484 | |||
Bien entendu, cette liste n'est pas exhaustive et il existe de nombreuses autres classifications et sous-classifications | 485 | 485 | Bien entendu, cette liste n'est pas exhaustive et il existe de nombreuses autres classifications et sous-classifications | |
de ces problèmes. Nous n'avons parl\'e ici que des cat\'egories les plus communes. | 486 | 486 | de ces problèmes. Nous n'avons parl\'e ici que des cat\'egories les plus communes. | |
487 | 487 | |||
Un autre point à d\'efinir, est le critère d'optimisation. Il y a là encore un grand nombre de | 488 | 488 | Un autre point à d\'efinir, est le critère d'optimisation. Il y a là encore un grand nombre de | |
critères possibles. Nous allons donc parler des principaux : | 489 | 489 | critères possibles. Nous allons donc parler des principaux : | |
\begin{itemize} | 490 | 490 | \begin{itemize} | |
\item Temps de compl\'etion total (ou Makespan en anglais) : ce critère est l'un des critères d'optimisation | 491 | 491 | \item Temps de compl\'etion total (ou Makespan en anglais) : ce critère est l'un des critères d'optimisation | |
les plus courant. Il s'agit donc de minimiser la date de fin de la dernière t\^ache de l'ensemble des | 492 | 492 | les plus courant. Il s'agit donc de minimiser la date de fin de la dernière t\^ache de l'ensemble des | |
t\^aches à ex\'ecuter. L'enjeu de cette optimisation est donc de trouver l'ordonnancement optimal permettant | 493 | 493 | t\^aches à ex\'ecuter. L'enjeu de cette optimisation est donc de trouver l'ordonnancement optimal permettant | |
la fin d'ex\'ecution au plus tôt. | 494 | 494 | la fin d'ex\'ecution au plus tôt. | |
\item Somme des temps d'ex\'ecution (Flowtime en anglais) : il s'agit de faire la somme des temps d'ex\'ecution de toutes les t\^aches | 495 | 495 | \item Somme des temps d'ex\'ecution (Flowtime en anglais) : il s'agit de faire la somme des temps d'ex\'ecution de toutes les t\^aches | |
et d'optimiser ce r\'esultat. | 496 | 496 | et d'optimiser ce r\'esultat. | |
\item Le d\'ebit : ce critère quant à lui, vise à augmenter au maximum le d\'ebit de traitement des donn\'ees. | 497 | 497 | \item Le d\'ebit : ce critère quant à lui, vise à augmenter au maximum le d\'ebit de traitement des donn\'ees. | |
\end{itemize} | 498 | 498 | \end{itemize} | |
499 | 499 | |||
En plus de cela, on peut avoir besoin de plusieurs critères d'optimisation. Il s'agit dans ce cas d'une optimisation | 500 | 500 | En plus de cela, on peut avoir besoin de plusieurs critères d'optimisation. Il s'agit dans ce cas d'une optimisation | |
multi-critères. Bien entendu, cela complexifie d'autant plus le problème car la solution la plus optimale pour un | 501 | 501 | multi-critères. Bien entendu, cela complexifie d'autant plus le problème car la solution la plus optimale pour un | |
des critères peut être très mauvaise pour un autre critère. De ce cas, il s'agira de trouver une solution qui permet | 502 | 502 | des critères peut être très mauvaise pour un autre critère. De ce cas, il s'agira de trouver une solution qui permet | |
de faire le meilleur compromis entre tous les critères. | 503 | 503 | de faire le meilleur compromis entre tous les critères. | |
504 | 504 | |||
\subsection{Formalisation du problème} | 505 | 505 | \subsection{Formalisation du problème} | |
\label{formalisation} | 506 | 506 | \label{formalisation} | |
Maintenant que nous avons donn\'e le vocabulaire li\'e à l'ordonnancement, nous allons pouvoir essayer caract\'eriser | 507 | 507 | Maintenant que nous avons donn\'e le vocabulaire li\'e à l'ordonnancement, nous allons pouvoir essayer caract\'eriser | |
formellement notre problème. En effet, nous allons reprendre les contraintes \'enonc\'ees dans la sections \ref{def-contraintes} | 508 | 508 | formellement notre problème. En effet, nous allons reprendre les contraintes \'enonc\'ees dans la sections \ref{def-contraintes} | |
et nous essayerons de les formaliser le plus finement possible. | 509 | 509 | et nous essayerons de les formaliser le plus finement possible. | |
510 | 510 | |||
Comme nous l'avons dit, une t\^ache est un bloc de traitement. Chaque t\^ache $i$ dispose d'un ensemble de paramètres | 511 | 511 | Comme nous l'avons dit, une t\^ache est un bloc de traitement. Chaque t\^ache $i$ dispose d'un ensemble de paramètres | |
que nous nommerons $\mathcal{P}_{i}$. Cet ensemble $\mathcal{P}_i$ est propre à chaque t\^ache et il variera d'une | 512 | 512 | que nous nommerons $\mathcal{P}_{i}$. Cet ensemble $\mathcal{P}_i$ est propre à chaque t\^ache et il variera d'une | |
t\^ache à l'autre. Nous reviendrons plus tard sur les paramètres qui peuvent composer cet ensemble. | 513 | 513 | t\^ache à l'autre. Nous reviendrons plus tard sur les paramètres qui peuvent composer cet ensemble. | |
514 | 514 | |||
Outre cet ensemble $\mathcal{P}_i$, chaque t\^ache dispose de paramètres communs : | 515 | 515 | Outre cet ensemble $\mathcal{P}_i$, chaque t\^ache dispose de paramètres communs : | |
\begin{itemize} | 516 | 516 | \begin{itemize} | |
\item Dur\'ee de la t\^ache : Comme nous l'avons dit auparavant, dans le cadre d'un FPGA le temps est compt\'e en nombre de coup d'horloge. | 517 | 517 | \item Dur\'ee de la t\^ache : Comme nous l'avons dit auparavant, dans le cadre d'un FPGA le temps est compt\'e en nombre de coup d'horloge. | |
En outre, les blocs sont toujours sollicit\'es, certains même sont capables de lire et de renvoyer une r\'esultat à chaque coups d'horloge. | 518 | 518 | En outre, les blocs sont toujours sollicit\'es, certains même sont capables de lire et de renvoyer une r\'esultat à chaque coups d'horloge. | |
Donc la dur\'ee d'une t\^ache ne peut être le laps de temps entre l'entr\'ee d'une donn\'ee et la sortie d'une autre. Nous d\'efinirons la | 519 | 519 | Donc la dur\'ee d'une t\^ache ne peut être le laps de temps entre l'entr\'ee d'une donn\'ee et la sortie d'une autre. Nous d\'efinirons la | |
dur\'ee comme le temps de traitement d'une donn\'ee, c'est à dire la diff\'erence de temps entre la date de sortie d'une donn\'ee | 520 | 520 | dur\'ee comme le temps de traitement d'une donn\'ee, c'est à dire la diff\'erence de temps entre la date de sortie d'une donn\'ee | |
et de sa date d'entr\'ee. Nous nommerons cette dur\'ee $\delta_i$. % Je devrais la nomm\'ee w comme dans la def2 | 521 | 521 | et de sa date d'entr\'ee. Nous nommerons cette dur\'ee $\delta_i$. % Je devrais la nomm\'ee w comme dans la def2 | |
\item La pr\'ecision : La pr\'ecision d'une donn\'ee est le nombre de bits significatifs qu'elle compte. En effet, au fil des traitements | 522 | 522 | \item La pr\'ecision : La pr\'ecision d'une donn\'ee est le nombre de bits significatifs qu'elle compte. En effet, au fil des traitements | |
les pr\'ecisions peuvent varier. On nomme donc la pr\'ecision d'entr\'ee d'une t\^ache $i$ comme $\pi_i^-$ et la pr\'ecision en sortie $\pi_i^+$. | 523 | 523 | les pr\'ecisions peuvent varier. On nomme donc la pr\'ecision d'entr\'ee d'une t\^ache $i$ comme $\pi_i^-$ et la pr\'ecision en sortie $\pi_i^+$. | |
\item La fr\'equence du flux en entr\'ee (ou sortie) : Cette fr\'equence repr\'esente la fr\'equence des donn\'ees qui arrivent (resp. sortent). | 524 | 524 | \item La fr\'equence du flux en entr\'ee (ou sortie) : Cette fr\'equence repr\'esente la fr\'equence des donn\'ees qui arrivent (resp. sortent). | |
Selon les t\^aches, les fr\'equences varieront. En effet, certains blocs ralentissent le flux c'est pourquoi on distingue la fr\'equence du | 525 | 525 | Selon les t\^aches, les fr\'equences varieront. En effet, certains blocs ralentissent le flux c'est pourquoi on distingue la fr\'equence du | |
flux en entr\'ee et la fr\'equence en sortie. Nous nommerons donc la fr\'equence du flux en entr\'ee $f_i^-$ et la fr\'equence en sortie $f_i^+$. | 526 | 526 | flux en entr\'ee et la fr\'equence en sortie. Nous nommerons donc la fr\'equence du flux en entr\'ee $f_i^-$ et la fr\'equence en sortie $f_i^+$. | |
\item La quantit\'e de donn\'ees en entr\'ee (ou en sortie) : Il s'agit de la quantit\'e de donn\'ees que le bloc s'attend à traiter (resp. | 527 | 527 | \item La quantit\'e de donn\'ees en entr\'ee (ou en sortie) : Il s'agit de la quantit\'e de donn\'ees que le bloc s'attend à traiter (resp. | |
est capable de produire). Les t\^aches peuvent avoir à traiter des gros volumes de donn\'ees et n'en ressortir qu'une partie. Cette | 528 | 528 | est capable de produire). Les t\^aches peuvent avoir à traiter des gros volumes de donn\'ees et n'en ressortir qu'une partie. Cette | |
fois encore, il nous faut donc diff\'erencier l'entr\'ee et la sortie. Nous nommerons donc la quantit\'e de donn\'ees entrantes $q_i^-$ | 529 | 529 | fois encore, il nous faut donc diff\'erencier l'entr\'ee et la sortie. Nous nommerons donc la quantit\'e de donn\'ees entrantes $q_i^-$ | |
et la quantit\'e de donn\'ees sortantes $q_i^+$ pour une t\^ache $i$. | 530 | 530 | et la quantit\'e de donn\'ees sortantes $q_i^+$ pour une t\^ache $i$. | |
\item Le d\'ebit d'entr\'ee (ou de sortie) : Ce paramètre correspond au d\'ebit de donn\'ees que la t\^ache est capable de traiter ou qu'elle | 531 | 531 | \item Le d\'ebit d'entr\'ee (ou de sortie) : Ce paramètre correspond au d\'ebit de donn\'ees que la t\^ache est capable de traiter ou qu'elle | |
fournit en sortie. Il s'agit simplement de l'expression des deux pr\'ec\'edents paramètres. Nous d\'efinirons donc la d\'ebit entrant de la | 532 | 532 | fournit en sortie. Il s'agit simplement de l'expression des deux pr\'ec\'edents paramètres. Nous d\'efinirons donc la d\'ebit entrant de la | |
t\^ache $i$ comme $d_i^-\ =\ q_i^-\ *\ f_i^-$ et le d\'ebit sortant comme $d_i^+\ =\ q_i^+\ *\ f_i^+$. | 533 | 533 | t\^ache $i$ comme $d_i^-\ =\ q_i^-\ *\ f_i^-$ et le d\'ebit sortant comme $d_i^+\ =\ q_i^+\ *\ f_i^+$. | |
\item La taille de la t\^ache : La taille dans les FPGA \'etant limit\'ee, ce paramètre exprime donc la place qu'occupe la t\^ache au sein du bloc. | 534 | 534 | \item La taille de la t\^ache : La taille dans les FPGA \'etant limit\'ee, ce paramètre exprime donc la place qu'occupe la t\^ache au sein du bloc. | |
Nous nommerons $\mathcal{A}_i$ cette taille. | 535 | 535 | Nous nommerons $\mathcal{A}_i$ cette taille. | |
\item Les pr\'ed\'ecesseurs et successeurs d'une t\^ache : cela nous permet de connaître les t\^aches requises pour pouvoir traiter | 536 | 536 | \item Les pr\'ed\'ecesseurs et successeurs d'une t\^ache : cela nous permet de connaître les t\^aches requises pour pouvoir traiter | |
la t\^ache $i$ ainsi que les t\^aches qui en d\'ependent. Ces ensemble sont not\'es $\Gamma _i ^-$ et $ \Gamma _i ^+$ \\ | 537 | 537 | la t\^ache $i$ ainsi que les t\^aches qui en d\'ependent. Ces ensemble sont not\'es $\Gamma _i ^-$ et $ \Gamma _i ^+$ \\ | |
%TODO Est-ce vraiment un paramètre ? | 538 | 538 | %TODO Est-ce vraiment un paramètre ? | |
\end{itemize} | 539 | 539 | \end{itemize} | |
540 | 540 | |||
Ces diff\'erents paramètres communs sont fortement li\'es aux \'el\'ements de $\mathcal{P}_i$. Voici quelques exemples de relations | 541 | 541 | Ces diff\'erents paramètres communs sont fortement li\'es aux \'el\'ements de $\mathcal{P}_i$. Voici quelques exemples de relations | |
que nous avons identifi\'ees : | 542 | 542 | que nous avons identifi\'ees : | |
\begin{itemize} | 543 | 543 | \begin{itemize} | |
\item $ \delta _i ^+ \ = \ \mathcal{F}_{\delta}(\pi_i^-,\ \pi_i^+,\ d_i^-,\ d_i^+,\ \mathcal{P}_i) $ donne le temps d'ex\'ecution | 544 | 544 | \item $ \delta _i ^+ \ = \ \mathcal{F}_{\delta}(\pi_i^-,\ \pi_i^+,\ d_i^-,\ d_i^+,\ \mathcal{P}_i) $ donne le temps d'ex\'ecution | |
de la t\^ache en fonction de la pr\'ecision voulue, du d\'ebit et des paramètres internes. | 545 | 545 | de la t\^ache en fonction de la pr\'ecision voulue, du d\'ebit et des paramètres internes. | |
\item $ \pi _i ^+ \ = \ \mathcal{F}_{p}(\pi_i^-,\ \mathcal{P}_i) $, la fonction $F_p$ donne la pr\'ecision en sortie selon la pr\'ecision de d\'epart | 546 | 546 | \item $ \pi _i ^+ \ = \ \mathcal{F}_{p}(\pi_i^-,\ \mathcal{P}_i) $, la fonction $F_p$ donne la pr\'ecision en sortie selon la pr\'ecision de d\'epart | |
et les paramètres internes de la t\^ache. | 547 | 547 | et les paramètres internes de la t\^ache. | |
\item $d_i^+\ =\ \mathcal{F}_d(d_i^-, \mathcal{P}_i)$, la fonction $F_d$ donne le d\'ebit sortant de la t\^ache en fonction du d\'ebit | 548 | 548 | \item $d_i^+\ =\ \mathcal{F}_d(d_i^-, \mathcal{P}_i)$, la fonction $F_d$ donne le d\'ebit sortant de la t\^ache en fonction du d\'ebit | |
sortant et des variables internes de la t\^ache. | 549 | 549 | sortant et des variables internes de la t\^ache. | |
\item $A_i^+\ =\ \mathcal{F}_A(\pi_i^-,\ \pi_i^+,\ d_i^-,\ d_i^+, \mathcal{P}_i)$ | 550 | 550 | \item $A_i^+\ =\ \mathcal{F}_A(\pi_i^-,\ \pi_i^+,\ d_i^-,\ d_i^+, \mathcal{P}_i)$ | |
\end{itemize} | 551 | 551 | \end{itemize} | |
Pour le moment, nous ne sommes pas capables de donner une d\'efinition g\'en\'erale de ces fonctions. Mais en revanche, | 552 | 552 | Pour le moment, nous ne sommes pas capables de donner une d\'efinition g\'en\'erale de ces fonctions. Mais en revanche, | |
sur quelques exemples simples (cf. \ref{def-contraintes}), nous parvenons à donner une \'evaluation de ces fonctions. | 553 | 553 | sur quelques exemples simples (cf. \ref{def-contraintes}), nous parvenons à donner une \'evaluation de ces fonctions. | |
554 | 554 | |||
Maintenant que nous avons donn\'e toutes les notations utiles, nous allons \'enoncer des contraintes relatives à notre problème. Soit | 555 | 555 | Maintenant que nous avons donn\'e toutes les notations utiles, nous allons \'enoncer des contraintes relatives à notre problème. Soit | |
un DGA $G(V,\ E)$, on a pour toutes arêtes $(i, j)\ \in\ E$ les in\'equations suivantes : | 556 | 556 | un DGA $G(V,\ E)$, on a pour toutes arêtes $(i, j)\ \in\ E$ les in\'equations suivantes : | |
557 | 557 | |||
\paragraph{Contrainte de pr\'ecision :} | 558 | 558 | \paragraph{Contrainte de pr\'ecision :} | |
Cette in\'equation traduit la contrainte de pr\'ecision d'une t\^ache à l'autre : | 559 | 559 | Cette in\'equation traduit la contrainte de pr\'ecision d'une t\^ache à l'autre : | |
\begin{align*} | 560 | 560 | \begin{align*} | |
\pi _i ^+ \geq \pi _j ^- | 561 | 561 | \pi _i ^+ \geq \pi _j ^- | |
\end{align*} | 562 | 562 | \end{align*} | |
563 | 563 | |||
\paragraph{Contrainte de d\'ebit :} | 564 | 564 | \paragraph{Contrainte de d\'ebit :} | |
Cette in\'equation traduit la contrainte de d\'ebit d'une t\^ache à l'autre : | 565 | 565 | Cette in\'equation traduit la contrainte de d\'ebit d'une t\^ache à l'autre : | |
\begin{align*} | 566 | 566 | \begin{align*} | |
d _i ^+ = q _j ^- * (f_i + (1 / s_j) ) & \text{ où } s_j \text{ est une valeur positive de temporisation de la t\^ache} | 567 | 567 | d _i ^+ = q _j ^- * (f_i + (1 / s_j) ) & \text{ où } s_j \text{ est une valeur positive de temporisation de la t\^ache} | |
\end{align*} | 568 | 568 | \end{align*} | |
569 | 569 | |||
\paragraph{Contrainte de synchronisation :} | 570 | 570 | \paragraph{Contrainte de synchronisation :} | |
Il s'agit de la contrainte qui impose que si à un moment du traitement, le DAG se s\'epare en plusieurs branches parallèles | 571 | 571 | Il s'agit de la contrainte qui impose que si à un moment du traitement, le DAG se s\'epare en plusieurs branches parallèles | |
et qu'elles se rejoignent plus tard, la somme des latences sur chacune des branches soit la même. | 572 | 572 | et qu'elles se rejoignent plus tard, la somme des latences sur chacune des branches soit la même. | |
Plus formellement, s'il existe plusieurs chemins disjoints, partant de la t\^ache $s$ et allant à la t\^ache de $f$ alors : | 573 | 573 | Plus formellement, s'il existe plusieurs chemins disjoints, partant de la t\^ache $s$ et allant à la t\^ache de $f$ alors : | |
\begin{align*} | 574 | 574 | \begin{align*} | |
\forall \text{ chemin } \mathcal{C}1(s, .., f), | 575 | 575 | \forall \text{ chemin } \mathcal{C}1(s, .., f), | |
\forall \text{ chemin } \mathcal{C}2(s, .., f) | 576 | 576 | \forall \text{ chemin } \mathcal{C}2(s, .., f) | |
\text{ tel que } \mathcal{C}1 \neq \mathcal{C}2 | 577 | 577 | \text{ tel que } \mathcal{C}1 \neq \mathcal{C}2 | |
\Rightarrow | 578 | 578 | \Rightarrow | |
\sum _{i} ^{i \in \mathcal{C}1} \delta_i = \sum _{i} ^{i \in \mathcal{C}2} \delta_i | 579 | 579 | \sum _{i} ^{i \in \mathcal{C}1} \delta_i = \sum _{i} ^{i \in \mathcal{C}2} \delta_i | |
\end{align*} | 580 | 580 | \end{align*} | |
581 | 581 | |||
\paragraph{Contrainte de place :} | 582 | 582 | \paragraph{Contrainte de place :} | |
Cette in\'equation traduit la contrainte de place dans le FPGA. La taille max de la puce FPGA est nomm\'e $\mathcal{A}_{FPGA}$ : | 583 | 583 | Cette in\'equation traduit la contrainte de place dans le FPGA. La taille max de la puce FPGA est nomm\'e $\mathcal{A}_{FPGA}$ : | |
\begin{align*} | 584 | 584 | \begin{align*} | |
\sum ^{\text{t\^ache } i} \mathcal{A}_i \leq \mathcal{A}_{FPGA} | 585 | 585 | \sum ^{\text{t\^ache } i} \mathcal{A}_i \leq \mathcal{A}_{FPGA} | |
\end{align*} | 586 | 586 | \end{align*} | |
587 | 587 | |||
\subsection{Exemples de mod\'elisation} | 588 | 588 | \subsection{Exemples de mod\'elisation} | |
\label{exemples-modeles} | 589 | 589 | \label{exemples-modeles} |