diff options
author | Camil Staps | 2017-01-23 22:30:19 +0100 |
---|---|---|
committer | Camil Staps | 2017-01-23 22:30:19 +0100 |
commit | 651b2de2f2df33af8c607fc419355e92eb0743f1 (patch) | |
tree | e1b7adc6249c3ea38e84b78c7b89b75489340aa7 /presentation | |
parent | Discussion (diff) |
eindpresentatie
Diffstat (limited to 'presentation')
-rw-r--r-- | presentation/pres.tex | 163 |
1 files changed, 89 insertions, 74 deletions
diff --git a/presentation/pres.tex b/presentation/pres.tex index 70aa594..adc0ead 100644 --- a/presentation/pres.tex +++ b/presentation/pres.tex @@ -1,45 +1,46 @@ -\documentclass[british]{beamer} +% vim: spelllang=nl: +\documentclass{beamer} -\usepackage[british]{babel} -\usepackage[babel=true]{csquotes} +\usepackage[dutch]{babel} \usepackage{tikz} \usetikzlibrary{calc} \usepackage{pgfplots} -\title{Code generation for the Thumb-2 instruction set} +\title{Codegeneratie voor de Thumb-2 instructieset} \author[Camil Staps]{ Camil Staps\\[1em]\small{ - \emph{Supervisors:}\\ + \emph{Begeleiders:}\\ prof. dr. dr.h.c. ir. M.J. Plasmeijer\\ drs. J.H.G. van Groningen}} -\date{Monday 9\textsuperscript{th} January, 2017} +\date{Dinsdag 24 januari 2017} \begin{document} \maketitle -\section{Introduction} +\section{Inleiding} -\begin{frame}{ARM and Thumb} +\begin{frame}{ARM en Thumb} \begin{itemize} - \item Widely used in embedded systems - \item Three instruction sets: + \item Veel gebruikt in embedded systems + \item Drie instructiesets: \begin{itemize} \item ARM (32-bit) \item Thumb (16-bit) - \item Thumb-2 (mixture) + \item Thumb-2 (combinatie) \end{itemize} - \item Thumb-2 advantages: + \item Meeste processoren ondersteunen alledrie + \item Thumb-2 voordelen: \begin{itemize} - \item Smaller code than ARM - \item Simpler devices than ARM - \item Faster than Thumb + \item Kleinere code dan ARM + \item Simpelere systemen dan ARM + \item Sneller dan Thumb \end{itemize} - \item Interesting differences for code generation: + \item Interessante eigenschappen voor codegeneratie: \begin{itemize} - \item How to deal with instructions that do not exist in Thumb-2? - \item How to use as many 16-bit instructions as possible? + \item Wat te doen met instructies die niet bestaan in Thumb-2? + \item Hoe kunnen we het aantal 16-bit instructies maximaliseren? \end{itemize} \end{itemize} \end{frame} @@ -47,39 +48,50 @@ \begin{frame}{Clean} \begin{minipage}{.65\linewidth} \begin{itemize} - \item Purely functional, lazy programming language - \item Compilation in several steps - \item Already had an ARM code generator - \item Made one for Thumb-2 + \item Pure functionele luie programmeertaal + \item Compilatie gaat in meerdere stappen + \item Codegenerator voor ARM bestond al + \item \emph{Case study}: aanpassen voor Thumb-2 \end{itemize} \end{minipage}% - \begin{minipage}{.35\linewidth} + \hfill + \begin{minipage}{.3\linewidth} \centering \footnotesize \begin{tikzpicture}[every node/.style={rectangle,draw},every path/.style={draw},->] \node (clean) {Clean}; \node[below of=clean] (core) {Core Clean}; \node[below of=core] (abc) {ABC-code}; - \node[below of=abc] (abstr) {Abstract Von-Neumann}; + \node[below of=abc] (abstr) {Abstracte Von-Neumann}; \node[below of=abstr] (target) {Target machine}; \path (clean) -- (core) -- (abc) -- (abstr) -- (target); \end{tikzpicture} \end{minipage} \end{frame} -\section{Register allocation} +\section{Problemen oplossen} -\begin{frame}{Register allocation --- introduction} +\begin{frame}{Problemen oplossen} \begin{itemize} - \item ARM has 16 registers; Thumb only lower eight - \item Higher registers can be accessed through 32-bit instructions in Thumb-2 - \item Want to put frequently used registers in lower half + \item Program counter opslaan voor functieaanroepen + \item Informatie in de laagste twee bits + \item Kleinere offsets in load instructies \end{itemize} \end{frame} -\begin{frame}{Register allocation --- collecting data} +\section{Registerallocatie} + +\begin{frame}{Registerallocatie --- inleiding} + \begin{itemize} + \item ARM heeft 16 registers; Thumb kan alleen bij de laagste 8 + \item Hogere registers worden benaderd met 32-bit instructies in Thumb-2 + \item Zet veelgebruikte registers in de laagste helft + \end{itemize} +\end{frame} + +\begin{frame}{Registerallocatie --- data} \begin{itemize} - \item Count register usage in the Clean compiler: + \item Registergebruik in de Clean compiler: \end{itemize} \begin{center} @@ -90,7 +102,7 @@ , bar width=.5em , xmin=0 , height=0.6\textheight - , symbolic y coords={S0,A ptr.,A0,Heap ptr.,A1,B0,A2,B ptr.,B1,S1,B2,B3,B4,Heap ctr.,A3} + , symbolic y coords={Scratch 0,A-stack pointer,A-stack 0,Heap pointer,A-stack 1,B-stack 0,A-stack 2,B-stack pointer,B-stack 1,Scratch 1,B-stack 2,B-stack 3,B-stack 4,Heap counter,A-stack 3} , ytick=data , scaled x ticks=real:1 , xtick scale label code/.code={} @@ -98,38 +110,40 @@ , compat={1.3} ] \addplot coordinates { - (378618,S0) - (270274,A ptr.) - (218018,A0) - (166284,Heap ptr.) - (152821,A1) - (110390,B0) - (107481,A2) - (102640,B ptr.) - ( 64496,B1) - ( 55526,S1) - ( 41092,B2) - ( 25924,B3) - ( 15699,B4) - ( 14930,Heap ctr.) - ( 9413,A3) + (378618,Scratch 0) + (270274,A-stack pointer) + (218018,A-stack 0) + (166284,Heap pointer) + (152821,A-stack 1) + (110390,B-stack 0) + (107481,A-stack 2) + (102640,B-stack pointer) + ( 64496,B-stack 1) + ( 55526,Scratch 1) + ( 41092,B-stack 2) + ( 25924,B-stack 3) + ( 15699,B-stack 4) + ( 14930,Heap counter) + ( 9413,A-stack 3) }; - \draw ($({axis cs:85000,S0})-(0,2em)$) -- ($({axis cs:85000,A3})+(0,2em)$); + \draw ($({axis cs:85000,Scratch 0})-(0,2em)$) -- ($({axis cs:85000,A-stack 3})+(0,2em)$); \end{axis} \end{tikzpicture} \end{center} \end{frame} -\begin{frame}{Register allocation --- the foreign function interface} +\begin{frame}{Registerallocatie --- de foreign function interface} \begin{itemize} - \item ARM, Inc. describes how function calls should take place - \item Need to adhere to the specification to be able to link with C - \item Lowest four registers are not preserved upon a function call - \item Trade-off: code size vs. efficient foreign function interface + \item ARM, Inc. beschrijft hoe functieaanroepen plaatsvinden + \item We moeten hieraan voldoen om met C te kunnen linken + \item Laagste vier registers zijn functieargumenten + \item Meeste functieargumenten komen van de B-stack + \item Vier elementen van de B-stack in de laagste acht registers is te veel + \item Trade-off: programmagrootte vs. effici\"ente foreign function interface \end{itemize} \end{frame} -\begin{frame}{Register allocation --- results} +\begin{frame}{Registerallocatie --- resultaten} \begin{center} \begin{tikzpicture} \begin{axis} @@ -150,33 +164,33 @@ \addplot coordinates { (3827868,0) [81.6\%] }; \addplot coordinates { (4385964,0) [93.5\%] }; \addplot coordinates { (4692628,0) [100\%] }; - \legend{Thumb (Code size), Thumb (FFI), ARM} + \legend{Thumb (programmagrootte), Thumb (FFI), ARM} \end{axis} \end{tikzpicture} \end{center} \end{frame} -\section{Results} +\section{Resultaten} \begin{frame}{Results} \begin{itemize} - \item Benchmarks with Clean's \enquote{small examples} - \item Code size decrease: $\overline{x}=17.3\%, \sigma=6.3\text{pp.}$\\ - Without tiny programs: $\overline{x}=21.0\%, \sigma=2.6\text{pp.}$ - \item Running time increase: $\overline{x}=4.8\%, \sigma=3.1\text{pp.}$\\ - Without tiny programs: $\overline{x}=3.7\%, \sigma=2.04\text{pp.}$ - \item Subroutine calls are expensive, - so Thumb-2 performance is worse for highly recursive programs + \item Benchmarks met Clean's \emph{small examples} + \item Winst op programmagrootte: $\overline{x}=17.3\%, \sigma=6.3\text{pp.}$\\ + Zonder kleine programma's: $\overline{x}=21.0\%, \sigma=2.6\text{pp.}$ + \item Verlies op snelheid: $\overline{x}=4.8\%, \sigma=3.1\text{pp.}$\\ + Zonder kleine programma's: $\overline{x}=3.7\%, \sigma=2.04\text{pp.}$ + \item Functieaanroepen zijn duur, + dus Thumb-2 geeft slechte resultaten voor programma's als Fibonacci \end{itemize} \end{frame} -\begin{frame}{Matching programs and instruction sets?} +\begin{frame}{Programma's en instructiesets matchen?} \begin{center} \begin{tikzpicture} \begin{axis} [ width=.8\textwidth - , xlabel={Size decrease (\%)} - , ylabel={Running time increase (\%)} + , xlabel={Winst programmagrootte (\%)} + , ylabel={Verlies snelheid (\%)} , mark options={scale=2} , scatter/classes={% a={mark=x,draw=red}, @@ -202,20 +216,21 @@ \end{center} \end{frame} -\section{Further work} +\section{Vooruit denken} -\begin{frame}{Further work} +\begin{frame}{Vooruit denken} \begin{itemize} - \item Branch optimisation (up to 1.2pp. code size win) - \item Subroutine calls need extra instructions on Thumb-2; - optimise this to save up to 4pp. on code size + \item Branchoptimalisatie (tot 2.2pp. winst programmagrootte) + \item Optimaliseer functieaanroepen voor tot $\approx4$pp. winst programmagrootte + \item Thumb-2-\emph{only} instructies voor snelheidswinst \end{itemize} - Overall expected code size win: $\approx$25\% (comparable to GCC) + Uiteindelijke winst programmagrootte (schatting): $\approx$25\% \begin{itemize} - \item Copying collector needs some fine-tuning - \item Two other garbage collectors not considered yet + \item Copying collector moet nog wat bijgeschaafd worden + \item Andere garbage collectors zijn nog niet bekeken + \item Foreign function interface moet verder getest worden \end{itemize} \end{frame} |