summaryrefslogtreecommitdiff
path: root/presentation
diff options
context:
space:
mode:
Diffstat (limited to 'presentation')
-rw-r--r--presentation/pres.tex163
1 files changed, 89 insertions, 74 deletions
diff --git a/presentation/pres.tex b/presentation/pres.tex
index 70aa594..adc0ead 100644
--- a/presentation/pres.tex
+++ b/presentation/pres.tex
@@ -1,45 +1,46 @@
-\documentclass[british]{beamer}
+% vim: spelllang=nl:
+\documentclass{beamer}
-\usepackage[british]{babel}
-\usepackage[babel=true]{csquotes}
+\usepackage[dutch]{babel}
\usepackage{tikz}
\usetikzlibrary{calc}
\usepackage{pgfplots}
-\title{Code generation for the Thumb-2 instruction set}
+\title{Codegeneratie voor de Thumb-2 instructieset}
\author[Camil Staps]{
Camil Staps\\[1em]\small{
- \emph{Supervisors:}\\
+ \emph{Begeleiders:}\\
prof. dr. dr.h.c. ir. M.J. Plasmeijer\\
drs. J.H.G. van Groningen}}
-\date{Monday 9\textsuperscript{th} January, 2017}
+\date{Dinsdag 24 januari 2017}
\begin{document}
\maketitle
-\section{Introduction}
+\section{Inleiding}
-\begin{frame}{ARM and Thumb}
+\begin{frame}{ARM en Thumb}
\begin{itemize}
- \item Widely used in embedded systems
- \item Three instruction sets:
+ \item Veel gebruikt in embedded systems
+ \item Drie instructiesets:
\begin{itemize}
\item ARM (32-bit)
\item Thumb (16-bit)
- \item Thumb-2 (mixture)
+ \item Thumb-2 (combinatie)
\end{itemize}
- \item Thumb-2 advantages:
+ \item Meeste processoren ondersteunen alledrie
+ \item Thumb-2 voordelen:
\begin{itemize}
- \item Smaller code than ARM
- \item Simpler devices than ARM
- \item Faster than Thumb
+ \item Kleinere code dan ARM
+ \item Simpelere systemen dan ARM
+ \item Sneller dan Thumb
\end{itemize}
- \item Interesting differences for code generation:
+ \item Interessante eigenschappen voor codegeneratie:
\begin{itemize}
- \item How to deal with instructions that do not exist in Thumb-2?
- \item How to use as many 16-bit instructions as possible?
+ \item Wat te doen met instructies die niet bestaan in Thumb-2?
+ \item Hoe kunnen we het aantal 16-bit instructies maximaliseren?
\end{itemize}
\end{itemize}
\end{frame}
@@ -47,39 +48,50 @@
\begin{frame}{Clean}
\begin{minipage}{.65\linewidth}
\begin{itemize}
- \item Purely functional, lazy programming language
- \item Compilation in several steps
- \item Already had an ARM code generator
- \item Made one for Thumb-2
+ \item Pure functionele luie programmeertaal
+ \item Compilatie gaat in meerdere stappen
+ \item Codegenerator voor ARM bestond al
+ \item \emph{Case study}: aanpassen voor Thumb-2
\end{itemize}
\end{minipage}%
- \begin{minipage}{.35\linewidth}
+ \hfill
+ \begin{minipage}{.3\linewidth}
\centering
\footnotesize
\begin{tikzpicture}[every node/.style={rectangle,draw},every path/.style={draw},->]
\node (clean) {Clean};
\node[below of=clean] (core) {Core Clean};
\node[below of=core] (abc) {ABC-code};
- \node[below of=abc] (abstr) {Abstract Von-Neumann};
+ \node[below of=abc] (abstr) {Abstracte Von-Neumann};
\node[below of=abstr] (target) {Target machine};
\path (clean) -- (core) -- (abc) -- (abstr) -- (target);
\end{tikzpicture}
\end{minipage}
\end{frame}
-\section{Register allocation}
+\section{Problemen oplossen}
-\begin{frame}{Register allocation --- introduction}
+\begin{frame}{Problemen oplossen}
\begin{itemize}
- \item ARM has 16 registers; Thumb only lower eight
- \item Higher registers can be accessed through 32-bit instructions in Thumb-2
- \item Want to put frequently used registers in lower half
+ \item Program counter opslaan voor functieaanroepen
+ \item Informatie in de laagste twee bits
+ \item Kleinere offsets in load instructies
\end{itemize}
\end{frame}
-\begin{frame}{Register allocation --- collecting data}
+\section{Registerallocatie}
+
+\begin{frame}{Registerallocatie --- inleiding}
+ \begin{itemize}
+ \item ARM heeft 16 registers; Thumb kan alleen bij de laagste 8
+ \item Hogere registers worden benaderd met 32-bit instructies in Thumb-2
+ \item Zet veelgebruikte registers in de laagste helft
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Registerallocatie --- data}
\begin{itemize}
- \item Count register usage in the Clean compiler:
+ \item Registergebruik in de Clean compiler:
\end{itemize}
\begin{center}
@@ -90,7 +102,7 @@
, bar width=.5em
, xmin=0
, height=0.6\textheight
- , symbolic y coords={S0,A ptr.,A0,Heap ptr.,A1,B0,A2,B ptr.,B1,S1,B2,B3,B4,Heap ctr.,A3}
+ , symbolic y coords={Scratch 0,A-stack pointer,A-stack 0,Heap pointer,A-stack 1,B-stack 0,A-stack 2,B-stack pointer,B-stack 1,Scratch 1,B-stack 2,B-stack 3,B-stack 4,Heap counter,A-stack 3}
, ytick=data
, scaled x ticks=real:1
, xtick scale label code/.code={}
@@ -98,38 +110,40 @@
, compat={1.3}
]
\addplot coordinates {
- (378618,S0)
- (270274,A ptr.)
- (218018,A0)
- (166284,Heap ptr.)
- (152821,A1)
- (110390,B0)
- (107481,A2)
- (102640,B ptr.)
- ( 64496,B1)
- ( 55526,S1)
- ( 41092,B2)
- ( 25924,B3)
- ( 15699,B4)
- ( 14930,Heap ctr.)
- ( 9413,A3)
+ (378618,Scratch 0)
+ (270274,A-stack pointer)
+ (218018,A-stack 0)
+ (166284,Heap pointer)
+ (152821,A-stack 1)
+ (110390,B-stack 0)
+ (107481,A-stack 2)
+ (102640,B-stack pointer)
+ ( 64496,B-stack 1)
+ ( 55526,Scratch 1)
+ ( 41092,B-stack 2)
+ ( 25924,B-stack 3)
+ ( 15699,B-stack 4)
+ ( 14930,Heap counter)
+ ( 9413,A-stack 3)
};
- \draw ($({axis cs:85000,S0})-(0,2em)$) -- ($({axis cs:85000,A3})+(0,2em)$);
+ \draw ($({axis cs:85000,Scratch 0})-(0,2em)$) -- ($({axis cs:85000,A-stack 3})+(0,2em)$);
\end{axis}
\end{tikzpicture}
\end{center}
\end{frame}
-\begin{frame}{Register allocation --- the foreign function interface}
+\begin{frame}{Registerallocatie --- de foreign function interface}
\begin{itemize}
- \item ARM, Inc. describes how function calls should take place
- \item Need to adhere to the specification to be able to link with C
- \item Lowest four registers are not preserved upon a function call
- \item Trade-off: code size vs. efficient foreign function interface
+ \item ARM, Inc. beschrijft hoe functieaanroepen plaatsvinden
+ \item We moeten hieraan voldoen om met C te kunnen linken
+ \item Laagste vier registers zijn functieargumenten
+ \item Meeste functieargumenten komen van de B-stack
+ \item Vier elementen van de B-stack in de laagste acht registers is te veel
+ \item Trade-off: programmagrootte vs. effici\"ente foreign function interface
\end{itemize}
\end{frame}
-\begin{frame}{Register allocation --- results}
+\begin{frame}{Registerallocatie --- resultaten}
\begin{center}
\begin{tikzpicture}
\begin{axis}
@@ -150,33 +164,33 @@
\addplot coordinates { (3827868,0) [81.6\%] };
\addplot coordinates { (4385964,0) [93.5\%] };
\addplot coordinates { (4692628,0) [100\%] };
- \legend{Thumb (Code size), Thumb (FFI), ARM}
+ \legend{Thumb (programmagrootte), Thumb (FFI), ARM}
\end{axis}
\end{tikzpicture}
\end{center}
\end{frame}
-\section{Results}
+\section{Resultaten}
\begin{frame}{Results}
\begin{itemize}
- \item Benchmarks with Clean's \enquote{small examples}
- \item Code size decrease: $\overline{x}=17.3\%, \sigma=6.3\text{pp.}$\\
- Without tiny programs: $\overline{x}=21.0\%, \sigma=2.6\text{pp.}$
- \item Running time increase: $\overline{x}=4.8\%, \sigma=3.1\text{pp.}$\\
- Without tiny programs: $\overline{x}=3.7\%, \sigma=2.04\text{pp.}$
- \item Subroutine calls are expensive,
- so Thumb-2 performance is worse for highly recursive programs
+ \item Benchmarks met Clean's \emph{small examples}
+ \item Winst op programmagrootte: $\overline{x}=17.3\%, \sigma=6.3\text{pp.}$\\
+ Zonder kleine programma's: $\overline{x}=21.0\%, \sigma=2.6\text{pp.}$
+ \item Verlies op snelheid: $\overline{x}=4.8\%, \sigma=3.1\text{pp.}$\\
+ Zonder kleine programma's: $\overline{x}=3.7\%, \sigma=2.04\text{pp.}$
+ \item Functieaanroepen zijn duur,
+ dus Thumb-2 geeft slechte resultaten voor programma's als Fibonacci
\end{itemize}
\end{frame}
-\begin{frame}{Matching programs and instruction sets?}
+\begin{frame}{Programma's en instructiesets matchen?}
\begin{center}
\begin{tikzpicture}
\begin{axis}
[ width=.8\textwidth
- , xlabel={Size decrease (\%)}
- , ylabel={Running time increase (\%)}
+ , xlabel={Winst programmagrootte (\%)}
+ , ylabel={Verlies snelheid (\%)}
, mark options={scale=2}
, scatter/classes={%
a={mark=x,draw=red},
@@ -202,20 +216,21 @@
\end{center}
\end{frame}
-\section{Further work}
+\section{Vooruit denken}
-\begin{frame}{Further work}
+\begin{frame}{Vooruit denken}
\begin{itemize}
- \item Branch optimisation (up to 1.2pp. code size win)
- \item Subroutine calls need extra instructions on Thumb-2;
- optimise this to save up to 4pp. on code size
+ \item Branchoptimalisatie (tot 2.2pp. winst programmagrootte)
+ \item Optimaliseer functieaanroepen voor tot $\approx4$pp. winst programmagrootte
+ \item Thumb-2-\emph{only} instructies voor snelheidswinst
\end{itemize}
- Overall expected code size win: $\approx$25\% (comparable to GCC)
+ Uiteindelijke winst programmagrootte (schatting): $\approx$25\%
\begin{itemize}
- \item Copying collector needs some fine-tuning
- \item Two other garbage collectors not considered yet
+ \item Copying collector moet nog wat bijgeschaafd worden
+ \item Andere garbage collectors zijn nog niet bekeken
+ \item Foreign function interface moet verder getest worden
\end{itemize}
\end{frame}