diff options
author | Camil Staps | 2016-12-29 23:42:00 +0100 |
---|---|---|
committer | Camil Staps | 2016-12-29 23:42:00 +0100 |
commit | 01fb8cd0a75651784e7d6baa37b7fc8363781bf9 (patch) | |
tree | d9ff318ad2708de49841f56d28d8042e00082de3 | |
parent | Current status; small fixes; more optimisation ideas in log (diff) |
Comments from The Vu
-rw-r--r-- | log.md | 2 | ||||
-rw-r--r-- | thesis/further.tex | 11 | ||||
-rw-r--r-- | thesis/intro.tex | 20 | ||||
-rw-r--r-- | thesis/reg-alloc.tex | 10 | ||||
-rw-r--r-- | thesis/results.tex | 2 | ||||
-rw-r--r-- | thesis/storing-pc.tex | 2 | ||||
-rw-r--r-- | thesis/thesis.bib | 23 | ||||
-rw-r--r-- | thesis/thesis.tex | 1 |
8 files changed, 41 insertions, 30 deletions
@@ -620,3 +620,5 @@ need to look at register optimisations. - Capitalisation in the reference list - More optimisations: branch optimisation - More optimisations: tail recursion +- Mention that the register counting method is simplistic (or has this been + mentioned already?) diff --git a/thesis/further.tex b/thesis/further.tex new file mode 100644 index 0000000..e499b7f --- /dev/null +++ b/thesis/further.tex @@ -0,0 +1,11 @@ +\section{Further work} +\label{sec:further} + +\begin{multicols}{2} + +\todo{ + There are some things that are still to be done and some optimisation vectors that are not yet considered. + What will be written depends on how far I get in the coming weeks with this. +} + +\end{multicols} diff --git a/thesis/intro.tex b/thesis/intro.tex index f92b21d..3d6347c 100644 --- a/thesis/intro.tex +++ b/thesis/intro.tex @@ -29,7 +29,7 @@ The main differences between ARM and Thumb-2 are the following: In ARM, every instruction has a 4-bit conditional field that allows for conditional execution. In the Thumb instruction set, all conditional instructions except branches have to be in an \enquote{IT block}. A first \ual{it} instruction gives the base condition and a then-else pattern. -The following instructions are executed conditionally. +The statements after the \ual{it} instruction are executed conditionally. For example: \begin{minted}{ual} @@ -59,7 +59,7 @@ For these instructions there exist 32-bit variants that can address all sixteen \begin{tikzpicture}[clean] \node (start) {\clean{Start}}; \end{tikzpicture} - \caption{Initially} + \caption{Initially.} \end{subfigure}% \begin{subfigure}[b]{.2\linewidth} \centering @@ -68,7 +68,7 @@ For these instructions there exist 32-bit variants that can address all sixteen \node[below=of fac.arg1.south] (4) {\clean{4}}; \draw (fac.arg1) -- (4.north); \end{tikzpicture} - \caption{Applying \clean{Start}} + \caption{Applying \clean{Start}.} \end{subfigure}% \begin{subfigure}[b]{.3\linewidth} \centering @@ -84,7 +84,7 @@ For these instructions there exist 32-bit variants that can address all sixteen \draw (minus.arg1) |- (4.east); \draw (minus.arg2) -- (1.west); \end{tikzpicture} - \caption{Applying \clean{fac n}} + \caption{Applying \clean{fac n}.} \end{subfigure}% \begin{subfigure}[b]{.2\linewidth} \centering @@ -97,16 +97,16 @@ For these instructions there exist 32-bit variants that can address all sixteen \draw (times.arg2) -- (fac.west); \draw (fac.arg1) -- (3.north); \end{tikzpicture} - \caption{Applying \clean{-}} + \caption{Applying \clean{-}.} \end{subfigure} - \caption{Rewriting a Clean node\label{fig:intro:rewriting}} + \caption{Rewriting a Clean node\label{fig:intro:rewriting}.} \end{figure*} \subsubsection{Interworking} \label{sec:intro:interworking} -The ARM and Thumb instruction sets are designed to \emph{interwork}: +The ARM and Thumb instruction sets are designed to interwork: different parts of a program can be assembled for different instruction sets - and it is possible to switch instruction set when the program counter is written to~\parencite[A4.1]{armv7ar}. + and it is possible to switch instruction set when an instruction writes to the program counter~\parencite[A4.1]{armv7ar}. The Thumb-2 code generator proposed in this thesis does not produce ARM code, though the existence of the interworking facility has effects on the techniques that can be used in it. @@ -178,7 +178,7 @@ This function uses the B-stack, for basic values, for its arguments and return v More complex types would be passed on the A-stack. The \clean{Start} rule compiles to% - \footnote{Again, the code has been shorted insignificantly for brevity.}: + \footnote{The code has been shorted insignificantly for brevity.}: \begin{minted}[tabsize=4]{abc} __fac_Start @@ -209,7 +209,7 @@ While the ABC-machine can be implemented in a straightforward manner using macro it introduces several optimisations, some of which are target-dependent. The ARM code for the factorial example is as follows% - \footnote{Some irrelevant peculiarities have been removed for brevity and clarity.}: + \footnote{Some irrelevant peculiarities have been removed for brevity.}: \begin{minted}{ual} sfac_P1: diff --git a/thesis/reg-alloc.tex b/thesis/reg-alloc.tex index 482cd8c..beb83c5 100644 --- a/thesis/reg-alloc.tex +++ b/thesis/reg-alloc.tex @@ -37,7 +37,7 @@ \draw ($({axis cs:85000,S0})-(0,2em)$) -- ($({axis cs:85000,A3})+(0,2em)$); \end{axis} \end{tikzpicture} - \subcaption{In the Clean compiler} + \subcaption{In the Clean compiler.} \end{subfigure}% \begin{subfigure}{.5\linewidth} \begin{tikzpicture} @@ -71,9 +71,9 @@ \draw ($({axis cs:155,S0})-(0,2em)$) -- ($({axis cs:155,A3})+(0,2em)$); \end{axis} \end{tikzpicture} - \subcaption{In the RTS\label{fig:reg-usage:rts}} + \subcaption{In the RTS.\label{fig:reg-usage:rts}} \end{subfigure}% - \caption{Register usage with the Thumb-2 backend\label{fig:reg-usage}} + \caption{Register usage with the Thumb-2 backend.\label{fig:reg-usage}} \end{figure} % pi@rasppi:~/clean/src/compiler$ ~/count-regs.sh @@ -182,7 +182,7 @@ In Clean programs, the ARM registers are used as follows~\parencite[\texttt{arms S0 & r12 & \highlight{r0} \\ \end{tabular} \end{subfigure}% - \caption{Register allocation in the ARM and Thumb backends\label{tab:reg-alloc:arm-and-thumb}} + \caption{Register allocation in the ARM and Thumb backends.\label{tab:reg-alloc:arm-and-thumb}} \end{table*} In \cref{fig:reg-usage}, we count for each register the number of instructions it occurs in, in the code generated for the Clean compiler~\parencite{cocl}. @@ -251,7 +251,7 @@ This is much more complicated, and for a rough estimate the simplistic method us \addplot coordinates { (69896,0) [100\%] }; \end{axis} \end{tikzpicture} - \caption{Code size for different backends\label{fig:reg-alloc:results}} + \caption{Code size for different backends.\label{fig:reg-alloc:results}} \end{figure*} \subsection{Optimisation} diff --git a/thesis/results.tex b/thesis/results.tex index 2f6de50..76afb27 100644 --- a/thesis/results.tex +++ b/thesis/results.tex @@ -18,7 +18,7 @@ & Diff. (\%) & 11.9 & 3.6 & 5.1 & 5.9 & --- & 3.7 & --- & --- & --- & 5.4 & 0.9 & 2.6 & 5.2 & 0.0 & --- & 8.0 & --- & --- \\ \end{tabular} } - \caption{Code size and running time comparison for ARM and Thumb-2\label{tab:results}} + \caption{Code size and running time comparison for ARM and Thumb-2.\label{tab:results}} \end{table*} % Program Code size Running time (+gc) Nr. runs diff --git a/thesis/storing-pc.tex b/thesis/storing-pc.tex index bfb8442..4bae7fb 100644 --- a/thesis/storing-pc.tex +++ b/thesis/storing-pc.tex @@ -157,7 +157,7 @@ For this reason it is better to use S0 whenever possible. \subsection{Comparison} \label{sec:storing-pc:comparison} Assuming the worst case, that all instructions in the jump block are wide, we need four more bytes in Thumb than in ARM. -As a benchmark, the Clean compiler has 41,006 jumps of this kind in 1,253,978 instructions, a rough 3.27\%. +As a benchmark, the Clean compiler has 41,006 jumps of this kind in 1,253,978 instructions, or approximately 3.27\%. The four extra bytes in Thumb mean a size increase of $41006\cdot4\approx160$KiB on the 5.3MiB file, an increase of 3.00\%. As for the time complexity: every jump requires an extra instruction cycle. diff --git a/thesis/thesis.bib b/thesis/thesis.bib index f43bc26..4156ff0 100644 --- a/thesis/thesis.bib +++ b/thesis/thesis.bib @@ -1,41 +1,41 @@ @manual{armv7ar, - label="ARM Ltd.", + author="{ARM Ltd.}", key="ARM Ltd. 1996", title="ARM Architecture Reference Manual. ARMv7-A and ARMv7-R edition", year=1996 } @manual{armv7m, - label="ARM Ltd.", + author="{ARM Ltd.}", key="ARM Ltd. 2006", title="ARMv7-M Architecture Reference Manual", year=2006 } @manual{armv8a, - label="ARM Ltd.", + author="{ARM Ltd.}", key="ARM Ltd. 2013", title="ARM Architecture Reference Manual. ARMv8, for ARMv8-A architecture profile (Beta)", year=2013 } @manual{armcallstd, - label="ARM Ltd.", + author="{ARM Ltd.}", key="ARM Ltd. 2015", title="Procedure Call Standard for the ARM Architecture", year=2015 } @manual{cleanlangrep, - title="Clean Language Report. Version 2.2", + title="Clean Language Report", + subtitle="Version 2.2", year=2011, author={Rinus Plasmeijer and Marco {\VAN{Eekelen}} and John {\VAN{Groningen}}}, organization="Department of Software Technology, University of Nijmegen" } @Online{clean, - label="Software Technology Research Group", - key="Software Technology Research Group", + author="{Software Technology Research Group}", title="The Clean Home Page", organization="Radboud University Nij\-me\-gen", url="http://clean.cs.ru.nl/", @@ -65,8 +65,7 @@ } @Online{armcg, - key="ARM CG", - label="ARM CG", + author="{Software Technology Research Group}", title="Clean's ARM code generator", subtitle="Revision 295", organization="Radboud University Nijmegen", @@ -76,8 +75,7 @@ } @Online{armrts, - key="ARM RTS", - label="ARM RTS", + author="{Software Technology Research Group}", title="Clean's ARM run-time system", subtitle="Revision 387", organization="Radboud University Nijmegen", @@ -87,8 +85,7 @@ } @Online{cocl, - key="Clean compiler", - label="Clean compiler", + author="{Software Technology Research Group}", title="The Clean compiler", subtitle="Bootstrap from intermediate ABC files, 32-bit.", organization="Radboud University Nijmegen", diff --git a/thesis/thesis.tex b/thesis/thesis.tex index 7ebb7ad..95abdee 100644 --- a/thesis/thesis.tex +++ b/thesis/thesis.tex @@ -54,6 +54,7 @@ It produces on average 20\% smaller code than the ARM code generator, which is o \input{load-offsets} \input{reg-alloc} \input{results} +\input{further} \ifdraft\else \cleardoublepage |