summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCamil Staps2016-12-29 23:42:00 +0100
committerCamil Staps2016-12-29 23:42:00 +0100
commit01fb8cd0a75651784e7d6baa37b7fc8363781bf9 (patch)
treed9ff318ad2708de49841f56d28d8042e00082de3
parentCurrent status; small fixes; more optimisation ideas in log (diff)
Comments from The Vu
-rw-r--r--log.md2
-rw-r--r--thesis/further.tex11
-rw-r--r--thesis/intro.tex20
-rw-r--r--thesis/reg-alloc.tex10
-rw-r--r--thesis/results.tex2
-rw-r--r--thesis/storing-pc.tex2
-rw-r--r--thesis/thesis.bib23
-rw-r--r--thesis/thesis.tex1
8 files changed, 41 insertions, 30 deletions
diff --git a/log.md b/log.md
index b6ac3c4..5f80202 100644
--- a/log.md
+++ b/log.md
@@ -620,3 +620,5 @@ need to look at register optimisations.
- Capitalisation in the reference list
- More optimisations: branch optimisation
- More optimisations: tail recursion
+- Mention that the register counting method is simplistic (or has this been
+ mentioned already?)
diff --git a/thesis/further.tex b/thesis/further.tex
new file mode 100644
index 0000000..e499b7f
--- /dev/null
+++ b/thesis/further.tex
@@ -0,0 +1,11 @@
+\section{Further work}
+\label{sec:further}
+
+\begin{multicols}{2}
+
+\todo{
+ There are some things that are still to be done and some optimisation vectors that are not yet considered.
+ What will be written depends on how far I get in the coming weeks with this.
+}
+
+\end{multicols}
diff --git a/thesis/intro.tex b/thesis/intro.tex
index f92b21d..3d6347c 100644
--- a/thesis/intro.tex
+++ b/thesis/intro.tex
@@ -29,7 +29,7 @@ The main differences between ARM and Thumb-2 are the following:
In ARM, every instruction has a 4-bit conditional field that allows for conditional execution.
In the Thumb instruction set, all conditional instructions except branches have to be in an \enquote{IT block}.
A first \ual{it} instruction gives the base condition and a then-else pattern.
-The following instructions are executed conditionally.
+The statements after the \ual{it} instruction are executed conditionally.
For example:
\begin{minted}{ual}
@@ -59,7 +59,7 @@ For these instructions there exist 32-bit variants that can address all sixteen
\begin{tikzpicture}[clean]
\node (start) {\clean{Start}};
\end{tikzpicture}
- \caption{Initially}
+ \caption{Initially.}
\end{subfigure}%
\begin{subfigure}[b]{.2\linewidth}
\centering
@@ -68,7 +68,7 @@ For these instructions there exist 32-bit variants that can address all sixteen
\node[below=of fac.arg1.south] (4) {\clean{4}};
\draw (fac.arg1) -- (4.north);
\end{tikzpicture}
- \caption{Applying \clean{Start}}
+ \caption{Applying \clean{Start}.}
\end{subfigure}%
\begin{subfigure}[b]{.3\linewidth}
\centering
@@ -84,7 +84,7 @@ For these instructions there exist 32-bit variants that can address all sixteen
\draw (minus.arg1) |- (4.east);
\draw (minus.arg2) -- (1.west);
\end{tikzpicture}
- \caption{Applying \clean{fac n}}
+ \caption{Applying \clean{fac n}.}
\end{subfigure}%
\begin{subfigure}[b]{.2\linewidth}
\centering
@@ -97,16 +97,16 @@ For these instructions there exist 32-bit variants that can address all sixteen
\draw (times.arg2) -- (fac.west);
\draw (fac.arg1) -- (3.north);
\end{tikzpicture}
- \caption{Applying \clean{-}}
+ \caption{Applying \clean{-}.}
\end{subfigure}
- \caption{Rewriting a Clean node\label{fig:intro:rewriting}}
+ \caption{Rewriting a Clean node\label{fig:intro:rewriting}.}
\end{figure*}
\subsubsection{Interworking}
\label{sec:intro:interworking}
-The ARM and Thumb instruction sets are designed to \emph{interwork}:
+The ARM and Thumb instruction sets are designed to interwork:
different parts of a program can be assembled for different instruction sets
- and it is possible to switch instruction set when the program counter is written to~\parencite[A4.1]{armv7ar}.
+ and it is possible to switch instruction set when an instruction writes to the program counter~\parencite[A4.1]{armv7ar}.
The Thumb-2 code generator proposed in this thesis does not produce ARM code,
though the existence of the interworking facility has effects on the techniques that can be used in it.
@@ -178,7 +178,7 @@ This function uses the B-stack, for basic values, for its arguments and return v
More complex types would be passed on the A-stack.
The \clean{Start} rule compiles to%
- \footnote{Again, the code has been shorted insignificantly for brevity.}:
+ \footnote{The code has been shorted insignificantly for brevity.}:
\begin{minted}[tabsize=4]{abc}
__fac_Start
@@ -209,7 +209,7 @@ While the ABC-machine can be implemented in a straightforward manner using macro
it introduces several optimisations, some of which are target-dependent.
The ARM code for the factorial example is as follows%
- \footnote{Some irrelevant peculiarities have been removed for brevity and clarity.}:
+ \footnote{Some irrelevant peculiarities have been removed for brevity.}:
\begin{minted}{ual}
sfac_P1:
diff --git a/thesis/reg-alloc.tex b/thesis/reg-alloc.tex
index 482cd8c..beb83c5 100644
--- a/thesis/reg-alloc.tex
+++ b/thesis/reg-alloc.tex
@@ -37,7 +37,7 @@
\draw ($({axis cs:85000,S0})-(0,2em)$) -- ($({axis cs:85000,A3})+(0,2em)$);
\end{axis}
\end{tikzpicture}
- \subcaption{In the Clean compiler}
+ \subcaption{In the Clean compiler.}
\end{subfigure}%
\begin{subfigure}{.5\linewidth}
\begin{tikzpicture}
@@ -71,9 +71,9 @@
\draw ($({axis cs:155,S0})-(0,2em)$) -- ($({axis cs:155,A3})+(0,2em)$);
\end{axis}
\end{tikzpicture}
- \subcaption{In the RTS\label{fig:reg-usage:rts}}
+ \subcaption{In the RTS.\label{fig:reg-usage:rts}}
\end{subfigure}%
- \caption{Register usage with the Thumb-2 backend\label{fig:reg-usage}}
+ \caption{Register usage with the Thumb-2 backend.\label{fig:reg-usage}}
\end{figure}
% pi@rasppi:~/clean/src/compiler$ ~/count-regs.sh
@@ -182,7 +182,7 @@ In Clean programs, the ARM registers are used as follows~\parencite[\texttt{arms
S0 & r12 & \highlight{r0} \\
\end{tabular}
\end{subfigure}%
- \caption{Register allocation in the ARM and Thumb backends\label{tab:reg-alloc:arm-and-thumb}}
+ \caption{Register allocation in the ARM and Thumb backends.\label{tab:reg-alloc:arm-and-thumb}}
\end{table*}
In \cref{fig:reg-usage}, we count for each register the number of instructions it occurs in, in the code generated for the Clean compiler~\parencite{cocl}.
@@ -251,7 +251,7 @@ This is much more complicated, and for a rough estimate the simplistic method us
\addplot coordinates { (69896,0) [100\%] };
\end{axis}
\end{tikzpicture}
- \caption{Code size for different backends\label{fig:reg-alloc:results}}
+ \caption{Code size for different backends.\label{fig:reg-alloc:results}}
\end{figure*}
\subsection{Optimisation}
diff --git a/thesis/results.tex b/thesis/results.tex
index 2f6de50..76afb27 100644
--- a/thesis/results.tex
+++ b/thesis/results.tex
@@ -18,7 +18,7 @@
& Diff. (\%) & 11.9 & 3.6 & 5.1 & 5.9 & --- & 3.7 & --- & --- & --- & 5.4 & 0.9 & 2.6 & 5.2 & 0.0 & --- & 8.0 & --- & --- \\
\end{tabular}
}
- \caption{Code size and running time comparison for ARM and Thumb-2\label{tab:results}}
+ \caption{Code size and running time comparison for ARM and Thumb-2.\label{tab:results}}
\end{table*}
% Program Code size Running time (+gc) Nr. runs
diff --git a/thesis/storing-pc.tex b/thesis/storing-pc.tex
index bfb8442..4bae7fb 100644
--- a/thesis/storing-pc.tex
+++ b/thesis/storing-pc.tex
@@ -157,7 +157,7 @@ For this reason it is better to use S0 whenever possible.
\subsection{Comparison}
\label{sec:storing-pc:comparison}
Assuming the worst case, that all instructions in the jump block are wide, we need four more bytes in Thumb than in ARM.
-As a benchmark, the Clean compiler has 41,006 jumps of this kind in 1,253,978 instructions, a rough 3.27\%.
+As a benchmark, the Clean compiler has 41,006 jumps of this kind in 1,253,978 instructions, or approximately 3.27\%.
The four extra bytes in Thumb mean a size increase of $41006\cdot4\approx160$KiB on the 5.3MiB file, an increase of 3.00\%.
As for the time complexity: every jump requires an extra instruction cycle.
diff --git a/thesis/thesis.bib b/thesis/thesis.bib
index f43bc26..4156ff0 100644
--- a/thesis/thesis.bib
+++ b/thesis/thesis.bib
@@ -1,41 +1,41 @@
@manual{armv7ar,
- label="ARM Ltd.",
+ author="{ARM Ltd.}",
key="ARM Ltd. 1996",
title="ARM Architecture Reference Manual. ARMv7-A and ARMv7-R edition",
year=1996
}
@manual{armv7m,
- label="ARM Ltd.",
+ author="{ARM Ltd.}",
key="ARM Ltd. 2006",
title="ARMv7-M Architecture Reference Manual",
year=2006
}
@manual{armv8a,
- label="ARM Ltd.",
+ author="{ARM Ltd.}",
key="ARM Ltd. 2013",
title="ARM Architecture Reference Manual. ARMv8, for ARMv8-A architecture profile (Beta)",
year=2013
}
@manual{armcallstd,
- label="ARM Ltd.",
+ author="{ARM Ltd.}",
key="ARM Ltd. 2015",
title="Procedure Call Standard for the ARM Architecture",
year=2015
}
@manual{cleanlangrep,
- title="Clean Language Report. Version 2.2",
+ title="Clean Language Report",
+ subtitle="Version 2.2",
year=2011,
author={Rinus Plasmeijer and Marco {\VAN{Eekelen}} and John {\VAN{Groningen}}},
organization="Department of Software Technology, University of Nijmegen"
}
@Online{clean,
- label="Software Technology Research Group",
- key="Software Technology Research Group",
+ author="{Software Technology Research Group}",
title="The Clean Home Page",
organization="Radboud University Nij\-me\-gen",
url="http://clean.cs.ru.nl/",
@@ -65,8 +65,7 @@
}
@Online{armcg,
- key="ARM CG",
- label="ARM CG",
+ author="{Software Technology Research Group}",
title="Clean's ARM code generator",
subtitle="Revision 295",
organization="Radboud University Nijmegen",
@@ -76,8 +75,7 @@
}
@Online{armrts,
- key="ARM RTS",
- label="ARM RTS",
+ author="{Software Technology Research Group}",
title="Clean's ARM run-time system",
subtitle="Revision 387",
organization="Radboud University Nijmegen",
@@ -87,8 +85,7 @@
}
@Online{cocl,
- key="Clean compiler",
- label="Clean compiler",
+ author="{Software Technology Research Group}",
title="The Clean compiler",
subtitle="Bootstrap from intermediate ABC files, 32-bit.",
organization="Radboud University Nijmegen",
diff --git a/thesis/thesis.tex b/thesis/thesis.tex
index 7ebb7ad..95abdee 100644
--- a/thesis/thesis.tex
+++ b/thesis/thesis.tex
@@ -54,6 +54,7 @@ It produces on average 20\% smaller code than the ARM code generator, which is o
\input{load-offsets}
\input{reg-alloc}
\input{results}
+\input{further}
\ifdraft\else
\cleardoublepage