thesis/results.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

\section{Results}
\label{sec:results}

\begin{table*}[b]
	\centering
	\small
	\setlength{\tabcolsep}{0.2em}
	\makebox[\textwidth]{%
		\begin{tabular}{l l *{18}{c}}
				&               & Ack  & E    & Fib  & FSve & Ham  & LQns & Mat  & Perm & Psc  & Rev  & RevTw & RFib & SQns & Sve  & STw  & Tak  & Tw   & WSeq \\\hline
			\multirow{3}{*}{Size}                                                                                              
				& ARM (b)       & 188  & 2388 &  164 & 1036 & 1576 & 2708 & 4452 & 1756 & 2940 &  796 &   820 &  248 & 1660 & 1200 &  340 &  232 &  360 & 2628 \\
				& Thumb (b)     & 164  & 1944 &  148 &  840 & 1200 & 2196 & 3392 & 1332 & 2452 &  636 &   632 &  248 & 1328 &  936 &  304 &  204 &  312 & 2020 \\
				& Diff. ($-\%$) & 12.8 & 18.6 &  9.8 & 18.9 & 23.9 & 18.9 & 23.8 & 24.1 & 16.6 & 20.1 &  22.9 &  0.0 & 20.0 & 22.0 & 10.6 & 12.1 & 13.3 & 23.1 \\\hline
			\multirow{3}{*}{Time}                                                                                               
				& ARM (s)       & 0.59 & 1.39 & 0.59 & 1.18 & ---  & 1.64 & ---  & ---  & ---  & 4.09 &  1.07 & 1.15 & 1.16 & 0.99 & ---  & 0.87 & ---  & ---  \\
				& Thumb (s)     & 0.66 & 1.44 & 0.62 & 1.11 & ---  & 1.70 & ---  & ---  & ---  & 4.31 &  1.08 & 1.18 & 1.22 & 0.99 & ---  & 0.94 & ---  & ---  \\
				& Diff. (\%)    & 11.9 &  3.6 &  5.1 &  5.9 & ---  &  3.7 & ---  & ---  & ---  &  5.4 &   0.9 &  2.6 &  5.2 &  0.0 & ---  &  8.0 & ---  & ---  \\
		\end{tabular}
	}
	\caption{Code size and running time comparison for ARM and Thumb-2\label{tab:results}}
\end{table*}

% Program         Code size             Running time (+gc)   Nr. runs
%                 ARM       Thumb       ARM       Thumb
% acker 3 10      188       164         0.59s     0.66s      20
% e 2000          2388      1944        1.39s     1.44s      10
% fsieve 50000    1036      840         1.18s     1.11s      10
% hamming 300     1576      1200        negligible           -
% invperm 16nrs   1756      1332        negligible           -
% lqueen 11       2708      2196        1.64s     1.70s      10
% mulmat 6        4452      3392        negligible           -
% nfib 35         164       148         0.59s     0.62s      10
% pascal 18 40    2940      2452        negligible           -
% reverse 10000   796       636         4.09s     4.31s      10
% revtwice 1 500  820       632         1.07s     1.08s      10      (-h 500m)
% rfib 35.0       248       248         1.15s     1.18s      10
% sieve 3000      1200      936         0.99s     0.99s      10
% squeen 11       1660      1328        1.16s     1.22s      10
% str_arit      SEGFAULT
% stwice          340       304         negligible           -
% tak 32 16 8     232       204         0.87s     0.94s      10
% twice           360       312         negligible           -
% war_seq         2628      2020        negligible           -

\begin{multicols}{2}

\subsection{Test suite}
\label{sec:results:testsuite}
We use a standard set of Clean programs to compare ARM and Thumb performance.
These programs are included in the \texttt{examples} directory of every Clean release~\parencite{clean}.
They are:

\begin{description}[itemsep=0pt]
	\item[Ack]
		$A(3,10)$, where $A$ is the Ackermann function.
	\item[E]
		The approximation of $e$ to 2000 digits.
	\item[Fib]
		Computing the 35\textsuperscript{th} Fibonacci number using naive recursion.
	\item[FSve]
		The sieve of Eratosthenes (optimised), computing the first 50,000 primes.
	\item[Ham]
		The Hamming function, computing the first 300 numbers with only 2, 3 and 5 as prime factors.
	\item[LQns]
		Computing the possibilities to place 11 queens on an $11\times11$ \enquote{chess} board (not optimised).
	\item[Mat]
		Multiplying two $6\times6$ matrices.
	\item[Perm]
		Inverting a permutation of \clean{[1..16]}.
	\item[Psc]
		Pretty-printing the first 18 rows of the triangle of Pascal.
	\item[Rev]
		Reversing a 10,000-elements list, 10,000 times.
	\item[RevTw]
		Reversing a list of 500 elements 65,536 times using the higher-order function \clean{twice} (with a heap size of 500M%
			\footnote{The large heap size is needed to prevent switching to another garbage collector, which had not yet been finished at the moment of writing.}).
	\item[RFib]
		Computing the 35\textsuperscript{th} Fibonacci number using naive recursion and \clean{Real}s instead of \clean{Int}s.
	\item[SQns]
		Computing the possibilities to place 11 queens on a $11\times11$ \enquote{chess} board (optimised).
	\item[Sve]
		The sieve of Eratosthenes (not optimised), computing the first 3,000 primes.
	\item[STw]
		Incrementing an integer 65,536 times using the higher-order function \clean{twice} (optimised).
	\item[Tak]
		$\tau(32,16,8)$, where $\tau$ is the Takeuchi function.
	\item[Tw]
		Incrementing an integer 65,536 times using the higher-order function \clean{twice} (not optimised).
	\item[WSeq]
		A sequential version of Warshall's shortest path algorithm on a $6\times6$ matrix.
\end{description}

\begin{figure*}[b!]
	\centering
	\begin{tikzpicture}
		\begin{axis}
			[ width=.5\textwidth
			, xlabel={Size decrease (\%)}
			, ylabel={Running time increase (\%)}
			, mark options={scale=2}
			, scatter/classes={%
				a={mark=x,draw=red},
				b={mark=+,draw=black}}
			]
			\addplot[scatter,only marks,scatter src=explicit symbolic]
				table[meta=label] {
					x     y    label
					12.8  11.9 a
					18.6  3.6  b
					9.8   5.1  a
					18.9  5.9  b
					18.9  3.7  b
					20.1  5.4  a
					22.9  0.9  a
					0.0   2.6  a
					20.0  5.2  b
					22.0  0.0  b
					12.1  8.0  a
				};
		\end{axis}
	\end{tikzpicture}
	\caption{
		Comparison of code size decrease and running time increase.
		Red crosses indicate programs that are smaller than 1000b in ARM;
			black pluses correspond to larger programs.
		\label{fig:results:scatter}}
\end{figure*}

\subsection{Code size}
\label{sec:results:size}
We compared the code size for the code generated by the ARM and Thumb backends for all programs in the test suite.
This does not include the run-time system, but does include parts of Clean's standard library, StdEnv, that are necessary for the program.
The results are in \cref{tab:results}.

The Thumb backend produces on average
	17.3\% smaller code, with a standard deviation of 6.3pp.
If we ignore those programs for which the ARM code size is less than 1,000 bytes, this is
	21.0\%, with a standard deviation of 2.6pp.,
	comparable to the 19.3\% found for the Clean compiler in \cref{sec:reg-alloc:results}.
Small programs with short function blocks skew the data,
	because they contain relatively many jumps which are longer in Thumb than in ARM (see \cref{sec:storing-pc}).

\subsection{Running time}
\label{sec:results:time}
For the same programs, we compare the running time.
Some of the examples as provided in the Clean releases have a too short running time to be able to compare them,
	so the parameters had to be tweaked.
In this case, the new parameters are mentioned in \cref{sec:results:testsuite}.
For some examples it was not possible to increase the running time enough.
\Cref{tab:results} lists \enquote{---} for these programs.

The increase in running time is relatively low compared to the decrease in code size:
	4.8\% on average with a standard deviation of 3.1pp.
For the larger programs, this is
	3.7\% with a standard deviation of 2.04pp. (though this is based on only five programs).

\subsection{Discussion}
\label{sec:results:discussion}
If we compare the code size decrease with the running time increase,
	we get the plot in \cref{fig:results:scatter}.
It seems that a high code size decrease often comes together with a low increase in running time,
	suggesting that Thumb is more suitable for some programs than others.
However, we do not have enough data to see if this is significant.
More research is needed to see if there actually is such a relationship,
	and if so, what programs are more suitable for Thumb.

The outliers in \cref{fig:results:scatter} are
	RFib $(0.0,2.6)$,
	Ack $(12.8,11.9)$,
	Fib $(9.8,5.1)$ and
	Tak $(12.1,8.0)$,
	all small programs that rely heavily on recursion.
This is explained by the relatively high number of jumps:
	in Thumb we had to add several instructions, introducing extra code size and running time (see \cref{sec:storing-pc}).

\end{multicols}

% vim: nowrap