From 126288502f64e1959e82bcab21c107f7fbee8f68 Mon Sep 17 00:00:00 2001
From: Camil Staps
Date: Tue, 15 Dec 2015 19:53:30 +0000
Subject: Finish report practical 2

---
 Practical2/report/Makefile           | 14 ++++++++++++++
 Practical2/report/algorithm.tex      | 26 +++++++++++++-------------
 Practical2/report/analysis.tex       |  4 ++--
 Practical2/report/future-work.tex    |  7 +++++++
 Practical2/report/implementation.tex |  2 +-
 Practical2/report/notation.tex       |  8 +++++---
 Practical2/report/report-print.tex   |  5 +++++
 Practical2/report/report.tex         | 10 ++++++----
 8 files changed, 53 insertions(+), 23 deletions(-)
 create mode 100644 Practical2/report/Makefile
 create mode 100644 Practical2/report/future-work.tex
 create mode 100644 Practical2/report/report-print.tex

(limited to 'Practical2')

diff --git a/Practical2/report/Makefile b/Practical2/report/Makefile
new file mode 100644
index 0000000..6f88cef
--- /dev/null
+++ b/Practical2/report/Makefile
@@ -0,0 +1,14 @@
+TEX=pdflatex
+OBJS=report.pdf report-print.pdf
+
+all:
+	latexmk -pdf -pdflatex="pdflatex -shell-escape" report.tex
+	pdflatex -shell-escape '\input{report-print}\input{report}'
+	pdflatex -shell-escape '\input{report-print}\input{report}'
+
+clean:
+	for pdf in $(OBJS); do latexmk -cd -c "$$pdf"; done
+
+FORCE:
+
+
diff --git a/Practical2/report/algorithm.tex b/Practical2/report/algorithm.tex
index 175ad4b..38a9e71 100644
--- a/Practical2/report/algorithm.tex
+++ b/Practical2/report/algorithm.tex
@@ -7,7 +7,7 @@ I will propose a solution that uses dynamic programming. First, I will describe
 \label{sec:algorithm:general}
 
 \begin{thmproof}[G_0_k]{lemma}{For any $\bar a, k$, we have $G_0^k(\bar a)=0$.}
-    There is only one way to partition $\bar a$ (using $k$ empty partitions). The profit of every member of the partitioning is zero, and therefore the overall profit of the partitioning is zero as well.
+    There is only one way to partition $\bar a$; using $k+1$ empty partitions. The profit of every member of the partitioning is zero, and therefore the overall profit of the partitioning is zero as well.
 \end{thmproof}
 
 \begin{thmproof}[G_p_0]{lemma}{For any $\bar a, p\le|\bar a|$, we have $G_p^0(\bar a)=\PR(S_0^p(\bar a))$.}
@@ -15,21 +15,21 @@ I will propose a solution that uses dynamic programming. First, I will describe
 \end{thmproof}
 
 \begin{thmproof}[G_p_k]{lemma}{$G_p^k(\bar a) = \max\left(G_p^{k-1}(\bar a), \max\limits_{i=0}^{p-1}\left(G_i^{k-1}(\bar a) + \PR(S_i^p(\bar a))\right)\right)$.}
-    Consider the $p$th element of $\bar a$ and a partitioning that gives the maximal profit in the substring until $\bar a_p$, $G_p^k(\bar a)$. We have two possibilities:
+    Consider the $p$th element of $\bar a$ ($\bar a_{p-1}$) and a partitioning that gives the maximum gain in the substring until $\bar a_p$, $G_p^k(\bar a)$. We have two possibilities:
 
     \begin{itemize}[itemsep=0pt]
-        \item $\bar a_p$ is in one partition with $\bar a_i, \bar a_{i+1}, \dots, \bar a_{p-1}$. In this case we would have $G_p^k(\bar a) = G_i^{k-1}(\bar a) + \PR(S_i^p(\bar a))$, since we can divide the rest of $\bar a$ over $k-1$ independent partitions and gain a profit of $\PR(S_i^p(\bar a))$ over that last partition.
-        \item It is in fact more worthwhile to create $k-1$ partitions rather than $k$. In this case, we have $G_p^k(\bar a)=G_p^{k-1}(\bar a)$.
+        \item $\bar a_{p-1}$ is in one substring with $\bar a_i, \bar a_{i+1}, \dots, \bar a_{p-2}$ (possibly $i=p-1$). In this case we would have $G_p^k(\bar a) = G_i^{k-1}(\bar a) + \PR(S_i^p(\bar a))$, since we can independently divide the rest of $\bar a$ over $k-1$ partitions and gain a profit of $\PR(S_i^p(\bar a))$ over that last partition.
+        \item It is in fact equally worthwhile to create $k$ partitions rather than $k+1$. In this case, we have $G_p^k(\bar a)=G_p^{k-1}(\bar a)$.
     \end{itemize}
 
     To get the actual profit, we take the maximum of the two possibilities.
 \end{thmproof}
 
-This suggests the general flow of our algorithm: we maintain an $(n+1)\times (k+1)$-table of values for $G_p^d$ with $0\le d\le k, 0\le p\le n$. Using the recurrences from \autoref{lem:G_p_0}, \ref{lem:G_0_k} and \ref{lem:G_p_k}, that only rely on `smaller values' ($G_{p'}^{d'}$ with $p'\le p$ or $d'\le d$), we generate the whole table. The solution to our problem is then $$\PM_k(\bar a) = \sum\bar a-\PP_k(\bar a) = \sum\bar a - G_{|\bar a|}^k(\bar a).$$
+This suggests the general flow of our algorithm: we maintain an $(n+1)\times (k+1)$-table of values for $G_p^d$ with $0\le d\le k, 0\le p\le n$. Using the recurrences from \autoref{lem:G_0_k}, \ref{lem:G_p_0} and \ref{lem:G_p_k}, we generate the whole table.
 
 \begin{algorithm}[h]
     \footnotesize
-    \caption{Finding the maximum profit $\PP_k(\bar a)$ of sequence $\bar a$ with $k$ partitions}
+    \caption{Finding the maximum profit $\PP_k(\bar a)$ of sequence $\bar a$ with at most $k+1$ partitions}
     \label{alg:maximum-profit}
     \begin{algorithmic}[1]
         \REQUIRE $\bar a, k$
@@ -54,13 +54,15 @@ This suggests the general flow of our algorithm: we maintain an $(n+1)\times (k+
     This follows from \autoref{lem:G_0_k} through \ref{lem:G_p_k}.
 \end{thmproof}
 
+The solution to our problem is then $$\PM_k(\bar a) = \sum\bar a-\PP_k(\bar a) = \sum\bar a - G_{|\bar a|}^k(\bar a).$$
+
 \subsection{Optimisations}
 \label{sec:algorithm:optimisations}
 
 A first observation we should make is that values in the input sequence $\bar a$ that are divisible by $5$ do not affect the maximum profit of the substring they're in.
 
-\begin{thmproof}[divide-by-5]{lemma}{For any $\bar a$, $\PP(\bar a) = \PP\left(\seq{a_i \in\bar a \mid 5\nmid a_i}\right)$.}
-    No matter in which substring, an $a_i$ which is divisible by $5$ will have no influence on the rounded sum of that substring and thus not on the profit.
+\begin{thmproof}[divide-by-5]{lemma}{For any $\bar a,k$, $\PP_k(\bar a) = \PP_k\left(\seq{a_i \in\bar a \mid 5\nmid a_i}\right)$.}
+    No matter in which substring (say $\bar a'$), an $a_i$ that is divisible by $5$ will have no influence on $\PR(\bar a')$ and thus not on $\PP_k(\bar a)$.
 \end{thmproof}
 
 On average, \autoref{lem:divide-by-5} allows us to apply \autoref{alg:maximum-profit} on a input $\frac54$ times as small as the original input. While not decreasing the Big-O complexity of the algorithm, this does give us faster running times.
@@ -81,9 +83,9 @@ Another optimisation concerns the $S$-function. In \autoref{alg:maximum-profit}
             \IF{$p=0$}
                 \STATE $S[0] = 0$
             \ELSE
-                \STATE $S[p-1] = a'_{p-1}$
+                \STATE $S[p-1] = \bar a'_{p-1}$
                 \FOR{$i=p-2$ through $0$}
-                    \STATE $S[i] = a_i + S[i+1]$
+                    \STATE $S[i] = \bar a'_i + S[i+1]$
                 \ENDFOR
             \ENDIF
 
@@ -93,7 +95,7 @@ Another optimisation concerns the $S$-function. In \autoref{alg:maximum-profit}
                 \ELSIF{$d = 0$}
                     \STATE $G[p][d] = \PR(S[0])$ \COMMENT{\autoref{lem:G_p_0}}
                 \ELSE
-                \STATE $G[p][d] = \max\left(G[p][d-1], \max\limits_{i=0}^{p-1}\left(G[i][d-1] + \PR(S[i])\right)\right)$ \COMMENT{\autoref{lem:G_p_k}}
+                    \STATE $G[p][d] = \max\left(G[p][d-1], \max\limits_{i=0}^{p-1}\left(G[i][d-1] + \PR(S[i])\right)\right)$ \COMMENT{\autoref{lem:G_p_k}} \label{alg:optimised:line:recurrence}
                 \ENDIF
             \ENDFOR
         \ENDFOR
@@ -103,5 +105,3 @@ Another optimisation concerns the $S$-function. In \autoref{alg:maximum-profit}
 
 The correctness of \autoref{alg:maximum-profit-optimised} depends on the correctness of \autoref{alg:maximum-profit} which has been proven in \autoref{thm:maximum-profit}.
 
-We can now easily find $\PM_k(\bar a)$ using the equation we saw in \autoref{sec:notation}: $$\PM_k(\bar a) = \sum\bar a - \PP_k(\bar a).$$
-
diff --git a/Practical2/report/analysis.tex b/Practical2/report/analysis.tex
index d989fe6..ac5d6a9 100644
--- a/Practical2/report/analysis.tex
+++ b/Practical2/report/analysis.tex
@@ -1,11 +1,11 @@
 \section{Complexity analysis}
 \label{sec:analysis}
 
-We iterate $p$ in $\pazocal O(|\bar a'|) = \pazocal O(|\bar a|)$. In every iteration we do two things:
+We iterate over $p$ in $\pazocal O(|\langle a_i\in\bar a\mid 5\nmid a_i\rangle|) = \pazocal O(|\bar a|)$. In every iteration we do two things:
 
 \begin{itemize}
     \item We build $S$ in $\pazocal O(p)=\pazocal O(|\bar a|)$.
-    \item We iterate $d$ in $\pazocal O(k)$. Apart from the special cases that $p=0$ or $d=0$, we then have to iterate over $i$ in $\pazocal O(p)=\pazocal O(|\bar a|)$.
+    \item We iterate over $d$ in $\pazocal O(k)$. Apart from the special cases that $p=0$ or $d=0$, we then have to iterate over $i$ in $\pazocal O(p)=\pazocal O(|\bar a|)$.
         
         The computations in this last loop take constant time: $\PR$ can be implemented in constant time as has been seen in \autoref{sec:implementation}, and computing the maximum can be done while iterating over $i$.
 \end{itemize}
diff --git a/Practical2/report/future-work.tex b/Practical2/report/future-work.tex
new file mode 100644
index 0000000..3ab8978
--- /dev/null
+++ b/Practical2/report/future-work.tex
@@ -0,0 +1,7 @@
+\section{Future work}
+\label{sec:future-work}
+
+This paper discusses an algorithm to find the minimal sum of rounded sums of a particular sequence. A different task would be to also find the partitioning into sublists that gives this minimal sum. Given the two-dimensional array with $G_p^k$ values this shouldn't be too difficult; it is obviously feasible in $\pazocal O(k\cdot|\bar a|)$ given that table. However, perhaps there are more efficient solutions to this?
+
+More generally: the algorithm discussed in this report is probably not the most efficient. In particular, I'm wondering if it is really necessary to loop over $0\le i<p$ in line \ref{alg:optimised:line:recurrence} of \autoref{alg:maximum-profit-optimised}, and if it isn't possible to start $i$ higher in some cases. This would probably mean storing some extra data in the main loop of \autoref{alg:maximum-profit-optimised}.
+
diff --git a/Practical2/report/implementation.tex b/Practical2/report/implementation.tex
index cc8a940..1a02b2d 100644
--- a/Practical2/report/implementation.tex
+++ b/Practical2/report/implementation.tex
@@ -13,7 +13,7 @@ In the \mintinline{c}{main} function we see how \autoref{lem:divide-by-5} can be
 
 \subsection{Compilation}
 \label{sec:implementation:compilation}
-Optimal results are achieved with \texttt{clang}, which is on average $25\%$ faster than \texttt{gcc}. In any case, the \texttt{-Ofast} flag should be used. It speeds up the program with a factor $4$ in the case of \texttt{clang} and a factor $3$ in the case of \texttt{gcc}.
+Optimal results are achieved with \texttt{clang}, which is on average $25\%$ faster than \texttt{gcc}\footnote{Using \texttt{-Ofast}.}. In any case, the \texttt{-Ofast} flag should be used. It speeds up the program with a factor $4$ in the case of \texttt{clang} and a factor $3$ in the case of \texttt{gcc}.
 
 A Makefile accompanies the C code. If you don't have \texttt{clang} installed, you should install it. If you don't want to install it, use \mintinline{bash}{make CC=gcc}.
 
diff --git a/Practical2/report/notation.tex b/Practical2/report/notation.tex
index cbbbc1d..0781524 100644
--- a/Practical2/report/notation.tex
+++ b/Practical2/report/notation.tex
@@ -1,9 +1,11 @@
 \section{Notation}
 \label{sec:notation}
 
-I will write $\bar a = \seq{a_i \mid i<n}$ for the sequence of numbers and $k$ for the number of substrings. I redefine $\round{x}$ to mean `the nearest multiple of $5$ to $x$'. We may define the `round profit' as $\PR(x)=x-\round{x}$.
+I will write $\bar a = \seq{a_i \mid i<n}$ for the sequence of numbers and $k+1$ for the number of substrings. I redefine $\round{x}$ to mean `the nearest multiple of $5$ to $x$'.
 
-Let $\PM_k(\bar a)$ be the minimal sum of rounded sums of $\bar a$ using $k$ partitions. I will use $\PP_k(\bar a)$ as the `maximum profit' of $\bar a$, that is, $\PP_k(\bar a) = \sum\bar a - \PM_k(\bar a)$. This leads to the term `profit' of a particular partitioning of $\bar a$ and of a particular (sub)string $\bar a'$, for which we need not introduce notation. The profit of a string $\bar a'$ is defined as $\PR(\sum\bar a)$. It is not difficult to see that the profit of a partition is equal to the some of the profits of its members.
+We may define the `rounding profit' of $x$ as $\PR(x)=x-\round{x}$. The rounding profit of a string $\bar a$ is defined as $\PR(\sum\bar a)$, and the rounding profit of a partition of $\bar a$ is the sum of the rounding profits of its elements.
 
-We define the gain $G^k_p(\bar a)$ as $\PP_k(\seq{a_i\in\bar a\mid i<p})$ (that is, the maximal profit on $\bar a$'s prefix with length $p$ using $k$ partitions). Lastly, let $S_i^j(\bar a)$ be the sum of all $p_x\in\bar a$ with $i\le x<j$.
+Let $\PM_k(\bar a)$ be the minimal sum of rounded sums of $\bar a$ using $k+1$ partitions. I will use $\PP_k(\bar a)$ as the `maximum profit' of $\bar a$ using $k+1$ partitions, that is, $\PP_k(\bar a) = \sum\bar a - \PM_k(\bar a)$.
+
+We define the gain $G^k_p(\bar a)$ as $\PP_k(\seq{a_i\in\bar a\mid i<p})$ (i.e., the maximal profit on $\bar a$'s prefix with length $p$ using $k+1$ partitions). Lastly, let $S_i^j(\bar a)$ be the sum of all $p_x\in\bar a$ with $i\le x<j$.
 
diff --git a/Practical2/report/report-print.tex b/Practical2/report/report-print.tex
new file mode 100644
index 0000000..9e7274f
--- /dev/null
+++ b/Practical2/report/report-print.tex
@@ -0,0 +1,5 @@
+\AtBeginDocument{
+    \usemintedstyle{bw}
+    \usepackage{courier}
+}
+
diff --git a/Practical2/report/report.tex b/Practical2/report/report.tex
index 2aeb621..96ec502 100644
--- a/Practical2/report/report.tex
+++ b/Practical2/report/report.tex
@@ -54,7 +54,7 @@
 
 \usepackage{minted}
 
-\title{How To Save Less Money Than Your CPU Costs} %todo working title
+\title{Maximising the Rounding Advantage of a Division of a List Into Sublists}
 \author{Camil Staps}
 
 \begin{document}
@@ -63,21 +63,23 @@
 \thispagestyle{fancy}
 
 \begin{abstract}
-    Given a list of $n$ numbers $a_0, a_1, \dots, a_{n-1}$, we consider one of its partitions in $k$ (possibly empty) pairwise disjoint substrings such that the sum of the sums of the substrings, rounded to multiples of $5$, is minimal.
+    Given a list of $n$ numbers $a_0, a_1, \dots, a_{n-1}$, we consider one of its partitions in $k+1$ (possibly empty) pairwise disjoint substrings such that the sum of the sums of the substrings, rounded to multiples of $5$, is minimal.
 
     This paper describes an algorithm to find the minimal sum for such a partition. It can easily be extended to find such a partition as well.
 \end{abstract}
 
 \section{Report organisation}
-\autoref{sec:notation} will define the notation used throughout this report. In \autoref{sec:algorithm} I will describe the algorithm and argue its correctness. First, we will discuss the basic structure in \autoref{sec:algorithm:general}. After that, I will show some optimisations in \autoref{sec:algorithm:optimisations}.
+\autoref{sec:notation} will define the notation used throughout this report. In \autoref{sec:algorithm} I will describe the algorithm and argue its correctness.
 
 When we have seen the algorithm, \autoref{sec:implementation} will go into details about its implementation in C. \autoref{sec:analysis} will contain a complexity analysis, both time- and space-wise, of the algorithm and its C implementation.
 
+We finish in \autoref{sec:future-work} with a discussion about possible future work.
+
 \input{notation}
 \input{algorithm}
 \input{implementation}
 \input{analysis}
-%\input{discussion}
+\input{future-work}
 
 \end{document}
 
-- 
cgit v1.2.3