diff options
author | zkwip | 2015-06-09 20:54:24 +0200 |
---|---|---|
committer | zkwip | 2015-06-09 20:54:24 +0200 |
commit | f9ca5facb8882001db4a45efa8f2997c067b7311 (patch) | |
tree | bf955456c6a832e7caf60925348554fe9c192ab3 /app/src | |
parent | Share implementation (diff) |
Summary shortening improvements
made the length limiter less greedy
Diffstat (limited to 'app/src')
-rw-r--r-- | app/src/main/java/org/rssin/summaries/BablukiAPI.java | 56 |
1 files changed, 45 insertions, 11 deletions
diff --git a/app/src/main/java/org/rssin/summaries/BablukiAPI.java b/app/src/main/java/org/rssin/summaries/BablukiAPI.java index 8d5a1af..417a807 100644 --- a/app/src/main/java/org/rssin/summaries/BablukiAPI.java +++ b/app/src/main/java/org/rssin/summaries/BablukiAPI.java @@ -100,12 +100,15 @@ public class BablukiAPI implements SummaryAPIInterface{ int senc = sents.size(); //reduce if neccesairy + + ArrayList<String> sents_bu = (ArrayList<String>)sents.clone(); - while((lm == LengthMode.LINES || lm == LengthMode.BOTH) && sents.size() > maxlines) + while((lm == LengthMode.LINES || lm == LengthMode.BOTH) && sents.size() > maxlines) { removeLeastSentence(sents, ranks); + } while((lm == LengthMode.CHARACTERS || lm == LengthMode.BOTH) && getTotalSize(sents, sep) > maxchars) - removeLeastSentence(sents, ranks); + removeLeastSentences(sents, ranks); //output the sentences. @@ -163,30 +166,62 @@ public class BablukiAPI implements SummaryAPIInterface{ return len; } - private void removeLeastSentence(ArrayList<String> sents, SentenceScore[] ranks) - { + private void removeLeastSentence(ArrayList<String> sents, SentenceScore[] ranks) { + + int index = findLeastSentence(sents,ranks); + sents.remove(index); + + } + private int findLeastSentence(ArrayList<String> sents, SentenceScore[] ranks) { + + //higher is better double lowest = Double.MAX_VALUE; int index = 0; - for(int i=0; i<sents.size(); i++) { - double score = getScore(formatSentence(sents.get(i)),ranks); + double score = getScore(formatSentence(sents.get(i)), ranks); if (score < lowest) { index = i; lowest = score; } } - - sents.remove(index); + return index; } + + private void removeLeastSentences(ArrayList<String> sentences, SentenceScore[] ranks) + { + while (getTotalSize(sentences, sep) > maxchars) { + + int bestid = findLeastSentence(sentences,ranks); + double bestScore = 0; + + for (int i = 0; i < sentences.size(); i++) { + ArrayList<String> sents = new ArrayList<>(sentences); + sents.remove(i); + if (getTotalSize(sents, sep) <= maxchars) { + if (getTotalScore(sents, ranks) >= bestScore) { + bestid = i; + bestScore = getTotalScore(sents, ranks); + } + } + } + sentences.remove(bestid); + } + } + + private double getTotalScore(ArrayList<String> sentences, SentenceScore[] ranks) + { + double score = 0; + for(String s: sentences) + score += getScore(formatSentence(s),ranks); + return score; + } private String[] splitIntoSentences(String content) { - //ik word gek String ends = "qwertyuiopasdfghjklzxcvbnm1234567890QWERTYUIOPASDFGHJKLZXCVBNM"; - //String ends = "a"; content = content.replaceAll("\n", " "); String[] ss = content.split("(\\.|!) +"); for(int i=0; i<ss.length; i++) { @@ -287,7 +322,6 @@ public class BablukiAPI implements SummaryAPIInterface{ } } } - return best; } |