From f9ca5facb8882001db4a45efa8f2997c067b7311 Mon Sep 17 00:00:00 2001 From: zkwip Date: Tue, 9 Jun 2015 20:54:24 +0200 Subject: Summary shortening improvements made the length limiter less greedy --- .../main/java/org/rssin/summaries/BablukiAPI.java | 56 +++++++++++++++++----- 1 file changed, 45 insertions(+), 11 deletions(-) (limited to 'app/src/main') diff --git a/app/src/main/java/org/rssin/summaries/BablukiAPI.java b/app/src/main/java/org/rssin/summaries/BablukiAPI.java index 8d5a1af..417a807 100644 --- a/app/src/main/java/org/rssin/summaries/BablukiAPI.java +++ b/app/src/main/java/org/rssin/summaries/BablukiAPI.java @@ -100,12 +100,15 @@ public class BablukiAPI implements SummaryAPIInterface{ int senc = sents.size(); //reduce if neccesairy + + ArrayList sents_bu = (ArrayList)sents.clone(); - while((lm == LengthMode.LINES || lm == LengthMode.BOTH) && sents.size() > maxlines) + while((lm == LengthMode.LINES || lm == LengthMode.BOTH) && sents.size() > maxlines) { removeLeastSentence(sents, ranks); + } while((lm == LengthMode.CHARACTERS || lm == LengthMode.BOTH) && getTotalSize(sents, sep) > maxchars) - removeLeastSentence(sents, ranks); + removeLeastSentences(sents, ranks); //output the sentences. @@ -163,30 +166,62 @@ public class BablukiAPI implements SummaryAPIInterface{ return len; } - private void removeLeastSentence(ArrayList sents, SentenceScore[] ranks) - { + private void removeLeastSentence(ArrayList sents, SentenceScore[] ranks) { + + int index = findLeastSentence(sents,ranks); + sents.remove(index); + + } + private int findLeastSentence(ArrayList sents, SentenceScore[] ranks) { + + //higher is better double lowest = Double.MAX_VALUE; int index = 0; - for(int i=0; i sentences, SentenceScore[] ranks) + { + while (getTotalSize(sentences, sep) > maxchars) { + + int bestid = findLeastSentence(sentences,ranks); + double bestScore = 0; + + for (int i = 0; i < sentences.size(); i++) { + ArrayList sents = new ArrayList<>(sentences); + sents.remove(i); + if (getTotalSize(sents, sep) <= maxchars) { + if (getTotalScore(sents, ranks) >= bestScore) { + bestid = i; + bestScore = getTotalScore(sents, ranks); + } + } + } + sentences.remove(bestid); + } + } + + private double getTotalScore(ArrayList sentences, SentenceScore[] ranks) + { + double score = 0; + for(String s: sentences) + score += getScore(formatSentence(s),ranks); + return score; + } private String[] splitIntoSentences(String content) { - //ik word gek String ends = "qwertyuiopasdfghjklzxcvbnm1234567890QWERTYUIOPASDFGHJKLZXCVBNM"; - //String ends = "a"; content = content.replaceAll("\n", " "); String[] ss = content.split("(\\.|!) +"); for(int i=0; i