diff options
author | zkwip | 2015-05-22 01:15:15 +0200 |
---|---|---|
committer | zkwip | 2015-05-22 01:15:15 +0200 |
commit | 1e05ff5824873218bd3caef0be6711892bc3d88f (patch) | |
tree | ce50740e18822e8ec80aca71b8795dd84971ede6 /app/src/main | |
parent | progress aan het algoritme, (diff) |
Babluki algoritme toegevoegd
Diffstat (limited to 'app/src/main')
-rw-r--r-- | app/src/main/java/org/rssin/summaries/BablukiAPI.java | 200 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/SentenceScore.java | 21 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/tester.java | 14 |
3 files changed, 230 insertions, 5 deletions
diff --git a/app/src/main/java/org/rssin/summaries/BablukiAPI.java b/app/src/main/java/org/rssin/summaries/BablukiAPI.java new file mode 100644 index 0000000..b48cfdf --- /dev/null +++ b/app/src/main/java/org/rssin/summaries/BablukiAPI.java @@ -0,0 +1,200 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.rssin.summaries; + +import org.rssin.rss.FeedItem; + +/** + * Java/SummaryAPI implementation of the Babluki summary tool. + * @author Joep + */ +public class BablukiAPI implements SummaryAPIInterface{ + + LengthMode lm; + int maxchars, maxlines; + + + @Override + public Summary getSummary(FeedItem f) { + + String desc = f.getDescription(); + String title = f.getTitle(); + String t = getSumText(desc,title); + Summary s = new Summary(t); + + return s; + } + + @Override + public Summary getSummaryFromText(String desc) { + + String t = getSumText(desc, ""); + + Summary s = new Summary(t); + + return s; + } + + public String getSumText(String content, String title) + { + SentenceScore[] ranks = getSentenceRanks(content); + + String[] paragraphs = splitIntoParagraphs(content); + + String o = ""; + + for(String p: paragraphs) + { + String sentence = getBestSentence(p,ranks); + if (sentence != null) + { + o += sentence.trim(); + o += ". "; + } + } + + return o; + + + } + + private String[] splitIntoSentences(String content) + { + content = content.replaceAll("\n", " "); + String[] ss = content.split("(\\.|!) +"); + for(String s: ss) + s=s.trim(); + return ss; + + } + + private String[] splitIntoParagraphs(String content) + { + return content.split("\n *\n"); + } + + private double sentenceIntersection(String sent1, String sent2) + { + String[] s1 = sent1.split(" "); + String[] s2 = sent2.split(" "); + + if (s1.length + s2.length == 0) + return 0; + + return 2.0 * (double)(intersections(s1,s2)) / (double)(s1.length + s2.length); + } + + private int intersections(Object[] s1, Object[] s2) + { + int ints = 0; + for(Object o: s1) + { + for(Object o2: s2) + { + if (o.equals(o2)) + ints++; + } + } + + return ints; + + } + + private String formatSentence(String sentence) + { + return sentence.replaceAll("[^A-Za-z]", ""); + } + + private SentenceScore[] getSentenceRanks (String content) + { + String[] sentences = splitIntoSentences(content); + + int n = sentences.length; + SentenceScore[] scores = new SentenceScore[n]; + for (int i = 0; i < n ; i++) + { + double score = 0.0; + for (int j = 0; j < n; j++) + { + if (i!=j) + score += sentenceIntersection(sentences[i],sentences[j]); + } + scores[i] = new SentenceScore(formatSentence(sentences[i]),score); + + } + + return scores; + } + + private String getBestSentence(String paragraph, SentenceScore[] slist) + { + String[] sentences = splitIntoSentences(paragraph); + + if (sentences.length == 0) + return null; + + String best = ""; + double max_value = 0; + + for(String s : sentences) + { + String strip_s = formatSentence(s); + if (!strip_s.isEmpty()) + { + if (getScore(strip_s, slist) > max_value) + { + max_value = getScore(strip_s, slist); + best = s; + } + } + } + + return best; + } + + private double getScore (String formatted, SentenceScore[] slist) + { + for(SentenceScore s: slist) + { + if (s.sentence.equals(formatted)) + return s.score; + } + return 0.0; + } + + + + /** + * set the LengthMode -_- + * + * @param l Mode to use. + */ + @Override + public void setLengthMode(LengthMode l) { + lm = l; + } + + /** + * set the maximum number of characters of the summary text. + * + * @param chars integer value of the amount. + */ + @Override + public void setMaxChars(int chars) { + maxchars = chars; + } + + /** + * set the maximum number of sentences of the summary text. + * + * @param lines integer value of the amount. + */ + @Override + public void setMaxLines(int lines) { + maxlines = lines; + } + +} diff --git a/app/src/main/java/org/rssin/summaries/SentenceScore.java b/app/src/main/java/org/rssin/summaries/SentenceScore.java new file mode 100644 index 0000000..e15e2a1 --- /dev/null +++ b/app/src/main/java/org/rssin/summaries/SentenceScore.java @@ -0,0 +1,21 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.rssin.summaries; + +/** + * + * @author Joep + */ +public class SentenceScore { + public final String sentence; + public final double score; + + public SentenceScore(String s, double d) + { + sentence = s; + score = d; + } +} diff --git a/app/src/main/java/org/rssin/summaries/tester.java b/app/src/main/java/org/rssin/summaries/tester.java index 07c751a..c795c78 100644 --- a/app/src/main/java/org/rssin/summaries/tester.java +++ b/app/src/main/java/org/rssin/summaries/tester.java @@ -5,7 +5,10 @@ */ package org.rssin.summaries; +import java.net.URL; +import java.util.Calendar; import java.util.Scanner; +import org.rssin.rss.FeedItem; /** * @@ -24,21 +27,22 @@ public class tester { String s = ""; do { - t += s; + t += s + "\n"; s = scanner.nextLine(); } while (!s.contains("#")); System.out.println("Tekst geaccepteerd."); - SummaryAPI sumo = new SummaryAPI(); + SummaryAPIInterface sumo = new BablukiAPI(); sumo.setLengthMode(LengthMode.LINES); sumo.setMaxLines(2); System.out.println("Sumo ingesteld."); + + Summary su = sumo.getSummaryFromText(t); + String k = su.getText(); - String k = sumo.getSumText(t); - - k = k.trim(); + //k = k.trim(); System.out.println(k); |