aboutsummaryrefslogtreecommitdiff
path: root/app/src/main
diff options
context:
space:
mode:
authorzkwip2015-05-22 01:15:15 +0200
committerzkwip2015-05-22 01:15:15 +0200
commit1e05ff5824873218bd3caef0be6711892bc3d88f (patch)
treece50740e18822e8ec80aca71b8795dd84971ede6 /app/src/main
parentprogress aan het algoritme, (diff)
Babluki algoritme toegevoegd
Diffstat (limited to 'app/src/main')
-rw-r--r--app/src/main/java/org/rssin/summaries/BablukiAPI.java200
-rw-r--r--app/src/main/java/org/rssin/summaries/SentenceScore.java21
-rw-r--r--app/src/main/java/org/rssin/summaries/tester.java14
3 files changed, 230 insertions, 5 deletions
diff --git a/app/src/main/java/org/rssin/summaries/BablukiAPI.java b/app/src/main/java/org/rssin/summaries/BablukiAPI.java
new file mode 100644
index 0000000..b48cfdf
--- /dev/null
+++ b/app/src/main/java/org/rssin/summaries/BablukiAPI.java
@@ -0,0 +1,200 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package org.rssin.summaries;
+
+import org.rssin.rss.FeedItem;
+
+/**
+ * Java/SummaryAPI implementation of the Babluki summary tool.
+ * @author Joep
+ */
+public class BablukiAPI implements SummaryAPIInterface{
+
+ LengthMode lm;
+ int maxchars, maxlines;
+
+
+ @Override
+ public Summary getSummary(FeedItem f) {
+
+ String desc = f.getDescription();
+ String title = f.getTitle();
+ String t = getSumText(desc,title);
+ Summary s = new Summary(t);
+
+ return s;
+ }
+
+ @Override
+ public Summary getSummaryFromText(String desc) {
+
+ String t = getSumText(desc, "");
+
+ Summary s = new Summary(t);
+
+ return s;
+ }
+
+ public String getSumText(String content, String title)
+ {
+ SentenceScore[] ranks = getSentenceRanks(content);
+
+ String[] paragraphs = splitIntoParagraphs(content);
+
+ String o = "";
+
+ for(String p: paragraphs)
+ {
+ String sentence = getBestSentence(p,ranks);
+ if (sentence != null)
+ {
+ o += sentence.trim();
+ o += ". ";
+ }
+ }
+
+ return o;
+
+
+ }
+
+ private String[] splitIntoSentences(String content)
+ {
+ content = content.replaceAll("\n", " ");
+ String[] ss = content.split("(\\.|!) +");
+ for(String s: ss)
+ s=s.trim();
+ return ss;
+
+ }
+
+ private String[] splitIntoParagraphs(String content)
+ {
+ return content.split("\n *\n");
+ }
+
+ private double sentenceIntersection(String sent1, String sent2)
+ {
+ String[] s1 = sent1.split(" ");
+ String[] s2 = sent2.split(" ");
+
+ if (s1.length + s2.length == 0)
+ return 0;
+
+ return 2.0 * (double)(intersections(s1,s2)) / (double)(s1.length + s2.length);
+ }
+
+ private int intersections(Object[] s1, Object[] s2)
+ {
+ int ints = 0;
+ for(Object o: s1)
+ {
+ for(Object o2: s2)
+ {
+ if (o.equals(o2))
+ ints++;
+ }
+ }
+
+ return ints;
+
+ }
+
+ private String formatSentence(String sentence)
+ {
+ return sentence.replaceAll("[^A-Za-z]", "");
+ }
+
+ private SentenceScore[] getSentenceRanks (String content)
+ {
+ String[] sentences = splitIntoSentences(content);
+
+ int n = sentences.length;
+ SentenceScore[] scores = new SentenceScore[n];
+ for (int i = 0; i < n ; i++)
+ {
+ double score = 0.0;
+ for (int j = 0; j < n; j++)
+ {
+ if (i!=j)
+ score += sentenceIntersection(sentences[i],sentences[j]);
+ }
+ scores[i] = new SentenceScore(formatSentence(sentences[i]),score);
+
+ }
+
+ return scores;
+ }
+
+ private String getBestSentence(String paragraph, SentenceScore[] slist)
+ {
+ String[] sentences = splitIntoSentences(paragraph);
+
+ if (sentences.length == 0)
+ return null;
+
+ String best = "";
+ double max_value = 0;
+
+ for(String s : sentences)
+ {
+ String strip_s = formatSentence(s);
+ if (!strip_s.isEmpty())
+ {
+ if (getScore(strip_s, slist) > max_value)
+ {
+ max_value = getScore(strip_s, slist);
+ best = s;
+ }
+ }
+ }
+
+ return best;
+ }
+
+ private double getScore (String formatted, SentenceScore[] slist)
+ {
+ for(SentenceScore s: slist)
+ {
+ if (s.sentence.equals(formatted))
+ return s.score;
+ }
+ return 0.0;
+ }
+
+
+
+ /**
+ * set the LengthMode -_-
+ *
+ * @param l Mode to use.
+ */
+ @Override
+ public void setLengthMode(LengthMode l) {
+ lm = l;
+ }
+
+ /**
+ * set the maximum number of characters of the summary text.
+ *
+ * @param chars integer value of the amount.
+ */
+ @Override
+ public void setMaxChars(int chars) {
+ maxchars = chars;
+ }
+
+ /**
+ * set the maximum number of sentences of the summary text.
+ *
+ * @param lines integer value of the amount.
+ */
+ @Override
+ public void setMaxLines(int lines) {
+ maxlines = lines;
+ }
+
+}
diff --git a/app/src/main/java/org/rssin/summaries/SentenceScore.java b/app/src/main/java/org/rssin/summaries/SentenceScore.java
new file mode 100644
index 0000000..e15e2a1
--- /dev/null
+++ b/app/src/main/java/org/rssin/summaries/SentenceScore.java
@@ -0,0 +1,21 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package org.rssin.summaries;
+
+/**
+ *
+ * @author Joep
+ */
+public class SentenceScore {
+ public final String sentence;
+ public final double score;
+
+ public SentenceScore(String s, double d)
+ {
+ sentence = s;
+ score = d;
+ }
+}
diff --git a/app/src/main/java/org/rssin/summaries/tester.java b/app/src/main/java/org/rssin/summaries/tester.java
index 07c751a..c795c78 100644
--- a/app/src/main/java/org/rssin/summaries/tester.java
+++ b/app/src/main/java/org/rssin/summaries/tester.java
@@ -5,7 +5,10 @@
*/
package org.rssin.summaries;
+import java.net.URL;
+import java.util.Calendar;
import java.util.Scanner;
+import org.rssin.rss.FeedItem;
/**
*
@@ -24,21 +27,22 @@ public class tester {
String s = "";
do {
- t += s;
+ t += s + "\n";
s = scanner.nextLine();
} while (!s.contains("#"));
System.out.println("Tekst geaccepteerd.");
- SummaryAPI sumo = new SummaryAPI();
+ SummaryAPIInterface sumo = new BablukiAPI();
sumo.setLengthMode(LengthMode.LINES);
sumo.setMaxLines(2);
System.out.println("Sumo ingesteld.");
+
+ Summary su = sumo.getSummaryFromText(t);
+ String k = su.getText();
- String k = sumo.getSumText(t);
-
- k = k.trim();
+ //k = k.trim();
System.out.println(k);