aboutsummaryrefslogtreecommitdiff
path: root/app/src/main/java/org
diff options
context:
space:
mode:
authorzkwip2015-05-20 17:32:35 +0200
committerzkwip2015-05-20 17:32:35 +0200
commitf6ed9b9c0f10770bd8da27f677a0777f9465b999 (patch)
treee0b2a1169dc97b7e5c1bd7dee807045d8991d9e4 /app/src/main/java/org
parentstart smmry, pak waarschijnlijk iets anders anyway (diff)
summary shit
geeft wel maar een regel
Diffstat (limited to 'app/src/main/java/org')
-rw-r--r--app/src/main/java/org/rssin/summaries/SentenceItem.java53
-rw-r--r--app/src/main/java/org/rssin/summaries/Special.java60
-rw-r--r--app/src/main/java/org/rssin/summaries/Stemmer.java852
-rw-r--r--app/src/main/java/org/rssin/summaries/Stopword.java74
-rw-r--r--app/src/main/java/org/rssin/summaries/SummaryAPI.java328
-rw-r--r--app/src/main/java/org/rssin/summaries/SummaryAPIInterface.java3
-rw-r--r--app/src/main/java/org/rssin/summaries/TxtStatic.java26
-rw-r--r--app/src/main/java/org/rssin/summaries/WordItem.java78
-rw-r--r--app/src/main/java/org/rssin/summaries/specials.txt18
-rw-r--r--app/src/main/java/org/rssin/summaries/stopwords.txt345
10 files changed, 1787 insertions, 50 deletions
diff --git a/app/src/main/java/org/rssin/summaries/SentenceItem.java b/app/src/main/java/org/rssin/summaries/SentenceItem.java
new file mode 100644
index 0000000..e8e0e0b
--- /dev/null
+++ b/app/src/main/java/org/rssin/summaries/SentenceItem.java
@@ -0,0 +1,53 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package org.rssin.summaries;
+
+public class SentenceItem
+{
+
+ private String rsen = "";
+ private String srsen = "";
+
+ private double wght;
+
+ public SentenceItem(String s)
+ {
+ rsen = new String(s);
+ wght = 0.0;
+ }
+
+
+ public void setRawSentecse(String sen)
+ {
+ rsen = sen;
+ }
+
+ public void setSRSentence(String rsen)
+ {
+ srsen = rsen;
+ }
+
+ public String getRawSentence()
+ {
+ return rsen;
+ }
+ public String getSRSentence()
+ {
+ return srsen;
+ }
+
+ public void addWeight(double wg)
+ {
+ wght=wght+wg;
+ }
+
+ public double getWeight()
+ {
+ return wght;
+ }
+
+
+} \ No newline at end of file
diff --git a/app/src/main/java/org/rssin/summaries/Special.java b/app/src/main/java/org/rssin/summaries/Special.java
new file mode 100644
index 0000000..36cb0ad
--- /dev/null
+++ b/app/src/main/java/org/rssin/summaries/Special.java
@@ -0,0 +1,60 @@
+package org.rssin.summaries;
+
+import java.io.*;
+import java.util.StringTokenizer;
+
+public class Special
+{
+
+ char spch[];
+
+ public Special()
+ {
+ /*int cnt=0,sz=0;char bt[]=null;
+ try {
+ File fp=new File("specials.txt");
+ FileReader fis=new FileReader(fp);
+ sz=(int)fp.length();
+ bt=new char[sz];
+ fis.read(bt);
+ fis.close();
+ }
+ catch(IOException ex) {}
+ spch=getTokens(new String(bt));*/
+ spch = new TxtStatic().getSpecial();
+ }
+
+
+ public char[] getTokens(String sen)
+ {
+ int sz=0,cnt=0;char words[]=null;
+ StringTokenizer stk=new StringTokenizer(sen);
+ sz=stk.countTokens();
+ words=new char[sz];
+ while ( stk.hasMoreTokens())
+ {
+ words[cnt]=new String(stk.nextToken()).charAt(0);
+ cnt++;
+ }
+ return words;
+ }
+
+ public String remove(String sen)
+ {
+ String dsen=new String(sen);
+
+ for (int j=0;j<spch.length; j++)
+ {
+
+ char csh[]=dsen.toCharArray();
+ dsen="";
+ for(int i=0;i<csh.length;i++ )
+ if(csh[i] != spch[j])
+ dsen=dsen + csh[i];
+
+ }
+
+ return dsen;
+ }
+
+} \ No newline at end of file
diff --git a/app/src/main/java/org/rssin/summaries/Stemmer.java b/app/src/main/java/org/rssin/summaries/Stemmer.java
new file mode 100644
index 0000000..1cad62f
--- /dev/null
+++ b/app/src/main/java/org/rssin/summaries/Stemmer.java
@@ -0,0 +1,852 @@
+package org.rssin.summaries;
+
+import java.util.*;
+
+public class Stemmer {
+
+
+ private static boolean m_CompMode = false;
+ private static HashMap m_l11 = null;
+ private static HashMap m_l10 = null;
+ private static HashMap m_l9 = null;
+ private static HashMap m_l8 = null;
+ private static HashMap m_l7 = null;
+ private static HashMap m_l6 = null;
+ private static HashMap m_l5 = null;
+ private static HashMap m_l4 = null;
+ private static HashMap m_l3 = null;
+ private static HashMap m_l2 = null;
+ private static HashMap m_l1 = null;
+
+ static {
+
+ m_l11 = new HashMap();
+ m_l11.put("alistically", "B");
+ m_l11.put("arizability", "A");
+ m_l11.put("izationally", "B");
+ m_l10 = new HashMap();
+ m_l10.put("antialness", "A");
+ m_l10.put("arisations", "A");
+ m_l10.put("arizations", "A");
+ m_l10.put("entialness", "A");
+ m_l9 = new HashMap();
+ m_l9.put("allically", "C");
+ m_l9.put("antaneous", "A");
+ m_l9.put("antiality", "A");
+ m_l9.put("arisation", "A");
+ m_l9.put("arization", "A");
+ m_l9.put("ationally", "B");
+ m_l9.put("ativeness", "A");
+ m_l9.put("eableness", "E");
+ m_l9.put("entations", "A");
+ m_l9.put("entiality", "A");
+ m_l9.put("entialize", "A");
+ m_l9.put("entiation", "A");
+ m_l9.put("ionalness", "A");
+ m_l9.put("istically", "A");
+ m_l9.put("itousness", "A");
+ m_l9.put("izability", "A");
+ m_l9.put("izational", "A");
+ m_l8 = new HashMap();
+ m_l8.put("ableness", "A");
+ m_l8.put("arizable", "A");
+ m_l8.put("entation", "A");
+ m_l8.put("entially", "A");
+ m_l8.put("eousness", "A");
+ m_l8.put("ibleness", "A");
+ m_l8.put("icalness", "A");
+ m_l8.put("ionalism", "A");
+ m_l8.put("ionality", "A");
+ m_l8.put("ionalize", "A");
+ m_l8.put("iousness", "A");
+ m_l8.put("izations", "A");
+ m_l8.put("lessness", "A");
+ m_l7 = new HashMap();
+ m_l7.put("ability", "A");
+ m_l7.put("aically", "A");
+ m_l7.put("alistic", "B");
+ m_l7.put("alities", "A");
+ m_l7.put("ariness", "E");
+ m_l7.put("aristic", "A");
+ m_l7.put("arizing", "A");
+ m_l7.put("ateness", "A");
+ m_l7.put("atingly", "A");
+ m_l7.put("ational", "B");
+ m_l7.put("atively", "A");
+ m_l7.put("ativism", "A");
+ m_l7.put("elihood", "E");
+ m_l7.put("encible", "A");
+ m_l7.put("entally", "A");
+ m_l7.put("entials", "A");
+ m_l7.put("entiate", "A");
+ m_l7.put("entness", "A");
+ m_l7.put("fulness", "A");
+ m_l7.put("ibility", "A");
+ m_l7.put("icalism", "A");
+ m_l7.put("icalist", "A");
+ m_l7.put("icality", "A");
+ m_l7.put("icalize", "A");
+ m_l7.put("ication", "G");
+ m_l7.put("icianry", "A");
+ m_l7.put("ination", "A");
+ m_l7.put("ingness", "A");
+ m_l7.put("ionally", "A");
+ m_l7.put("isation", "A");
+ m_l7.put("ishness", "A");
+ m_l7.put("istical", "A");
+ m_l7.put("iteness", "A");
+ m_l7.put("iveness", "A");
+ m_l7.put("ivistic", "A");
+ m_l7.put("ivities", "A");
+ m_l7.put("ization", "F");
+ m_l7.put("izement", "A");
+ m_l7.put("oidally", "A");
+ m_l7.put("ousness", "A");
+ m_l6 = new HashMap();
+ m_l6.put("aceous", "A");
+ m_l6.put("acious", "B");
+ m_l6.put("action", "G");
+ m_l6.put("alness", "A");
+ m_l6.put("ancial", "A");
+ m_l6.put("ancies", "A");
+ m_l6.put("ancing", "B");
+ m_l6.put("ariser", "A");
+ m_l6.put("arized", "A");
+ m_l6.put("arizer", "A");
+ m_l6.put("atable", "A");
+ m_l6.put("ations", "B");
+ m_l6.put("atives", "A");
+ m_l6.put("eature", "Z");
+ m_l6.put("efully", "A");
+ m_l6.put("encies", "A");
+ m_l6.put("encing", "A");
+ m_l6.put("ential", "A");
+ m_l6.put("enting", "C");
+ m_l6.put("entist", "A");
+ m_l6.put("eously", "A");
+ m_l6.put("ialist", "A");
+ m_l6.put("iality", "A");
+ m_l6.put("ialize", "A");
+ m_l6.put("ically", "A");
+ m_l6.put("icance", "A");
+ m_l6.put("icians", "A");
+ m_l6.put("icists", "A");
+ m_l6.put("ifully", "A");
+ m_l6.put("ionals", "A");
+ m_l6.put("ionate", "D");
+ m_l6.put("ioning", "A");
+ m_l6.put("ionist", "A");
+ m_l6.put("iously", "A");
+ m_l6.put("istics", "A");
+ m_l6.put("izable", "E");
+ m_l6.put("lessly", "A");
+ m_l6.put("nesses", "A");
+ m_l6.put("oidism", "A");
+ m_l5 = new HashMap();
+ m_l5.put("acies", "A");
+ m_l5.put("acity", "A");
+ m_l5.put("aging", "B");
+ m_l5.put("aical", "A");
+ if (!m_CompMode) {
+ m_l5.put("alist", "A");
+ }
+ m_l5.put("alism", "B");
+ m_l5.put("ality", "A");
+ m_l5.put("alize", "A");
+ m_l5.put("allic", "b");
+ m_l5.put("anced", "B");
+ m_l5.put("ances", "B");
+ m_l5.put("antic", "C");
+ m_l5.put("arial", "A");
+ m_l5.put("aries", "A");
+ m_l5.put("arily", "A");
+ m_l5.put("arity", "B");
+ m_l5.put("arize", "A");
+ m_l5.put("aroid", "A");
+ m_l5.put("ately", "A");
+ m_l5.put("ating", "I");
+ m_l5.put("ation", "B");
+ m_l5.put("ative", "A");
+ m_l5.put("ators", "A");
+ m_l5.put("atory", "A");
+ m_l5.put("ature", "E");
+ m_l5.put("early", "Y");
+ m_l5.put("ehood", "A");
+ m_l5.put("eless", "A");
+ if (!m_CompMode) {
+ m_l5.put("elily", "A");
+ } else {
+ m_l5.put("elity", "A");
+ }
+ m_l5.put("ement", "A");
+ m_l5.put("enced", "A");
+ m_l5.put("ences", "A");
+ m_l5.put("eness", "E");
+ m_l5.put("ening", "E");
+ m_l5.put("ental", "A");
+ m_l5.put("ented", "C");
+ m_l5.put("ently", "A");
+ m_l5.put("fully", "A");
+ m_l5.put("ially", "A");
+ m_l5.put("icant", "A");
+ m_l5.put("ician", "A");
+ m_l5.put("icide", "A");
+ m_l5.put("icism", "A");
+ m_l5.put("icist", "A");
+ m_l5.put("icity", "A");
+ m_l5.put("idine", "I");
+ m_l5.put("iedly", "A");
+ m_l5.put("ihood", "A");
+ m_l5.put("inate", "A");
+ m_l5.put("iness", "A");
+ m_l5.put("ingly", "B");
+ m_l5.put("inism", "J");
+ m_l5.put("inity", "c");
+ m_l5.put("ional", "A");
+ m_l5.put("ioned", "A");
+ m_l5.put("ished", "A");
+ m_l5.put("istic", "A");
+ m_l5.put("ities", "A");
+ m_l5.put("itous", "A");
+ m_l5.put("ively", "A");
+ m_l5.put("ivity", "A");
+ m_l5.put("izers", "F");
+ m_l5.put("izing", "F");
+ m_l5.put("oidal", "A");
+ m_l5.put("oides", "A");
+ m_l5.put("otide", "A");
+ m_l5.put("ously", "A");
+ m_l4 = new HashMap();
+ m_l4.put("able", "A");
+ m_l4.put("ably", "A");
+ m_l4.put("ages", "B");
+ m_l4.put("ally", "B");
+ m_l4.put("ance", "B");
+ m_l4.put("ancy", "B");
+ m_l4.put("ants", "B");
+ m_l4.put("aric", "A");
+ m_l4.put("arly", "K");
+ m_l4.put("ated", "I");
+ m_l4.put("ates", "A");
+ m_l4.put("atic", "B");
+ m_l4.put("ator", "A");
+ m_l4.put("ealy", "Y");
+ m_l4.put("edly", "E");
+ m_l4.put("eful", "A");
+ m_l4.put("eity", "A");
+ m_l4.put("ence", "A");
+ m_l4.put("ency", "A");
+ m_l4.put("ened", "E");
+ m_l4.put("enly", "E");
+ m_l4.put("eous", "A");
+ m_l4.put("hood", "A");
+ m_l4.put("ials", "A");
+ m_l4.put("ians", "A");
+ m_l4.put("ible", "A");
+ m_l4.put("ibly", "A");
+ m_l4.put("ical", "A");
+ m_l4.put("ides", "L");
+ m_l4.put("iers", "A");
+ m_l4.put("iful", "A");
+ m_l4.put("ines", "M");
+ m_l4.put("ings", "N");
+ m_l4.put("ions", "B");
+ m_l4.put("ious", "A");
+ m_l4.put("isms", "B");
+ m_l4.put("ists", "A");
+ m_l4.put("itic", "H");
+ m_l4.put("ized", "F");
+ m_l4.put("izer", "F");
+ m_l4.put("less", "A");
+ m_l4.put("lily", "A");
+ m_l4.put("ness", "A");
+ m_l4.put("ogen", "A");
+ m_l4.put("ward", "A");
+ m_l4.put("wise", "A");
+ m_l4.put("ying", "B");
+ m_l4.put("yish", "A");
+ m_l3 = new HashMap();
+ m_l3.put("acy", "A");
+ m_l3.put("age", "B");
+ m_l3.put("aic", "A");
+ m_l3.put("als", "b");
+ m_l3.put("ant", "B");
+ m_l3.put("ars", "O");
+ m_l3.put("ary", "F");
+ m_l3.put("ata", "A");
+ m_l3.put("ate", "A");
+ m_l3.put("eal", "Y");
+ m_l3.put("ear", "Y");
+ m_l3.put("ely", "E");
+ m_l3.put("ene", "E");
+ m_l3.put("ent", "C");
+ m_l3.put("ery", "E");
+ m_l3.put("ese", "A");
+ m_l3.put("ful", "A");
+ m_l3.put("ial", "A");
+ m_l3.put("ian", "A");
+ m_l3.put("ics", "A");
+ m_l3.put("ide", "L");
+ m_l3.put("ied", "A");
+ m_l3.put("ier", "A");
+ m_l3.put("ies", "P");
+ m_l3.put("ily", "A");
+ m_l3.put("ine", "M");
+ m_l3.put("ing", "N");
+ m_l3.put("ion", "Q");
+ m_l3.put("ish", "C");
+ m_l3.put("ism", "B");
+ m_l3.put("ist", "A");
+ m_l3.put("ite", "a");
+ m_l3.put("ity", "A");
+ m_l3.put("ium", "A");
+ m_l3.put("ive", "A");
+ m_l3.put("ize", "F");
+ m_l3.put("oid", "A");
+ m_l3.put("one", "R");
+ m_l3.put("ous", "A");
+ m_l2 = new HashMap();
+ m_l2.put("ae", "A");
+ m_l2.put("al", "b");
+ m_l2.put("ar", "X");
+ m_l2.put("as", "B");
+ m_l2.put("ed", "E");
+ m_l2.put("en", "F");
+ m_l2.put("es", "E");
+ m_l2.put("ia", "A");
+ m_l2.put("ic", "A");
+ m_l2.put("is", "A");
+ m_l2.put("ly", "B");
+ m_l2.put("on", "S");
+ m_l2.put("or", "T");
+ m_l2.put("um", "U");
+ m_l2.put("us", "V");
+ m_l2.put("yl", "R");
+ m_l2.put("s\'", "A");
+ m_l2.put("\'s", "A");
+ m_l1 = new HashMap();
+ m_l1.put("a", "A");
+ m_l1.put("e", "A");
+ m_l1.put("i", "A");
+ m_l1.put("o", "A");
+ m_l1.put("s", "W");
+ m_l1.put("y", "B");
+ }
+
+ private String removeEnding(String word) {
+
+ int length = word.length();
+ int el = 11;
+
+ while (el > 0) {
+ if (length - el > 1) {
+ String ending = word.substring(length - el);
+ String conditionCode = null;
+ switch (el) {
+ case 11: conditionCode = (String)m_l11.get(ending);
+ break;
+ case 10: conditionCode = (String)m_l10.get(ending);
+ break;
+ case 9: conditionCode = (String)m_l9.get(ending);
+ break;
+ case 8: conditionCode = (String)m_l8.get(ending);
+ break;
+ case 7: conditionCode = (String)m_l7.get(ending);
+ break;
+ case 6: conditionCode = (String)m_l6.get(ending);
+ break;
+ case 5: conditionCode = (String)m_l5.get(ending);
+ break;
+ case 4: conditionCode = (String)m_l4.get(ending);
+ break;
+ case 3: conditionCode = (String)m_l3.get(ending);
+ break;
+ case 2: conditionCode = (String)m_l2.get(ending);
+ break;
+ case 1: conditionCode = (String)m_l1.get(ending);
+ break;
+ default:
+ }
+ if (conditionCode != null) {
+ switch (conditionCode.charAt(0)) {
+ case 'A':
+ return word.substring(0, length - el);
+ case 'B':
+ if (length - el > 2) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'C':
+ if (length - el > 3) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'D':
+ if (length - el > 4) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'E':
+ if (word.charAt(length - el - 1) != 'e') {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'F':
+ if ((length - el > 2) &&
+ (word.charAt(length - el - 1) != 'e')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'G':
+ if ((length - el > 2) &&
+ (word.charAt(length - el - 1) == 'f')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'H':
+ if ((word.charAt(length - el - 1) == 't') ||
+ ((word.charAt(length - el - 1) == 'l') &&
+ (word.charAt(length - el - 2) == 'l'))) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'I':
+ if ((word.charAt(length - el - 1) != 'o') &&
+ (word.charAt(length - el - 1) != 'e')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'J':
+ if ((word.charAt(length - el - 1) != 'a') &&
+ (word.charAt(length - el - 1) != 'e')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'K':
+ if ((length - el > 2) &&
+ ((word.charAt(length - el - 1) == 'l') ||
+ (word.charAt(length - el - 1) == 'i') ||
+ ((word.charAt(length - el - 1) == 'e') &&
+ (word.charAt(length - el - 3) == 'u')))) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'L':
+ if ((word.charAt(length - el - 1) != 'u') &&
+ (word.charAt(length - el - 1) != 'x') &&
+ ((word.charAt(length - el - 1) != 's') ||
+ (word.charAt(length - el - 2) == 'o'))) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'M':
+ if ((word.charAt(length - el - 1) != 'a') &&
+ (word.charAt(length - el - 1) != 'c') &&
+ (word.charAt(length - el - 1) != 'e') &&
+ (word.charAt(length - el - 1) != 'm')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'N':
+ if ((length - el > 3) ||
+ ((length - el == 3) &&
+ ((word.charAt(length - el - 3) != 's')))) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'O':
+ if ((word.charAt(length - el - 1) == 'l') ||
+ (word.charAt(length - el - 1) == 'i')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'P':
+ if (word.charAt(length - el - 1) != 'c') {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'Q':
+ if ((length - el > 2) &&
+ (word.charAt(length - el - 1) != 'l') &&
+ (word.charAt(length - el - 1) != 'n')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'R':
+ if ((word.charAt(length - el - 1) == 'n') ||
+ (word.charAt(length - el - 1) == 'r')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'S':
+ if (((word.charAt(length - el - 1) == 'r') &&
+ (word.charAt(length - el - 2) == 'd')) ||
+ ((word.charAt(length - el - 1) == 't') &&
+ (word.charAt(length - el - 2) != 't'))) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'T':
+ if ((word.charAt(length - el - 1) == 's') ||
+ ((word.charAt(length - el - 1) == 't') &&
+ (word.charAt(length - el - 2) != 'o'))) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'U':
+ if ((word.charAt(length - el - 1) == 'l') ||
+ (word.charAt(length - el - 1) == 'm') ||
+ (word.charAt(length - el - 1) == 'n') ||
+ (word.charAt(length - el - 1) == 'r')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'V':
+ if (word.charAt(length - el - 1) == 'c') {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'W':
+ if ((word.charAt(length - el - 1) != 's') &&
+ (word.charAt(length - el - 1) != 'u')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'X':
+ if ((word.charAt(length - el - 1) == 'l') ||
+ (word.charAt(length - el - 1) == 'i') ||
+ ((length - el > 2) &&
+ (word.charAt(length - el - 1) == 'e') &&
+ (word.charAt(length - el - 3) == 'u'))) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'Y':
+ if ((word.charAt(length - el - 1) == 'n') &&
+ (word.charAt(length - el - 2) == 'i')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'Z':
+ if (word.charAt(length - el - 1) != 'f') {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'a':
+ if ((word.charAt(length - el - 1) == 'd') ||
+ (word.charAt(length - el - 1) == 'f') ||
+ (((word.charAt(length - el - 1) == 'h') &&
+ (word.charAt(length - el - 2) == 'p'))) ||
+ (((word.charAt(length - el - 1) == 'h') &&
+ (word.charAt(length - el - 2) == 't'))) ||
+ (word.charAt(length - el - 1) == 'l') ||
+ (((word.charAt(length - el - 1) == 'r') &&
+ (word.charAt(length - el - 2) == 'e'))) ||
+ (((word.charAt(length - el - 1) == 'r') &&
+ (word.charAt(length - el - 2) == 'o'))) ||
+ (((word.charAt(length - el - 1) == 's') &&
+ (word.charAt(length - el - 2) == 'e'))) ||
+ (word.charAt(length - el - 1) == 't')) {
+ return word.substring(0, length - el);
+ }
+ break;
+ case 'b':
+ if (m_CompMode) {
+ if (((length - el == 3 ) &&
+ (!((word.charAt(length - el - 1) == 't') &&
+ (word.charAt(length - el - 2) == 'e') &&
+ (word.charAt(length - el - 3) == 'm')))) ||
+ ((length - el > 3) &&
+ (!((word.charAt(length - el - 1) == 't') &&
+ (word.charAt(length - el - 2) == 's') &&
+ (word.charAt(length - el - 3) == 'y') &&
+ (word.charAt(length - el - 4) == 'r'))))) {
+ return word.substring(0, length - el);
+ }
+ } else {
+ if ((length - el > 2) &&
+ (!((word.charAt(length - el - 1) == 't') &&
+ (word.charAt(length - el - 2) == 'e') &&
+ (word.charAt(length - el - 3) == 'm'))) &&
+ ((length - el < 4) ||
+ (!((word.charAt(length - el - 1) == 't') &&
+ (word.charAt(length - el - 2) == 's') &&
+ (word.charAt(length - el - 3) == 'y') &&
+ (word.charAt(length - el - 4) == 'r'))))) {
+ return word.substring(0, length - el);
+ }
+ }
+ break;
+ case 'c':
+ if (word.charAt(length - el - 1) == 'l') {
+ return word.substring(0, length - el);
+ }
+ break;
+ default:
+ throw new IllegalArgumentException("Fatal error.");
+ }
+ }
+ }
+ el--;
+ }
+ return word;
+ }
+
+ private String recodeEnding(String word) {
+
+ int lastPos = word.length() - 1;
+
+ // Rule 1
+ if (word.endsWith("bb") ||
+ word.endsWith("dd") ||
+ word.endsWith("gg") ||
+ word.endsWith("ll") ||
+ word.endsWith("mm") ||
+ word.endsWith("nn") ||
+ word.endsWith("pp") ||
+ word.endsWith("rr") ||
+ word.endsWith("ss") ||
+ word.endsWith("tt")) {
+ word = word.substring(0, lastPos);
+ lastPos--;
+ }
+
+ // Rule 2
+ if (word.endsWith("iev")) {
+ word = word.substring(0, lastPos - 2).concat("ief");
+ }
+
+ // Rule 3
+ if (word.endsWith("uct")) {
+ word = word.substring(0, lastPos - 2).concat("uc");
+ lastPos--;
+ }
+
+ // Rule 4
+ if (word.endsWith("umpt")) {
+ word = word.substring(0, lastPos - 3).concat("um");
+ lastPos -= 2;
+ }
+
+ // Rule 5
+ if (word.endsWith("rpt")) {
+ word = word.substring(0, lastPos - 2).concat("rb");
+ lastPos--;
+ }
+
+ // Rule 6
+ if (word.endsWith("urs")) {
+ word = word.substring(0, lastPos - 2).concat("ur");
+ lastPos--;
+ }
+
+ // Rule 7
+ if (word.endsWith("istr")) {
+ word = word.substring(0, lastPos - 3).concat("ister");
+ lastPos++;
+ }
+
+ // Rule 7a
+ if (word.endsWith("metr")) {
+ word = word.substring(0, lastPos - 3).concat("meter");
+ lastPos++;
+ }
+
+ // Rule 8
+ if (word.endsWith("olv")) {
+ word = word.substring(0, lastPos - 2).concat("olut");
+ lastPos++;
+ }
+
+ // Rule 9
+ if (word.endsWith("ul")) {
+ if ((lastPos - 2 < 0) ||
+ ((word.charAt(lastPos - 2) != 'a') &&
+ (word.charAt(lastPos - 2) != 'i') &&
+ (word.charAt(lastPos - 2) != 'o'))) {
+ word = word.substring(0, lastPos - 1).concat("l");
+ lastPos--;
+ }
+ }
+
+ // Rule 10
+ if (word.endsWith("bex")) {
+ word = word.substring(0, lastPos - 2).concat("bic");
+ }
+
+ // Rule 11
+ if (word.endsWith("dex")) {
+ word = word.substring(0, lastPos - 2).concat("dic");
+ }
+
+ // Rule 12
+ if (word.endsWith("pex")) {
+ word = word.substring(0, lastPos - 2).concat("pic");
+ }
+
+ // Rule 13
+ if (word.endsWith("tex")) {
+ word = word.substring(0, lastPos - 2).concat("tic");
+ }
+
+ // Rule 14
+ if (word.endsWith("ax")) {
+ word = word.substring(0, lastPos - 1).concat("ac");
+ }
+
+ // Rule 15
+ if (word.endsWith("ex")) {
+ word = word.substring(0, lastPos - 1).concat("ec");
+ }
+
+ // Rule 16
+ if (word.endsWith("ix")) {
+ word = word.substring(0, lastPos - 1).concat("ic");
+ }
+
+ // Rule 17
+ if (word.endsWith("lux")) {
+ word = word.substring(0, lastPos - 2).concat("luc");
+ }
+
+ // Rule 18
+ if (word.endsWith("uad")) {
+ word = word.substring(0, lastPos - 2).concat("uas");
+ }
+
+ // Rule 19
+ if (word.endsWith("vad")) {
+ word = word.substring(0, lastPos - 2).concat("vas");
+ }
+
+ // Rule 20
+ if (word.endsWith("cid")) {
+ word = word.substring(0, lastPos - 2).concat("cis");
+ }
+
+ // Rule 21
+ if (word.endsWith("lid")) {
+ word = word.substring(0, lastPos - 2).concat("lis");
+ }
+
+ // Rule 22
+ if (word.endsWith("erid")) {
+ word = word.substring(0, lastPos - 3).concat("eris");
+ }
+
+ // Rule 23
+ if (word.endsWith("pand")) {
+ word = word.substring(0, lastPos - 3).concat("pans");
+ }
+
+ // Rule 24
+ if (word.endsWith("end")) {
+ if ((lastPos - 3 < 0) ||
+ (word.charAt(lastPos - 3) != 's')) {
+ word = word.substring(0, lastPos - 2).concat("ens");
+ }
+ }
+
+ // Rule 25
+ if (word.endsWith("ond")) {
+ word = word.substring(0, lastPos - 2).concat("ons");
+ }
+
+ // Rule 26
+ if (word.endsWith("lud")) {
+ word = word.substring(0, lastPos - 2).concat("lus");
+ }
+
+ // Rule 27
+ if (word.endsWith("rud")) {
+ word = word.substring(0, lastPos - 2).concat("rus");
+ }
+
+ // Rule 28
+ if (word.endsWith("her")) {
+ if ((lastPos - 3 < 0) ||
+ ((word.charAt(lastPos - 3) != 'p') &&
+ (word.charAt(lastPos - 3) != 't'))) {
+ word = word.substring(0, lastPos - 2).concat("hes");
+ }
+ }
+
+ // Rule 29
+ if (word.endsWith("mit")) {
+ word = word.substring(0, lastPos - 2).concat("mis");
+ }
+
+ // Rule 30
+ if (word.endsWith("end")) {
+ if ((lastPos - 3 < 0) ||
+ (word.charAt(lastPos - 3) != 'm')) {
+ word = word.substring(0, lastPos - 2).concat("ens");
+ }
+ }
+
+ // Rule 31
+ if (word.endsWith("ert")) {
+ word = word.substring(0, lastPos - 2).concat("ers");
+ }
+
+ // Rule 32
+ if (word.endsWith("et")) {
+ if ((lastPos - 2 < 0) ||
+ (word.charAt(lastPos - 2) != 'n')) {
+ word = word.substring(0, lastPos - 1).concat("es");
+ }
+ }
+
+ // Rule 33
+ if (word.endsWith("yt")) {
+ word = word.substring(0, lastPos - 1).concat("ys");
+ }
+
+ // Rule 34
+ if (word.endsWith("yz")) {
+ word = word.substring(0, lastPos - 1).concat("ys");
+ }
+
+ return word;
+ }
+
+ public String stem(String word) {
+
+ if (word.length() > 2) {
+ return recodeEnding(removeEnding(word.toLowerCase()));
+ } else {
+ return word.toLowerCase();
+ }
+ }
+
+ public String stemString(String str) {
+
+ StringBuffer result = new StringBuffer();
+ int start = -1;
+ for (int j = 0; j < str.length(); j++) {
+ char c = str.charAt(j);
+ if (Character.isLetterOrDigit(c)) {
+ if (start == -1) {
+ start = j;
+ }
+ } else if (c == '\'') {
+ if (start == -1) {
+ result.append(c);
+ }
+ } else {
+ if (start != -1) {
+ result.append(stem(str.substring(start, j)));
+ start = -1;
+ }
+ result.append(c);
+ }
+ }
+ if (start != -1) {
+ result.append(stem(str.substring(start, str.length())));
+ }
+ return result.toString();
+ }
+
+}
+
+
diff --git a/app/src/main/java/org/rssin/summaries/Stopword.java b/app/src/main/java/org/rssin/summaries/Stopword.java
new file mode 100644
index 0000000..af3bee6
--- /dev/null
+++ b/app/src/main/java/org/rssin/summaries/Stopword.java
@@ -0,0 +1,74 @@
+package org.rssin.summaries;
+
+import java.io.*;
+import java.util.StringTokenizer;
+
+public class Stopword
+{
+
+ String stwd[];
+
+ public Stopword ()
+ {
+ /*int cnt=0,sz=0;char bt[]=null;
+ try
+ {
+ File fp = new File("stopwords.txt");
+ FileReader fis = new FileReader(fp);
+ sz = (int)fp.length();
+ bt = new char [sz];
+ fis.read(bt);
+ fis.close();
+ }
+ catch(IOException ex) {}
+
+ stwd=getTokens(new String(bt)); */
+
+ stwd = new TxtStatic().getStopWords();
+ }
+
+public void display ()
+ {
+ for (int i=0;i<stwd.length;i++)
+ System.out.println(stwd[i]);
+ }
+
+ public boolean isStopword( String word)
+ {
+ boolean flag=false;
+ for (int i =0;i<stwd.length;i++) {
+ if(stwd[i].equalsIgnoreCase(word) ) {
+ flag=true;
+ break;
+ }
+ }
+ return flag;
+ }
+
+ public String[] getTokens(String sen)
+ {
+ int sz=0,cnt=0;String words[]=null;
+ StringTokenizer stk=new StringTokenizer(sen) ;
+ sz=stk.countTokens();
+ words=new String[sz];
+ while ( stk.hasMoreTokens())
+ {
+ words[cnt]=new String(stk.nextToken());
+ cnt++;
+ }
+ return words;
+ }
+
+ public String remove(String sen)
+ {
+ String dsen="";
+ String words[]=getTokens(sen);
+ for (int j=0;j<words.length; j++)
+ {
+ if ( ! isStopword(words[j] ) )
+ dsen = dsen +words[j] +" ";
+ }
+ return dsen;
+ }
+
+} \ No newline at end of file
diff --git a/app/src/main/java/org/rssin/summaries/SummaryAPI.java b/app/src/main/java/org/rssin/summaries/SummaryAPI.java
index 68bc38d..83c3fa6 100644
--- a/app/src/main/java/org/rssin/summaries/SummaryAPI.java
+++ b/app/src/main/java/org/rssin/summaries/SummaryAPI.java
@@ -1,53 +1,283 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
package org.rssin.summaries;
-import java.net.URL;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.Set;
+import org.rssin.rss.FeedItem;
+
+/**
+ * Summary API van AST
+ * @author Joep
+ */
public class SummaryAPI implements SummaryAPIInterface {
-
- private final String APIURL = "http://api.smmry.com/";
- private final String APIKEY = "D5DDCDBD6F";
- private final int LINES = 3;
-
- public SummaryAPI()
- {
-
- }
-
- @Override;
- public Summary getSummary(FeedItem f)
- {
- String desc = f.description;
- String sum = sendRequest(desc);
-
- }
-
- private String sendRequest(String desc)
- {
- String q = APIURL + "?SM_API_KEY=" + APIKEY + "&SM_LENGTH=" + LINES;
-
- // Create a new HttpClient and Post Header
- HttpClient httpclient = new DefaultHttpClient();
- HttpPost httppost = new HttpPost(q);
-
- try {
-
- List<NameValuePair> nameValuePairs = new ArrayList<NameValuePair>(2);
- nameValuePairs.add(new BasicNameValuePair("sm_api_input", desc));
- httppost.setEntity(new UrlEncodedFormEntity(nameValuePairs));
-
- // Execute HTTP Post Request
- HttpResponse response = httpclient.execute(httppost);
-
- HttpEntity con = response.getEntity();
- InputStream in = con.getInputStream();
- String encoding = con.getContentEncoding();
- encoding = encoding == null ? "UTF-8" : encoding;
- String body = IOUtils.toString(in, encoding);
-
- } catch (ClientProtocolException e) {
- // TODO Auto-generated catch block
- } catch (IOException e) {
- // TODO Auto-generated catch block
- }
- }
-} \ No newline at end of file
+
+ @Override
+ public Summary getSummary(FeedItem f) {
+ //todo
+ String desc = f.description;
+ String t = getSumText(desc);
+ Summary s = new Summary(t);
+
+ return s;
+ }
+
+ @Override
+ public Summary getSummaryFromText(String t) {
+ Summary s = new Summary(t);
+
+ return s;
+ }
+
+ public String getSumText(String desc)
+ {
+ Hashtable hs = new Hashtable();
+ ArrayList zinnen = getSentences(desc);
+
+ //remove stopwords
+
+ Stopword stop = new Stopword();
+ Special specl = new Special();
+ for( int i=0; i<zinnen.size(); i++)
+ {
+ SentenceItem sl = (SentenceItem)zinnen.get(i);
+ sl.setSRSentence(specl.remove(sl.getRawSentence()));
+ sl.setSRSentence(stop.remove(sl.getSRSentence()));
+ }
+
+ //unique words
+
+ for(int i=0;i<zinnen.size(); i++)
+ {
+ SentenceItem sl = (SentenceItem)zinnen.get(i);
+ String sen = sl.getSRSentence();
+
+ int wordcount=0;
+ String[] words = sen.split(" ");
+
+ for (String tok : words) {
+
+ tok = tok.trim();
+
+ wordcount++;
+
+ if(!hs.containsKey(tok) && tok.length() >=3)
+ addword(tok,i,wordcount,hs);
+ else if(hs.containsKey(tok) )
+ upword(tok,i,wordcount,hs);
+ }
+ }
+
+ //stemming
+
+ stemming(hs);
+
+ //significant
+
+ Enumeration key=hs.keys();
+ while (key.hasMoreElements() )
+ delword(key.nextElement(), hs);
+
+ //getWeight
+
+ key=hs.keys();
+ while (key.hasMoreElements())
+ setWeight(key.nextElement(),hs, zinnen);
+
+ //ranking + return :P
+ return ranking(zinnen, hs);
+ }
+
+ private void setWeight(Object tok, Hashtable hs, ArrayList zinnen)
+ {
+ double wg=0.0;
+ WordItem wl=(WordItem)hs.get(tok);
+ double scnt = (double)zinnen.size();
+ double tf=wl.getcount();
+ double df=wl.sentensecount();
+ wg = tf*Math.log10(scnt/df);
+ wl.addWeight(wg);
+ }
+
+ public String ranking(ArrayList<SentenceItem> zinnen, Hashtable hs)
+ {
+ SentenceItem sl=null;
+ double max=0.0;
+ int mi=0;
+
+ for(int i = 0; i < zinnen.size(); i++)
+ {
+ sl = zinnen.get(i);
+ String sen=sl.getSRSentence();
+ Enumeration key = hs.keys();
+ while(key.hasMoreElements())
+ {
+ String str=(String)key.nextElement();
+ if(sen.indexOf(str) != -1 )
+ {
+ WordItem wl=(WordItem)hs.get(str);
+ sl.addWeight(wl.getWeight());
+ }
+ }
+ }
+
+ for(int i = 0; i < zinnen.size(); i++)
+ {
+ sl = (SentenceItem)zinnen.get(i);
+ if( sl.getWeight() > max )
+ {
+ max = sl.getWeight();
+ mi=i;
+ }
+ }
+
+ String str1 = sl.getRawSentence();
+ sl = zinnen.get(mi);
+
+ return str1;
+ }
+
+ private void delword(Object tok, Hashtable hs)
+ {
+ WordItem wl=(WordItem)hs.remove(tok);
+ if( wl.getcount() > 3 )
+ hs.put(tok,wl);
+ }
+
+
+ private void stemword(String w1,String w2, Hashtable hs)
+ {
+ if( !hs.containsKey(w2) || !hs.containsKey(w1) )
+ {
+ // System.out.print("return:");
+ return;
+ }
+ WordItem wl1=(WordItem)hs.remove(w1);
+ WordItem wl2=(WordItem)hs.remove(w2);
+
+ ArrayList wp=wl2.getwordpos();
+ ArrayList sp=wl2.getsentensepos();
+
+
+ for(int i=0;i<wp.size();i++)
+ {
+ String wp2=(String)wp.get(i);
+ String sp2=(String)sp.get(i);
+ wl1.incrcount(Integer.parseInt(wp2),Integer.parseInt(sp2));
+ }
+
+ hs.put(w1,wl1);
+ }
+
+
+ private double difpos(String str1,String str2)
+ {
+ int sz = Math.min(str1.length(), str2.length());
+ int mz = Math.max(str1.length(), str2.length());
+
+ double dp=mz;
+ double sm=0;
+
+ for (int i = 0; i < mz; i++ )
+ {
+ if( str1.charAt(i) != str2.charAt(i) )
+ {
+ dp=i+1;
+ break;
+ }
+ else
+ sm++;
+ }
+ return(sm*(dp/sz));
+ }
+
+
+ private void stemming(Hashtable hs)
+ {
+ int sz=hs.size();
+
+ double wdis[][] = new double[sz][sz];
+
+ Set s1=hs.keySet();
+
+ Object obj[]=s1.toArray();
+
+ for(int i = 0; i < sz; i++)
+ {
+ String str1 = (String) obj[i];
+ for(int j=0;j<sz;j++)
+ {
+ String str2=(String) obj[j];
+ if(i!=j)
+ wdis[i][j]=difpos(str1,str2);
+ }
+ }
+
+ for(int i=0;i<sz;i++)
+ {
+ String str1=(String)obj[i];
+ for(int j=0;j<sz;j++)
+ {
+ String str2 = (String)obj[j];
+ if(i != j && wdis[i][j] >= 3.0 )
+ {
+ stemword(str1,str2,hs);
+ }
+ }
+ }
+ }
+
+ private void addword(String tok,int sp,int wp, Hashtable hs)
+ {
+ WordItem wl=new WordItem(tok);
+ wl.incrcount(sp+1,wp);
+ hs.put(tok,wl);
+ }
+ private void upword(String tok,int sp,int wp, Hashtable hs)
+ {
+ WordItem wl=(WordItem)hs.remove(tok);
+ wl.incrcount(sp+1,wp);
+ hs.put(tok,wl);
+ }
+
+ private ArrayList<SentenceItem> getSentences(String doc)
+ {
+ ArrayList<SentenceItem> als = new ArrayList();
+
+ int fs1=0;
+ int fs2=0;
+ int nx=0;
+
+ while ( nx < (doc.length()-1) )
+ {
+ nx=doc.indexOf(".",fs2);
+ if(nx==-1)
+ break;
+ else if( nx==doc.lastIndexOf(".") )
+ {
+ String str=(doc.substring(fs1,nx+1)).toLowerCase();
+ als.add(new SentenceItem(str));
+ fs2=nx+1;
+ fs1=fs2;
+ break;
+ }
+ else if( doc.charAt(nx+1) ==' ' || doc.charAt(nx+1) =='\r' || doc.charAt(nx+1) =='\n')
+ {
+ String str=(doc.substring(fs1,nx+1).toLowerCase()).trim();
+ als.add(new SentenceItem(str));
+ fs2=nx+1;
+ fs1=fs2;
+ }
+ else
+ fs2=nx+1;
+ }
+
+ return als;
+ }
+}
diff --git a/app/src/main/java/org/rssin/summaries/SummaryAPIInterface.java b/app/src/main/java/org/rssin/summaries/SummaryAPIInterface.java
index 1caf922..17636de 100644
--- a/app/src/main/java/org/rssin/summaries/SummaryAPIInterface.java
+++ b/app/src/main/java/org/rssin/summaries/SummaryAPIInterface.java
@@ -1,9 +1,10 @@
package org.rssin.summaries;
-import rss.FeedItem;
+import org.rssin.rss.FeedItem;
public interface SummaryAPIInterface {
public Summary getSummary(FeedItem f);
+ public Summary getSummaryFromText(String t);
} \ No newline at end of file
diff --git a/app/src/main/java/org/rssin/summaries/TxtStatic.java b/app/src/main/java/org/rssin/summaries/TxtStatic.java
new file mode 100644
index 0000000..d686beb
--- /dev/null
+++ b/app/src/main/java/org/rssin/summaries/TxtStatic.java
@@ -0,0 +1,26 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package org.rssin.summaries;
+
+/**
+ *
+ * @author Joep
+ */
+public class TxtStatic {
+
+ private final String[] stopwords = {"a","about","above","across","after","afterwards","again","against","all","almost","alone","along","already","also","although","always","am","among","amongst","amoungst","amount","an","and","another","any","anyhow","anyone","anything","anyway","anywhere","are","around","as","at","back","be","became","because","become","becomes","becoming","been","before","beforehand","behind","being","below","beside","besides","between","beyond","bill","both","bottom","but","by","by","call","can","common","cannot","cant","co","computer","con","could","couldnt","cry","de","describe","detail","do","does","done","down","due","during","each","eg","eight","either","eleven","else","elsewhere","empty","enough","etc","even","ever","every","everyone","everything","everywhere","except","few","fifteen","fify","fill","find","fire","first","five","for","former","formerly","forty","found","four","from","front","full","further","get","give","go","had","has","hasnt","have","he","hence","her","here","hereafter","hereby","herein","hereupon","hers","herself","him","himself","his","how","however","hundred","i","ie","if","in","inc","indeed","interest","into","is","it","its","itself","keep","last","latter","latterly","least","less","ltd","made","many","may","me","meanwhile","might","mill","mine","more","moreover","most","mostly","move","much","must","my","myself","name","namely","neither","never","nevertheless","next","nine","no","nobody","none","noone","nor","not","nothing","now","nowhere","of","off","often","on","once","one","only","onto","or","other","others","otherwise","our","ours","ourselves","out","over","own","part","per","perhaps","please","put","rather","re","same","see","seem","seemed","seeming","seems","serious","several","she","should","show","side","since","sincere","six","sixty","so","some","somehow","someone","something","sometime","sometimes","somewhere","still","such","system","take","ten","than","that","the","their","them","themselves","then","thence","there","thereafter","thereby","therefore","therein","thereupon","these","they","thick","thin","third","this","those","though","three","through","throughout","thru","thus","to","together","too","top","toward","towards","twelve","twenty","two","un","under","until","up","upon","us","usually","usual","very","via","was","we","well","were","what","whatever","when","whence","whenever","where","whereafter","whereas","whereby","whereinwhereupon","wherever","whether","which","while","whither","who","whoever","whole","whom","whose","why","will","with","within","without","would","yet","you","your","yours","yourself","yourselves","don't","won't","can't","didn't","it's","is'nt","aren't","wasn't","haven't","hasn't","hadn't","you've","it'hv","you'd","you're","hasn't","we�ll","you�re","we're","we've"};
+
+ private final char[] special = {'"', ',', ';', '!', '&', '/', '$', ':', '|', '%', ')', '(', '[', ']', '�', '�', '\'', '.', ' ', '\n'};
+
+ public String[] getStopWords()
+ {
+ return stopwords;
+ }
+ public char[] getSpecial()
+ {
+ return special;
+ }
+}
diff --git a/app/src/main/java/org/rssin/summaries/WordItem.java b/app/src/main/java/org/rssin/summaries/WordItem.java
new file mode 100644
index 0000000..fb47749
--- /dev/null
+++ b/app/src/main/java/org/rssin/summaries/WordItem.java
@@ -0,0 +1,78 @@
+package org.rssin.summaries;
+
+import java.util.ArrayList;
+
+public class WordItem
+{
+ private String word;
+ private double cnt;
+ private double scnt;
+ private double wght;
+
+ private ArrayList spl;
+ private ArrayList wpl;
+
+ public WordItem (String wd)
+ {
+ word= new String(wd);
+ spl=new ArrayList();
+ wpl=new ArrayList();
+ cnt=0;scnt=0;
+ }
+
+ public void incrcount(int sp,int wp)
+ {
+ cnt++;
+ sentensepos(sp);
+ wordpos(wp);
+ }
+
+ public double getcount()
+ {
+ return cnt;
+ }
+
+ public String getword()
+ {
+ return word;
+ }
+
+ public void sentensepos(int sp)
+ {
+ if(! spl.contains(sp+""))
+ scnt++;
+ spl.add(sp+"");
+ }
+
+ public void wordpos(int wp)
+ {
+ wpl.add(wp+"");
+ }
+
+ public ArrayList getwordpos()
+ {
+ return wpl;
+ }
+
+ public ArrayList getsentensepos()
+ {
+ return spl;
+ }
+
+ public void addWeight(double wg)
+ {
+ wght=wg;
+ }
+
+ public double getWeight()
+ {
+ return wght;
+ }
+
+ public double sentensecount()
+ {
+ return scnt;
+ }
+
+
+} \ No newline at end of file
diff --git a/app/src/main/java/org/rssin/summaries/specials.txt b/app/src/main/java/org/rssin/summaries/specials.txt
new file mode 100644
index 0000000..046d05b
--- /dev/null
+++ b/app/src/main/java/org/rssin/summaries/specials.txt
@@ -0,0 +1,18 @@
+,
+"
+;
+!
+&
+/
+$
+:
+|
+%
+)
+(
+[
+]
+”
+“
+'
+.
diff --git a/app/src/main/java/org/rssin/summaries/stopwords.txt b/app/src/main/java/org/rssin/summaries/stopwords.txt
new file mode 100644
index 0000000..e621f2a
--- /dev/null
+++ b/app/src/main/java/org/rssin/summaries/stopwords.txt
@@ -0,0 +1,345 @@
+a
+about
+above
+across
+after
+afterwards
+again
+against
+all
+almost
+alone
+along
+already
+also
+although
+always
+am
+among
+amongst
+amoungst
+amount
+an
+and
+another
+any
+anyhow
+anyone
+anything
+anyway
+anywhere
+are
+around
+as
+at
+back
+be
+became
+because
+become
+becomes
+becoming
+been
+before
+beforehand
+behind
+being
+below
+beside
+besides
+between
+beyond
+bill
+both
+bottom
+but
+by
+by
+call
+can
+common
+cannot
+cant
+co
+computer
+con
+could
+couldnt
+cry
+de
+describe
+detail
+do
+does
+done
+down
+due
+during
+each
+eg
+eight
+either
+eleven
+else
+elsewhere
+empty
+enough
+etc
+even
+ever
+every
+everyone
+everything
+everywhere
+except
+few
+fifteen
+fify
+fill
+find
+fire
+first
+five
+for
+former
+formerly
+forty
+found
+four
+from
+front
+full
+further
+get
+give
+go
+had
+has
+hasnt
+have
+he
+hence
+her
+here
+hereafter
+hereby
+herein
+hereupon
+hers
+herself
+him
+himself
+his
+how
+however
+hundred
+i
+ie
+if
+in
+inc
+indeed
+interest
+into
+is
+it
+its
+itself
+keep
+last
+latter
+latterly
+least
+less
+ltd
+made
+many
+may
+me
+meanwhile
+might
+mill
+mine
+more
+moreover
+most
+mostly
+move
+much
+must
+my
+myself
+name
+namely
+neither
+never
+nevertheless
+next
+nine
+no
+nobody
+none
+noone
+nor
+not
+nothing
+now
+nowhere
+of
+off
+often
+on
+once
+one
+only
+onto
+or
+other
+others
+otherwise
+our
+ours
+ourselves
+out
+over
+own
+part
+per
+perhaps
+please
+put
+rather
+re
+same
+see
+seem
+seemed
+seeming
+seems
+serious
+several
+she
+should
+show
+side
+since
+sincere
+six
+sixty
+so
+some
+somehow
+someone
+something
+sometime
+sometimes
+somewhere
+still
+such
+system
+take
+ten
+than
+that
+the
+their
+them
+themselves
+then
+thence
+there
+thereafter
+thereby
+therefore
+therein
+thereupon
+these
+they
+thick
+thin
+third
+this
+those
+though
+three
+through
+throughout
+thru
+thus
+to
+together
+too
+top
+toward
+towards
+twelve
+twenty
+two
+un
+under
+until
+up
+upon
+us
+usually
+usual
+very
+via
+was
+we
+well
+were
+what
+whatever
+when
+whence
+whenever
+where
+whereafter
+whereas
+whereby
+wherein
+whereupon
+wherever
+whether
+which
+while
+whither
+who
+whoever
+whole
+whom
+whose
+why
+will
+with
+within
+without
+would
+yet
+you
+your
+yours
+yourself
+yourselves
+don't
+won't
+can't
+didn't
+it's
+is'nt
+aren't
+wasn't
+haven't
+hasn't
+hadn't
+you've
+it'hv
+you'd
+you're
+hasn't
+we’ll
+you’re
+we're
+we've
+