diff options
author | zkwip | 2015-05-21 17:23:07 +0200 |
---|---|---|
committer | zkwip | 2015-05-21 17:23:07 +0200 |
commit | f243649f58cb69e0cffb76f51075fefc76d80237 (patch) | |
tree | 55150573aa63d9332d57d9ecc95e0aba1642278c /app/src/main | |
parent | summary shit (diff) |
progress aan het algoritme,
partial multiline support etc.
Diffstat (limited to 'app/src/main')
-rw-r--r-- | app/src/main/java/org/rssin/summaries/LengthMode.java | 32 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/SentenceItem.java | 84 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/Special.java | 109 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/Stemmer.java | 1708 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/Stopword.java | 121 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/Summary.java | 36 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/SummaryAPI.java | 477 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/SummaryAPIInterface.java | 48 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/TxtStatic.java | 23 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/WordItem.java | 143 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/specials.txt | 18 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/stopwords.txt | 345 | ||||
-rw-r--r-- | app/src/main/java/org/rssin/summaries/tester.java | 49 |
13 files changed, 1568 insertions, 1625 deletions
diff --git a/app/src/main/java/org/rssin/summaries/LengthMode.java b/app/src/main/java/org/rssin/summaries/LengthMode.java new file mode 100644 index 0000000..1ec6794 --- /dev/null +++ b/app/src/main/java/org/rssin/summaries/LengthMode.java @@ -0,0 +1,32 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.rssin.summaries; + +/** + * + * @author jbernards + */ +public enum LengthMode { + + /** + * Limit the summary length by the amount of characters. + */ + CHARACTERS, + /** + * Limit the summary length by the amount of sentences. + */ + LINES, + /** + * Don't limit the summary. + */ + NOLIMIT, + /** + * Limit both by the amount of character and by the amount of sentences. + */ + BOTH + + +} diff --git a/app/src/main/java/org/rssin/summaries/SentenceItem.java b/app/src/main/java/org/rssin/summaries/SentenceItem.java index e8e0e0b..1646152 100644 --- a/app/src/main/java/org/rssin/summaries/SentenceItem.java +++ b/app/src/main/java/org/rssin/summaries/SentenceItem.java @@ -5,49 +5,79 @@ */ package org.rssin.summaries; -public class SentenceItem -{ +/** + * + * @author jbernards + */ +public class SentenceItem { private String rsen = ""; private String srsen = ""; private double wght; - public SentenceItem(String s) - { + /** + * Creates a sentence item based on its string source + * + * @param s sentence in string form + */ + public SentenceItem(String s) { rsen = new String(s); - wght = 0.0; + wght = 0.0; } - - public void setRawSentecse(String sen) - { - rsen = sen; + /** + * change the raw sentence. + * + * @param sen string value of the sentence; + */ + public void setRawSentecse(String sen) { + rsen = sen; } - public void setSRSentence(String rsen) - { + /** + * no idea what this does. + * + * @param rsen + */ + public void setSRSentence(String rsen) { srsen = rsen; - } + } - public String getRawSentence() - { + /** + * gets the raw sentence. + * + * @return guess what + */ + public String getRawSentence() { return rsen; - } - public String getSRSentence() - { + } + + /** + * ? + * + * @return + */ + public String getSRSentence() { return srsen; } - public void addWeight(double wg) - { - wght=wght+wg; - } - - public double getWeight() - { - return wght; - } + /** + * increases the weight value of the entire sentence by a given amount. + * + * @param wg the amount to raise the weight. + */ + public void addWeight(double wg) { + wght += wg; + } + /** + * gets the current weight of the entire sentence. + * + * @return the weight value. + */ + public double getWeight() { + return wght; + } -}
\ No newline at end of file +} diff --git a/app/src/main/java/org/rssin/summaries/Special.java b/app/src/main/java/org/rssin/summaries/Special.java index 36cb0ad..b367ee5 100644 --- a/app/src/main/java/org/rssin/summaries/Special.java +++ b/app/src/main/java/org/rssin/summaries/Special.java @@ -3,58 +3,61 @@ package org.rssin.summaries; import java.io.*; import java.util.StringTokenizer; -public class Special -{ - - char spch[]; - - public Special() - { - /*int cnt=0,sz=0;char bt[]=null; - try { - File fp=new File("specials.txt"); - FileReader fis=new FileReader(fp); - sz=(int)fp.length(); - bt=new char[sz]; - fis.read(bt); - fis.close(); - } - catch(IOException ex) {} - spch=getTokens(new String(bt));*/ +/** + * + * @author jbernards + */ +public class Special { + + char spch[]; + + /** + * constructor. + */ + public Special() { spch = new TxtStatic().getSpecial(); - } - - - public char[] getTokens(String sen) - { - int sz=0,cnt=0;char words[]=null; - StringTokenizer stk=new StringTokenizer(sen); - sz=stk.countTokens(); - words=new char[sz]; - while ( stk.hasMoreTokens()) - { - words[cnt]=new String(stk.nextToken()).charAt(0); - cnt++; - } - return words; } - - public String remove(String sen) - { - String dsen=new String(sen); - - for (int j=0;j<spch.length; j++) - { - - char csh[]=dsen.toCharArray(); - dsen=""; - for(int i=0;i<csh.length;i++ ) - if(csh[i] != spch[j]) - dsen=dsen + csh[i]; - - } - - return dsen; - } - -}
\ No newline at end of file + + /** + * devides the sentence into different words. + * + * @param sen the sentence string + * @return the first letter of each word. + */ + public char[] getTokens(String sen) { + + String[] sp = sen.split(" "); + char[] words = new char[sp.length]; + for(int i = 0; i < sp.length; i++) + { + words[i] = sp[i].charAt(0); + } + + return words; + } + + /** + * removes special words for a give sentence + * + * @param sen sentence + * @return sentences with the words removed. + */ + public String remove(String sen) { + String dsen = sen; + + for (int j = 0; j < spch.length; j++) { + + char csh[] = dsen.toCharArray(); + dsen = ""; + for (int i = 0; i < csh.length; i++) { + if (csh[i] != spch[j]) { + dsen = dsen + csh[i]; + } + } + + } + + return dsen; + } + +} diff --git a/app/src/main/java/org/rssin/summaries/Stemmer.java b/app/src/main/java/org/rssin/summaries/Stemmer.java index 1cad62f..54a64fb 100644 --- a/app/src/main/java/org/rssin/summaries/Stemmer.java +++ b/app/src/main/java/org/rssin/summaries/Stemmer.java @@ -2,851 +2,873 @@ package org.rssin.summaries; import java.util.*; +/** + * + * @author jbernards + */ public class Stemmer { - - private static boolean m_CompMode = false; - private static HashMap m_l11 = null; - private static HashMap m_l10 = null; - private static HashMap m_l9 = null; - private static HashMap m_l8 = null; - private static HashMap m_l7 = null; - private static HashMap m_l6 = null; - private static HashMap m_l5 = null; - private static HashMap m_l4 = null; - private static HashMap m_l3 = null; - private static HashMap m_l2 = null; - private static HashMap m_l1 = null; - - static { - - m_l11 = new HashMap(); - m_l11.put("alistically", "B"); - m_l11.put("arizability", "A"); - m_l11.put("izationally", "B"); - m_l10 = new HashMap(); - m_l10.put("antialness", "A"); - m_l10.put("arisations", "A"); - m_l10.put("arizations", "A"); - m_l10.put("entialness", "A"); - m_l9 = new HashMap(); - m_l9.put("allically", "C"); - m_l9.put("antaneous", "A"); - m_l9.put("antiality", "A"); - m_l9.put("arisation", "A"); - m_l9.put("arization", "A"); - m_l9.put("ationally", "B"); - m_l9.put("ativeness", "A"); - m_l9.put("eableness", "E"); - m_l9.put("entations", "A"); - m_l9.put("entiality", "A"); - m_l9.put("entialize", "A"); - m_l9.put("entiation", "A"); - m_l9.put("ionalness", "A"); - m_l9.put("istically", "A"); - m_l9.put("itousness", "A"); - m_l9.put("izability", "A"); - m_l9.put("izational", "A"); - m_l8 = new HashMap(); - m_l8.put("ableness", "A"); - m_l8.put("arizable", "A"); - m_l8.put("entation", "A"); - m_l8.put("entially", "A"); - m_l8.put("eousness", "A"); - m_l8.put("ibleness", "A"); - m_l8.put("icalness", "A"); - m_l8.put("ionalism", "A"); - m_l8.put("ionality", "A"); - m_l8.put("ionalize", "A"); - m_l8.put("iousness", "A"); - m_l8.put("izations", "A"); - m_l8.put("lessness", "A"); - m_l7 = new HashMap(); - m_l7.put("ability", "A"); - m_l7.put("aically", "A"); - m_l7.put("alistic", "B"); - m_l7.put("alities", "A"); - m_l7.put("ariness", "E"); - m_l7.put("aristic", "A"); - m_l7.put("arizing", "A"); - m_l7.put("ateness", "A"); - m_l7.put("atingly", "A"); - m_l7.put("ational", "B"); - m_l7.put("atively", "A"); - m_l7.put("ativism", "A"); - m_l7.put("elihood", "E"); - m_l7.put("encible", "A"); - m_l7.put("entally", "A"); - m_l7.put("entials", "A"); - m_l7.put("entiate", "A"); - m_l7.put("entness", "A"); - m_l7.put("fulness", "A"); - m_l7.put("ibility", "A"); - m_l7.put("icalism", "A"); - m_l7.put("icalist", "A"); - m_l7.put("icality", "A"); - m_l7.put("icalize", "A"); - m_l7.put("ication", "G"); - m_l7.put("icianry", "A"); - m_l7.put("ination", "A"); - m_l7.put("ingness", "A"); - m_l7.put("ionally", "A"); - m_l7.put("isation", "A"); - m_l7.put("ishness", "A"); - m_l7.put("istical", "A"); - m_l7.put("iteness", "A"); - m_l7.put("iveness", "A"); - m_l7.put("ivistic", "A"); - m_l7.put("ivities", "A"); - m_l7.put("ization", "F"); - m_l7.put("izement", "A"); - m_l7.put("oidally", "A"); - m_l7.put("ousness", "A"); - m_l6 = new HashMap(); - m_l6.put("aceous", "A"); - m_l6.put("acious", "B"); - m_l6.put("action", "G"); - m_l6.put("alness", "A"); - m_l6.put("ancial", "A"); - m_l6.put("ancies", "A"); - m_l6.put("ancing", "B"); - m_l6.put("ariser", "A"); - m_l6.put("arized", "A"); - m_l6.put("arizer", "A"); - m_l6.put("atable", "A"); - m_l6.put("ations", "B"); - m_l6.put("atives", "A"); - m_l6.put("eature", "Z"); - m_l6.put("efully", "A"); - m_l6.put("encies", "A"); - m_l6.put("encing", "A"); - m_l6.put("ential", "A"); - m_l6.put("enting", "C"); - m_l6.put("entist", "A"); - m_l6.put("eously", "A"); - m_l6.put("ialist", "A"); - m_l6.put("iality", "A"); - m_l6.put("ialize", "A"); - m_l6.put("ically", "A"); - m_l6.put("icance", "A"); - m_l6.put("icians", "A"); - m_l6.put("icists", "A"); - m_l6.put("ifully", "A"); - m_l6.put("ionals", "A"); - m_l6.put("ionate", "D"); - m_l6.put("ioning", "A"); - m_l6.put("ionist", "A"); - m_l6.put("iously", "A"); - m_l6.put("istics", "A"); - m_l6.put("izable", "E"); - m_l6.put("lessly", "A"); - m_l6.put("nesses", "A"); - m_l6.put("oidism", "A"); - m_l5 = new HashMap(); - m_l5.put("acies", "A"); - m_l5.put("acity", "A"); - m_l5.put("aging", "B"); - m_l5.put("aical", "A"); - if (!m_CompMode) { - m_l5.put("alist", "A"); - } - m_l5.put("alism", "B"); - m_l5.put("ality", "A"); - m_l5.put("alize", "A"); - m_l5.put("allic", "b"); - m_l5.put("anced", "B"); - m_l5.put("ances", "B"); - m_l5.put("antic", "C"); - m_l5.put("arial", "A"); - m_l5.put("aries", "A"); - m_l5.put("arily", "A"); - m_l5.put("arity", "B"); - m_l5.put("arize", "A"); - m_l5.put("aroid", "A"); - m_l5.put("ately", "A"); - m_l5.put("ating", "I"); - m_l5.put("ation", "B"); - m_l5.put("ative", "A"); - m_l5.put("ators", "A"); - m_l5.put("atory", "A"); - m_l5.put("ature", "E"); - m_l5.put("early", "Y"); - m_l5.put("ehood", "A"); - m_l5.put("eless", "A"); - if (!m_CompMode) { - m_l5.put("elily", "A"); - } else { - m_l5.put("elity", "A"); - } - m_l5.put("ement", "A"); - m_l5.put("enced", "A"); - m_l5.put("ences", "A"); - m_l5.put("eness", "E"); - m_l5.put("ening", "E"); - m_l5.put("ental", "A"); - m_l5.put("ented", "C"); - m_l5.put("ently", "A"); - m_l5.put("fully", "A"); - m_l5.put("ially", "A"); - m_l5.put("icant", "A"); - m_l5.put("ician", "A"); - m_l5.put("icide", "A"); - m_l5.put("icism", "A"); - m_l5.put("icist", "A"); - m_l5.put("icity", "A"); - m_l5.put("idine", "I"); - m_l5.put("iedly", "A"); - m_l5.put("ihood", "A"); - m_l5.put("inate", "A"); - m_l5.put("iness", "A"); - m_l5.put("ingly", "B"); - m_l5.put("inism", "J"); - m_l5.put("inity", "c"); - m_l5.put("ional", "A"); - m_l5.put("ioned", "A"); - m_l5.put("ished", "A"); - m_l5.put("istic", "A"); - m_l5.put("ities", "A"); - m_l5.put("itous", "A"); - m_l5.put("ively", "A"); - m_l5.put("ivity", "A"); - m_l5.put("izers", "F"); - m_l5.put("izing", "F"); - m_l5.put("oidal", "A"); - m_l5.put("oides", "A"); - m_l5.put("otide", "A"); - m_l5.put("ously", "A"); - m_l4 = new HashMap(); - m_l4.put("able", "A"); - m_l4.put("ably", "A"); - m_l4.put("ages", "B"); - m_l4.put("ally", "B"); - m_l4.put("ance", "B"); - m_l4.put("ancy", "B"); - m_l4.put("ants", "B"); - m_l4.put("aric", "A"); - m_l4.put("arly", "K"); - m_l4.put("ated", "I"); - m_l4.put("ates", "A"); - m_l4.put("atic", "B"); - m_l4.put("ator", "A"); - m_l4.put("ealy", "Y"); - m_l4.put("edly", "E"); - m_l4.put("eful", "A"); - m_l4.put("eity", "A"); - m_l4.put("ence", "A"); - m_l4.put("ency", "A"); - m_l4.put("ened", "E"); - m_l4.put("enly", "E"); - m_l4.put("eous", "A"); - m_l4.put("hood", "A"); - m_l4.put("ials", "A"); - m_l4.put("ians", "A"); - m_l4.put("ible", "A"); - m_l4.put("ibly", "A"); - m_l4.put("ical", "A"); - m_l4.put("ides", "L"); - m_l4.put("iers", "A"); - m_l4.put("iful", "A"); - m_l4.put("ines", "M"); - m_l4.put("ings", "N"); - m_l4.put("ions", "B"); - m_l4.put("ious", "A"); - m_l4.put("isms", "B"); - m_l4.put("ists", "A"); - m_l4.put("itic", "H"); - m_l4.put("ized", "F"); - m_l4.put("izer", "F"); - m_l4.put("less", "A"); - m_l4.put("lily", "A"); - m_l4.put("ness", "A"); - m_l4.put("ogen", "A"); - m_l4.put("ward", "A"); - m_l4.put("wise", "A"); - m_l4.put("ying", "B"); - m_l4.put("yish", "A"); - m_l3 = new HashMap(); - m_l3.put("acy", "A"); - m_l3.put("age", "B"); - m_l3.put("aic", "A"); - m_l3.put("als", "b"); - m_l3.put("ant", "B"); - m_l3.put("ars", "O"); - m_l3.put("ary", "F"); - m_l3.put("ata", "A"); - m_l3.put("ate", "A"); - m_l3.put("eal", "Y"); - m_l3.put("ear", "Y"); - m_l3.put("ely", "E"); - m_l3.put("ene", "E"); - m_l3.put("ent", "C"); - m_l3.put("ery", "E"); - m_l3.put("ese", "A"); - m_l3.put("ful", "A"); - m_l3.put("ial", "A"); - m_l3.put("ian", "A"); - m_l3.put("ics", "A"); - m_l3.put("ide", "L"); - m_l3.put("ied", "A"); - m_l3.put("ier", "A"); - m_l3.put("ies", "P"); - m_l3.put("ily", "A"); - m_l3.put("ine", "M"); - m_l3.put("ing", "N"); - m_l3.put("ion", "Q"); - m_l3.put("ish", "C"); - m_l3.put("ism", "B"); - m_l3.put("ist", "A"); - m_l3.put("ite", "a"); - m_l3.put("ity", "A"); - m_l3.put("ium", "A"); - m_l3.put("ive", "A"); - m_l3.put("ize", "F"); - m_l3.put("oid", "A"); - m_l3.put("one", "R"); - m_l3.put("ous", "A"); - m_l2 = new HashMap(); - m_l2.put("ae", "A"); - m_l2.put("al", "b"); - m_l2.put("ar", "X"); - m_l2.put("as", "B"); - m_l2.put("ed", "E"); - m_l2.put("en", "F"); - m_l2.put("es", "E"); - m_l2.put("ia", "A"); - m_l2.put("ic", "A"); - m_l2.put("is", "A"); - m_l2.put("ly", "B"); - m_l2.put("on", "S"); - m_l2.put("or", "T"); - m_l2.put("um", "U"); - m_l2.put("us", "V"); - m_l2.put("yl", "R"); - m_l2.put("s\'", "A"); - m_l2.put("\'s", "A"); - m_l1 = new HashMap(); - m_l1.put("a", "A"); - m_l1.put("e", "A"); - m_l1.put("i", "A"); - m_l1.put("o", "A"); - m_l1.put("s", "W"); - m_l1.put("y", "B"); - } - - private String removeEnding(String word) { - - int length = word.length(); - int el = 11; - - while (el > 0) { - if (length - el > 1) { - String ending = word.substring(length - el); - String conditionCode = null; - switch (el) { - case 11: conditionCode = (String)m_l11.get(ending); - break; - case 10: conditionCode = (String)m_l10.get(ending); - break; - case 9: conditionCode = (String)m_l9.get(ending); - break; - case 8: conditionCode = (String)m_l8.get(ending); - break; - case 7: conditionCode = (String)m_l7.get(ending); - break; - case 6: conditionCode = (String)m_l6.get(ending); - break; - case 5: conditionCode = (String)m_l5.get(ending); - break; - case 4: conditionCode = (String)m_l4.get(ending); - break; - case 3: conditionCode = (String)m_l3.get(ending); - break; - case 2: conditionCode = (String)m_l2.get(ending); - break; - case 1: conditionCode = (String)m_l1.get(ending); - break; - default: - } - if (conditionCode != null) { - switch (conditionCode.charAt(0)) { - case 'A': - return word.substring(0, length - el); - case 'B': - if (length - el > 2) { - return word.substring(0, length - el); - } - break; - case 'C': - if (length - el > 3) { - return word.substring(0, length - el); - } - break; - case 'D': - if (length - el > 4) { - return word.substring(0, length - el); - } - break; - case 'E': - if (word.charAt(length - el - 1) != 'e') { - return word.substring(0, length - el); - } - break; - case 'F': - if ((length - el > 2) && - (word.charAt(length - el - 1) != 'e')) { - return word.substring(0, length - el); - } - break; - case 'G': - if ((length - el > 2) && - (word.charAt(length - el - 1) == 'f')) { - return word.substring(0, length - el); - } - break; - case 'H': - if ((word.charAt(length - el - 1) == 't') || - ((word.charAt(length - el - 1) == 'l') && - (word.charAt(length - el - 2) == 'l'))) { - return word.substring(0, length - el); - } - break; - case 'I': - if ((word.charAt(length - el - 1) != 'o') && - (word.charAt(length - el - 1) != 'e')) { - return word.substring(0, length - el); - } - break; - case 'J': - if ((word.charAt(length - el - 1) != 'a') && - (word.charAt(length - el - 1) != 'e')) { - return word.substring(0, length - el); - } - break; - case 'K': - if ((length - el > 2) && - ((word.charAt(length - el - 1) == 'l') || - (word.charAt(length - el - 1) == 'i') || - ((word.charAt(length - el - 1) == 'e') && - (word.charAt(length - el - 3) == 'u')))) { - return word.substring(0, length - el); - } - break; - case 'L': - if ((word.charAt(length - el - 1) != 'u') && - (word.charAt(length - el - 1) != 'x') && - ((word.charAt(length - el - 1) != 's') || - (word.charAt(length - el - 2) == 'o'))) { - return word.substring(0, length - el); - } - break; - case 'M': - if ((word.charAt(length - el - 1) != 'a') && - (word.charAt(length - el - 1) != 'c') && - (word.charAt(length - el - 1) != 'e') && - (word.charAt(length - el - 1) != 'm')) { - return word.substring(0, length - el); - } - break; - case 'N': - if ((length - el > 3) || - ((length - el == 3) && - ((word.charAt(length - el - 3) != 's')))) { - return word.substring(0, length - el); - } - break; - case 'O': - if ((word.charAt(length - el - 1) == 'l') || - (word.charAt(length - el - 1) == 'i')) { - return word.substring(0, length - el); - } - break; - case 'P': - if (word.charAt(length - el - 1) != 'c') { - return word.substring(0, length - el); - } - break; - case 'Q': - if ((length - el > 2) && - (word.charAt(length - el - 1) != 'l') && - (word.charAt(length - el - 1) != 'n')) { - return word.substring(0, length - el); - } - break; - case 'R': - if ((word.charAt(length - el - 1) == 'n') || - (word.charAt(length - el - 1) == 'r')) { - return word.substring(0, length - el); - } - break; - case 'S': - if (((word.charAt(length - el - 1) == 'r') && - (word.charAt(length - el - 2) == 'd')) || - ((word.charAt(length - el - 1) == 't') && - (word.charAt(length - el - 2) != 't'))) { - return word.substring(0, length - el); - } - break; - case 'T': - if ((word.charAt(length - el - 1) == 's') || - ((word.charAt(length - el - 1) == 't') && - (word.charAt(length - el - 2) != 'o'))) { - return word.substring(0, length - el); - } - break; - case 'U': - if ((word.charAt(length - el - 1) == 'l') || - (word.charAt(length - el - 1) == 'm') || - (word.charAt(length - el - 1) == 'n') || - (word.charAt(length - el - 1) == 'r')) { - return word.substring(0, length - el); - } - break; - case 'V': - if (word.charAt(length - el - 1) == 'c') { - return word.substring(0, length - el); - } - break; - case 'W': - if ((word.charAt(length - el - 1) != 's') && - (word.charAt(length - el - 1) != 'u')) { - return word.substring(0, length - el); - } - break; - case 'X': - if ((word.charAt(length - el - 1) == 'l') || - (word.charAt(length - el - 1) == 'i') || - ((length - el > 2) && - (word.charAt(length - el - 1) == 'e') && - (word.charAt(length - el - 3) == 'u'))) { - return word.substring(0, length - el); - } - break; - case 'Y': - if ((word.charAt(length - el - 1) == 'n') && - (word.charAt(length - el - 2) == 'i')) { - return word.substring(0, length - el); - } - break; - case 'Z': - if (word.charAt(length - el - 1) != 'f') { - return word.substring(0, length - el); - } - break; - case 'a': - if ((word.charAt(length - el - 1) == 'd') || - (word.charAt(length - el - 1) == 'f') || - (((word.charAt(length - el - 1) == 'h') && - (word.charAt(length - el - 2) == 'p'))) || - (((word.charAt(length - el - 1) == 'h') && - (word.charAt(length - el - 2) == 't'))) || - (word.charAt(length - el - 1) == 'l') || - (((word.charAt(length - el - 1) == 'r') && - (word.charAt(length - el - 2) == 'e'))) || - (((word.charAt(length - el - 1) == 'r') && - (word.charAt(length - el - 2) == 'o'))) || - (((word.charAt(length - el - 1) == 's') && - (word.charAt(length - el - 2) == 'e'))) || - (word.charAt(length - el - 1) == 't')) { - return word.substring(0, length - el); - } - break; - case 'b': - if (m_CompMode) { - if (((length - el == 3 ) && - (!((word.charAt(length - el - 1) == 't') && - (word.charAt(length - el - 2) == 'e') && - (word.charAt(length - el - 3) == 'm')))) || - ((length - el > 3) && - (!((word.charAt(length - el - 1) == 't') && - (word.charAt(length - el - 2) == 's') && - (word.charAt(length - el - 3) == 'y') && - (word.charAt(length - el - 4) == 'r'))))) { - return word.substring(0, length - el); - } - } else { - if ((length - el > 2) && - (!((word.charAt(length - el - 1) == 't') && - (word.charAt(length - el - 2) == 'e') && - (word.charAt(length - el - 3) == 'm'))) && - ((length - el < 4) || - (!((word.charAt(length - el - 1) == 't') && - (word.charAt(length - el - 2) == 's') && - (word.charAt(length - el - 3) == 'y') && - (word.charAt(length - el - 4) == 'r'))))) { - return word.substring(0, length - el); - } - } - break; - case 'c': - if (word.charAt(length - el - 1) == 'l') { - return word.substring(0, length - el); - } - break; - default: - throw new IllegalArgumentException("Fatal error."); - } - } - } - el--; - } - return word; - } - - private String recodeEnding(String word) { - - int lastPos = word.length() - 1; - - // Rule 1 - if (word.endsWith("bb") || - word.endsWith("dd") || - word.endsWith("gg") || - word.endsWith("ll") || - word.endsWith("mm") || - word.endsWith("nn") || - word.endsWith("pp") || - word.endsWith("rr") || - word.endsWith("ss") || - word.endsWith("tt")) { - word = word.substring(0, lastPos); - lastPos--; - } - - // Rule 2 - if (word.endsWith("iev")) { - word = word.substring(0, lastPos - 2).concat("ief"); - } - - // Rule 3 - if (word.endsWith("uct")) { - word = word.substring(0, lastPos - 2).concat("uc"); - lastPos--; - } - - // Rule 4 - if (word.endsWith("umpt")) { - word = word.substring(0, lastPos - 3).concat("um"); - lastPos -= 2; - } - - // Rule 5 - if (word.endsWith("rpt")) { - word = word.substring(0, lastPos - 2).concat("rb"); - lastPos--; - } - - // Rule 6 - if (word.endsWith("urs")) { - word = word.substring(0, lastPos - 2).concat("ur"); - lastPos--; - } - - // Rule 7 - if (word.endsWith("istr")) { - word = word.substring(0, lastPos - 3).concat("ister"); - lastPos++; - } - - // Rule 7a - if (word.endsWith("metr")) { - word = word.substring(0, lastPos - 3).concat("meter"); - lastPos++; - } - - // Rule 8 - if (word.endsWith("olv")) { - word = word.substring(0, lastPos - 2).concat("olut"); - lastPos++; - } - - // Rule 9 - if (word.endsWith("ul")) { - if ((lastPos - 2 < 0) || - ((word.charAt(lastPos - 2) != 'a') && - (word.charAt(lastPos - 2) != 'i') && - (word.charAt(lastPos - 2) != 'o'))) { - word = word.substring(0, lastPos - 1).concat("l"); - lastPos--; - } - } - - // Rule 10 - if (word.endsWith("bex")) { - word = word.substring(0, lastPos - 2).concat("bic"); - } - - // Rule 11 - if (word.endsWith("dex")) { - word = word.substring(0, lastPos - 2).concat("dic"); - } - - // Rule 12 - if (word.endsWith("pex")) { - word = word.substring(0, lastPos - 2).concat("pic"); - } - - // Rule 13 - if (word.endsWith("tex")) { - word = word.substring(0, lastPos - 2).concat("tic"); - } - - // Rule 14 - if (word.endsWith("ax")) { - word = word.substring(0, lastPos - 1).concat("ac"); - } - - // Rule 15 - if (word.endsWith("ex")) { - word = word.substring(0, lastPos - 1).concat("ec"); - } - - // Rule 16 - if (word.endsWith("ix")) { - word = word.substring(0, lastPos - 1).concat("ic"); - } - - // Rule 17 - if (word.endsWith("lux")) { - word = word.substring(0, lastPos - 2).concat("luc"); - } - - // Rule 18 - if (word.endsWith("uad")) { - word = word.substring(0, lastPos - 2).concat("uas"); + private static boolean m_CompMode = false; + private static HashMap m_l11 = null; + private static HashMap m_l10 = null; + private static HashMap m_l9 = null; + private static HashMap m_l8 = null; + private static HashMap m_l7 = null; + private static HashMap m_l6 = null; + private static HashMap m_l5 = null; + private static HashMap m_l4 = null; + private static HashMap m_l3 = null; + private static HashMap m_l2 = null; + private static HashMap m_l1 = null; + + static { + + m_l11 = new HashMap(); + m_l11.put("alistically", "B"); + m_l11.put("arizability", "A"); + m_l11.put("izationally", "B"); + m_l10 = new HashMap(); + m_l10.put("antialness", "A"); + m_l10.put("arisations", "A"); + m_l10.put("arizations", "A"); + m_l10.put("entialness", "A"); + m_l9 = new HashMap(); + m_l9.put("allically", "C"); + m_l9.put("antaneous", "A"); + m_l9.put("antiality", "A"); + m_l9.put("arisation", "A"); + m_l9.put("arization", "A"); + m_l9.put("ationally", "B"); + m_l9.put("ativeness", "A"); + m_l9.put("eableness", "E"); + m_l9.put("entations", "A"); + m_l9.put("entiality", "A"); + m_l9.put("entialize", "A"); + m_l9.put("entiation", "A"); + m_l9.put("ionalness", "A"); + m_l9.put("istically", "A"); + m_l9.put("itousness", "A"); + m_l9.put("izability", "A"); + m_l9.put("izational", "A"); + m_l8 = new HashMap(); + m_l8.put("ableness", "A"); + m_l8.put("arizable", "A"); + m_l8.put("entation", "A"); + m_l8.put("entially", "A"); + m_l8.put("eousness", "A"); + m_l8.put("ibleness", "A"); + m_l8.put("icalness", "A"); + m_l8.put("ionalism", "A"); + m_l8.put("ionality", "A"); + m_l8.put("ionalize", "A"); + m_l8.put("iousness", "A"); + m_l8.put("izations", "A"); + m_l8.put("lessness", "A"); + m_l7 = new HashMap(); + m_l7.put("ability", "A"); + m_l7.put("aically", "A"); + m_l7.put("alistic", "B"); + m_l7.put("alities", "A"); + m_l7.put("ariness", "E"); + m_l7.put("aristic", "A"); + m_l7.put("arizing", "A"); + m_l7.put("ateness", "A"); + m_l7.put("atingly", "A"); + m_l7.put("ational", "B"); + m_l7.put("atively", "A"); + m_l7.put("ativism", "A"); + m_l7.put("elihood", "E"); + m_l7.put("encible", "A"); + m_l7.put("entally", "A"); + m_l7.put("entials", "A"); + m_l7.put("entiate", "A"); + m_l7.put("entness", "A"); + m_l7.put("fulness", "A"); + m_l7.put("ibility", "A"); + m_l7.put("icalism", "A"); + m_l7.put("icalist", "A"); + m_l7.put("icality", "A"); + m_l7.put("icalize", "A"); + m_l7.put("ication", "G"); + m_l7.put("icianry", "A"); + m_l7.put("ination", "A"); + m_l7.put("ingness", "A"); + m_l7.put("ionally", "A"); + m_l7.put("isation", "A"); + m_l7.put("ishness", "A"); + m_l7.put("istical", "A"); + m_l7.put("iteness", "A"); + m_l7.put("iveness", "A"); + m_l7.put("ivistic", "A"); + m_l7.put("ivities", "A"); + m_l7.put("ization", "F"); + m_l7.put("izement", "A"); + m_l7.put("oidally", "A"); + m_l7.put("ousness", "A"); + m_l6 = new HashMap(); + m_l6.put("aceous", "A"); + m_l6.put("acious", "B"); + m_l6.put("action", "G"); + m_l6.put("alness", "A"); + m_l6.put("ancial", "A"); + m_l6.put("ancies", "A"); + m_l6.put("ancing", "B"); + m_l6.put("ariser", "A"); + m_l6.put("arized", "A"); + m_l6.put("arizer", "A"); + m_l6.put("atable", "A"); + m_l6.put("ations", "B"); + m_l6.put("atives", "A"); + m_l6.put("eature", "Z"); + m_l6.put("efully", "A"); + m_l6.put("encies", "A"); + m_l6.put("encing", "A"); + m_l6.put("ential", "A"); + m_l6.put("enting", "C"); + m_l6.put("entist", "A"); + m_l6.put("eously", "A"); + m_l6.put("ialist", "A"); + m_l6.put("iality", "A"); + m_l6.put("ialize", "A"); + m_l6.put("ically", "A"); + m_l6.put("icance", "A"); + m_l6.put("icians", "A"); + m_l6.put("icists", "A"); + m_l6.put("ifully", "A"); + m_l6.put("ionals", "A"); + m_l6.put("ionate", "D"); + m_l6.put("ioning", "A"); + m_l6.put("ionist", "A"); + m_l6.put("iously", "A"); + m_l6.put("istics", "A"); + m_l6.put("izable", "E"); + m_l6.put("lessly", "A"); + m_l6.put("nesses", "A"); + m_l6.put("oidism", "A"); + m_l5 = new HashMap(); + m_l5.put("acies", "A"); + m_l5.put("acity", "A"); + m_l5.put("aging", "B"); + m_l5.put("aical", "A"); + if (!m_CompMode) { + m_l5.put("alist", "A"); + } + m_l5.put("alism", "B"); + m_l5.put("ality", "A"); + m_l5.put("alize", "A"); + m_l5.put("allic", "b"); + m_l5.put("anced", "B"); + m_l5.put("ances", "B"); + m_l5.put("antic", "C"); + m_l5.put("arial", "A"); + m_l5.put("aries", "A"); + m_l5.put("arily", "A"); + m_l5.put("arity", "B"); + m_l5.put("arize", "A"); + m_l5.put("aroid", "A"); + m_l5.put("ately", "A"); + m_l5.put("ating", "I"); + m_l5.put("ation", "B"); + m_l5.put("ative", "A"); + m_l5.put("ators", "A"); + m_l5.put("atory", "A"); + m_l5.put("ature", "E"); + m_l5.put("early", "Y"); + m_l5.put("ehood", "A"); + m_l5.put("eless", "A"); + if (!m_CompMode) { + m_l5.put("elily", "A"); + } else { + m_l5.put("elity", "A"); + } + m_l5.put("ement", "A"); + m_l5.put("enced", "A"); + m_l5.put("ences", "A"); + m_l5.put("eness", "E"); + m_l5.put("ening", "E"); + m_l5.put("ental", "A"); + m_l5.put("ented", "C"); + m_l5.put("ently", "A"); + m_l5.put("fully", "A"); + m_l5.put("ially", "A"); + m_l5.put("icant", "A"); + m_l5.put("ician", "A"); + m_l5.put("icide", "A"); + m_l5.put("icism", "A"); + m_l5.put("icist", "A"); + m_l5.put("icity", "A"); + m_l5.put("idine", "I"); + m_l5.put("iedly", "A"); + m_l5.put("ihood", "A"); + m_l5.put("inate", "A"); + m_l5.put("iness", "A"); + m_l5.put("ingly", "B"); + m_l5.put("inism", "J"); + m_l5.put("inity", "c"); + m_l5.put("ional", "A"); + m_l5.put("ioned", "A"); + m_l5.put("ished", "A"); + m_l5.put("istic", "A"); + m_l5.put("ities", "A"); + m_l5.put("itous", "A"); + m_l5.put("ively", "A"); + m_l5.put("ivity", "A"); + m_l5.put("izers", "F"); + m_l5.put("izing", "F"); + m_l5.put("oidal", "A"); + m_l5.put("oides", "A"); + m_l5.put("otide", "A"); + m_l5.put("ously", "A"); + m_l4 = new HashMap(); + m_l4.put("able", "A"); + m_l4.put("ably", "A"); + m_l4.put("ages", "B"); + m_l4.put("ally", "B"); + m_l4.put("ance", "B"); + m_l4.put("ancy", "B"); + m_l4.put("ants", "B"); + m_l4.put("aric", "A"); + m_l4.put("arly", "K"); + m_l4.put("ated", "I"); + m_l4.put("ates", "A"); + m_l4.put("atic", "B"); + m_l4.put("ator", "A"); + m_l4.put("ealy", "Y"); + m_l4.put("edly", "E"); + m_l4.put("eful", "A"); + m_l4.put("eity", "A"); + m_l4.put("ence", "A"); + m_l4.put("ency", "A"); + m_l4.put("ened", "E"); + m_l4.put("enly", "E"); + m_l4.put("eous", "A"); + m_l4.put("hood", "A"); + m_l4.put("ials", "A"); + m_l4.put("ians", "A"); + m_l4.put("ible", "A"); + m_l4.put("ibly", "A"); + m_l4.put("ical", "A"); + m_l4.put("ides", "L"); + m_l4.put("iers", "A"); + m_l4.put("iful", "A"); + m_l4.put("ines", "M"); + m_l4.put("ings", "N"); + m_l4.put("ions", "B"); + m_l4.put("ious", "A"); + m_l4.put("isms", "B"); + m_l4.put("ists", "A"); + m_l4.put("itic", "H"); + m_l4.put("ized", "F"); + m_l4.put("izer", "F"); + m_l4.put("less", "A"); + m_l4.put("lily", "A"); + m_l4.put("ness", "A"); + m_l4.put("ogen", "A"); + m_l4.put("ward", "A"); + m_l4.put("wise", "A"); + m_l4.put("ying", "B"); + m_l4.put("yish", "A"); + m_l3 = new HashMap(); + m_l3.put("acy", "A"); + m_l3.put("age", "B"); + m_l3.put("aic", "A"); + m_l3.put("als", "b"); + m_l3.put("ant", "B"); + m_l3.put("ars", "O"); + m_l3.put("ary", "F"); + m_l3.put("ata", "A"); + m_l3.put("ate", "A"); + m_l3.put("eal", "Y"); + m_l3.put("ear", "Y"); + m_l3.put("ely", "E"); + m_l3.put("ene", "E"); + m_l3.put("ent", "C"); + m_l3.put("ery", "E"); + m_l3.put("ese", "A"); + m_l3.put("ful", "A"); + m_l3.put("ial", "A"); + m_l3.put("ian", "A"); + m_l3.put("ics", "A"); + m_l3.put("ide", "L"); + m_l3.put("ied", "A"); + m_l3.put("ier", "A"); + m_l3.put("ies", "P"); + m_l3.put("ily", "A"); + m_l3.put("ine", "M"); + m_l3.put("ing", "N"); + m_l3.put("ion", "Q"); + m_l3.put("ish", "C"); + m_l3.put("ism", "B"); + m_l3.put("ist", "A"); + m_l3.put("ite", "a"); + m_l3.put("ity", "A"); + m_l3.put("ium", "A"); + m_l3.put("ive", "A"); + m_l3.put("ize", "F"); + m_l3.put("oid", "A"); + m_l3.put("one", "R"); + m_l3.put("ous", "A"); + m_l2 = new HashMap(); + m_l2.put("ae", "A"); + m_l2.put("al", "b"); + m_l2.put("ar", "X"); + m_l2.put("as", "B"); + m_l2.put("ed", "E"); + m_l2.put("en", "F"); + m_l2.put("es", "E"); + m_l2.put("ia", "A"); + m_l2.put("ic", "A"); + m_l2.put("is", "A"); + m_l2.put("ly", "B"); + m_l2.put("on", "S"); + m_l2.put("or", "T"); + m_l2.put("um", "U"); + m_l2.put("us", "V"); + m_l2.put("yl", "R"); + m_l2.put("s\'", "A"); + m_l2.put("\'s", "A"); + m_l1 = new HashMap(); + m_l1.put("a", "A"); + m_l1.put("e", "A"); + m_l1.put("i", "A"); + m_l1.put("o", "A"); + m_l1.put("s", "W"); + m_l1.put("y", "B"); + } + + private String removeEnding(String word) { + + int length = word.length(); + int el = 11; + + while (el > 0) { + if (length - el > 1) { + String ending = word.substring(length - el); + String conditionCode = null; + switch (el) { + case 11: + conditionCode = (String) m_l11.get(ending); + break; + case 10: + conditionCode = (String) m_l10.get(ending); + break; + case 9: + conditionCode = (String) m_l9.get(ending); + break; + case 8: + conditionCode = (String) m_l8.get(ending); + break; + case 7: + conditionCode = (String) m_l7.get(ending); + break; + case 6: + conditionCode = (String) m_l6.get(ending); + break; + case 5: + conditionCode = (String) m_l5.get(ending); + break; + case 4: + conditionCode = (String) m_l4.get(ending); + break; + case 3: + conditionCode = (String) m_l3.get(ending); + break; + case 2: + conditionCode = (String) m_l2.get(ending); + break; + case 1: + conditionCode = (String) m_l1.get(ending); + break; + default: + } + if (conditionCode != null) { + switch (conditionCode.charAt(0)) { + case 'A': + return word.substring(0, length - el); + case 'B': + if (length - el > 2) { + return word.substring(0, length - el); + } + break; + case 'C': + if (length - el > 3) { + return word.substring(0, length - el); + } + break; + case 'D': + if (length - el > 4) { + return word.substring(0, length - el); + } + break; + case 'E': + if (word.charAt(length - el - 1) != 'e') { + return word.substring(0, length - el); + } + break; + case 'F': + if ((length - el > 2) + && (word.charAt(length - el - 1) != 'e')) { + return word.substring(0, length - el); + } + break; + case 'G': + if ((length - el > 2) + && (word.charAt(length - el - 1) == 'f')) { + return word.substring(0, length - el); + } + break; + case 'H': + if ((word.charAt(length - el - 1) == 't') + || ((word.charAt(length - el - 1) == 'l') + && (word.charAt(length - el - 2) == 'l'))) { + return word.substring(0, length - el); + } + break; + case 'I': + if ((word.charAt(length - el - 1) != 'o') + && (word.charAt(length - el - 1) != 'e')) { + return word.substring(0, length - el); + } + break; + case 'J': + if ((word.charAt(length - el - 1) != 'a') + && (word.charAt(length - el - 1) != 'e')) { + return word.substring(0, length - el); + } + break; + case 'K': + if ((length - el > 2) + && ((word.charAt(length - el - 1) == 'l') + || (word.charAt(length - el - 1) == 'i') + || ((word.charAt(length - el - 1) == 'e') + && (word.charAt(length - el - 3) == 'u')))) { + return word.substring(0, length - el); + } + break; + case 'L': + if ((word.charAt(length - el - 1) != 'u') + && (word.charAt(length - el - 1) != 'x') + && ((word.charAt(length - el - 1) != 's') + || (word.charAt(length - el - 2) == 'o'))) { + return word.substring(0, length - el); + } + break; + case 'M': + if ((word.charAt(length - el - 1) != 'a') + && (word.charAt(length - el - 1) != 'c') + && (word.charAt(length - el - 1) != 'e') + && (word.charAt(length - el - 1) != 'm')) { + return word.substring(0, length - el); + } + break; + case 'N': + if ((length - el > 3) + || ((length - el == 3) + && ((word.charAt(length - el - 3) != 's')))) { + return word.substring(0, length - el); + } + break; + case 'O': + if ((word.charAt(length - el - 1) == 'l') + || (word.charAt(length - el - 1) == 'i')) { + return word.substring(0, length - el); + } + break; + case 'P': + if (word.charAt(length - el - 1) != 'c') { + return word.substring(0, length - el); + } + break; + case 'Q': + if ((length - el > 2) + && (word.charAt(length - el - 1) != 'l') + && (word.charAt(length - el - 1) != 'n')) { + return word.substring(0, length - el); + } + break; + case 'R': + if ((word.charAt(length - el - 1) == 'n') + || (word.charAt(length - el - 1) == 'r')) { + return word.substring(0, length - el); + } + break; + case 'S': + if (((word.charAt(length - el - 1) == 'r') + && (word.charAt(length - el - 2) == 'd')) + || ((word.charAt(length - el - 1) == 't') + && (word.charAt(length - el - 2) != 't'))) { + return word.substring(0, length - el); + } + break; + case 'T': + if ((word.charAt(length - el - 1) == 's') + || ((word.charAt(length - el - 1) == 't') + && (word.charAt(length - el - 2) != 'o'))) { + return word.substring(0, length - el); + } + break; + case 'U': + if ((word.charAt(length - el - 1) == 'l') + || (word.charAt(length - el - 1) == 'm') + || (word.charAt(length - el - 1) == 'n') + || (word.charAt(length - el - 1) == 'r')) { + return word.substring(0, length - el); + } + break; + case 'V': + if (word.charAt(length - el - 1) == 'c') { + return word.substring(0, length - el); + } + break; + case 'W': + if ((word.charAt(length - el - 1) != 's') + && (word.charAt(length - el - 1) != 'u')) { + return word.substring(0, length - el); + } + break; + case 'X': + if ((word.charAt(length - el - 1) == 'l') + || (word.charAt(length - el - 1) == 'i') + || ((length - el > 2) + && (word.charAt(length - el - 1) == 'e') + && (word.charAt(length - el - 3) == 'u'))) { + return word.substring(0, length - el); + } + break; + case 'Y': + if ((word.charAt(length - el - 1) == 'n') + && (word.charAt(length - el - 2) == 'i')) { + return word.substring(0, length - el); + } + break; + case 'Z': + if (word.charAt(length - el - 1) != 'f') { + return word.substring(0, length - el); + } + break; + case 'a': + if ((word.charAt(length - el - 1) == 'd') + || (word.charAt(length - el - 1) == 'f') + || (((word.charAt(length - el - 1) == 'h') + && (word.charAt(length - el - 2) == 'p'))) + || (((word.charAt(length - el - 1) == 'h') + && (word.charAt(length - el - 2) == 't'))) + || (word.charAt(length - el - 1) == 'l') + || (((word.charAt(length - el - 1) == 'r') + && (word.charAt(length - el - 2) == 'e'))) + || (((word.charAt(length - el - 1) == 'r') + && (word.charAt(length - el - 2) == 'o'))) + || (((word.charAt(length - el - 1) == 's') + && (word.charAt(length - el - 2) == 'e'))) + || (word.charAt(length - el - 1) == 't')) { + return word.substring(0, length - el); + } + break; + case 'b': + if (m_CompMode) { + if (((length - el == 3) + && (!((word.charAt(length - el - 1) == 't') + && (word.charAt(length - el - 2) == 'e') + && (word.charAt(length - el - 3) == 'm')))) + || ((length - el > 3) + && (!((word.charAt(length - el - 1) == 't') + && (word.charAt(length - el - 2) == 's') + && (word.charAt(length - el - 3) == 'y') + && (word.charAt(length - el - 4) == 'r'))))) { + return word.substring(0, length - el); + } + } else { + if ((length - el > 2) + && (!((word.charAt(length - el - 1) == 't') + && (word.charAt(length - el - 2) == 'e') + && (word.charAt(length - el - 3) == 'm'))) + && ((length - el < 4) + || (!((word.charAt(length - el - 1) == 't') + && (word.charAt(length - el - 2) == 's') + && (word.charAt(length - el - 3) == 'y') + && (word.charAt(length - el - 4) == 'r'))))) { + return word.substring(0, length - el); + } + } + break; + case 'c': + if (word.charAt(length - el - 1) == 'l') { + return word.substring(0, length - el); + } + break; + default: + throw new IllegalArgumentException("Fatal error."); + } + } + } + el--; + } + return word; + } + + private String recodeEnding(String word) { + + int lastPos = word.length() - 1; + + // Rule 1 + if (word.endsWith("bb") + || word.endsWith("dd") + || word.endsWith("gg") + || word.endsWith("ll") + || word.endsWith("mm") + || word.endsWith("nn") + || word.endsWith("pp") + || word.endsWith("rr") + || word.endsWith("ss") + || word.endsWith("tt")) { + word = word.substring(0, lastPos); + lastPos--; + } + + // Rule 2 + if (word.endsWith("iev")) { + word = word.substring(0, lastPos - 2).concat("ief"); + } + + // Rule 3 + if (word.endsWith("uct")) { + word = word.substring(0, lastPos - 2).concat("uc"); + lastPos--; + } + + // Rule 4 + if (word.endsWith("umpt")) { + word = word.substring(0, lastPos - 3).concat("um"); + lastPos -= 2; + } + + // Rule 5 + if (word.endsWith("rpt")) { + word = word.substring(0, lastPos - 2).concat("rb"); + lastPos--; + } + + // Rule 6 + if (word.endsWith("urs")) { + word = word.substring(0, lastPos - 2).concat("ur"); + lastPos--; + } + + // Rule 7 + if (word.endsWith("istr")) { + word = word.substring(0, lastPos - 3).concat("ister"); + lastPos++; + } + + // Rule 7a + if (word.endsWith("metr")) { + word = word.substring(0, lastPos - 3).concat("meter"); + lastPos++; + } + + // Rule 8 + if (word.endsWith("olv")) { + word = word.substring(0, lastPos - 2).concat("olut"); + lastPos++; + } + + // Rule 9 + if (word.endsWith("ul")) { + if ((lastPos - 2 < 0) + || ((word.charAt(lastPos - 2) != 'a') + && (word.charAt(lastPos - 2) != 'i') + && (word.charAt(lastPos - 2) != 'o'))) { + word = word.substring(0, lastPos - 1).concat("l"); + lastPos--; + } + } + + // Rule 10 + if (word.endsWith("bex")) { + word = word.substring(0, lastPos - 2).concat("bic"); + } + + // Rule 11 + if (word.endsWith("dex")) { + word = word.substring(0, lastPos - 2).concat("dic"); + } + + // Rule 12 + if (word.endsWith("pex")) { + word = word.substring(0, lastPos - 2).concat("pic"); + } + + // Rule 13 + if (word.endsWith("tex")) { + word = word.substring(0, lastPos - 2).concat("tic"); + } + + // Rule 14 + if (word.endsWith("ax")) { + word = word.substring(0, lastPos - 1).concat("ac"); + } + + // Rule 15 + if (word.endsWith("ex")) { + word = word.substring(0, lastPos - 1).concat("ec"); + } + + // Rule 16 + if (word.endsWith("ix")) { + word = word.substring(0, lastPos - 1).concat("ic"); + } + + // Rule 17 + if (word.endsWith("lux")) { + word = word.substring(0, lastPos - 2).concat("luc"); + } + + // Rule 18 + if (word.endsWith("uad")) { + word = word.substring(0, lastPos - 2).concat("uas"); + } + + // Rule 19 + if (word.endsWith("vad")) { + word = word.substring(0, lastPos - 2).concat("vas"); + } + + // Rule 20 + if (word.endsWith("cid")) { + word = word.substring(0, lastPos - 2).concat("cis"); + } + + // Rule 21 + if (word.endsWith("lid")) { + word = word.substring(0, lastPos - 2).concat("lis"); + } + + // Rule 22 + if (word.endsWith("erid")) { + word = word.substring(0, lastPos - 3).concat("eris"); + } + + // Rule 23 + if (word.endsWith("pand")) { + word = word.substring(0, lastPos - 3).concat("pans"); + } + + // Rule 24 + if (word.endsWith("end")) { + if ((lastPos - 3 < 0) + || (word.charAt(lastPos - 3) != 's')) { + word = word.substring(0, lastPos - 2).concat("ens"); + } + } + + // Rule 25 + if (word.endsWith("ond")) { + word = word.substring(0, lastPos - 2).concat("ons"); + } + + // Rule 26 + if (word.endsWith("lud")) { + word = word.substring(0, lastPos - 2).concat("lus"); + } + + // Rule 27 + if (word.endsWith("rud")) { + word = word.substring(0, lastPos - 2).concat("rus"); + } + + // Rule 28 + if (word.endsWith("her")) { + if ((lastPos - 3 < 0) + || ((word.charAt(lastPos - 3) != 'p') + && (word.charAt(lastPos - 3) != 't'))) { + word = word.substring(0, lastPos - 2).concat("hes"); + } + } + + // Rule 29 + if (word.endsWith("mit")) { + word = word.substring(0, lastPos - 2).concat("mis"); + } + + // Rule 30 + if (word.endsWith("end")) { + if ((lastPos - 3 < 0) + || (word.charAt(lastPos - 3) != 'm')) { + word = word.substring(0, lastPos - 2).concat("ens"); + } + } + + // Rule 31 + if (word.endsWith("ert")) { + word = word.substring(0, lastPos - 2).concat("ers"); + } + + // Rule 32 + if (word.endsWith("et")) { + if ((lastPos - 2 < 0) + || (word.charAt(lastPos - 2) != 'n')) { + word = word.substring(0, lastPos - 1).concat("es"); + } + } + + // Rule 33 + if (word.endsWith("yt")) { + word = word.substring(0, lastPos - 1).concat("ys"); + } + + // Rule 34 + if (word.endsWith("yz")) { + word = word.substring(0, lastPos - 1).concat("ys"); + } + + return word; + } + + /** + * + * @param word + * @return + */ + public String stem(String word) { + + if (word.length() > 2) { + return recodeEnding(removeEnding(word.toLowerCase())); + } else { + return word.toLowerCase(); + } + } + + /** + * + * @param str + * @return + */ + public String stemString(String str) { + + StringBuffer result = new StringBuffer(); + int start = -1; + for (int j = 0; j < str.length(); j++) { + char c = str.charAt(j); + if (Character.isLetterOrDigit(c)) { + if (start == -1) { + start = j; + } + } else if (c == '\'') { + if (start == -1) { + result.append(c); + } + } else { + if (start != -1) { + result.append(stem(str.substring(start, j))); + start = -1; + } + result.append(c); + } + } + if (start != -1) { + result.append(stem(str.substring(start, str.length()))); + } + return result.toString(); } - // Rule 19 - if (word.endsWith("vad")) { - word = word.substring(0, lastPos - 2).concat("vas"); - } - - // Rule 20 - if (word.endsWith("cid")) { - word = word.substring(0, lastPos - 2).concat("cis"); - } - - // Rule 21 - if (word.endsWith("lid")) { - word = word.substring(0, lastPos - 2).concat("lis"); - } - - // Rule 22 - if (word.endsWith("erid")) { - word = word.substring(0, lastPos - 3).concat("eris"); - } - - // Rule 23 - if (word.endsWith("pand")) { - word = word.substring(0, lastPos - 3).concat("pans"); - } - - // Rule 24 - if (word.endsWith("end")) { - if ((lastPos - 3 < 0) || - (word.charAt(lastPos - 3) != 's')) { - word = word.substring(0, lastPos - 2).concat("ens"); - } - } - - // Rule 25 - if (word.endsWith("ond")) { - word = word.substring(0, lastPos - 2).concat("ons"); - } - - // Rule 26 - if (word.endsWith("lud")) { - word = word.substring(0, lastPos - 2).concat("lus"); - } - - // Rule 27 - if (word.endsWith("rud")) { - word = word.substring(0, lastPos - 2).concat("rus"); - } - - // Rule 28 - if (word.endsWith("her")) { - if ((lastPos - 3 < 0) || - ((word.charAt(lastPos - 3) != 'p') && - (word.charAt(lastPos - 3) != 't'))) { - word = word.substring(0, lastPos - 2).concat("hes"); - } - } - - // Rule 29 - if (word.endsWith("mit")) { - word = word.substring(0, lastPos - 2).concat("mis"); - } - - // Rule 30 - if (word.endsWith("end")) { - if ((lastPos - 3 < 0) || - (word.charAt(lastPos - 3) != 'm')) { - word = word.substring(0, lastPos - 2).concat("ens"); - } - } - - // Rule 31 - if (word.endsWith("ert")) { - word = word.substring(0, lastPos - 2).concat("ers"); - } - - // Rule 32 - if (word.endsWith("et")) { - if ((lastPos - 2 < 0) || - (word.charAt(lastPos - 2) != 'n')) { - word = word.substring(0, lastPos - 1).concat("es"); - } - } - - // Rule 33 - if (word.endsWith("yt")) { - word = word.substring(0, lastPos - 1).concat("ys"); - } - - // Rule 34 - if (word.endsWith("yz")) { - word = word.substring(0, lastPos - 1).concat("ys"); - } - - return word; - } - - public String stem(String word) { - - if (word.length() > 2) { - return recodeEnding(removeEnding(word.toLowerCase())); - } else { - return word.toLowerCase(); - } - } - - public String stemString(String str) { - - StringBuffer result = new StringBuffer(); - int start = -1; - for (int j = 0; j < str.length(); j++) { - char c = str.charAt(j); - if (Character.isLetterOrDigit(c)) { - if (start == -1) { - start = j; - } - } else if (c == '\'') { - if (start == -1) { - result.append(c); - } - } else { - if (start != -1) { - result.append(stem(str.substring(start, j))); - start = -1; - } - result.append(c); - } - } - if (start != -1) { - result.append(stem(str.substring(start, str.length()))); - } - return result.toString(); - } - } - - diff --git a/app/src/main/java/org/rssin/summaries/Stopword.java b/app/src/main/java/org/rssin/summaries/Stopword.java index af3bee6..e43d3bd 100644 --- a/app/src/main/java/org/rssin/summaries/Stopword.java +++ b/app/src/main/java/org/rssin/summaries/Stopword.java @@ -3,72 +3,69 @@ package org.rssin.summaries; import java.io.*; import java.util.StringTokenizer; -public class Stopword -{ - +/** + * + * @author jbernards + */ +public class Stopword { + String stwd[]; - - public Stopword () - { - /*int cnt=0,sz=0;char bt[]=null; - try - { - File fp = new File("stopwords.txt"); - FileReader fis = new FileReader(fp); - sz = (int)fp.length(); - bt = new char [sz]; - fis.read(bt); - fis.close(); - } - catch(IOException ex) {} - - stwd=getTokens(new String(bt)); */ - + + /** + * + */ + public Stopword() { stwd = new TxtStatic().getStopWords(); } -public void display () - { - for (int i=0;i<stwd.length;i++) - System.out.println(stwd[i]); - } - - public boolean isStopword( String word) - { - boolean flag=false; - for (int i =0;i<stwd.length;i++) { - if(stwd[i].equalsIgnoreCase(word) ) { - flag=true; - break; - } + /** + * + * @param word + * @return + */ + public boolean isStopword(String word) { + boolean flag = false; + for (int i = 0; i < stwd.length; i++) { + if (stwd[i].equalsIgnoreCase(word)) { + flag = true; + break; } - return flag; - } + } + return flag; + } - public String[] getTokens(String sen) - { - int sz=0,cnt=0;String words[]=null; - StringTokenizer stk=new StringTokenizer(sen) ; - sz=stk.countTokens(); - words=new String[sz]; - while ( stk.hasMoreTokens()) - { - words[cnt]=new String(stk.nextToken()); - cnt++; - } - return words; + /** + * + * @param sen + * @return + */ + public String[] getTokens(String sen) { + int sz = 0, cnt = 0; + String words[] = null; + StringTokenizer stk = new StringTokenizer(sen); + sz = stk.countTokens(); + words = new String[sz]; + while (stk.hasMoreTokens()) { + words[cnt] = new String(stk.nextToken()); + cnt++; + } + return words; + } + + /** + * + * @param sen + * @return + */ + public String remove(String sen) { + String dsen = ""; + String words[] = getTokens(sen); + for (int j = 0; j < words.length; j++) { + if (!isStopword(words[j])) { + dsen = dsen + words[j] + " "; + } + } + return dsen; } - - public String remove(String sen) - { - String dsen=""; - String words[]=getTokens(sen); - for (int j=0;j<words.length; j++) - { - if ( ! isStopword(words[j] ) ) - dsen = dsen +words[j] +" "; - } - return dsen; - } - -}
\ No newline at end of file + +} diff --git a/app/src/main/java/org/rssin/summaries/Summary.java b/app/src/main/java/org/rssin/summaries/Summary.java index 90ceb04..7504bba 100644 --- a/app/src/main/java/org/rssin/summaries/Summary.java +++ b/app/src/main/java/org/rssin/summaries/Summary.java @@ -1,16 +1,26 @@ package org.rssin.summaries; +/** + * + * @author jbernards + */ public class Summary { - - private String content; - - public Summary(String s) - { - content = s; - } - - public String getText() - { - return content; - } -}
\ No newline at end of file + + private String content; + + /** + * + * @param s + */ + public Summary(String s) { + content = s; + } + + /** + * + * @return + */ + public String getText() { + return content; + } +} diff --git a/app/src/main/java/org/rssin/summaries/SummaryAPI.java b/app/src/main/java/org/rssin/summaries/SummaryAPI.java index 83c3fa6..bb2349d 100644 --- a/app/src/main/java/org/rssin/summaries/SummaryAPI.java +++ b/app/src/main/java/org/rssin/summaries/SummaryAPI.java @@ -11,273 +11,352 @@ import java.util.Hashtable; import java.util.Set; import org.rssin.rss.FeedItem; - /** * Summary API van AST + * * @author Joep */ public class SummaryAPI implements SummaryAPIInterface { + LengthMode lm; + int maxchars, maxlines; + + /** + * get a Summary object from a feedItem, using the settings specified. + * + * @param f FeedItem containing the text to summarize. + * @return The summary + */ @Override public Summary getSummary(FeedItem f) { //todo - String desc = f.description; + String desc = f.getDescription(); String t = getSumText(desc); Summary s = new Summary(t); - + return s; } - + + /** + * Constructor + */ + public SummaryAPI() { + lm = LengthMode.NOLIMIT; + maxchars = Integer.MAX_VALUE; + maxlines = Integer.MAX_VALUE; + } + + /** + * Generates get a Summary object from a String, using the settings + * specified. + * + * @param desc the text to summarize + * @return The summary. + */ @Override - public Summary getSummaryFromText(String t) { + public Summary getSummaryFromText(String desc) { + String t = getSumText(desc); + Summary s = new Summary(t); - + return s; } - - public String getSumText(String desc) - { - Hashtable hs = new Hashtable(); - ArrayList zinnen = getSentences(desc); - //remove stopwords + /** + * gets the summary string from a text string, using the settings specified. + * + * @param desc string to summarize + * @return the summary text. + */ + public String getSumText(String desc) { + Hashtable hs = new Hashtable(); + ArrayList<SentenceItem> zinnen = getSentences(desc); - Stopword stop = new Stopword(); - Special specl = new Special(); - for( int i=0; i<zinnen.size(); i++) - { - SentenceItem sl = (SentenceItem)zinnen.get(i); - sl.setSRSentence(specl.remove(sl.getRawSentence())); - sl.setSRSentence(stop.remove(sl.getSRSentence())); - } + //remove stopwords + Stopword stop = new Stopword(); + Special specl = new Special(); + for (int i = 0; i < zinnen.size(); i++) { + SentenceItem sl = (SentenceItem) zinnen.get(i); + sl.setSRSentence(specl.remove(sl.getRawSentence())); + sl.setSRSentence(stop.remove(sl.getSRSentence())); + } //unique words + for (int i = 0; i < zinnen.size(); i++) { + SentenceItem sl = (SentenceItem) zinnen.get(i); + String sen = sl.getSRSentence(); - for(int i=0;i<zinnen.size(); i++) - { - SentenceItem sl = (SentenceItem)zinnen.get(i); - String sen = sl.getSRSentence(); - - int wordcount=0; + int wordcount = 0; String[] words = sen.split(" "); - + for (String tok : words) { - + tok = tok.trim(); - + wordcount++; - - if(!hs.containsKey(tok) && tok.length() >=3) - addword(tok,i,wordcount,hs); - else if(hs.containsKey(tok) ) - upword(tok,i,wordcount,hs); - } + + if (!hs.containsKey(tok) && tok.length() >= 3) { + addword(tok, i, wordcount, hs); + } else if (hs.containsKey(tok)) { + upword(tok, i, wordcount, hs); + } + } } - + //stemming - - stemming(hs); - + stemming(hs); + //significant - - Enumeration key=hs.keys(); - while (key.hasMoreElements() ) - delword(key.nextElement(), hs); - + Enumeration key = hs.keys(); + while (key.hasMoreElements()) { + delword(key.nextElement(), hs); + } + //getWeight + key = hs.keys(); + while (key.hasMoreElements()) { + setWeight(key.nextElement(), hs, zinnen); + } + + //ranking + return :P + ranking(zinnen, hs); + + return chooseSentence(zinnen); + } + + private String chooseSentence(ArrayList<SentenceItem> zinnen) { + String output = ""; + int lines = zinnen.size(); + SentenceItem[] z = zinnen.toArray(new SentenceItem[0]); + if (lm == LengthMode.LINES || lm == LengthMode.BOTH) { + lines = Math.min(zinnen.size(), maxlines); + z = new SentenceItem[lines]; + for (int i = 0; i < zinnen.size(); i++) { + boolean placed = false; + int j = 0; + while (j < lines && !placed) { + if (z[j] == null) { + z[j] = zinnen.get(i); + placed = true; + } + j++; + } + j--; + + while (!placed && j >= 0) { + if (z[j].getWeight() > zinnen.get(i).getWeight()) { + z[j] = zinnen.get(i); + placed = true; + } + j--; + } + } + } - key=hs.keys(); - while (key.hasMoreElements()) - setWeight(key.nextElement(),hs, zinnen); + if (lm == LengthMode.CHARACTERS || lm == LengthMode.BOTH) { + //todo + } - //ranking + return :P - return ranking(zinnen, hs); + for(SentenceItem zin : z) + { + output += zin.getRawSentence(); + } + + return output; } - - private void setWeight(Object tok, Hashtable hs, ArrayList zinnen) - { - double wg=0.0; - WordItem wl=(WordItem)hs.get(tok); - double scnt = (double)zinnen.size(); - double tf=wl.getcount(); - double df=wl.sentensecount(); - wg = tf*Math.log10(scnt/df); - wl.addWeight(wg); + + private void setWeight(Object tok, Hashtable hs, ArrayList zinnen) { + double wg = 0.0; + WordItem wl = (WordItem) hs.get(tok); + double scnt = (double) zinnen.size(); + double tf = wl.getcount(); + double df = wl.sentensecount(); + wg = tf * Math.log10(scnt / df); + wl.addWeight(wg); } - public String ranking(ArrayList<SentenceItem> zinnen, Hashtable hs) - { - SentenceItem sl=null; - double max=0.0; - int mi=0; - - for(int i = 0; i < zinnen.size(); i++) - { - sl = zinnen.get(i); - String sen=sl.getSRSentence(); - Enumeration key = hs.keys(); - while(key.hasMoreElements()) - { - String str=(String)key.nextElement(); - if(sen.indexOf(str) != -1 ) - { - WordItem wl=(WordItem)hs.get(str); - sl.addWeight(wl.getWeight()); - } - } - } - - for(int i = 0; i < zinnen.size(); i++) - { - sl = (SentenceItem)zinnen.get(i); - if( sl.getWeight() > max ) - { - max = sl.getWeight(); - mi=i; + /** + * + * @param zinnen + * @param hs + * @return + */ + public String ranking(ArrayList<SentenceItem> zinnen, Hashtable hs) { + SentenceItem sl = null; + double max = 0.0; + int mi = 0; + + for (int i = 0; i < zinnen.size(); i++) { + sl = zinnen.get(i); + String sen = sl.getSRSentence(); + Enumeration key = hs.keys(); + while (key.hasMoreElements()) { + String str = (String) key.nextElement(); + if (sen.indexOf(str) != -1) { + WordItem wl = (WordItem) hs.get(str); + sl.addWeight(wl.getWeight()); + } + } + } + + for (int i = 0; i < zinnen.size(); i++) { + sl = (SentenceItem) zinnen.get(i); + if (sl.getWeight() > max) { + max = sl.getWeight(); + mi = i; } } String str1 = sl.getRawSentence(); - sl = zinnen.get(mi); + sl = zinnen.get(mi); - return str1; + return str1; } - - private void delword(Object tok, Hashtable hs) - { - WordItem wl=(WordItem)hs.remove(tok); - if( wl.getcount() > 3 ) - hs.put(tok,wl); + + private void delword(Object tok, Hashtable hs) { + WordItem wl = (WordItem) hs.remove(tok); + if (wl.getcount() > 3) { + hs.put(tok, wl); + } } - - private void stemword(String w1,String w2, Hashtable hs) - { - if( !hs.containsKey(w2) || !hs.containsKey(w1) ) - { + private void stemword(String w1, String w2, Hashtable hs) { + if (!hs.containsKey(w2) || !hs.containsKey(w1)) { // System.out.print("return:"); - return; + return; } - WordItem wl1=(WordItem)hs.remove(w1); - WordItem wl2=(WordItem)hs.remove(w2); - - ArrayList wp=wl2.getwordpos(); - ArrayList sp=wl2.getsentensepos(); + WordItem wl1 = (WordItem) hs.remove(w1); + WordItem wl2 = (WordItem) hs.remove(w2); + ArrayList wp = wl2.getwordpos(); + ArrayList sp = wl2.getsentensepos(); - for(int i=0;i<wp.size();i++) - { - String wp2=(String)wp.get(i); - String sp2=(String)sp.get(i); - wl1.incrcount(Integer.parseInt(wp2),Integer.parseInt(sp2)); + for (int i = 0; i < wp.size(); i++) { + String wp2 = (String) wp.get(i); + String sp2 = (String) sp.get(i); + wl1.incrcount(Integer.parseInt(wp2), Integer.parseInt(sp2)); } - hs.put(w1,wl1); + hs.put(w1, wl1); } - - - private double difpos(String str1,String str2) - { + + private double difpos(String str1, String str2) { int sz = Math.min(str1.length(), str2.length()); int mz = Math.max(str1.length(), str2.length()); - - double dp=mz; - double sm=0; - - for (int i = 0; i < mz; i++ ) - { - if( str1.charAt(i) != str2.charAt(i) ) - { - dp=i+1; + + double dp = mz; + double sm = 0; + + for (int i = 0; i < mz; i++) { + if (str1.charAt(i) != str2.charAt(i)) { + dp = i + 1; break; + } else { + sm++; } - else - sm++; - } - return(sm*(dp/sz)); + } + return (sm * (dp / sz)); } + private void stemming(Hashtable hs) { + int sz = hs.size(); - private void stemming(Hashtable hs) - { - int sz=hs.size(); - double wdis[][] = new double[sz][sz]; - - Set s1=hs.keySet(); - - Object obj[]=s1.toArray(); - for(int i = 0; i < sz; i++) - { + Set s1 = hs.keySet(); + + Object obj[] = s1.toArray(); + + for (int i = 0; i < sz; i++) { String str1 = (String) obj[i]; - for(int j=0;j<sz;j++) - { - String str2=(String) obj[j]; - if(i!=j) - wdis[i][j]=difpos(str1,str2); + for (int j = 0; j < sz; j++) { + String str2 = (String) obj[j]; + if (i != j) { + wdis[i][j] = difpos(str1, str2); + } } - } - - for(int i=0;i<sz;i++) - { - String str1=(String)obj[i]; - for(int j=0;j<sz;j++) - { - String str2 = (String)obj[j]; - if(i != j && wdis[i][j] >= 3.0 ) - { - stemword(str1,str2,hs); - } - } - } + } + + for (int i = 0; i < sz; i++) { + String str1 = (String) obj[i]; + for (int j = 0; j < sz; j++) { + String str2 = (String) obj[j]; + if (i != j && wdis[i][j] >= 3.0) { + stemword(str1, str2, hs); + } + } + } } - - private void addword(String tok,int sp,int wp, Hashtable hs) - { - WordItem wl=new WordItem(tok); - wl.incrcount(sp+1,wp); - hs.put(tok,wl); + + private void addword(String tok, int sp, int wp, Hashtable hs) { + WordItem wl = new WordItem(tok); + wl.incrcount(sp + 1, wp); + hs.put(tok, wl); } - private void upword(String tok,int sp,int wp, Hashtable hs) - { - WordItem wl=(WordItem)hs.remove(tok); - wl.incrcount(sp+1,wp); - hs.put(tok,wl); + + private void upword(String tok, int sp, int wp, Hashtable hs) { + WordItem wl = (WordItem) hs.remove(tok); + wl.incrcount(sp + 1, wp); + hs.put(tok, wl); } - - private ArrayList<SentenceItem> getSentences(String doc) - { + + private ArrayList<SentenceItem> getSentences(String doc) { ArrayList<SentenceItem> als = new ArrayList(); - - int fs1=0; - int fs2=0; - int nx=0; - while ( nx < (doc.length()-1) ) - { - nx=doc.indexOf(".",fs2); - if(nx==-1) - break; - else if( nx==doc.lastIndexOf(".") ) - { - String str=(doc.substring(fs1,nx+1)).toLowerCase(); - als.add(new SentenceItem(str)); - fs2=nx+1; - fs1=fs2; + int fs1 = 0; + int fs2 = 0; + int nx = 0; + + while (nx < (doc.length() - 1)) { + nx = doc.indexOf(".", fs2); + if (nx == -1) { + break; + } else if (nx == doc.lastIndexOf(".")) { + String str = (doc.substring(fs1, nx + 1)).toLowerCase(); + als.add(new SentenceItem(str)); + fs2 = nx + 1; + fs1 = fs2; break; - } - else if( doc.charAt(nx+1) ==' ' || doc.charAt(nx+1) =='\r' || doc.charAt(nx+1) =='\n') - { - String str=(doc.substring(fs1,nx+1).toLowerCase()).trim(); - als.add(new SentenceItem(str)); - fs2=nx+1; - fs1=fs2; - } - else - fs2=nx+1; + } else if (doc.charAt(nx + 1) == ' ' || doc.charAt(nx + 1) == '\r' || doc.charAt(nx + 1) == '\n') { + String str = (doc.substring(fs1, nx + 1).toLowerCase()).trim(); + als.add(new SentenceItem(str)); + fs2 = nx + 1; + fs1 = fs2; + } else { + fs2 = nx + 1; + } } - return als; + return als; + } + + /** + * set the lengthmode -_- + * + * @param l Mode to use. + */ + @Override + public void setLengthMode(LengthMode l) { + lm = l; + } + + /** + * set the maximum number of characters of the summary text. + * + * @param chars integer value of the amount. + */ + @Override + public void setMaxChars(int chars) { + maxchars = chars; + } + + /** + * set the maximum number of sentences of the summary text. + * + * @param lines integer value of the amount. + */ + @Override + public void setMaxLines(int lines) { + maxlines = lines; } } diff --git a/app/src/main/java/org/rssin/summaries/SummaryAPIInterface.java b/app/src/main/java/org/rssin/summaries/SummaryAPIInterface.java index 17636de..5ae2cb9 100644 --- a/app/src/main/java/org/rssin/summaries/SummaryAPIInterface.java +++ b/app/src/main/java/org/rssin/summaries/SummaryAPIInterface.java @@ -2,9 +2,47 @@ package org.rssin.summaries; import org.rssin.rss.FeedItem; +/** + * + * @author jbernards + */ public interface SummaryAPIInterface { - - public Summary getSummary(FeedItem f); - - public Summary getSummaryFromText(String t); -}
\ No newline at end of file + + /** + * get a Summary object from a feedItem, using the settings specified. + * + * @param f FeedItem containing the text to summarize. + * @return The summary + */ + public Summary getSummary(FeedItem f); + + /** + * Generates get a Summary object from a String, using the settings + * specified. + * + * @param t the text to summarize + * @return The summary. + */ + public Summary getSummaryFromText(String t); + + /** + * set the LengthMode -_- + * + * @param l Mode to use. + */ + public void setLengthMode(LengthMode l); + + /** + * set the maximum number of characters of the summary text. + * + * @param chars integer value of the amount. + */ + public void setMaxChars(int chars); + + /** + * set the maximum number of sentences of the summary text. + * + * @param lines integer value of the amount. + */ + public void setMaxLines(int lines); +} diff --git a/app/src/main/java/org/rssin/summaries/TxtStatic.java b/app/src/main/java/org/rssin/summaries/TxtStatic.java index d686beb..e31bf8b 100644 --- a/app/src/main/java/org/rssin/summaries/TxtStatic.java +++ b/app/src/main/java/org/rssin/summaries/TxtStatic.java @@ -10,17 +10,24 @@ package org.rssin.summaries; * @author Joep */ public class TxtStatic { - - private final String[] stopwords = {"a","about","above","across","after","afterwards","again","against","all","almost","alone","along","already","also","although","always","am","among","amongst","amoungst","amount","an","and","another","any","anyhow","anyone","anything","anyway","anywhere","are","around","as","at","back","be","became","because","become","becomes","becoming","been","before","beforehand","behind","being","below","beside","besides","between","beyond","bill","both","bottom","but","by","by","call","can","common","cannot","cant","co","computer","con","could","couldnt","cry","de","describe","detail","do","does","done","down","due","during","each","eg","eight","either","eleven","else","elsewhere","empty","enough","etc","even","ever","every","everyone","everything","everywhere","except","few","fifteen","fify","fill","find","fire","first","five","for","former","formerly","forty","found","four","from","front","full","further","get","give","go","had","has","hasnt","have","he","hence","her","here","hereafter","hereby","herein","hereupon","hers","herself","him","himself","his","how","however","hundred","i","ie","if","in","inc","indeed","interest","into","is","it","its","itself","keep","last","latter","latterly","least","less","ltd","made","many","may","me","meanwhile","might","mill","mine","more","moreover","most","mostly","move","much","must","my","myself","name","namely","neither","never","nevertheless","next","nine","no","nobody","none","noone","nor","not","nothing","now","nowhere","of","off","often","on","once","one","only","onto","or","other","others","otherwise","our","ours","ourselves","out","over","own","part","per","perhaps","please","put","rather","re","same","see","seem","seemed","seeming","seems","serious","several","she","should","show","side","since","sincere","six","sixty","so","some","somehow","someone","something","sometime","sometimes","somewhere","still","such","system","take","ten","than","that","the","their","them","themselves","then","thence","there","thereafter","thereby","therefore","therein","thereupon","these","they","thick","thin","third","this","those","though","three","through","throughout","thru","thus","to","together","too","top","toward","towards","twelve","twenty","two","un","under","until","up","upon","us","usually","usual","very","via","was","we","well","were","what","whatever","when","whence","whenever","where","whereafter","whereas","whereby","whereinwhereupon","wherever","whether","which","while","whither","who","whoever","whole","whom","whose","why","will","with","within","without","would","yet","you","your","yours","yourself","yourselves","don't","won't","can't","didn't","it's","is'nt","aren't","wasn't","haven't","hasn't","hadn't","you've","it'hv","you'd","you're","hasn't","we�ll","you�re","we're","we've"}; - + + private final String[] stopwords = {"a", "about", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "amoungst", "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are", "around", "as", "at", "back", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom", "but", "by", "by", "call", "can", "common", "cannot", "cant", "co", "computer", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "does", "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has", "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "i", "ie", "if", "in", "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name", "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed", "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "thereupon", "these", "they", "thick", "thin", "third", "this", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "usually", "usual", "very", "via", "was", "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "whereinwhereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "don't", "won't", "can't", "didn't", "it's", "is'nt", "aren't", "wasn't", "haven't", "hasn't", "hadn't", "you've", "it'hv", "you'd", "you're", "hasn't", "we�ll", "you�re", "we're", "we've"}; + private final char[] special = {'"', ',', ';', '!', '&', '/', '$', ':', '|', '%', ')', '(', '[', ']', '�', '�', '\'', '.', ' ', '\n'}; - - public String[] getStopWords() - { + + /** + * + * @return + */ + public String[] getStopWords() { return stopwords; } - public char[] getSpecial() - { + + /** + * + * @return + */ + public char[] getSpecial() { return special; } } diff --git a/app/src/main/java/org/rssin/summaries/WordItem.java b/app/src/main/java/org/rssin/summaries/WordItem.java index fb47749..072320e 100644 --- a/app/src/main/java/org/rssin/summaries/WordItem.java +++ b/app/src/main/java/org/rssin/summaries/WordItem.java @@ -2,77 +2,116 @@ package org.rssin.summaries; import java.util.ArrayList; -public class WordItem -{ +/** + * + * @author jbernards + */ +public class WordItem { + private String word; private double cnt; private double scnt; - private double wght; + private double wght; private ArrayList spl; private ArrayList wpl; - - public WordItem (String wd) - { - word= new String(wd); - spl=new ArrayList(); - wpl=new ArrayList(); - cnt=0;scnt=0; - } - - public void incrcount(int sp,int wp) - { + + /** + * + * @param wd + */ + public WordItem(String wd) { + word = new String(wd); + spl = new ArrayList(); + wpl = new ArrayList(); + cnt = 0; + scnt = 0; + } + + /** + * + * @param sp + * @param wp + */ + public void incrcount(int sp, int wp) { cnt++; sentensepos(sp); wordpos(wp); - } + } - public double getcount() - { - return cnt; + /** + * + * @return + */ + public double getcount() { + return cnt; } - - public String getword() - { + + /** + * + * @return + */ + public String getword() { return word; } - - public void sentensepos(int sp) - { - if(! spl.contains(sp+"")) + + /** + * + * @param sp + */ + public void sentensepos(int sp) { + if (!spl.contains(sp + "")) { scnt++; - spl.add(sp+""); + } + spl.add(sp + ""); } - public void wordpos(int wp) - { - wpl.add(wp+""); + /** + * + * @param wp + */ + public void wordpos(int wp) { + wpl.add(wp + ""); } - - public ArrayList getwordpos() - { - return wpl; + + /** + * + * @return + */ + public ArrayList getwordpos() { + return wpl; + } + + /** + * + * @return + */ + public ArrayList getsentensepos() { + return spl; } - - public ArrayList getsentensepos() - { - return spl; - } - - public void addWeight(double wg) - { - wght=wg; + + /** + * + * @param wg + */ + public void addWeight(double wg) { + wght = wg; } - - public double getWeight() - { - return wght; + + /** + * + * @return + */ + public double getWeight() { + return wght; } - - public double sentensecount() - { - return scnt; + + /** + * + * @return + */ + public double sentensecount() { + return scnt; } - -}
\ No newline at end of file +} diff --git a/app/src/main/java/org/rssin/summaries/specials.txt b/app/src/main/java/org/rssin/summaries/specials.txt deleted file mode 100644 index 046d05b..0000000 --- a/app/src/main/java/org/rssin/summaries/specials.txt +++ /dev/null @@ -1,18 +0,0 @@ -, -" -; -! -& -/ -$ -: -| -% -) -( -[ -] -” -“ -' -. diff --git a/app/src/main/java/org/rssin/summaries/stopwords.txt b/app/src/main/java/org/rssin/summaries/stopwords.txt deleted file mode 100644 index e621f2a..0000000 --- a/app/src/main/java/org/rssin/summaries/stopwords.txt +++ /dev/null @@ -1,345 +0,0 @@ -a -about -above -across -after -afterwards -again -against -all -almost -alone -along -already -also -although -always -am -among -amongst -amoungst -amount -an -and -another -any -anyhow -anyone -anything -anyway -anywhere -are -around -as -at -back -be -became -because -become -becomes -becoming -been -before -beforehand -behind -being -below -beside -besides -between -beyond -bill -both -bottom -but -by -by -call -can -common -cannot -cant -co -computer -con -could -couldnt -cry -de -describe -detail -do -does -done -down -due -during -each -eg -eight -either -eleven -else -elsewhere -empty -enough -etc -even -ever -every -everyone -everything -everywhere -except -few -fifteen -fify -fill -find -fire -first -five -for -former -formerly -forty -found -four -from -front -full -further -get -give -go -had -has -hasnt -have -he -hence -her -here -hereafter -hereby -herein -hereupon -hers -herself -him -himself -his -how -however -hundred -i -ie -if -in -inc -indeed -interest -into -is -it -its -itself -keep -last -latter -latterly -least -less -ltd -made -many -may -me -meanwhile -might -mill -mine -more -moreover -most -mostly -move -much -must -my -myself -name -namely -neither -never -nevertheless -next -nine -no -nobody -none -noone -nor -not -nothing -now -nowhere -of -off -often -on -once -one -only -onto -or -other -others -otherwise -our -ours -ourselves -out -over -own -part -per -perhaps -please -put -rather -re -same -see -seem -seemed -seeming -seems -serious -several -she -should -show -side -since -sincere -six -sixty -so -some -somehow -someone -something -sometime -sometimes -somewhere -still -such -system -take -ten -than -that -the -their -them -themselves -then -thence -there -thereafter -thereby -therefore -therein -thereupon -these -they -thick -thin -third -this -those -though -three -through -throughout -thru -thus -to -together -too -top -toward -towards -twelve -twenty -two -un -under -until -up -upon -us -usually -usual -very -via -was -we -well -were -what -whatever -when -whence -whenever -where -whereafter -whereas -whereby -wherein -whereupon -wherever -whether -which -while -whither -who -whoever -whole -whom -whose -why -will -with -within -without -would -yet -you -your -yours -yourself -yourselves -don't -won't -can't -didn't -it's -is'nt -aren't -wasn't -haven't -hasn't -hadn't -you've -it'hv -you'd -you're -hasn't -we’ll -you’re -we're -we've - diff --git a/app/src/main/java/org/rssin/summaries/tester.java b/app/src/main/java/org/rssin/summaries/tester.java new file mode 100644 index 0000000..07c751a --- /dev/null +++ b/app/src/main/java/org/rssin/summaries/tester.java @@ -0,0 +1,49 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.rssin.summaries; + +import java.util.Scanner; + +/** + * + * @author jbernards + */ +public class tester { + + /** + * @param args the command line arguments + */ + public static void main(String[] args) { + Scanner scanner = new Scanner(System.in); + + System.out.println("Voer een tekst in"); + String t = ""; + String s = ""; + + do { + t += s; + s = scanner.nextLine(); + } while (!s.contains("#")); + + System.out.println("Tekst geaccepteerd."); + + SummaryAPI sumo = new SummaryAPI(); + sumo.setLengthMode(LengthMode.LINES); + sumo.setMaxLines(2); + + System.out.println("Sumo ingesteld."); + + String k = sumo.getSumText(t); + + k = k.trim(); + + System.out.println(k); + + System.out.println(k.length()); + System.out.println(t.length()); + } + +} |