summaryrefslogtreecommitdiff
path: root/Assignment 1/CamilStaps-assignment1-freqs.hs
diff options
context:
space:
mode:
authorCamil Staps2016-02-12 14:50:35 +0100
committerCamil Staps2016-02-12 14:50:35 +0100
commita9af9b24b7fbfb31110123b125fb7fc916cff24f (patch)
treee8a49c9bedbcf43a912dfd37348da79123bf814a /Assignment 1/CamilStaps-assignment1-freqs.hs
Put everything on git
Diffstat (limited to 'Assignment 1/CamilStaps-assignment1-freqs.hs')
-rw-r--r--Assignment 1/CamilStaps-assignment1-freqs.hs39
1 files changed, 39 insertions, 0 deletions
diff --git a/Assignment 1/CamilStaps-assignment1-freqs.hs b/Assignment 1/CamilStaps-assignment1-freqs.hs
new file mode 100644
index 0000000..6c6cd47
--- /dev/null
+++ b/Assignment 1/CamilStaps-assignment1-freqs.hs
@@ -0,0 +1,39 @@
+import System.Environment
+import Data.String.Utils
+import qualified Data.List as List
+
+main = do
+ args <- getArgs
+ let input = replace " " "" (head args)
+ print $ List.reverse $ List.sort $ countfreqs input 1 []
+ print $ List.reverse $ List.sort $ countfreqs input 2 []
+ print $ List.reverse $ List.sort $ countfreqs input 3 []
+
+-- countfreqs s n []: calculate occurrence statistics of n-grams in s
+countfreqs :: String -> Int -> [Freq] -> [Freq]
+countfreqs "" _ fl = fl
+countfreqs s n fl
+ | length s < n = fl
+ | otherwise = countfreqs (tail s) n (freqsincr fl (take n s) [])
+
+-- freqsincr fl s []: add s to the frequencies in fl
+freqsincr :: [Freq] -> String -> [Freq] -> [Freq]
+freqsincr [] s fl = fl ++ [Freq { item = s, freq = 1}]
+freqsincr (freq:fla) s2 flb
+ | get_item freq == s2 = fla ++ flb ++ [Freq { item = s2, freq = get_freq freq + 1}]
+ | otherwise = freqsincr fla s2 (flb ++ [Freq { item = get_item freq, freq = get_freq freq}])
+
+-- Data type for keeping track of frequencies (Int) of substrings (String)
+data Freq = Freq { item :: String, freq :: Int } deriving (Eq)
+
+get_item :: Freq -> String
+get_item = item
+
+get_freq :: Freq -> Int
+get_freq = freq
+
+instance Ord Freq where
+ f1 `compare` f2 = get_freq f1 `compare` get_freq f2
+
+instance Show Freq where
+ show f = get_item f ++ " (" ++ show (get_freq f) ++ "x)" \ No newline at end of file