-- For fun, I decided to compute Huffman codes and compute their expected -- length using Haskell. WARNING: notice the lack of tests... -- -- DISCLAIMER: This is my first Haskell program of more than a few lines, -- written after reading some of the Edinburgh Inf1-FP notes. It is almost -- certainly terrible style! -- Run with runhaskell, or typing main after "ghci huff.hs", or compiling with ghc and then running ./huff -- I ran it with ghc 7.0.3. -- Iain Murray, October 2012 import Data.List (sort) -- If keen, I could grab the probabilities from the command-line. -- http://learnyouahaskell.com/input-and-output -- Remembering to normalize them. probs = [0.3, 0.2, 0.2, 0.1, 0.1, 0.1] --probs = [0.25, 0.25, 0.25, 0.125, 0.125] data Tree = Leaf Int | Tree :^: Tree deriving (Eq, Ord, Show) huffTree = makeTree probsNodes where probsNodes = sort (zip probs (map Leaf [0..])) makeTree :: [(Double,Tree)] -> Tree makeTree [(p,n)] = n makeTree ((p1,n1):(p2,n2):ns) = makeTree (moveUp ((p1+p2, n1 :^: n2):ns)) where moveUp (x:y:xs) | x > y = y:(moveUp (x:xs)) | otherwise = (x:y:xs) moveUp xs = xs -- There is a heap data structure on hackage if I wanted to pull out the least -- probable nodes more efficiently. codes = map snd (sort (accumCodes "" huffTree)) where accumCodes :: String -> Tree -> [(Int,String)] accumCodes prefix (Leaf x) = [(x,prefix)] accumCodes prefix (t1 :^: t2) = (accumCodes (prefix ++ "0") t1) ++ (accumCodes (prefix ++ "1") t2) expectedLength = sum [p * (fromIntegral (length c)) | (p,c) <- zip probs codes] entropy = -sum [p * log p | p <- probs] / log 2.0 main = putStr ((unlines codes) ++ "Expected length: " ++ (show expectedLength) ++ " bits/symbol\n" ++ " Entropy: " ++ (show entropy) ++ " bits/symbol\n")