-- For fun, I decided to compute Huffman codes and compute their expected
-- length using Haskell. WARNING: notice the lack of tests...
--
-- DISCLAIMER: This is my first Haskell program of more than a few lines,
-- written after reading some of the Edinburgh Inf1-FP notes. It is almost
-- certainly terrible style!
-- Run with runhaskell, or typing main after "ghci huff.hs", or compiling with ghc and then running ./huff
-- I ran it with ghc 7.0.3.
-- Iain Murray, October 2012
import Data.List (sort)
-- If keen, I could grab the probabilities from the command-line.
-- http://learnyouahaskell.com/input-and-output
-- Remembering to normalize them.
probs = [0.3, 0.2, 0.2, 0.1, 0.1, 0.1]
--probs = [0.25, 0.25, 0.25, 0.125, 0.125]
data Tree = Leaf Int
| Tree :^: Tree
deriving (Eq, Ord, Show)
huffTree = makeTree probsNodes
where
probsNodes = sort (zip probs (map Leaf [0..]))
makeTree :: [(Double,Tree)] -> Tree
makeTree [(p,n)] = n
makeTree ((p1,n1):(p2,n2):ns) = makeTree (moveUp ((p1+p2, n1 :^: n2):ns))
where
moveUp (x:y:xs)
| x > y = y:(moveUp (x:xs))
| otherwise = (x:y:xs)
moveUp xs = xs
-- There is a heap data structure on hackage if I wanted to pull out the least
-- probable nodes more efficiently.
codes = map snd (sort (accumCodes "" huffTree))
where
accumCodes :: String -> Tree -> [(Int,String)]
accumCodes prefix (Leaf x) = [(x,prefix)]
accumCodes prefix (t1 :^: t2) = (accumCodes (prefix ++ "0") t1) ++ (accumCodes (prefix ++ "1") t2)
expectedLength = sum [p * (fromIntegral (length c)) | (p,c) <- zip probs codes]
entropy = -sum [p * log p | p <- probs] / log 2.0
main =
putStr ((unlines codes) ++
"Expected length: " ++ (show expectedLength) ++ " bits/symbol\n" ++
" Entropy: " ++ (show entropy) ++ " bits/symbol\n")