function [codes, lengths, ave_length, entropy] = huffman(pp) %HUFFMAN binary symbol code for known distribution (for teaching, not production use!) % % [codes, lengths, ave_length, entropy] = huffman(pp); % % Inputs: % pp Ix1 vector of probabilities of symbols % % Outputs: % codes Ix1 cell array of strings giving binary codewords % lengths Ix1 lengths of each codeword % ave_length 1x1 performance of code in bits/symbol % entropy 1x1 entropy of distribution in bits/symbol % Iain Murray, October 2010, October 2011 assert(isvector(pp)); nodes = num2cell(1:length(pp)); probs = pp; while length(nodes) > 1 % find two least probable nodes: [mn, idx] = min(probs); probs(idx) = Inf; [mn2, idx2] = min(probs); % merge them: probs(idx) = mn + mn2; probs(idx2) = []; nodes{idx} = {nodes{idx}, nodes{idx2}}; nodes(idx2) = []; end % Recursive navigation of tree of nested cells to construct codes codes = cell(size(pp)); codes = huffman_helper('', nodes{1}, codes); % Measure performance of code lengths = cellfun(@length, codes); ave_length = pp(:)'*lengths(:); % Compare to ideal performance for codes based on large blocks entropy = hbits(pp); function codes = huffman_helper(cur_code, nodes, codes) if length(nodes) == 1 symbol = nodes; codes{symbol} = cur_code; return; else codes = huffman_helper([cur_code, '0'], nodes{1}, codes); codes = huffman_helper([cur_code, '1'], nodes{2}, codes); end % hbits is from my Matlab toolbox: % http://homepages.inf.ed.ac.uk/imurray2/code/#imurray-matlab % But copied here for convenience: function h = hbits(varargin) %HBITS (cross-)entropy for discrete distributions measured in bits % % h = hnats(pp) / log(2) % h = hnats(pp, qq) / log(2) % % See also, HNATS, H2BITS, H2NATS, KL_BITS, KL_NATS % Iain Murray, September 2008 h = hnats(varargin{:}) / log(2); function h = hnats(pp, qq) %HNATS (cross-)entropy for discrete distributions measured in nats % % h = hnats(pp, qq) % = -pp(:)'*log(qq(:)), but with zeros from pp removed. % % If one argument is given, encoding distribution qq is equal to the source % distribution pp. % % See also, HBITS, H2NATS, H2BITS, KL_NATS, KL_BITS % Iain Murray, September 2008 if nargin < 2 qq = pp; elseif ~isequal(size(pp), size(qq)) error('Input probability arrays must have the same size.'); end mask = (pp(:)~=0); m = @(x) x(mask); s = @(x) m(x(:)); h = -s(pp)'*reallog(s(qq));