This section contains some of the programming assignments of SEM – IGNOU MCAOL course. This code is made available in the hope that it’ll be useful.
Huffman Encoding
The idea is to generate a variable length encoding so as to efficiently send the data. Suppose that we want to send the word HELLO. Now if we use ASCII then it means we’ve to send out 40 bits i.e 8 bits per character.
If we use Huffman encoding we can send this data in lesser number of bits. The aim is to identify such a bit pattern so that we can encode each symbol (character) using lesser bits.
About the algorithm
- First calculate the frequency of each symbol in the data.
- Based on the frequency create binary tree as follows
- Take two smallest frequency items and create a new node with the sum of their frequencies.
- Add this newly created node at the back of the remaining symbol queue.
- Repeat this process until there’s a single node available.
- The remaining node is the root of the Huffman tree.
Code, C++
#include <iostream>
#include <vector>
#include <algorithm>
#include <utility>
#include <functional>
#include <queue>
#include <string>
#include <memory>
struct node {
std::pair<char, int> value;
std::unique_ptr<node> left, right;
std::string prefix;
node(char c, int freq):left(nullptr), right(nullptr), value(c, freq){
}
//For priority queue
bool operator < (const node &n1) const {
return value.second < n1.value.second;
}
bool operator > (const node &n1) const {
return value.second > n1.value.second;
}
void print() {
std::cout<<"---\nthis node is "<<value.first<<"\n";
std::cout<<"prefix for this node = "<<prefix<<"\n";
#ifdef DEBUG
std::cout<<"Left pointing to "<<left<<" and right to "<<right<<"\n---\n";
#endif
}
};
std::unique_ptr<node> make_huffman_tree(int arr[][2], int rows)
{
struct node *root = nullptr;
//Create a priority queue of nodes.
std::priority_queue<std::unique_ptr<node>, std::vector<std::unique_ptr<node>>,
std::greater<std::unique_ptr<node>>> freq;
//Initially add all alphabets in the set.
for (int i = 0; i<rows; i += 1) {
freq.push(std::move(std::unique_ptr<node>(new node(arr[i][0], arr[i][1]))));
}
do {
auto znode = std::unique_ptr<node>(new node(' ', 0));
auto hasAtLeastTwo = false;
//const_cast is used to remove the "constness" from freq.top()
//as top() returns a const reference which can't be moved.
auto first_min = std::move(const_cast<std::unique_ptr<node>&>(freq.top()));
freq.pop();
auto second_min = std::unique_ptr<node>(nullptr);
//If there are at least 2 items in this priority queue
if (freq.size() > 0)
hasAtLeastTwo = true;
if (hasAtLeastTwo) {
second_min = std::move(const_cast<std::unique_ptr<node>&>(freq.top()));
freq.pop();
}
//Assign the left and right items of the new combined node
//created.
znode->left = std::move(second_min);
znode->right = std::move(first_min);
znode->value = std::make_pair(' ',
(znode->left != nullptr?znode->left->value.second : 0) +
znode->right->value.second);
//If we added item to the left, set it's prefix to 0
//otherwise set it to 1.
if (hasAtLeastTwo)
znode->left->prefix = "0";
znode->right->prefix = "1";
freq.push(std::move(znode));
} while (freq.size() > 1);
//In case the array had no elements to begin with.
if (freq.size() == 0)
return std::move(std::unique_ptr<node>(nullptr));
//Otherwise return the only element in this queue.
return std::move(std::move(const_cast<std::unique_ptr<node>&>(freq.top()))); // The only node remaining.
}
//Beginning from root setup the prefixes of the tree from
//top to bottom. When we reach a node we set the prefix to be
//the parent node's prefix + it's own prefix. Note that this
//is pre-order traversal of the tree.
void setup_prefix_huffman_tree(std::unique_ptr<node>& root) {
if (root == nullptr)
return;
if (root->left != nullptr) {
root->left->prefix = root->prefix + root->left->prefix;
}
if (root->right != nullptr) {
root->right->prefix = root->prefix + root->right->prefix;
}
setup_prefix_huffman_tree(root->left);
setup_prefix_huffman_tree(root->right);
}
//Just as we did prefix, print the tree using pre-order traversal.
void print_huffman_tree(std::unique_ptr<node> &root) {
if (root == nullptr)
return;
if (root->value.first != ' ')
root->print();
print_huffman_tree(root->left);
print_huffman_tree(root->right);
}
int main(int argc, char *argv[])
{
int arr[][2] = { {'a', 15},
{'b', 25},
{'c', 5},
{'d', 7},
{'e', 10},
{'f', 13},
{'g', 9}
};
auto root_node = make_huffman_tree(arr, sizeof(arr)/sizeof(arr[0]));
setup_prefix_huffman_tree(root_node);
print_huffman_tree(root_node);
}
Things to improve
- Since we’re trying to make this tree work from top to bottom, the prefix for each node is create only after the tree is made.
- This requires tree traversal however it would be nice if this can be avoided.
- Use a class method, perhaps a static method which takes an input of alphabets and their frequencies and in turn returns Huffman tree root.