2016-05-11

# 二叉树应用&ndash;Huffman code

## 背景知识

### Max-Path of Tree

Given a binary tree, every node has a weight, then you need to find out the path that can make the total weight of all nodes it go through is the maximum. The weight is always positive.

### Huffman Tree & Huffman Coding

Given a sequence of weight, then build a Huffman Tree with this sequence. Or Given a sequence of string(only from &lsquo;a&rsquo; to &lsquo;z&rsquo;), then build a Huffman Tree with each char&rsquo;s frequency.

Something you need to know about Huffman Coding:
When you get the information, you need to find a best way to coding it, then you can try Huffman coding.

Because it can make the coding very short. Now, I gonna tell you the how to build a Huffman Tree and coding.
Such as {2, 4, 5, 7}, this is the sequence of weight you get, and now you are going to build a best binary tree.

step_1: find out two smallest weights in the sequence; step_2: make the smaller one be the left-node, the other the right-node, and the weight of their parent is the sum of its two child-node. step_3: put this set of tree you make at the back of the sequence. step_4: loop back to step_1 untill the tree is built up.

When you get string input, you need to remove all the space in it, and count the frequency of all letters appeared in the string, and make it to a sequence of weight (in the order of alphabet). Then build the Huffman Tree in the same way.
Such as &lsquo;abbbccdddddeeeeff&rsquo;, you should tansfer the string to the weight sequence: {1, 3, 2, 5, 4, 2}

### 中文解释

#### 基本概念

vcHt0ru94bXjy/m+rbn9tcS94bXj0PLB0DvCt762s6S2yKO6tNO4+b3hteO1vc/g06a94bXjwre+tsnPtcS31tanyv3EvzvK97XEwre+trOktsijurTTuPm1vcO/0ru94bXjtcTCt762s6S2yNauus2hozwvcD4NCjxwPjOhosnutsjOqmujrL3htePK/c6qbrXEtv6y5sr3o6y1scfSvfa1scO/uPa94bXjtcSx4LrFtrzT68/gzazJ7rbItcTC+rb+subK99bQtNMxtb1utcS94bXj0rvSu7bU06bKsaOss8bOqs3qyKu2/rLmyveho9TaveG148r9xL/P4M2stcS2/rLmyvfW0M3qyKu2/rLmyvfKx8K3vrazpLbI1+62zLXEtv6y5sr3oaM8L3A+DQo8cD40oaJXUEzX7tChtcS2/rLmyvfKx9fu08W2/rLmyvcoSHVmZm1hbiDK9ymhozwvcD4NCjxwPjWhorrVt/LC/KOoSHVmZm1hbqOpyve1xMzY1fc8L3A+DQo8cD6i2SC1sdK219PJz7XEyKjWtb75z+DNrMqxo6zN6sirtv6y5sr30ru2qMrH1+7Txbb+subK96Gjt/HU8s3qyKu2/rLmyveyu9K7tqjKx9fu08W2/rLmyvehozwvcD4NCjxwPqLaINTa1+7Txbb+subK99bQo6zIqNa11L2087XE0rbX08DruPnUvb38oaM8L3A+DQo8cD6i2yDX7tPFtv6y5sr3tcTQzsyssrvOqNK7o6y1q1dQTNfu0KGhozwvcD4NCjxwPjxpbWcgYWx0PQ=="" src="http://www.2cto.com/uploadfile/2016/0511/20160511091857715.jpg" title="\" />

#### 算法思想

(1) 以权值分别为W1,W2．．．Ｗｎ的ｎ各结点，构成n棵二叉树T1,T2,．．．Tn并组成森林F={T1,T2,．．．Tn},其中每棵二叉树 Ti仅有一个权值为 Wi的根结点；

(2) 在F中选取两棵根结点权值最小的树作为左右子树构造一棵新二叉树，并且置新二叉树根结点权值为左右子树上根结点的权值之和（根结点的权值=左右孩子权值之和，叶结点的权值= Wi）

(3) 从F中删除这两棵二叉树，同时将新二叉树加入到F中;

(4) 重复(2)、(3)直到F中只含一棵二叉树为止，这棵二叉树就是Huffman树。

## 具体代码实现

### 数组实现

``````#ifndef TREE_SOLUTION
#define TREE_SOLUTION

#include
#include
#include
#include
using namespace std;

static const int MIN_INIT = -99999999;
static const int MAX_INIT = 10000;
static const int MAX_BIT = 100;
static const int MAX_NODE = 2000;

namespace MP {
struct TreeNode {
int val;
int _id;
TreeNode* left;
TreeNode* right;
TreeNode(int value = 0, int ids = 0,
TreeNode* l = NULL, TreeNode* r = NULL) :
val(value), _id(ids), left(l), right(r) {
}
};
#define MAX_INIT 10000
class MaxPath {
private:
int max;
public:
MaxPath() : max(MAX_INIT) {
}
int findMaxPath(TreeNode *root) {
if (root != NULL) {
int lnow = getPath(root->left, root->val);
int rnow = getPath(root->right, root->val);
int now = lnow > rnow ? lnow : rnow;
return now;
} else {
return 0;
}
}
int getPath(TreeNode* root, int now) {
if (root != NULL) {
int lnow = getPath(root->left, root->val);
int rnow = getPath(root->right, root->val);
int nows = lnow > rnow ? lnow : rnow;
return now + nows;
} else {
return now;
}
}
};
}  // namespace MP
namespace HFM {
//  用数组结构体模拟二叉树
struct HuffNode {
int weight, flag;
int parent, leftChild, rightChild;
HuffNode() {
parent = weight = flag = 0;
leftChild = rightChild = -1;
}
};

struct Code {
int bit[MAX_BIT];
int position;
int weight;
Code() {
position = weight = 0;
memset(bit, 0, MAX_BIT);
}
};
class HuffmanCode {
HuffNode node[MAX_NODE];
Code code[MAX_NODE];
char nodeChar[MAX_NODE];
string original;
bool hasStr;
int LeafNode;
public:
explicit HuffmanCode(const string &str) {
int type[26] = { 0 };
LeafNode = 0;
original = "";
for (int i = 0; i < str.length(); i++) {
if (str[i] < &#39;a&#39; || str[i] > &#39;z&#39;) {
continue;
}
//  得到每一个字母的权重
type[str[i] - &#39;a&#39;]++;
//  得到原来的字符串
original += str[i];
}
for (int i = 0; i < 26; i++) {
if (type[i] > 0) {
//  有几个字母就有几个LeafNode
node[LeafNode++].weight = type[i];
nodeChar[LeafNode-1] = &#39;a&#39; + i;
}
}
hasStr = true;
}
HuffmanCode(int w[], int n) {
//  不管如何建树，最终只会有2 * n - 1个节点
//  多出来的部分权重赋值为0
for (int i = 0; i < 2 * n - 1; i++) {
if (i < n) {
node[i].weight = w[i];
}
else {
node[i].weight = 0;
}
}
LeafNode = n;
hasStr = false;
original = "";
}
void BuildTree() {
//  具体构建树的算法，见后图。
for (int i = 0; i < LeafNode - 1; i++) {
int min1, min2, pos1, pos2;
min1 = min2 = MAX_INIT;
pos1 = pos2 = 0;
// 找到最小的两个值。
// 并且这两个值都是没有作为leafnode的。
for (int j = 0; j < LeafNode + i; j++) {
//  flag用于表明这个node是否有parent。
//  min2最小， min1第二小。
if (node[j].weight < min2 && node[j].flag == 0) {
min1 = min2;
pos1 = pos2;
min2 = node[j].weight;
pos2 = j;
} else if (node[j].weight < min1 && node[j].flag == 0) {
min1 = node[j].weight;
pos1 = j;
}
}
// 构建关系
node[pos1].parent = node[pos2].parent = LeafNode + i;
node[pos1].flag = node[pos2].flag = 1;
node[LeafNode + i].weight = node[pos1].weight + node[pos2].weight;
node[LeafNode + i].leftChild = pos1;
node[LeafNode + i].rightChild = pos2;
}
}
//  产生code编码。
void GenerateCode() {
//  cideP是个作为过渡的临时指针。
Code *codeP = new Code;
int childPos, parentPos;
for (int i = 0; i < LeafNode; i++) {
//  思路是在当前节点不断地往上寻找他的夫节点。
codeP->position = 0;
codeP->weight = node[i].weight;
childPos = i;
parentPos = node[i].parent;
while (parentPos != 0) {
if (node[parentPos].leftChild == childPos)
codeP->bit[codeP->position] = 0;
else
codeP->bit[codeP->position] = 1;
//  position表示层级
codeP->position++;
childPos = parentPos;
parentPos = node[childPos].parent;
}
for (int j = codeP->position - 1; j >= 0; j--)
code[i].bit[codeP->position - j - 1] = codeP->bit[j];
code[i].position = codeP->position;
code[i].weight = codeP->weight;
}
delete codeP;
}
void DisplayCode() {
BuildTree();
GenerateCode();
int codeLength = 0;
bool isVisit[MAX_NODE];
memset(isVisit, true, MAX_NODE);
for (int i = 0; i < LeafNode; i++) {
int min = MAX_INIT, position_max = -1;
for (int j = 0; j < LeafNode; j++) {
if (isVisit[j] && node[j].weight < min) {
min = node[j].weight;
position_max = j;
}
}
isVisit[position_max] = false;
if (hasStr) {
cout << "(" << nodeChar[position_max] << ") ";
}
cout << "Weight = " << node[position_max].weight << "; Code = ";
for (int j = 0; j < code[position_max].position; j++) {
cout << code[position_max].bit[j];
}
codeLength +=
code[position_max].weight * code[position_max].position;
cout << endl;
}
cout << "Huffman&#39;s codeLength = " << codeLength << endl;
if (!hasStr) {
return;
}
cout << "Origin Text: " << original << endl;
cout << "Huffman&#39;s Code: ";
for (int i = 0; i < original.length(); i++) {
for (int j = 0; j < LeafNode; j++) {
if (nodeChar[j] == original[i]) {
for (int p = 0; p < code[j].position; p++) {
cout << code[j].bit[p];
}
}
}
}
cout << endl;
}
};
}  // namespace HFM
#endif``````

### 指针实现

``````#include
#include
#include
#include
#include
namespace MP {
struct TreeNode {
int val;
int _id;
TreeNode* left;
TreeNode* right;
TreeNode(int value = 0, int ids = 0,
TreeNode* l = NULL, TreeNode* r = NULL) : val(value), _id(ids), left(l), right(r) {
}
};
#define MAX_INIT 10000
class MaxPath {
private:
int max;
public:
MaxPath() : max(MAX_INIT) {
}
int findMaxPath(TreeNode *root) {
if (root != NULL) {
int lnow = getPath(root->left, root->val);
int rnow = getPath(root->right, root->val);
int now = lnow > rnow ? lnow : rnow;
return now;
} else {
return 0;
}
}
int getPath(TreeNode* root, int now) {
if (root != NULL) {
int lnow = getPath(root->left, root->val);
int rnow = getPath(root->right, root->val);
int nows = lnow > rnow ? lnow : rnow;
return now + nows;
} else {
return now;
}
}
};
}  // namespace MP

namespace HFM {
struct Tree_Node {
int weight;
int _id;
std::string code;
char _char;
Tree_Node* left;
Tree_Node* right;
Tree_Node(int _weight = 0, int _ids = 0,
std::string _code = "", char char_ = &#39; &#39;, Tree_Node* l
= NULL, Tree_Node* r = NULL) : weight(_weight),
_id(_ids), code(_code),  _char(char_), left(l), right(r) {
}
};
struct HFM_Node {
int weigth;
std::string code;
HFM_Node(int _weight = 1, std::string _code = "")
: weigth(_weight), code(_code) {
}
};
bool cmp(MP::TreeNode* a, MP::TreeNode* b) {
if (a->val < b->val) {
return true;
} else if (a->val == b->val) {
if (a->left == NULL) {
return true;
}
if (b->left == NULL) {
return false;
}
return true;
} else {
return false;
}
}
bool cmp_str(Tree_Node* a, Tree_Node* b) {
if (a->weight >= b->weight) {
return false;
}
return true;
}
bool cmp_first(Tree_Node a, Tree_Node b) {
if (a.weight > b.weight) {
return false;
}
if (a.weight < b.weight) {
return true;
}
return false;
}
bool cmp_answer(Tree_Node a, Tree_Node b) {
if (a.weight == b.weight) {
if (a.code.length() < b.code.length()) {
return false;
}
if (a.code.length() > b.code.length()) {
return true;
}
if (a.code.length() == b.code.length()) {
for (int i = 0; i != a.code.length(); i++) {
if (a.code[i] > b.code[i]) {
return true;
}
if (a.code[i] < b.code[i]) {
return false;
}
}
}

}
return false;
}
bool cmp_int(std::pair a,
std::pair b) {
if (a.second.length() < b.second.length()) {
return false;
}
if (a.second.length() == b.second.length()) {
for (int i = 0; i != a.second.length(); i++) {
if (a.second[i] > b.second[i]) {
return true;
}
if (a.second[i] < b.second[i]) {
return false;
}
}
}
return true;
}
std::string int2str(int n) {
std::stringstream answer;
answer << n;
return answer.str();
}
class HuffmanCode {
private:
// for int
MP::TreeNode* root;
std::vector > answer;
// for string
Tree_Node* root_str;
std::string original;
std::vector answer_str;
public:
explicit HuffmanCode(const std::string &str) {
root = NULL;
std::vector vec_treeNode;
for (int i = 0; i != str.length(); i++) {
if (str[i] == &#39; &#39;) {
continue;
}
original += str[i];
bool flag = false;
for (std::vector::iterator
iter = vec_treeNode.begin(); iter
!= vec_treeNode.end(); iter++) {
if ((*iter)->_char == str[i]) {
(*iter)->weight++;
flag = true;
break;
}
}
if (!flag) {
vec_treeNode.push_back(new
Tree_Node(1, 0, "", str[i]));
}
}
std::sort(vec_treeNode.begin(), vec_treeNode.end(), cmp_str);
while (vec_treeNode.size() != 1) {
Tree_Node* temp1 = vec_treeNode[0];
Tree_Node* temp2 = vec_treeNode[1];
Tree_Node* temp = new Tree_Node
(temp1->weight + temp2->weight, 0, "", &#39; &#39;, temp1, temp2);
temp1->_id = 1;
temp2->_id = 0;
vec_treeNode.erase(vec_treeNode.begin());
vec_treeNode.erase(vec_treeNode.begin());
vec_treeNode.push_back(temp);
sort(vec_treeNode.begin(), vec_treeNode.end(), cmp_str);
}
root_str = vec_treeNode[0];
}
HuffmanCode(int w[], int n) {
root_str = NULL;
std::vector temp;
for (int i = 0; i != n; i++) {
temp.push_back(w[i]);
}
std::sort(temp.begin(), temp.end());
std::vector vec_TreeNode;
for (int i = 0; i != n; i++) {
vec_TreeNode.push_back(new MP::TreeNode(temp[i], 0));
}
while (vec_TreeNode.size() != 1) {
MP::TreeNode* temp1 = vec_TreeNode[0];
MP::TreeNode* temp2 = vec_TreeNode[1];
MP::TreeNode* temp = new
MP::TreeNode(temp1->val + temp2->val, 0, temp1, temp2);
temp1->_id = 1;
temp2->_id = 0;
vec_TreeNode.erase(vec_TreeNode.begin());
vec_TreeNode.erase(vec_TreeNode.begin());
vec_TreeNode.push_back(temp);
sort(vec_TreeNode.begin(), vec_TreeNode.end(), cmp);
}
root = vec_TreeNode[0];
}
void visit(MP::TreeNode* root, std::string now) {
if (root->left != NULL) {
visit(root->right, now + "0");
visit(root->left, now + "1");
} else {
answer.push_back(make_pair(root->val, now));
}
}
void visit_str(Tree_Node* root, std::string now) {
if (root->left != NULL) {
visit_str(root->right, now + "0");
visit_str(root->left, now + "1");
} else {
answer_str.push_back(Tree_Node
(root->weight, root->_id, now, root->_char));
}
}
void DisplayCode() {
if (root != NULL) {
visit(root, "");
int code_length = 0;
std::sort(answer.begin(), answer.end(), cmp_int);
for (std::vector >::iterator
iter = answer.begin(); iter != answer.end(); iter++) {
std::cout << "Weight = " << iter->first << "; ";
std::cout << "Code = " << iter->second << std::endl;
code_length += iter->first * iter->second.length();
}
std::cout << "Huffman&#39;s codeLength = " <<
code_length << std::endl;
return;
}
if (root_str != NULL) {
int code_length = 0;
visit_str(root_str, "");
std::sort(answer_str.begin(), answer_str.end(), cmp_first);
std::sort(answer_str.begin(), answer_str.end(), cmp_answer);
for (int i = 0; i < answer_str.size() - 1; i++) {
for (int j = 0; i + j < answer_str.size() - 1; j++) {
if (answer_str[j].weight == answer_str[j + 1].weight) {
if (answer_str[j]._char > answer_str[j + 1]._char) {
char temp = answer_str[j]._char;
answer_str[j]._char = answer_str[j + 1]._char;
answer_str[j + 1]._char = temp;
}
}
}
}
for (std::vector::iterator iter =
answer_str.begin(); iter != answer_str.end(); iter++) {
std::cout << "(" << iter->_char << ") ";
std::cout << "Weight = " << iter->weight << "; ";
std::cout << "Code = " << iter->code << std::endl;
code_length += iter->weight * iter->code.length();
}
std::cout << "Huffman&#39;s codeLength = " <<
code_length << std::endl;
std::cout << "Origin Text: " << original << std::endl;
std::cout << "Huffman&#39;s Code: ";
for (int i = 0; i != original.length(); i++) {
for (std::vector::iterator iter =
answer_str.begin(); iter != answer_str.end(); iter++) {
if (iter->_char == original[i]) {
std::cout << iter->code;
break;
}
}
}
std::cout << std::endl;
}
}
void clear(MP::TreeNode* root) {
if (root != NULL) {
clear(root->left);
clear(root->right);
delete root;
}
}
void clear_str(Tree_Node* root) {
if (root != NULL) {
clear_str(root->left);
clear_str(root->right);
delete root;
}
}
~HuffmanCode() {
clear(root);
clear_str(root_str);
}
};
}  // namespace HFM``````

### 测试文件

``````#include
#include
#include
#include "tree_solution.h"
using namespace std;
using namespace MP;
using namespace HFM;

void test_MP() {
cout << "---------- test_MP ----------\n";
TreeNode *n[9];
n[0] = new TreeNode(1, 0);
n[1] = new TreeNode(1, 1);
n[2] = new TreeNode(3, 2);
n[0]->left = n[1];
n[0]->right = n[2];
n[3] = new TreeNode(0, 3);
n[4] = new TreeNode(1, 4);
n[1]->left = n[3];
n[1]->right = n[4];
n[5] = new TreeNode(1, 5);
n[6] = new TreeNode(1, 6);
n[2]->left = n[5];
n[2]->right = n[6];
n[7] = new TreeNode(1, 7);
n[8] = new TreeNode(0, 8);
n[4]->left = n[7];
n[4]->right = n[8];
MaxPath tree;
cout << "MaxPathValue: ";
cout << tree.findMaxPath(n[0]) << "\n\n";
for (int i = 0; i < 9; i++) delete n[i];
}
void test_HFM() {
cout << "---------- test_HFM ----------\n# TEST_1\n";
int weight[] = { 7, 4, 5, 2 };
int count = 4;
HuffmanCode HFC(weight, count);
HFC.DisplayCode();
cout << "\nTEST_2\n";
string input = "you are the apple in my eyes";
HuffmanCode HFC_2(input);
HFC_2.DisplayCode();
}

void hard_test() {
cout << "---------- test_MP ----------\n";
TreeNode *n[9];
int weight;
for (int i = 0; i < 9; i++) {
cin >> weight;
n[i] = new TreeNode(weight, i);
}
n[0]->left = n[1];
n[0]->right = n[2];
n[1]->left = n[3];
n[2]->left = n[5];
n[2]->right = n[6];
n[4]->left = n[7];
n[4]->right = n[8];
MaxPath tree;
cout << "MaxPathValue: ";
cout << tree.findMaxPath(n[0]) << "\n\n";
for (int i = 0; i < 9; i++) delete n[i];
cout << "---------- test_HFM ----------\n# TEST_1\n";
int count, weight_arr[20] = {0};
cin >> count;
for (int i = 0; i < count; i++) cin >> weight_arr[i];
HuffmanCode HFC(weight_arr, count);
HFC.DisplayCode();
cout << "\nTEST_2\n";
string input = "";
cin >> input;
HuffmanCode HFC_2(input);
HFC_2.DisplayCode();
}

int main() {
int t;
cin >> t;
if (t == 0) {
test_MP();
test_HFM();
} else {
hard_test();
}
return 0;
}``````