#include <iostream>
#include <vector>
#include <unordered_map>
#include <string>
#include <bitset>
#include <sstream>
using namespace std;
// LZW Compression Algorithm
vector<int> lzwCompress(const string& input) {
unordered_map<string, int> dictionary;
vector<int> output;
// Initialize the dictionary with single characters (ASCII)
for (int i = 0; i < 256; ++i) {
dictionary[string(1, char(i))] = i;
}
string current = "";
int code = 256; // Next available code
for (char c : input) {
current += c;
// If the current substring exists in the dictionary, extend it
if (dictionary.find(current) != dictionary.end()) {
continue;
} else {
// Output the code for the current substring (longest match)
output.push_back(dictionary[current.substr(0, current.length() - 1)]);
// Add the current substring to the dictionary
dictionary[current] = code++;
// Start a new current substring
current = string(1, c);
}
}
// Output the code for the last substring
if (!current.empty()) {
output.push_back(dictionary[current]);
}
return output;
}
// LZW Decompression Algorithm
string lzwDecompress(const vector<int>& input) {
unordered_map<int, string> dictionary;
string output = "";
int code = 256; // Next available code
string prev = string(1, char(input[0]));
output += prev;
dictionary[256] = prev;
for (size_t i = 1; i < input.size(); ++i) {
int currentCode = input[i];
string current;
// If the current code is already in the dictionary, just fetch it
if (dictionary.find(currentCode) != dictionary.end()) {
current = dictionary[currentCode];
} else {
// Handle the special case where the current code is the next code
current = prev + prev[0];
}
output += current;
// Add the new string to the dictionary
dictionary[code++] = prev + current[0];
// Set the previous string for the next iteration
prev = current;
}
return output;
}
// Function to display a hexdump of the encoded data (like bvi style)
void hexdump(const vector<int>& data) {
size_t offset = 0;
const size_t bytesPerLine = 16;
while (offset < data.size()) {
printf("%08X ", offset);
// Print hexadecimal representation
for (size_t i = 0; i < bytesPerLine; ++i) {
if (offset + i < data.size()) {
printf("%02X ", data[offset + i]);
} else {
printf(" ");
}
}
printf(" |");
// Print ASCII representation
for (size_t i = 0; i < bytesPerLine; ++i) {
if (offset + i < data.size()) {
char c = char(data[offset + i]);
printf("%c", isprint(c) ? c : '.');
} else {
printf(" ");
}
}
printf("|\n");
offset += bytesPerLine;
}
}
int main() {
// Example input data
string inputData = "this is an example of lzw compression in c++";
// LZW Compression
vector<int> compressedData = lzwCompress(inputData);
// LZW Decompression
string decompressedData = lzwDecompress(compressedData);
// Output the results
cout << "Original Data: " << inputData << endl;
cout << "Decompressed Data: " << decompressedData << endl;
// Hexdump the compressed data (encoded as integer codes)
cout << "Hexdump of the compressed data:" << endl;
hexdump(compressedData);
return 0;
}