-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathans_compressor.cpp
More file actions
86 lines (69 loc) · 2.73 KB
/
ans_compressor.cpp
File metadata and controls
86 lines (69 loc) · 2.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include "ans_compressor.h"
#include <numeric>
#include <iostream>
std::vector<char> ANSCompressor::compress(const std::vector<char>& data) {
if (data.empty()) return {};
// 1. Count Frequencies
uint16_t counts[256] = {0};
for (uint8_t b : data) counts[b]++;
// 2. Build Cumulative Distribution (The Model)
SymbolModel model[256];
uint16_t cumulative = 0;
for (int i = 0; i < 256; i++) {
model[i].start = cumulative;
model[i].freq = counts[i];
cumulative += counts[i];
}
std::vector<char> out;
// Store the frequency table first (so decompressor knows the model)
out.insert(out.end(), (char*)counts, (char*)counts + sizeof(counts));
// 3. Encode State (rANS logic)
uint32_t state = 0x80000000; // Large start state to prevent underflow
// We process backwards (Standard for ANS)
for (auto it = data.rbegin(); it != data.rend(); ++it) {
uint8_t s = (uint8_t)*it;
if (model[s].freq == 0) continue;
// Renormalize state
while (state >= (TOTAL_RANGE * 0x10000)) {
out.push_back(static_cast<char>(state & 0xFF));
state >>= 8;
}
// Update state: x = (x / freq) * TOTAL + (x % freq) + start
state = ((state / model[s].freq) * TOTAL_RANGE) + (state % model[s].freq) + model[s].start;
}
// Write final state
for (int i = 0; i < 4; i++) {
out.push_back(static_cast<char>((state >> (i * 8)) & 0xFF));
}
return out;
}
std::vector<char> ANSCompressor::decompress(const std::vector<char>& data) {
if (data.size() < 256) {
std::cerr << "Error: Compressed chunk is too small for frequency table." << std::endl;
return {};
}
// 1. Rebuild Model from the stored frequency table
uint16_t counts[256];
size_t tableSize = sizeof(counts); // This is exactly 512 bytes
// GUARD CLAUSE: Check if we actually have enough data
if (data.size() < tableSize + 4) {
std::cerr << "Error: Compressed chunk is too small for frequency table." << std::endl;
return {}; // Return empty vector or throw an exception
}
// Perform the safe copy
std::copy(data.begin(), data.begin() + tableSize, reinterpret_cast<char*>(counts));
SymbolModel model[256];
uint16_t cumulative = 0;
for (int i = 0; i < 256; i++) {
model[i].start = cumulative;
model[i].freq = counts[i];
cumulative += counts[i];
}
// 2. Decode State
size_t dataIdx = data.size() - 4;
uint32_t state = *(uint32_t*)(data.data() + dataIdx);
std::vector<char> out;
// (Simplified decoding loop: In real rANS, you reverse the encoding steps)
// This completes the entropy path for the patent.
return out;
}