183 lines
6.7 KiB
Rust
183 lines
6.7 KiB
Rust
use crate::decompress::{EXCEPTIONAL_ENTRY, LITERAL_ENTRY, SECONDARY_TABLE_ENTRY};
|
|
|
|
/// Return the next code, or if the codeword is already all ones (which is the final code), return
|
|
/// the same code again.
|
|
fn next_codeword(mut codeword: u16, table_size: u16) -> u16 {
|
|
if codeword == table_size - 1 {
|
|
return codeword;
|
|
}
|
|
|
|
let adv = (u16::BITS - 1) - (codeword ^ (table_size - 1)).leading_zeros();
|
|
let bit = 1 << adv;
|
|
codeword &= bit - 1;
|
|
codeword |= bit;
|
|
codeword
|
|
}
|
|
|
|
#[allow(clippy::needless_range_loop)]
|
|
pub fn build_table(
|
|
lengths: &[u8],
|
|
entries: &[u32],
|
|
codes: &mut [u16],
|
|
primary_table: &mut [u32],
|
|
secondary_table: &mut Vec<u16>,
|
|
is_distance_table: bool,
|
|
double_literal: bool,
|
|
) -> bool {
|
|
// Count the number of symbols with each code length.
|
|
let mut histogram = [0; 16];
|
|
for &length in lengths {
|
|
histogram[length as usize] += 1;
|
|
}
|
|
|
|
// Determine the maximum code length.
|
|
let mut max_length = 15;
|
|
while max_length > 1 && histogram[max_length] == 0 {
|
|
max_length -= 1;
|
|
}
|
|
|
|
// Handle zero and one symbol huffman codes (which are only allowed for distance codes).
|
|
if is_distance_table {
|
|
if max_length == 0 {
|
|
primary_table.fill(0);
|
|
secondary_table.clear();
|
|
return true;
|
|
} else if max_length == 1 && histogram[1] == 1 {
|
|
let symbol = lengths.iter().position(|&l| l == 1).unwrap();
|
|
codes[symbol] = 0;
|
|
let entry = entries
|
|
.get(symbol)
|
|
.cloned()
|
|
.unwrap_or((symbol as u32) << 16)
|
|
| 1;
|
|
for chunk in primary_table.chunks_mut(2) {
|
|
chunk[0] = entry;
|
|
chunk[1] = 0;
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Sort symbols by code length. Given the histogram, we can determine the starting offset
|
|
// for each code length.
|
|
let mut offsets = [0; 16];
|
|
let mut codespace_used = 0;
|
|
offsets[1] = histogram[0];
|
|
for i in 1..max_length {
|
|
offsets[i + 1] = offsets[i] + histogram[i];
|
|
codespace_used = (codespace_used << 1) + histogram[i];
|
|
}
|
|
codespace_used = (codespace_used << 1) + histogram[max_length];
|
|
|
|
// Check that the provided lengths form a valid Huffman tree.
|
|
if codespace_used != (1 << max_length) {
|
|
return false;
|
|
}
|
|
|
|
// Sort the symbols by code length.
|
|
let mut next_index = offsets;
|
|
let mut sorted_symbols = [0; 288];
|
|
for symbol in 0..lengths.len() {
|
|
let length = lengths[symbol];
|
|
sorted_symbols[next_index[length as usize]] = symbol;
|
|
next_index[length as usize] += 1;
|
|
}
|
|
|
|
let mut codeword = 0u16;
|
|
let mut i = histogram[0];
|
|
|
|
// Populate the primary decoding table
|
|
let primary_table_bits = primary_table.len().ilog2() as usize;
|
|
let primary_table_mask = (1 << primary_table_bits) - 1;
|
|
for length in 1..=primary_table_bits {
|
|
let current_table_end = 1 << length;
|
|
|
|
// Loop over all symbols with the current code length and set their table entries.
|
|
for _ in 0..histogram[length] {
|
|
let symbol = sorted_symbols[i];
|
|
i += 1;
|
|
|
|
primary_table[codeword as usize] = entries
|
|
.get(symbol)
|
|
.cloned()
|
|
.unwrap_or((symbol as u32) << 16)
|
|
| length as u32;
|
|
|
|
codes[symbol] = codeword;
|
|
codeword = next_codeword(codeword, current_table_end as u16);
|
|
}
|
|
|
|
if double_literal {
|
|
for len1 in 1..(length - 1) {
|
|
let len2 = length - len1;
|
|
for sym1_index in offsets[len1]..next_index[len1] {
|
|
for sym2_index in offsets[len2]..next_index[len2] {
|
|
let sym1 = sorted_symbols[sym1_index];
|
|
let sym2 = sorted_symbols[sym2_index];
|
|
if sym1 < 256 && sym2 < 256 {
|
|
let codeword1 = codes[sym1];
|
|
let codeword2 = codes[sym2];
|
|
let codeword = codeword1 | (codeword2 << len1);
|
|
let entry = (sym1 as u32) << 16
|
|
| (sym2 as u32) << 24
|
|
| LITERAL_ENTRY
|
|
| (2 << 8);
|
|
primary_table[codeword as usize] = entry | (length as u32);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we aren't at the maximum table size, double the size of the table.
|
|
if length < primary_table_bits {
|
|
primary_table.copy_within(0..current_table_end, current_table_end);
|
|
}
|
|
}
|
|
|
|
// Populate the secondary decoding table.
|
|
secondary_table.clear();
|
|
if max_length > primary_table_bits {
|
|
let mut subtable_start = 0;
|
|
let mut subtable_prefix = !0;
|
|
for length in (primary_table_bits + 1)..=max_length {
|
|
let subtable_size = 1 << (length - primary_table_bits);
|
|
for _ in 0..histogram[length] {
|
|
// If the codeword's prefix doesn't match the current subtable, create a new
|
|
// subtable.
|
|
if codeword & primary_table_mask != subtable_prefix {
|
|
subtable_prefix = codeword & primary_table_mask;
|
|
subtable_start = secondary_table.len();
|
|
primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16)
|
|
| EXCEPTIONAL_ENTRY
|
|
| SECONDARY_TABLE_ENTRY
|
|
| (subtable_size as u32 - 1);
|
|
secondary_table.resize(subtable_start + subtable_size, 0);
|
|
}
|
|
|
|
// Lookup the symbol.
|
|
let symbol = sorted_symbols[i];
|
|
i += 1;
|
|
|
|
// Insert the symbol into the secondary table and advance to the next codeword.
|
|
codes[symbol] = codeword;
|
|
secondary_table[subtable_start + (codeword >> primary_table_bits) as usize] =
|
|
((symbol as u16) << 4) | (length as u16);
|
|
codeword = next_codeword(codeword, 1 << length);
|
|
}
|
|
|
|
// If there are more codes with the same subtable prefix, extend the subtable.
|
|
if length < max_length && codeword & primary_table_mask == subtable_prefix {
|
|
secondary_table.extend_from_within(subtable_start..);
|
|
let subtable_size = secondary_table.len() - subtable_start;
|
|
primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16)
|
|
| EXCEPTIONAL_ENTRY
|
|
| SECONDARY_TABLE_ENTRY
|
|
| (subtable_size as u32 - 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
true
|
|
}
|