Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

482
vendor/flate2/src/gz/bufread.rs vendored Normal file
View File

@@ -0,0 +1,482 @@
use std::cmp;
use std::io;
use std::io::prelude::*;
use std::mem;
use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser};
use crate::crc::CrcReader;
use crate::deflate;
use crate::Compression;
fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
let min = cmp::min(into.len(), from.len() - *pos);
into[..min].copy_from_slice(&from[*pos..*pos + min]);
*pos += min;
min
}
/// A gzip streaming encoder
///
/// This structure implements a [`Read`] interface. When read from, it reads
/// uncompressed data from the underlying [`BufRead`] and provides the compressed data.
///
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
///
/// # Examples
///
/// ```
/// use std::io::prelude::*;
/// use std::io;
/// use flate2::Compression;
/// use flate2::bufread::GzEncoder;
/// use std::fs::File;
/// use std::io::BufReader;
///
/// // Opens sample file, compresses the contents and returns a Vector or error
/// // File wrapped in a BufReader implements BufRead
///
/// fn open_hello_world() -> io::Result<Vec<u8>> {
/// let f = File::open("examples/hello_world.txt")?;
/// let b = BufReader::new(f);
/// let mut gz = GzEncoder::new(b, Compression::fast());
/// let mut buffer = Vec::new();
/// gz.read_to_end(&mut buffer)?;
/// Ok(buffer)
/// }
/// ```
#[derive(Debug)]
pub struct GzEncoder<R> {
inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
header: Vec<u8>,
pos: usize,
eof: bool,
}
pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
let crc = CrcReader::new(r);
GzEncoder {
inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
header,
pos: 0,
eof: false,
}
}
impl<R: BufRead> GzEncoder<R> {
/// Creates a new encoder which will use the given compression level.
///
/// The encoder is not configured specially for the emitted header. For
/// header configuration, see the `GzBuilder` type.
///
/// The data read from the stream `r` will be compressed and available
/// through the returned reader.
pub fn new(r: R, level: Compression) -> GzEncoder<R> {
GzBuilder::new().buf_read(r, level)
}
fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
if self.pos == 8 {
return Ok(0);
}
let crc = self.inner.get_ref().crc();
let calced_crc_bytes = crc.sum().to_le_bytes();
let arr = [
calced_crc_bytes[0],
calced_crc_bytes[1],
calced_crc_bytes[2],
calced_crc_bytes[3],
crc.amount() as u8,
(crc.amount() >> 8) as u8,
(crc.amount() >> 16) as u8,
(crc.amount() >> 24) as u8,
];
Ok(copy(into, &arr, &mut self.pos))
}
}
impl<R> GzEncoder<R> {
/// Acquires a reference to the underlying reader.
pub fn get_ref(&self) -> &R {
self.inner.get_ref().get_ref()
}
/// Acquires a mutable reference to the underlying reader.
///
/// Note that mutation of the reader may result in surprising results if
/// this encoder is continued to be used.
pub fn get_mut(&mut self) -> &mut R {
self.inner.get_mut().get_mut()
}
/// Returns the underlying stream, consuming this encoder
pub fn into_inner(self) -> R {
self.inner.into_inner().into_inner()
}
}
#[inline]
fn finish(buf: &[u8; 8]) -> (u32, u32) {
let crc = (buf[0] as u32)
| ((buf[1] as u32) << 8)
| ((buf[2] as u32) << 16)
| ((buf[3] as u32) << 24);
let amt = (buf[4] as u32)
| ((buf[5] as u32) << 8)
| ((buf[6] as u32) << 16)
| ((buf[7] as u32) << 24);
(crc, amt)
}
impl<R: BufRead> Read for GzEncoder<R> {
fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
let mut amt = 0;
if self.eof {
return self.read_footer(into);
} else if self.pos < self.header.len() {
amt += copy(into, &self.header, &mut self.pos);
if amt == into.len() {
return Ok(amt);
}
let tmp = into;
into = &mut tmp[amt..];
}
match self.inner.read(into)? {
0 => {
self.eof = true;
self.pos = 0;
self.read_footer(into)
}
n => Ok(amt + n),
}
}
}
impl<R: BufRead + Write> Write for GzEncoder<R> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.get_mut().write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.get_mut().flush()
}
}
/// A decoder for a single member of a [gzip file].
///
/// This structure implements a [`Read`] interface. When read from, it reads
/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
///
/// After reading a single member of the gzip data this reader will return
/// Ok(0) even if there are more bytes available in the underlying reader.
/// If you need the following bytes, call `into_inner()` after Ok(0) to
/// recover the underlying reader.
///
/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
/// or read more
/// [in the introduction](../index.html#about-multi-member-gzip-files).
///
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
///
/// # Examples
///
/// ```
/// use std::io::prelude::*;
/// use std::io;
/// # use flate2::Compression;
/// # use flate2::write::GzEncoder;
/// use flate2::bufread::GzDecoder;
///
/// # fn main() {
/// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
/// # e.write_all(b"Hello World").unwrap();
/// # let bytes = e.finish().unwrap();
/// # println!("{}", decode_reader(bytes).unwrap());
/// # }
/// #
/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
/// // Here &[u8] implements BufRead
///
/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
/// let mut gz = GzDecoder::new(&bytes[..]);
/// let mut s = String::new();
/// gz.read_to_string(&mut s)?;
/// Ok(s)
/// }
/// ```
#[derive(Debug)]
pub struct GzDecoder<R> {
state: GzState,
reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
multi: bool,
}
#[derive(Debug)]
enum GzState {
Header(GzHeaderParser),
Body(GzHeader),
Finished(GzHeader, usize, [u8; 8]),
Err(io::Error),
End(Option<GzHeader>),
}
impl<R: BufRead> GzDecoder<R> {
/// Creates a new decoder from the given reader, immediately parsing the
/// gzip header.
pub fn new(mut r: R) -> GzDecoder<R> {
let mut header_parser = GzHeaderParser::new();
let state = match header_parser.parse(&mut r) {
Ok(_) => GzState::Body(GzHeader::from(header_parser)),
Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => {
GzState::Header(header_parser)
}
Err(err) => GzState::Err(err),
};
GzDecoder {
state,
reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
multi: false,
}
}
fn multi(mut self, flag: bool) -> GzDecoder<R> {
self.multi = flag;
self
}
}
impl<R> GzDecoder<R> {
/// Returns the header associated with this stream, if it was valid
pub fn header(&self) -> Option<&GzHeader> {
match &self.state {
GzState::Body(header) | GzState::Finished(header, _, _) => Some(header),
GzState::End(header) => header.as_ref(),
_ => None,
}
}
/// Acquires a reference to the underlying reader.
pub fn get_ref(&self) -> &R {
self.reader.get_ref().get_ref()
}
/// Acquires a mutable reference to the underlying stream.
///
/// Note that mutation of the stream may result in surprising results if
/// this decoder is continued to be used.
pub fn get_mut(&mut self) -> &mut R {
self.reader.get_mut().get_mut()
}
/// Consumes this decoder, returning the underlying reader.
pub fn into_inner(self) -> R {
self.reader.into_inner().into_inner()
}
}
impl<R: BufRead> Read for GzDecoder<R> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
loop {
match &mut self.state {
GzState::Header(parser) => {
parser.parse(self.reader.get_mut().get_mut())?;
self.state = GzState::Body(GzHeader::from(mem::take(parser)));
}
GzState::Body(header) => {
if into.is_empty() {
return Ok(0);
}
match self.reader.read(into)? {
0 => {
self.state = GzState::Finished(mem::take(header), 0, [0; 8]);
}
n => {
return Ok(n);
}
}
}
GzState::Finished(header, pos, buf) => {
if *pos < buf.len() {
*pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?;
} else {
let (crc, amt) = finish(buf);
if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() {
self.state = GzState::End(Some(mem::take(header)));
return Err(corrupt());
} else if self.multi {
let is_eof = self
.reader
.get_mut()
.get_mut()
.fill_buf()
.map(|buf| buf.is_empty())?;
if is_eof {
self.state = GzState::End(Some(mem::take(header)));
} else {
self.reader.reset();
self.reader.get_mut().reset_data();
self.state = GzState::Header(GzHeaderParser::new())
}
} else {
self.state = GzState::End(Some(mem::take(header)));
}
}
}
GzState::Err(err) => {
let result = Err(mem::replace(err, io::ErrorKind::Other.into()));
self.state = GzState::End(None);
return result;
}
GzState::End(_) => return Ok(0),
}
}
}
}
impl<R: BufRead + Write> Write for GzDecoder<R> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.get_mut().write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.get_mut().flush()
}
}
/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
///
/// This structure implements a [`Read`] interface. When read from, it reads
/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
///
/// A gzip file consists of a series of *members* concatenated one after another.
/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the
/// underlying reader does. For a file, this reads to the end of the file.
///
/// To handle members separately, see [GzDecoder] or read more
/// [in the introduction](../index.html#about-multi-member-gzip-files).
///
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
///
/// # Examples
///
/// ```
/// use std::io::prelude::*;
/// use std::io;
/// # use flate2::Compression;
/// # use flate2::write::GzEncoder;
/// use flate2::bufread::MultiGzDecoder;
///
/// # fn main() {
/// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
/// # e.write_all(b"Hello World").unwrap();
/// # let bytes = e.finish().unwrap();
/// # println!("{}", decode_reader(bytes).unwrap());
/// # }
/// #
/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
/// // Here &[u8] implements BufRead
///
/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
/// let mut gz = MultiGzDecoder::new(&bytes[..]);
/// let mut s = String::new();
/// gz.read_to_string(&mut s)?;
/// Ok(s)
/// }
/// ```
#[derive(Debug)]
pub struct MultiGzDecoder<R>(GzDecoder<R>);
impl<R: BufRead> MultiGzDecoder<R> {
/// Creates a new decoder from the given reader, immediately parsing the
/// (first) gzip header. If the gzip stream contains multiple members all will
/// be decoded.
pub fn new(r: R) -> MultiGzDecoder<R> {
MultiGzDecoder(GzDecoder::new(r).multi(true))
}
}
impl<R> MultiGzDecoder<R> {
/// Returns the current header associated with this stream, if it's valid
pub fn header(&self) -> Option<&GzHeader> {
self.0.header()
}
/// Acquires a reference to the underlying reader.
pub fn get_ref(&self) -> &R {
self.0.get_ref()
}
/// Acquires a mutable reference to the underlying stream.
///
/// Note that mutation of the stream may result in surprising results if
/// this decoder is continued to be used.
pub fn get_mut(&mut self) -> &mut R {
self.0.get_mut()
}
/// Consumes this decoder, returning the underlying reader.
pub fn into_inner(self) -> R {
self.0.into_inner()
}
}
impl<R: BufRead> Read for MultiGzDecoder<R> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
self.0.read(into)
}
}
#[cfg(test)]
mod test {
use crate::bufread::GzDecoder;
use crate::gz::write;
use crate::Compression;
use std::io::{Read, Write};
// GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any
// additional data to be consumed by the caller.
#[test]
fn decode_extra_data() {
let expected = "Hello World";
let compressed = {
let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
e.write(expected.as_ref()).unwrap();
let mut b = e.finish().unwrap();
b.push(b'x');
b
};
let mut output = Vec::new();
let mut decoder = GzDecoder::new(compressed.as_slice());
let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
assert_eq!(decoded_bytes, output.len());
let actual = std::str::from_utf8(&output).expect("String parsing error");
assert_eq!(
actual, expected,
"after decompression we obtain the original input"
);
output.clear();
assert_eq!(
decoder.read(&mut output).unwrap(),
0,
"subsequent read of decoder returns 0, but inner reader can return additional data"
);
let mut reader = decoder.into_inner();
assert_eq!(
reader.read_to_end(&mut output).unwrap(),
1,
"extra data is accessible in underlying buf-read"
);
assert_eq!(output, b"x");
}
}

624
vendor/flate2/src/gz/mod.rs vendored Normal file
View File

@@ -0,0 +1,624 @@
use std::ffi::CString;
use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
use std::time;
use crate::bufreader::BufReader;
use crate::{Compression, Crc};
pub static FHCRC: u8 = 1 << 1;
pub static FEXTRA: u8 = 1 << 2;
pub static FNAME: u8 = 1 << 3;
pub static FCOMMENT: u8 = 1 << 4;
pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
pub mod bufread;
pub mod read;
pub mod write;
// The maximum length of the header filename and comment fields. More than
// enough for these fields in reasonable use, but prevents possible attacks.
const MAX_HEADER_BUF: usize = 65535;
/// A structure representing the header of a gzip stream.
///
/// The header can contain metadata about the file that was compressed, if
/// present.
#[derive(PartialEq, Clone, Debug, Default)]
pub struct GzHeader {
extra: Option<Vec<u8>>,
filename: Option<Vec<u8>>,
comment: Option<Vec<u8>>,
operating_system: u8,
mtime: u32,
}
impl GzHeader {
/// Returns the `filename` field of this gzip stream's header, if present.
pub fn filename(&self) -> Option<&[u8]> {
self.filename.as_ref().map(|s| &s[..])
}
/// Returns the `extra` field of this gzip stream's header, if present.
pub fn extra(&self) -> Option<&[u8]> {
self.extra.as_ref().map(|s| &s[..])
}
/// Returns the `comment` field of this gzip stream's header, if present.
pub fn comment(&self) -> Option<&[u8]> {
self.comment.as_ref().map(|s| &s[..])
}
/// Returns the `operating_system` field of this gzip stream's header.
///
/// There are predefined values for various operating systems.
/// 255 means that the value is unknown.
pub fn operating_system(&self) -> u8 {
self.operating_system
}
/// This gives the most recent modification time of the original file being compressed.
///
/// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970.
/// (Note that this may cause problems for MS-DOS and other systems that use local
/// rather than Universal time.) If the compressed data did not come from a file,
/// `mtime` is set to the time at which compression started.
/// `mtime` = 0 means no time stamp is available.
///
/// The usage of `mtime` is discouraged because of Year 2038 problem.
pub fn mtime(&self) -> u32 {
self.mtime
}
/// Returns the most recent modification time represented by a date-time type.
/// Returns `None` if the value of the underlying counter is 0,
/// indicating no time stamp is available.
///
///
/// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970.
/// See [`mtime`](#method.mtime) for more detail.
pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
if self.mtime == 0 {
None
} else {
let duration = time::Duration::new(u64::from(self.mtime), 0);
let datetime = time::UNIX_EPOCH + duration;
Some(datetime)
}
}
}
#[derive(Debug, Default)]
pub enum GzHeaderState {
Start(u8, [u8; 10]),
Xlen(Option<Box<Crc>>, u8, [u8; 2]),
Extra(Option<Box<Crc>>, u16),
Filename(Option<Box<Crc>>),
Comment(Option<Box<Crc>>),
Crc(Option<Box<Crc>>, u8, [u8; 2]),
#[default]
Complete,
}
#[derive(Debug, Default)]
pub struct GzHeaderParser {
state: GzHeaderState,
flags: u8,
header: GzHeader,
}
impl GzHeaderParser {
fn new() -> Self {
GzHeaderParser {
state: GzHeaderState::Start(0, [0; 10]),
flags: 0,
header: GzHeader::default(),
}
}
fn parse<R: BufRead>(&mut self, r: &mut R) -> Result<()> {
loop {
match &mut self.state {
GzHeaderState::Start(count, buffer) => {
while (*count as usize) < buffer.len() {
*count += read_into(r, &mut buffer[*count as usize..])? as u8;
}
// Gzip identification bytes
if buffer[0] != 0x1f || buffer[1] != 0x8b {
return Err(bad_header());
}
// Gzip compression method (8 = deflate)
if buffer[2] != 8 {
return Err(bad_header());
}
self.flags = buffer[3];
// RFC1952: "must give an error indication if any reserved bit is non-zero"
if self.flags & FRESERVED != 0 {
return Err(bad_header());
}
self.header.mtime = (buffer[4] as u32)
| ((buffer[5] as u32) << 8)
| ((buffer[6] as u32) << 16)
| ((buffer[7] as u32) << 24);
let _xfl = buffer[8];
self.header.operating_system = buffer[9];
let crc = if self.flags & FHCRC != 0 {
let mut crc = Box::new(Crc::new());
crc.update(buffer);
Some(crc)
} else {
None
};
self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
}
GzHeaderState::Xlen(crc, count, buffer) => {
if self.flags & FEXTRA != 0 {
while (*count as usize) < buffer.len() {
*count += read_into(r, &mut buffer[*count as usize..])? as u8;
}
if let Some(crc) = crc {
crc.update(buffer);
}
let xlen = parse_le_u16(buffer);
self.header.extra = Some(vec![0; xlen as usize]);
self.state = GzHeaderState::Extra(crc.take(), 0);
} else {
self.state = GzHeaderState::Filename(crc.take());
}
}
GzHeaderState::Extra(crc, count) => {
debug_assert!(self.header.extra.is_some());
let extra = self.header.extra.as_mut().unwrap();
while (*count as usize) < extra.len() {
*count += read_into(r, &mut extra[*count as usize..])? as u16;
}
if let Some(crc) = crc {
crc.update(extra);
}
self.state = GzHeaderState::Filename(crc.take());
}
GzHeaderState::Filename(crc) => {
if self.flags & FNAME != 0 {
let filename = self.header.filename.get_or_insert_with(Vec::new);
read_to_nul(r, filename)?;
if let Some(crc) = crc {
crc.update(filename);
crc.update(b"\0");
}
}
self.state = GzHeaderState::Comment(crc.take());
}
GzHeaderState::Comment(crc) => {
if self.flags & FCOMMENT != 0 {
let comment = self.header.comment.get_or_insert_with(Vec::new);
read_to_nul(r, comment)?;
if let Some(crc) = crc {
crc.update(comment);
crc.update(b"\0");
}
}
self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
}
GzHeaderState::Crc(crc, count, buffer) => {
if let Some(crc) = crc {
debug_assert!(self.flags & FHCRC != 0);
while (*count as usize) < buffer.len() {
*count += read_into(r, &mut buffer[*count as usize..])? as u8;
}
let stored_crc = parse_le_u16(buffer);
let calced_crc = crc.sum() as u16;
if stored_crc != calced_crc {
return Err(corrupt());
}
}
self.state = GzHeaderState::Complete;
}
GzHeaderState::Complete => {
return Ok(());
}
}
}
}
fn header(&self) -> Option<&GzHeader> {
match self.state {
GzHeaderState::Complete => Some(&self.header),
_ => None,
}
}
}
impl From<GzHeaderParser> for GzHeader {
fn from(parser: GzHeaderParser) -> Self {
debug_assert!(matches!(parser.state, GzHeaderState::Complete));
parser.header
}
}
// Attempt to fill the `buffer` from `r`. Return the number of bytes read.
// Return an error if EOF is read before the buffer is full. This differs
// from `read` in that Ok(0) means that more data may be available.
fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
debug_assert!(!buffer.is_empty());
match r.read(buffer) {
Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
Ok(n) => Ok(n),
Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
Err(e) => Err(e),
}
}
// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`.
fn read_to_nul<R: BufRead>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
let mut bytes = r.bytes();
loop {
match bytes.next().transpose()? {
Some(0) => return Ok(()),
Some(_) if buffer.len() == MAX_HEADER_BUF => {
return Err(Error::new(
ErrorKind::InvalidInput,
"gzip header field too long",
));
}
Some(byte) => {
buffer.push(byte);
}
None => {
return Err(ErrorKind::UnexpectedEof.into());
}
}
}
}
fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
u16::from_le_bytes(*buffer)
}
fn bad_header() -> Error {
Error::new(ErrorKind::InvalidInput, "invalid gzip header")
}
fn corrupt() -> Error {
Error::new(
ErrorKind::InvalidInput,
"corrupt gzip stream does not have a matching checksum",
)
}
/// A builder structure to create a new gzip Encoder.
///
/// This structure controls header configuration options such as the filename.
///
/// # Examples
///
/// ```
/// use std::io::prelude::*;
/// # use std::io;
/// use std::fs::File;
/// use flate2::GzBuilder;
/// use flate2::Compression;
///
/// // GzBuilder opens a file and writes a sample string using GzBuilder pattern
///
/// # fn sample_builder() -> Result<(), io::Error> {
/// let f = File::create("examples/hello_world.gz")?;
/// let mut gz = GzBuilder::new()
/// .filename("hello_world.txt")
/// .comment("test file, please delete")
/// .write(f, Compression::default());
/// gz.write_all(b"hello world")?;
/// gz.finish()?;
/// # Ok(())
/// # }
/// ```
#[derive(Debug, Default)]
pub struct GzBuilder {
extra: Option<Vec<u8>>,
filename: Option<CString>,
comment: Option<CString>,
operating_system: Option<u8>,
mtime: u32,
}
impl GzBuilder {
/// Create a new blank builder with no header by default.
pub fn new() -> GzBuilder {
Self::default()
}
/// Configure the `mtime` field in the gzip header.
pub fn mtime(mut self, mtime: u32) -> GzBuilder {
self.mtime = mtime;
self
}
/// Configure the `operating_system` field in the gzip header.
pub fn operating_system(mut self, os: u8) -> GzBuilder {
self.operating_system = Some(os);
self
}
/// Configure the `extra` field in the gzip header.
pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
self.extra = Some(extra.into());
self
}
/// Configure the `filename` field in the gzip header.
///
/// # Panics
///
/// Panics if the `filename` slice contains a zero.
pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
self.filename = Some(CString::new(filename.into()).unwrap());
self
}
/// Configure the `comment` field in the gzip header.
///
/// # Panics
///
/// Panics if the `comment` slice contains a zero.
pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
self.comment = Some(CString::new(comment.into()).unwrap());
self
}
/// Consume this builder, creating a writer encoder in the process.
///
/// The data written to the returned encoder will be compressed and then
/// written out to the supplied parameter `w`.
pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
write::gz_encoder(self.into_header(lvl), w, lvl)
}
/// Consume this builder, creating a reader encoder in the process.
///
/// Data read from the returned encoder will be the compressed version of
/// the data read from the given reader.
pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
}
/// Consume this builder, creating a reader encoder in the process.
///
/// Data read from the returned encoder will be the compressed version of
/// the data read from the given reader.
pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
where
R: BufRead,
{
bufread::gz_encoder(self.into_header(lvl), r, lvl)
}
fn into_header(self, lvl: Compression) -> Vec<u8> {
let GzBuilder {
extra,
filename,
comment,
operating_system,
mtime,
} = self;
let mut flg = 0;
let mut header = vec![0u8; 10];
if let Some(v) = extra {
flg |= FEXTRA;
header.extend((v.len() as u16).to_le_bytes());
header.extend(v);
}
if let Some(filename) = filename {
flg |= FNAME;
header.extend(filename.as_bytes_with_nul().iter().copied());
}
if let Some(comment) = comment {
flg |= FCOMMENT;
header.extend(comment.as_bytes_with_nul().iter().copied());
}
header[0] = 0x1f;
header[1] = 0x8b;
header[2] = 8;
header[3] = flg;
header[4] = mtime as u8;
header[5] = (mtime >> 8) as u8;
header[6] = (mtime >> 16) as u8;
header[7] = (mtime >> 24) as u8;
header[8] = if lvl.0 >= Compression::best().0 {
2
} else if lvl.0 <= Compression::fast().0 {
4
} else {
0
};
// Typically this byte indicates what OS the gz stream was created on,
// but in an effort to have cross-platform reproducible streams just
// default this value to 255. I'm not sure that if we "correctly" set
// this it'd do anything anyway...
header[9] = operating_system.unwrap_or(255);
header
}
}
#[cfg(test)]
mod tests {
use std::io::prelude::*;
use super::{read, write, GzBuilder, GzHeaderParser};
use crate::{Compression, GzHeader};
use rand::{rng, Rng};
#[test]
fn roundtrip() {
let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
e.write_all(b"foo bar baz").unwrap();
let inner = e.finish().unwrap();
let mut d = read::GzDecoder::new(&inner[..]);
let mut s = String::new();
d.read_to_string(&mut s).unwrap();
assert_eq!(s, "foo bar baz");
}
#[test]
fn roundtrip_zero() {
let e = write::GzEncoder::new(Vec::new(), Compression::default());
let inner = e.finish().unwrap();
let mut d = read::GzDecoder::new(&inner[..]);
let mut s = String::new();
d.read_to_string(&mut s).unwrap();
assert_eq!(s, "");
}
#[test]
fn roundtrip_big() {
let mut real = Vec::new();
let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
for _ in 0..200 {
let to_write = &v[..rng().random_range(0..v.len())];
real.extend(to_write.iter().copied());
w.write_all(to_write).unwrap();
}
let result = w.finish().unwrap();
let mut r = read::GzDecoder::new(&result[..]);
let mut v = Vec::new();
r.read_to_end(&mut v).unwrap();
assert_eq!(v, real);
}
#[test]
fn roundtrip_big2() {
let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
let mut res = Vec::new();
r.read_to_end(&mut res).unwrap();
assert_eq!(res, v);
}
// A Rust implementation of CRC that closely matches the C code in RFC1952.
// Only use this to create CRCs for tests.
struct Rfc1952Crc {
/* Table of CRCs of all 8-bit messages. */
crc_table: [u32; 256],
}
impl Rfc1952Crc {
fn new() -> Self {
let mut crc = Rfc1952Crc {
crc_table: [0; 256],
};
/* Make the table for a fast CRC. */
for n in 0usize..256 {
let mut c = n as u32;
for _k in 0..8 {
if c & 1 != 0 {
c = 0xedb88320 ^ (c >> 1);
} else {
c = c >> 1;
}
}
crc.crc_table[n] = c;
}
crc
}
/*
Update a running crc with the bytes buf and return
the updated crc. The crc should be initialized to zero. Pre- and
post-conditioning (one's complement) is performed within this
function so it shouldn't be done by the caller.
*/
fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
let mut c = crc ^ 0xffffffff;
for b in buf {
c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
}
c ^ 0xffffffff
}
/* Return the CRC of the bytes buf. */
fn crc(&self, buf: &[u8]) -> u32 {
self.update_crc(0, buf)
}
}
#[test]
fn roundtrip_header() {
let mut header = GzBuilder::new()
.mtime(1234)
.operating_system(57)
.filename("filename")
.comment("comment")
.into_header(Compression::fast());
// Add a CRC to the header
header[3] = header[3] ^ super::FHCRC;
let rfc1952_crc = Rfc1952Crc::new();
let crc32 = rfc1952_crc.crc(&header);
let crc16 = crc32 as u16;
header.extend(&crc16.to_le_bytes());
let mut parser = GzHeaderParser::new();
parser.parse(&mut header.as_slice()).unwrap();
let actual = parser.header().unwrap();
assert_eq!(
actual,
&GzHeader {
extra: None,
filename: Some("filename".as_bytes().to_vec()),
comment: Some("comment".as_bytes().to_vec()),
operating_system: 57,
mtime: 1234
}
)
}
#[test]
fn fields() {
let r = vec![0, 2, 4, 6];
let e = GzBuilder::new()
.filename("foo.rs")
.comment("bar")
.extra(vec![0, 1, 2, 3])
.read(&r[..], Compression::default());
let mut d = read::GzDecoder::new(e);
assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
let mut res = Vec::new();
d.read_to_end(&mut res).unwrap();
assert_eq!(res, vec![0, 2, 4, 6]);
}
#[test]
fn keep_reading_after_end() {
let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
e.write_all(b"foo bar baz").unwrap();
let inner = e.finish().unwrap();
let mut d = read::GzDecoder::new(&inner[..]);
let mut s = String::new();
d.read_to_string(&mut s).unwrap();
assert_eq!(s, "foo bar baz");
d.read_to_string(&mut s).unwrap();
assert_eq!(s, "foo bar baz");
}
#[test]
fn qc_reader() {
::quickcheck::quickcheck(test as fn(_) -> _);
fn test(v: Vec<u8>) -> bool {
let r = read::GzEncoder::new(&v[..], Compression::default());
let mut r = read::GzDecoder::new(r);
let mut v2 = Vec::new();
r.read_to_end(&mut v2).unwrap();
v == v2
}
}
#[test]
fn flush_after_write() {
let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
write!(f, "Hello world").unwrap();
f.flush().unwrap();
}
}

378
vendor/flate2/src/gz/read.rs vendored Normal file
View File

@@ -0,0 +1,378 @@
use std::io;
use std::io::prelude::*;
use super::bufread;
use super::{GzBuilder, GzHeader};
use crate::bufreader::BufReader;
use crate::Compression;
/// A gzip streaming encoder
///
/// This structure implements a [`Read`] interface. When read from, it reads
/// uncompressed data from the underlying [`Read`] and provides the compressed data.
///
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
///
/// # Examples
///
/// ```
/// use std::io::prelude::*;
/// use std::io;
/// use flate2::Compression;
/// use flate2::read::GzEncoder;
///
/// // Return a vector containing the GZ compressed version of hello world
///
/// fn gzencode_hello_world() -> io::Result<Vec<u8>> {
/// let mut ret_vec = Vec::new();
/// let bytestring = b"hello world";
/// let mut gz = GzEncoder::new(&bytestring[..], Compression::fast());
/// gz.read_to_end(&mut ret_vec)?;
/// Ok(ret_vec)
/// }
/// ```
#[derive(Debug)]
pub struct GzEncoder<R> {
inner: bufread::GzEncoder<BufReader<R>>,
}
pub fn gz_encoder<R: Read>(inner: bufread::GzEncoder<BufReader<R>>) -> GzEncoder<R> {
GzEncoder { inner }
}
impl<R: Read> GzEncoder<R> {
/// Creates a new encoder which will use the given compression level.
///
/// The encoder is not configured specially for the emitted header. For
/// header configuration, see the `GzBuilder` type.
///
/// The data read from the stream `r` will be compressed and available
/// through the returned reader.
pub fn new(r: R, level: Compression) -> GzEncoder<R> {
GzBuilder::new().read(r, level)
}
}
impl<R> GzEncoder<R> {
/// Acquires a reference to the underlying reader.
pub fn get_ref(&self) -> &R {
self.inner.get_ref().get_ref()
}
/// Acquires a mutable reference to the underlying reader.
///
/// Note that mutation of the reader may result in surprising results if
/// this encoder is continued to be used.
pub fn get_mut(&mut self) -> &mut R {
self.inner.get_mut().get_mut()
}
/// Returns the underlying stream, consuming this encoder
pub fn into_inner(self) -> R {
self.inner.into_inner().into_inner()
}
}
impl<R: Read> Read for GzEncoder<R> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
self.inner.read(into)
}
}
impl<R: Read + Write> Write for GzEncoder<R> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.get_mut().write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.get_mut().flush()
}
}
/// A decoder for a single member of a [gzip file].
///
/// This structure implements a [`Read`] interface. When read from, it reads
/// compressed data from the underlying [`Read`] and provides the uncompressed data.
///
/// After reading a single member of the gzip data this reader will return
/// Ok(0) even if there are more bytes available in the underlying reader.
/// `GzDecoder` may have read additional bytes past the end of the gzip data.
/// If you need the following bytes, wrap the `Reader` in a `std::io::BufReader`
/// and use `bufread::GzDecoder` instead.
///
/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
/// or read more
/// [in the introduction](../index.html#about-multi-member-gzip-files).
///
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
///
/// # Examples
///
/// ```
/// use std::io::prelude::*;
/// use std::io;
/// # use flate2::Compression;
/// # use flate2::write::GzEncoder;
/// use flate2::read::GzDecoder;
///
/// # fn main() {
/// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
/// # e.write_all(b"Hello World").unwrap();
/// # let bytes = e.finish().unwrap();
/// # println!("{}", decode_reader(bytes).unwrap());
/// # }
/// #
/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
/// // Here &[u8] implements Read
///
/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
/// let mut gz = GzDecoder::new(&bytes[..]);
/// let mut s = String::new();
/// gz.read_to_string(&mut s)?;
/// Ok(s)
/// }
/// ```
#[derive(Debug)]
pub struct GzDecoder<R> {
inner: bufread::GzDecoder<BufReader<R>>,
}
impl<R: Read> GzDecoder<R> {
/// Creates a new decoder from the given reader, immediately parsing the
/// gzip header.
pub fn new(r: R) -> GzDecoder<R> {
GzDecoder {
inner: bufread::GzDecoder::new(BufReader::new(r)),
}
}
}
impl<R> GzDecoder<R> {
/// Returns the header associated with this stream, if it was valid.
pub fn header(&self) -> Option<&GzHeader> {
self.inner.header()
}
/// Acquires a reference to the underlying reader.
///
/// Note that the decoder may have read past the end of the gzip data.
/// To prevent this use [`bufread::GzDecoder`] instead.
pub fn get_ref(&self) -> &R {
self.inner.get_ref().get_ref()
}
/// Acquires a mutable reference to the underlying stream.
///
/// Note that mutation of the stream may result in surprising results if
/// this decoder continues to be used.
///
/// Note that the decoder may have read past the end of the gzip data.
/// To prevent this use [`bufread::GzDecoder`] instead.
pub fn get_mut(&mut self) -> &mut R {
self.inner.get_mut().get_mut()
}
/// Consumes this decoder, returning the underlying reader.
///
/// Note that the decoder may have read past the end of the gzip data.
/// Subsequent reads will skip those bytes. To prevent this use
/// [`bufread::GzDecoder`] instead.
pub fn into_inner(self) -> R {
self.inner.into_inner().into_inner()
}
}
impl<R: Read> Read for GzDecoder<R> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
self.inner.read(into)
}
}
impl<R: Read + Write> Write for GzDecoder<R> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.get_mut().write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.get_mut().flush()
}
}
/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
///
/// This structure implements a [`Read`] interface. When read from, it reads
/// compressed data from the underlying [`Read`] and provides the uncompressed
/// data.
///
/// A gzip file consists of a series of *members* concatenated one after another.
/// MultiGzDecoder decodes all members of a file and returns Ok(0) once the
/// underlying reader does.
///
/// To handle members separately, see [GzDecoder] or read more
/// [in the introduction](../index.html#about-multi-member-gzip-files).
///
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
///
/// # Examples
///
/// ```
/// use std::io::prelude::*;
/// use std::io;
/// # use flate2::Compression;
/// # use flate2::write::GzEncoder;
/// use flate2::read::MultiGzDecoder;
///
/// # fn main() {
/// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
/// # e.write_all(b"Hello World").unwrap();
/// # let bytes = e.finish().unwrap();
/// # println!("{}", decode_reader(bytes).unwrap());
/// # }
/// #
/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
/// // Here &[u8] implements Read
///
/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
/// let mut gz = MultiGzDecoder::new(&bytes[..]);
/// let mut s = String::new();
/// gz.read_to_string(&mut s)?;
/// Ok(s)
/// }
/// ```
#[derive(Debug)]
pub struct MultiGzDecoder<R> {
inner: bufread::MultiGzDecoder<BufReader<R>>,
}
impl<R: Read> MultiGzDecoder<R> {
/// Creates a new decoder from the given reader, immediately parsing the
/// (first) gzip header. If the gzip stream contains multiple members all will
/// be decoded.
pub fn new(r: R) -> MultiGzDecoder<R> {
MultiGzDecoder {
inner: bufread::MultiGzDecoder::new(BufReader::new(r)),
}
}
}
impl<R> MultiGzDecoder<R> {
/// Returns the current header associated with this stream, if it's valid.
pub fn header(&self) -> Option<&GzHeader> {
self.inner.header()
}
/// Acquires a reference to the underlying reader.
pub fn get_ref(&self) -> &R {
self.inner.get_ref().get_ref()
}
/// Acquires a mutable reference to the underlying stream.
///
/// Note that mutation of the stream may result in surprising results if
/// this decoder is continued to be used.
pub fn get_mut(&mut self) -> &mut R {
self.inner.get_mut().get_mut()
}
/// Consumes this decoder, returning the underlying reader.
pub fn into_inner(self) -> R {
self.inner.into_inner().into_inner()
}
}
impl<R: Read> Read for MultiGzDecoder<R> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
self.inner.read(into)
}
}
impl<R: Read + Write> Write for MultiGzDecoder<R> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.get_mut().write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.get_mut().flush()
}
}
#[cfg(test)]
mod tests {
use std::io::{Cursor, ErrorKind, Read, Result, Write};
use super::GzDecoder;
//a cursor turning EOF into blocking errors
#[derive(Debug)]
pub struct BlockingCursor {
pub cursor: Cursor<Vec<u8>>,
}
impl BlockingCursor {
pub fn new() -> BlockingCursor {
BlockingCursor {
cursor: Cursor::new(Vec::new()),
}
}
pub fn set_position(&mut self, pos: u64) {
return self.cursor.set_position(pos);
}
}
impl Write for BlockingCursor {
fn write(&mut self, buf: &[u8]) -> Result<usize> {
return self.cursor.write(buf);
}
fn flush(&mut self) -> Result<()> {
return self.cursor.flush();
}
}
impl Read for BlockingCursor {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
//use the cursor, except it turns eof into blocking error
let r = self.cursor.read(buf);
match r {
Err(ref err) => {
if err.kind() == ErrorKind::UnexpectedEof {
return Err(ErrorKind::WouldBlock.into());
}
}
Ok(0) => {
//regular EOF turned into blocking error
return Err(ErrorKind::WouldBlock.into());
}
Ok(_n) => {}
}
return r;
}
}
#[test]
fn blocked_partial_header_read() {
// this is a reader which receives data afterwards
let mut r = BlockingCursor::new();
let data = vec![1, 2, 3];
match r.write_all(&data) {
Ok(()) => {}
_ => {
panic!("Unexpected result for write_all");
}
}
r.set_position(0);
// this is unused except for the buffering
let mut decoder = GzDecoder::new(r);
let mut out = Vec::with_capacity(7);
match decoder.read(&mut out) {
Err(e) => {
assert_eq!(e.kind(), ErrorKind::WouldBlock);
}
_ => {
panic!("Unexpected result for decoder.read");
}
}
}
}

641
vendor/flate2/src/gz/write.rs vendored Normal file
View File

@@ -0,0 +1,641 @@
use std::cmp;
use std::io;
use std::io::prelude::*;
use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser};
use crate::crc::{Crc, CrcWriter};
use crate::zio;
use crate::{Compress, Compression, Decompress, Status};
/// A gzip streaming encoder
///
/// This structure exposes a [`Write`] interface that will emit compressed data
/// to the underlying writer `W`.
///
/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
///
/// # Examples
///
/// ```
/// use std::io::prelude::*;
/// use flate2::Compression;
/// use flate2::write::GzEncoder;
///
/// // Vec<u8> implements Write to print the compressed bytes of sample string
/// # fn main() {
///
/// let mut e = GzEncoder::new(Vec::new(), Compression::default());
/// e.write_all(b"Hello World").unwrap();
/// println!("{:?}", e.finish().unwrap());
/// # }
/// ```
#[derive(Debug)]
pub struct GzEncoder<W: Write> {
inner: zio::Writer<W, Compress>,
crc: Crc,
crc_bytes_written: usize,
header: Vec<u8>,
}
pub fn gz_encoder<W: Write>(header: Vec<u8>, w: W, lvl: Compression) -> GzEncoder<W> {
GzEncoder {
inner: zio::Writer::new(w, Compress::new(lvl, false)),
crc: Crc::new(),
header,
crc_bytes_written: 0,
}
}
impl<W: Write> GzEncoder<W> {
/// Creates a new encoder which will use the given compression level.
///
/// The encoder is not configured specially for the emitted header. For
/// header configuration, see the `GzBuilder` type.
///
/// The data written to the returned encoder will be compressed and then
/// written to the stream `w`.
pub fn new(w: W, level: Compression) -> GzEncoder<W> {
GzBuilder::new().write(w, level)
}
/// Acquires a reference to the underlying writer.
pub fn get_ref(&self) -> &W {
self.inner.get_ref()
}
/// Acquires a mutable reference to the underlying writer.
///
/// Note that mutation of the writer may result in surprising results if
/// this encoder is continued to be used.
pub fn get_mut(&mut self) -> &mut W {
self.inner.get_mut()
}
/// Attempt to finish this output stream, writing out final chunks of data.
///
/// Note that this function can only be used once data has finished being
/// written to the output stream. After this function is called then further
/// calls to `write` may result in a panic.
///
/// # Panics
///
/// Attempts to write data to this stream may result in a panic after this
/// function is called.
///
/// # Errors
///
/// This function will perform I/O to complete this stream, and any I/O
/// errors which occur will be returned from this function.
pub fn try_finish(&mut self) -> io::Result<()> {
self.write_header()?;
self.inner.finish()?;
while self.crc_bytes_written < 8 {
let (sum, amt) = (self.crc.sum(), self.crc.amount());
let buf = [
sum as u8,
(sum >> 8) as u8,
(sum >> 16) as u8,
(sum >> 24) as u8,
amt as u8,
(amt >> 8) as u8,
(amt >> 16) as u8,
(amt >> 24) as u8,
];
let inner = self.inner.get_mut();
let n = inner.write(&buf[self.crc_bytes_written..])?;
self.crc_bytes_written += n;
}
Ok(())
}
/// Finish encoding this stream, returning the underlying writer once the
/// encoding is done.
///
/// Note that this function may not be suitable to call in a situation where
/// the underlying stream is an asynchronous I/O stream. To finish a stream
/// the `try_finish` (or `shutdown`) method should be used instead. To
/// re-acquire ownership of a stream it is safe to call this method after
/// `try_finish` or `shutdown` has returned `Ok`.
///
/// # Errors
///
/// This function will perform I/O to complete this stream, and any I/O
/// errors which occur will be returned from this function.
pub fn finish(mut self) -> io::Result<W> {
self.try_finish()?;
Ok(self.inner.take_inner())
}
fn write_header(&mut self) -> io::Result<()> {
while !self.header.is_empty() {
let n = self.inner.get_mut().write(&self.header)?;
self.header.drain(..n);
}
Ok(())
}
}
impl<W: Write> Write for GzEncoder<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
assert_eq!(self.crc_bytes_written, 0);
self.write_header()?;
let n = self.inner.write(buf)?;
self.crc.update(&buf[..n]);
Ok(n)
}
fn flush(&mut self) -> io::Result<()> {
assert_eq!(self.crc_bytes_written, 0);
self.write_header()?;
self.inner.flush()
}
}
impl<R: Read + Write> Read for GzEncoder<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.get_mut().read(buf)
}
}
impl<W: Write> Drop for GzEncoder<W> {
fn drop(&mut self) {
if self.inner.is_present() {
let _ = self.try_finish();
}
}
}
/// A decoder for a single member of a [gzip file].
///
/// This structure exposes a [`Write`] interface, receiving compressed data and
/// writing uncompressed data to the underlying writer.
///
/// After decoding a single member of the gzip data this writer will return the number of bytes up to
/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to
/// handle any data following the gzip member.
///
/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
/// or read more
/// [in the introduction](../index.html#about-multi-member-gzip-files).
///
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
///
/// # Examples
///
/// ```
/// use std::io::prelude::*;
/// use std::io;
/// use flate2::Compression;
/// use flate2::write::{GzEncoder, GzDecoder};
///
/// # fn main() {
/// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
/// # e.write(b"Hello World").unwrap();
/// # let bytes = e.finish().unwrap();
/// # assert_eq!("Hello World", decode_writer(bytes).unwrap());
/// # }
/// // Uncompresses a gzip encoded vector of bytes and returns a string or error
/// // Here Vec<u8> implements Write
/// fn decode_writer(bytes: Vec<u8>) -> io::Result<String> {
/// let mut writer = Vec::new();
/// let mut decoder = GzDecoder::new(writer);
/// decoder.write_all(&bytes[..])?;
/// writer = decoder.finish()?;
/// let return_string = String::from_utf8(writer).expect("String parsing error");
/// Ok(return_string)
/// }
/// ```
#[derive(Debug)]
pub struct GzDecoder<W: Write> {
inner: zio::Writer<CrcWriter<W>, Decompress>,
crc_bytes: Vec<u8>,
header_parser: GzHeaderParser,
}
const CRC_BYTES_LEN: usize = 8;
impl<W: Write> GzDecoder<W> {
/// Creates a new decoder which will write uncompressed data to the stream.
///
/// When this encoder is dropped or unwrapped the final pieces of data will
/// be flushed.
pub fn new(w: W) -> GzDecoder<W> {
GzDecoder {
inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)),
crc_bytes: Vec::with_capacity(CRC_BYTES_LEN),
header_parser: GzHeaderParser::new(),
}
}
/// Returns the header associated with this stream.
pub fn header(&self) -> Option<&GzHeader> {
self.header_parser.header()
}
/// Acquires a reference to the underlying writer.
pub fn get_ref(&self) -> &W {
self.inner.get_ref().get_ref()
}
/// Acquires a mutable reference to the underlying writer.
///
/// Note that mutating the output/input state of the stream may corrupt this
/// object, so care must be taken when using this method.
pub fn get_mut(&mut self) -> &mut W {
self.inner.get_mut().get_mut()
}
/// Attempt to finish this output stream, writing out final chunks of data.
///
/// Note that this function can only be used once data has finished being
/// written to the output stream. After this function is called then further
/// calls to `write` may result in a panic.
///
/// # Panics
///
/// Attempts to write data to this stream may result in a panic after this
/// function is called.
///
/// # Errors
///
/// This function will perform I/O to finish the stream, returning any
/// errors which happen.
pub fn try_finish(&mut self) -> io::Result<()> {
self.finish_and_check_crc()?;
Ok(())
}
/// Consumes this decoder, flushing the output stream.
///
/// This will flush the underlying data stream and then return the contained
/// writer if the flush succeeded.
///
/// Note that this function may not be suitable to call in a situation where
/// the underlying stream is an asynchronous I/O stream. To finish a stream
/// the `try_finish` (or `shutdown`) method should be used instead. To
/// re-acquire ownership of a stream it is safe to call this method after
/// `try_finish` or `shutdown` has returned `Ok`.
///
/// # Errors
///
/// This function will perform I/O to complete this stream, and any I/O
/// errors which occur will be returned from this function.
pub fn finish(mut self) -> io::Result<W> {
self.finish_and_check_crc()?;
Ok(self.inner.take_inner().into_inner())
}
fn finish_and_check_crc(&mut self) -> io::Result<()> {
self.inner.finish()?;
if self.crc_bytes.len() != 8 {
return Err(corrupt());
}
let crc = (self.crc_bytes[0] as u32)
| ((self.crc_bytes[1] as u32) << 8)
| ((self.crc_bytes[2] as u32) << 16)
| ((self.crc_bytes[3] as u32) << 24);
let amt = (self.crc_bytes[4] as u32)
| ((self.crc_bytes[5] as u32) << 8)
| ((self.crc_bytes[6] as u32) << 16)
| ((self.crc_bytes[7] as u32) << 24);
if crc != self.inner.get_ref().crc().sum() {
return Err(corrupt());
}
if amt != self.inner.get_ref().crc().amount() {
return Err(corrupt());
}
Ok(())
}
}
impl<W: Write> Write for GzDecoder<W> {
fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> {
let buflen = buf.len();
if self.header().is_none() {
match self.header_parser.parse(&mut buf) {
Err(err) => {
if err.kind() == io::ErrorKind::UnexpectedEof {
// all data read but header still not complete
Ok(buflen)
} else {
Err(err)
}
}
Ok(_) => {
debug_assert!(self.header().is_some());
// buf now contains the unread part of the original buf
let n = buflen - buf.len();
Ok(n)
}
}
} else {
let (n, status) = self.inner.write_with_status(buf)?;
if status == Status::StreamEnd && n < buf.len() && self.crc_bytes.len() < 8 {
let remaining = buf.len() - n;
let crc_bytes = cmp::min(remaining, CRC_BYTES_LEN - self.crc_bytes.len());
self.crc_bytes.extend(&buf[n..n + crc_bytes]);
return Ok(n + crc_bytes);
}
Ok(n)
}
}
fn flush(&mut self) -> io::Result<()> {
self.inner.flush()
}
}
impl<W: Read + Write> Read for GzDecoder<W> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.inner.get_mut().get_mut().read(buf)
}
}
/// A gzip streaming decoder that decodes a [gzip file] with multiple members.
///
/// This structure exposes a [`Write`] interface that will consume compressed data and
/// write uncompressed data to the underlying writer.
///
/// A gzip file consists of a series of *members* concatenated one after another.
/// `MultiGzDecoder` decodes all members of a file and writes them to the
/// underlying writer one after another.
///
/// To handle members separately, see [GzDecoder] or read more
/// [in the introduction](../index.html#about-multi-member-gzip-files).
///
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
#[derive(Debug)]
pub struct MultiGzDecoder<W: Write> {
inner: GzDecoder<W>,
}
impl<W: Write> MultiGzDecoder<W> {
/// Creates a new decoder which will write uncompressed data to the stream.
/// If the gzip stream contains multiple members all will be decoded.
pub fn new(w: W) -> MultiGzDecoder<W> {
MultiGzDecoder {
inner: GzDecoder::new(w),
}
}
/// Returns the header associated with the current member.
pub fn header(&self) -> Option<&GzHeader> {
self.inner.header()
}
/// Acquires a reference to the underlying writer.
pub fn get_ref(&self) -> &W {
self.inner.get_ref()
}
/// Acquires a mutable reference to the underlying writer.
///
/// Note that mutating the output/input state of the stream may corrupt this
/// object, so care must be taken when using this method.
pub fn get_mut(&mut self) -> &mut W {
self.inner.get_mut()
}
/// Attempt to finish this output stream, writing out final chunks of data.
///
/// Note that this function can only be used once data has finished being
/// written to the output stream. After this function is called then further
/// calls to `write` may result in a panic.
///
/// # Panics
///
/// Attempts to write data to this stream may result in a panic after this
/// function is called.
///
/// # Errors
///
/// This function will perform I/O to finish the stream, returning any
/// errors which happen.
pub fn try_finish(&mut self) -> io::Result<()> {
self.inner.try_finish()
}
/// Consumes this decoder, flushing the output stream.
///
/// This will flush the underlying data stream and then return the contained
/// writer if the flush succeeded.
///
/// Note that this function may not be suitable to call in a situation where
/// the underlying stream is an asynchronous I/O stream. To finish a stream
/// the `try_finish` (or `shutdown`) method should be used instead. To
/// re-acquire ownership of a stream it is safe to call this method after
/// `try_finish` or `shutdown` has returned `Ok`.
///
/// # Errors
///
/// This function will perform I/O to complete this stream, and any I/O
/// errors which occur will be returned from this function.
pub fn finish(self) -> io::Result<W> {
self.inner.finish()
}
}
impl<W: Write> Write for MultiGzDecoder<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
if buf.is_empty() {
Ok(0)
} else {
match self.inner.write(buf) {
Ok(0) => {
// When the GzDecoder indicates that it has finished
// create a new GzDecoder to handle additional data.
self.inner.try_finish()?;
let w = self.inner.inner.take_inner().into_inner();
self.inner = GzDecoder::new(w);
self.inner.write(buf)
}
res => res,
}
}
}
fn flush(&mut self) -> io::Result<()> {
self.inner.flush()
}
}
#[cfg(test)]
mod tests {
use super::*;
const STR: &str = "Hello World Hello World Hello World Hello World Hello World \
Hello World Hello World Hello World Hello World Hello World \
Hello World Hello World Hello World Hello World Hello World \
Hello World Hello World Hello World Hello World Hello World \
Hello World Hello World Hello World Hello World Hello World";
#[test]
fn decode_writer_one_chunk() {
let mut e = GzEncoder::new(Vec::new(), Compression::default());
e.write(STR.as_ref()).unwrap();
let bytes = e.finish().unwrap();
let mut writer = Vec::new();
let mut decoder = GzDecoder::new(writer);
let n = decoder.write(&bytes[..]).unwrap();
decoder.write(&bytes[n..]).unwrap();
decoder.try_finish().unwrap();
writer = decoder.finish().unwrap();
let return_string = String::from_utf8(writer).expect("String parsing error");
assert_eq!(return_string, STR);
}
#[test]
fn decode_writer_partial_header() {
let mut e = GzEncoder::new(Vec::new(), Compression::default());
e.write(STR.as_ref()).unwrap();
let bytes = e.finish().unwrap();
let mut writer = Vec::new();
let mut decoder = GzDecoder::new(writer);
assert_eq!(decoder.write(&bytes[..5]).unwrap(), 5);
let n = decoder.write(&bytes[5..]).unwrap();
if n < bytes.len() - 5 {
decoder.write(&bytes[n + 5..]).unwrap();
}
writer = decoder.finish().unwrap();
let return_string = String::from_utf8(writer).expect("String parsing error");
assert_eq!(return_string, STR);
}
#[test]
fn decode_writer_partial_header_filename() {
let filename = "test.txt";
let mut e = GzBuilder::new()
.filename(filename)
.read(STR.as_bytes(), Compression::default());
let mut bytes = Vec::new();
e.read_to_end(&mut bytes).unwrap();
let mut writer = Vec::new();
let mut decoder = GzDecoder::new(writer);
assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
let n = decoder.write(&bytes[12..]).unwrap();
if n < bytes.len() - 12 {
decoder.write(&bytes[n + 12..]).unwrap();
}
assert_eq!(
decoder.header().unwrap().filename().unwrap(),
filename.as_bytes()
);
writer = decoder.finish().unwrap();
let return_string = String::from_utf8(writer).expect("String parsing error");
assert_eq!(return_string, STR);
}
#[test]
fn decode_writer_partial_header_comment() {
let comment = "test comment";
let mut e = GzBuilder::new()
.comment(comment)
.read(STR.as_bytes(), Compression::default());
let mut bytes = Vec::new();
e.read_to_end(&mut bytes).unwrap();
let mut writer = Vec::new();
let mut decoder = GzDecoder::new(writer);
assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
let n = decoder.write(&bytes[12..]).unwrap();
if n < bytes.len() - 12 {
decoder.write(&bytes[n + 12..]).unwrap();
}
assert_eq!(
decoder.header().unwrap().comment().unwrap(),
comment.as_bytes()
);
writer = decoder.finish().unwrap();
let return_string = String::from_utf8(writer).expect("String parsing error");
assert_eq!(return_string, STR);
}
#[test]
fn decode_writer_exact_header() {
let mut e = GzEncoder::new(Vec::new(), Compression::default());
e.write(STR.as_ref()).unwrap();
let bytes = e.finish().unwrap();
let mut writer = Vec::new();
let mut decoder = GzDecoder::new(writer);
assert_eq!(decoder.write(&bytes[..10]).unwrap(), 10);
decoder.write(&bytes[10..]).unwrap();
writer = decoder.finish().unwrap();
let return_string = String::from_utf8(writer).expect("String parsing error");
assert_eq!(return_string, STR);
}
#[test]
fn decode_writer_partial_crc() {
let mut e = GzEncoder::new(Vec::new(), Compression::default());
e.write(STR.as_ref()).unwrap();
let bytes = e.finish().unwrap();
let mut writer = Vec::new();
let mut decoder = GzDecoder::new(writer);
let l = bytes.len() - 5;
let n = decoder.write(&bytes[..l]).unwrap();
decoder.write(&bytes[n..]).unwrap();
writer = decoder.finish().unwrap();
let return_string = String::from_utf8(writer).expect("String parsing error");
assert_eq!(return_string, STR);
}
// Two or more gzip files concatenated form a multi-member gzip file. MultiGzDecoder will
// concatenate the decoded contents of all members.
#[test]
fn decode_multi_writer() {
let mut e = GzEncoder::new(Vec::new(), Compression::default());
e.write(STR.as_ref()).unwrap();
let bytes = e.finish().unwrap().repeat(2);
let mut writer = Vec::new();
let mut decoder = MultiGzDecoder::new(writer);
let mut count = 0;
while count < bytes.len() {
let n = decoder.write(&bytes[count..]).unwrap();
assert!(n != 0);
count += n;
}
writer = decoder.finish().unwrap();
let return_string = String::from_utf8(writer).expect("String parsing error");
let expected = STR.repeat(2);
assert_eq!(return_string, expected);
}
// GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any
// additional data to be consumed by the caller.
#[test]
fn decode_extra_data() {
let compressed = {
let mut e = GzEncoder::new(Vec::new(), Compression::default());
e.write(STR.as_ref()).unwrap();
let mut b = e.finish().unwrap();
b.push(b'x');
b
};
let mut writer = Vec::new();
let mut decoder = GzDecoder::new(writer);
let mut consumed_bytes = 0;
loop {
let n = decoder.write(&compressed[consumed_bytes..]).unwrap();
if n == 0 {
break;
}
consumed_bytes += n;
}
writer = decoder.finish().unwrap();
let actual = String::from_utf8(writer).expect("String parsing error");
assert_eq!(actual, STR);
assert_eq!(&compressed[consumed_bytes..], b"x");
}
}