Skip to content

Commit

Permalink
feat(commands): More dump options (#1339)
Browse files Browse the repository at this point in the history
Adds output as  targz and zip.
Also adds the options `--archive` to choose the ouput format and
`--file` to directly specify a file to dump into. When a file is
specified, the ouput format is automatically chosen from the file
extension, if given.

---------

Signed-off-by: simonsan <[email protected]>
Co-authored-by: simonsan <[email protected]>
  • Loading branch information
aawsome and simonsan authored Dec 2, 2024
1 parent f14a8bb commit 8b18e62
Show file tree
Hide file tree
Showing 6 changed files with 296 additions and 19 deletions.
65 changes: 65 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ dateparser = "0.2.1"
derive_more = { version = "1", features = ["debug"] }
dialoguer = "0.11.0"
directories = "5"
flate2 = "1.0.34"
fuse_mt = { version = "0.6", optional = true }
futures = { version = "0.3.31", optional = true }
gethostname = "0.5"
Expand All @@ -120,6 +121,7 @@ open = "5.3.1"
self_update = { version = "=0.39.0", default-features = false, optional = true, features = ["rustls", "archive-tar", "compression-flate2"] } # FIXME: Downgraded to 0.39.0 due to https://github.com/jaemk/self_update/issues/136
tar = "0.4.43"
toml = "0.8"
zip = { version = "2.2.0", default-features = false, features = ["deflate", "chrono"] }

# filtering
jaq-core = { version = "2", optional = true }
Expand Down
1 change: 1 addition & 0 deletions deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ allow = [
"CC0-1.0",
"Zlib",
"Unicode-3.0",
"BSL-1.0",
]
# The confidence threshold for detecting a license from license text.
# The higher the value, the more closely the license text must be to the
Expand Down
213 changes: 204 additions & 9 deletions src/commands/dump.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
//! `dump` subcommand
use std::io::{Read, Write};
use std::{
fs::File,
io::{copy, Cursor, Read, Seek, SeekFrom, Write},
path::PathBuf,
};

use crate::{repository::CliIndexedRepo, status_err, Application, RUSTIC_APP};

use abscissa_core::{Command, Runnable, Shutdown};
use anyhow::Result;
use derive_more::FromStr;
use flate2::{write::GzEncoder, Compression};
use log::warn;
use rustic_core::{
repofile::{Node, NodeType},
vfs::OpenFile,
LsOptions,
};
use tar::{Builder, EntryType, Header};
use zip::{write::SimpleFileOptions, ZipWriter};

/// `dump` subcommand
#[derive(clap::Parser, Command, Debug)]
Expand All @@ -21,9 +28,38 @@ pub(crate) struct DumpCmd {
#[clap(value_name = "SNAPSHOT[:PATH]")]
snap: String,

/// Listing options
#[clap(flatten)]
ls_opts: LsOptions,
/// set archive format to use. Possible values: auto, content, tar, targz, zip. For "auto" format is dertermined by file extension (if given) or "tar" for dirs.
#[clap(long, value_name = "FORMAT", default_value = "auto")]
archive: ArchiveKind,

/// dump output to the given file. Use this instead of redirecting stdout to a file.
#[clap(long)]
file: Option<PathBuf>,

/// Glob pattern to exclude/include (can be specified multiple times)
#[clap(long, help_heading = "Exclude options")]
glob: Vec<String>,

/// Same as --glob pattern but ignores the casing of filenames
#[clap(long, value_name = "GLOB", help_heading = "Exclude options")]
iglob: Vec<String>,

/// Read glob patterns to exclude/include from this file (can be specified multiple times)
#[clap(long, value_name = "FILE", help_heading = "Exclude options")]
glob_file: Vec<String>,

/// Same as --glob-file ignores the casing of filenames in patterns
#[clap(long, value_name = "FILE", help_heading = "Exclude options")]
iglob_file: Vec<String>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, FromStr)]
enum ArchiveKind {
Auto,
Content,
Tar,
TarGz,
Zip,
}

impl Runnable for DumpCmd {
Expand All @@ -46,17 +82,77 @@ impl DumpCmd {
let node =
repo.node_from_snapshot_path(&self.snap, |sn| config.snapshot_filter.matches(sn))?;

let mut stdout = std::io::stdout();
if node.is_file() {
repo.dump(&node, &mut stdout)?;
let stdout = std::io::stdout();

let ls_opts = LsOptions::default()
.glob(self.glob.clone())
.glob_file(self.glob_file.clone())
.iglob(self.iglob.clone())
.iglob_file(self.iglob_file.clone())
.recursive(true);

let ext = self
.file
.as_ref()
.and_then(|f| f.extension().map(|s| s.to_string_lossy().to_string()));

let archive = match self.archive {
ArchiveKind::Auto => match ext.as_deref() {
Some("tar") => ArchiveKind::Tar,
Some("tgz") | Some("gz") => ArchiveKind::TarGz,
Some("zip") => ArchiveKind::Zip,
_ if node.is_dir() => ArchiveKind::Tar,
_ => ArchiveKind::Content,
},
a => a,
};

let mut w: Box<dyn Write> = if let Some(file) = &self.file {
let mut file = File::create(file)?;
if archive == ArchiveKind::Zip {
// when writing zip to a file, we use the optimized writer
return write_zip_to_file(&repo, &node, &mut file, &ls_opts);
}
Box::new(file)
} else {
dump_tar(&repo, &node, &mut stdout, &self.ls_opts)?;
}
Box::new(stdout)
};

match archive {
ArchiveKind::Content => dump_content(&repo, &node, &mut w, &ls_opts)?,
ArchiveKind::Tar => dump_tar(&repo, &node, &mut w, &ls_opts)?,
ArchiveKind::TarGz => dump_tar_gz(&repo, &node, &mut w, &ls_opts)?,
ArchiveKind::Zip => dump_zip(&repo, &node, &mut w, &ls_opts)?,
_ => {}
};

Ok(())
}
}

fn dump_content(
repo: &CliIndexedRepo,
node: &Node,
w: &mut impl Write,
ls_opts: &LsOptions,
) -> Result<()> {
for item in repo.ls(node, ls_opts)? {
let (_, node) = item?;
repo.dump(&node, w)?;
}
Ok(())
}

fn dump_tar_gz(
repo: &CliIndexedRepo,
node: &Node,
w: &mut impl Write,
ls_opts: &LsOptions,
) -> Result<()> {
let mut w = GzEncoder::new(w, Compression::default());
dump_tar(repo, node, &mut w, ls_opts)
}

fn dump_tar(
repo: &CliIndexedRepo,
node: &Node,
Expand Down Expand Up @@ -135,6 +231,105 @@ fn dump_tar(
Ok(())
}

fn dump_zip(
repo: &CliIndexedRepo,
node: &Node,
w: &mut impl Write,
ls_opts: &LsOptions,
) -> Result<()> {
let w = SeekWriter {
write: w,
cursor: Cursor::new(Vec::new()),
written: 0,
};
let mut zip = ZipWriter::new(w);
zip.set_flush_on_finish_file(true);
write_zip_contents(repo, node, &mut zip, ls_opts)?;
let mut inner = zip.finish()?;
inner.flush()?;
Ok(())
}

fn write_zip_to_file(
repo: &CliIndexedRepo,
node: &Node,
file: &mut (impl Write + Seek),
ls_opts: &LsOptions,
) -> Result<()> {
let mut zip = ZipWriter::new(file);
write_zip_contents(repo, node, &mut zip, ls_opts)?;
let _ = zip.finish()?;
Ok(())
}

fn write_zip_contents(
repo: &CliIndexedRepo,
node: &Node,
zip: &mut ZipWriter<impl Write + Seek>,
ls_opts: &LsOptions,
) -> Result<()> {
for item in repo.ls(node, ls_opts)? {
let (path, node) = item?;

let mut options = SimpleFileOptions::default();
if let Some(mode) = node.meta.mode {
// TODO: this is some go-mapped mode, but lower bits are the standard unix mode bits -> is this ok?
options = options.unix_permissions(mode);
}
if let Some(mtime) = node.meta.mtime {
options =
options.last_modified_time(mtime.naive_local().try_into().unwrap_or_default());
}
if node.is_file() {
zip.start_file_from_path(path, options)?;
repo.dump(&node, zip)?;
} else {
zip.add_directory_from_path(path, options)?;
}
}
Ok(())
}

struct SeekWriter<W> {
write: W,
cursor: Cursor<Vec<u8>>,
written: u64,
}

impl<W> Read for SeekWriter<W> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
self.cursor.read(buf)
}
}

impl<W: Write> Write for SeekWriter<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.cursor.write(buf)
}

fn flush(&mut self) -> std::io::Result<()> {
_ = self.cursor.seek(SeekFrom::Start(0))?;
let n = copy(&mut self.cursor, &mut self.write)?;
_ = self.cursor.seek(SeekFrom::Start(0))?;
self.cursor.get_mut().clear();
self.cursor.get_mut().shrink_to(1_000_000);
self.written += n;
Ok(())
}
}

impl<W> Seek for SeekWriter<W> {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
match pos {
SeekFrom::Start(n) => self.cursor.seek(SeekFrom::Start(n - self.written)),
pos => self.cursor.seek(pos),
}
}
fn stream_position(&mut self) -> std::io::Result<u64> {
Ok(self.written + self.cursor.stream_position()?)
}
}

struct OpenFileReader<'a> {
repo: &'a CliIndexedRepo,
open_file: OpenFile,
Expand Down
Loading

0 comments on commit 8b18e62

Please sign in to comment.