Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(commands): More dump options #1339

Merged
merged 9 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ dateparser = "0.2.1"
derive_more = { version = "1", features = ["debug"] }
dialoguer = "0.11.0"
directories = "5"
flate2 = "1.0.34"
fuse_mt = { version = "0.6", optional = true }
futures = { version = "0.3.31", optional = true }
gethostname = "0.5"
Expand All @@ -120,6 +121,7 @@ open = "5.3.1"
self_update = { version = "=0.39.0", default-features = false, optional = true, features = ["rustls", "archive-tar", "compression-flate2"] } # FIXME: Downgraded to 0.39.0 due to https://github.com/jaemk/self_update/issues/136
tar = "0.4.43"
toml = "0.8"
zip = { version = "2.2.0", default-features = false, features = ["deflate", "chrono"] }

# filtering
jaq-core = { version = "2", optional = true }
Expand Down
1 change: 1 addition & 0 deletions deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ allow = [
"CC0-1.0",
"Zlib",
"Unicode-3.0",
"BSL-1.0",
]
# The confidence threshold for detecting a license from license text.
# The higher the value, the more closely the license text must be to the
Expand Down
213 changes: 204 additions & 9 deletions src/commands/dump.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
//! `dump` subcommand

use std::io::{Read, Write};
use std::{
fs::File,
io::{copy, Cursor, Read, Seek, SeekFrom, Write},
path::PathBuf,
};

use crate::{repository::CliIndexedRepo, status_err, Application, RUSTIC_APP};

use abscissa_core::{Command, Runnable, Shutdown};
use anyhow::Result;
use derive_more::FromStr;
use flate2::{write::GzEncoder, Compression};
use log::warn;
use rustic_core::{
repofile::{Node, NodeType},
vfs::OpenFile,
LsOptions,
};
use tar::{Builder, EntryType, Header};
use zip::{write::SimpleFileOptions, ZipWriter};

/// `dump` subcommand
#[derive(clap::Parser, Command, Debug)]
Expand All @@ -21,9 +28,38 @@ pub(crate) struct DumpCmd {
#[clap(value_name = "SNAPSHOT[:PATH]")]
snap: String,

/// Listing options
#[clap(flatten)]
ls_opts: LsOptions,
/// set archive format to use. Possible values: auto, content, tar, targz, zip. For "auto" format is dertermined by file extension (if given) or "tar" for dirs.
#[clap(long, value_name = "FORMAT", default_value = "auto")]
archive: ArchiveKind,

/// dump output to the given file. Use this instead of redirecting stdout to a file.
#[clap(long)]
file: Option<PathBuf>,

/// Glob pattern to exclude/include (can be specified multiple times)
#[clap(long, help_heading = "Exclude options")]
glob: Vec<String>,

/// Same as --glob pattern but ignores the casing of filenames
#[clap(long, value_name = "GLOB", help_heading = "Exclude options")]
iglob: Vec<String>,

/// Read glob patterns to exclude/include from this file (can be specified multiple times)
#[clap(long, value_name = "FILE", help_heading = "Exclude options")]
glob_file: Vec<String>,

/// Same as --glob-file ignores the casing of filenames in patterns
#[clap(long, value_name = "FILE", help_heading = "Exclude options")]
iglob_file: Vec<String>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, FromStr)]
enum ArchiveKind {
Auto,
Content,
Tar,
TarGz,
Zip,
}

impl Runnable for DumpCmd {
Expand All @@ -46,17 +82,77 @@ impl DumpCmd {
let node =
repo.node_from_snapshot_path(&self.snap, |sn| config.snapshot_filter.matches(sn))?;

let mut stdout = std::io::stdout();
if node.is_file() {
repo.dump(&node, &mut stdout)?;
let stdout = std::io::stdout();

let ls_opts = LsOptions::default()
.glob(self.glob.clone())
.glob_file(self.glob_file.clone())
.iglob(self.iglob.clone())
.iglob_file(self.iglob_file.clone())
.recursive(true);

let ext = self
.file
.as_ref()
.and_then(|f| f.extension().map(|s| s.to_string_lossy().to_string()));

let archive = match self.archive {
ArchiveKind::Auto => match ext.as_deref() {
Some("tar") => ArchiveKind::Tar,
Some("tgz") | Some("gz") => ArchiveKind::TarGz,
Some("zip") => ArchiveKind::Zip,
_ if node.is_dir() => ArchiveKind::Tar,
_ => ArchiveKind::Content,
},
a => a,
};

let mut w: Box<dyn Write> = if let Some(file) = &self.file {
let mut file = File::create(file)?;
if archive == ArchiveKind::Zip {
// when writing zip to a file, we use the optimized writer
return write_zip_to_file(&repo, &node, &mut file, &ls_opts);
}
Box::new(file)
} else {
dump_tar(&repo, &node, &mut stdout, &self.ls_opts)?;
}
Box::new(stdout)
};

match archive {
ArchiveKind::Content => dump_content(&repo, &node, &mut w, &ls_opts)?,
ArchiveKind::Tar => dump_tar(&repo, &node, &mut w, &ls_opts)?,
ArchiveKind::TarGz => dump_tar_gz(&repo, &node, &mut w, &ls_opts)?,
ArchiveKind::Zip => dump_zip(&repo, &node, &mut w, &ls_opts)?,
_ => {}
};

Ok(())
}
}

fn dump_content(
repo: &CliIndexedRepo,
node: &Node,
w: &mut impl Write,
ls_opts: &LsOptions,
) -> Result<()> {
for item in repo.ls(node, ls_opts)? {
let (_, node) = item?;
repo.dump(&node, w)?;
}
Ok(())
}

fn dump_tar_gz(
repo: &CliIndexedRepo,
node: &Node,
w: &mut impl Write,
ls_opts: &LsOptions,
) -> Result<()> {
let mut w = GzEncoder::new(w, Compression::default());
dump_tar(repo, node, &mut w, ls_opts)
}

fn dump_tar(
repo: &CliIndexedRepo,
node: &Node,
Expand Down Expand Up @@ -135,6 +231,105 @@ fn dump_tar(
Ok(())
}

fn dump_zip(
repo: &CliIndexedRepo,
node: &Node,
w: &mut impl Write,
ls_opts: &LsOptions,
) -> Result<()> {
let w = SeekWriter {
write: w,
cursor: Cursor::new(Vec::new()),
written: 0,
};
let mut zip = ZipWriter::new(w);
zip.set_flush_on_finish_file(true);
write_zip_contents(repo, node, &mut zip, ls_opts)?;
let mut inner = zip.finish()?;
inner.flush()?;
Ok(())
}

fn write_zip_to_file(
repo: &CliIndexedRepo,
node: &Node,
file: &mut (impl Write + Seek),
ls_opts: &LsOptions,
) -> Result<()> {
let mut zip = ZipWriter::new(file);
write_zip_contents(repo, node, &mut zip, ls_opts)?;
let _ = zip.finish()?;
Ok(())
}

fn write_zip_contents(
repo: &CliIndexedRepo,
node: &Node,
zip: &mut ZipWriter<impl Write + Seek>,
ls_opts: &LsOptions,
) -> Result<()> {
for item in repo.ls(node, ls_opts)? {
let (path, node) = item?;

let mut options = SimpleFileOptions::default();
if let Some(mode) = node.meta.mode {
// TODO: this is some go-mapped mode, but lower bits are the standard unix mode bits -> is this ok?
options = options.unix_permissions(mode);
}
if let Some(mtime) = node.meta.mtime {
options =
options.last_modified_time(mtime.naive_local().try_into().unwrap_or_default());
}
if node.is_file() {
zip.start_file_from_path(path, options)?;
repo.dump(&node, zip)?;
} else {
zip.add_directory_from_path(path, options)?;
}
}
Ok(())
}

struct SeekWriter<W> {
write: W,
cursor: Cursor<Vec<u8>>,
written: u64,
}

impl<W> Read for SeekWriter<W> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
self.cursor.read(buf)
}
}

impl<W: Write> Write for SeekWriter<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.cursor.write(buf)
}

fn flush(&mut self) -> std::io::Result<()> {
_ = self.cursor.seek(SeekFrom::Start(0))?;
let n = copy(&mut self.cursor, &mut self.write)?;
_ = self.cursor.seek(SeekFrom::Start(0))?;
self.cursor.get_mut().clear();
self.cursor.get_mut().shrink_to(1_000_000);
self.written += n;
Ok(())
}
}

impl<W> Seek for SeekWriter<W> {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
match pos {
SeekFrom::Start(n) => self.cursor.seek(SeekFrom::Start(n - self.written)),
pos => self.cursor.seek(pos),
}
}
fn stream_position(&mut self) -> std::io::Result<u64> {
Ok(self.written + self.cursor.stream_position()?)
}
}

struct OpenFileReader<'a> {
repo: &'a CliIndexedRepo,
open_file: OpenFile,
Expand Down
Loading
Loading