From 1723abcd79ea4b65a1f5d6103b8724aa9a454b9c Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Thu, 17 Aug 2023 18:32:47 +0300 Subject: [PATCH] ls/ls-url: introduce --size I really need it only for studio right now, but this is really nice to have in a daily life as well. E.g. ``` $ dvc ls-url . --size 108 Metadata-and-Results.dvc 97 AudioMP3.dvc 97 AudioWAV.dvc 12181 README.md 54 .gitignore 168 .gitattributes 99 VideoFlash.dvc 312 LICENSE.txt 139 .dvcignore 96 AudioMP3 96 Asset 192 .dvc 64 Metadata-and-Results 64 AudioWAV 192 Scripts 64 VideoFlash 384 .git 96 .vscode ``` ``` $ dvc list . --size 139 .dvcignore 168 .gitattributes 54 .gitignore 96 .vscode 96 Asset 96 AudioMP3 97 AudioMP3.dvc 64 AudioWAV 97 AudioWAV.dvc 312 LICENSE.txt 64 Metadata-and-Results 108 Metadata-and-Results.dvc 12181 README.md 192 Scripts 64 VideoFlash 99 VideoFlash.dvc ``` --- dvc/commands/ls/__init__.py | 28 ++++++++++++-- dvc/commands/ls_url.py | 11 ++++-- dvc/repo/ls.py | 1 + dvc/repo/ls_url.py | 1 + tests/func/test_ls.py | 76 +++++++++++++++++++++++++++---------- 5 files changed, 90 insertions(+), 27 deletions(-) diff --git a/dvc/commands/ls/__init__.py b/dvc/commands/ls/__init__.py index c044e3571a..bee863ff7b 100644 --- a/dvc/commands/ls/__init__.py +++ b/dvc/commands/ls/__init__.py @@ -1,6 +1,8 @@ import argparse import logging +from tqdm import tqdm + from dvc.cli import completion from dvc.cli.command import CmdBaseNoRepo from dvc.cli.utils import DictAction, append_doc_link @@ -11,7 +13,16 @@ logger = logging.getLogger(__name__) -def _prettify(entries, with_color=False): +def _format_entry(entry, fmt): + size = entry.get("size") + if size is None: + size = "" + else: + size = tqdm.format_sizeof(size, divisor=1024) + return size, fmt(entry) + + +def show_entries(entries, with_color=False, with_size=False): if with_color: ls_colors = LsColors() fmt = ls_colors.format @@ -20,7 +31,12 @@ def _prettify(entries, with_color=False): def fmt(entry): return entry["path"] - return [fmt(entry) for entry in entries] + if with_size: + ui.table([_format_entry(entry, fmt) for entry in entries]) + return + + # NOTE: this is faster than ui.table for very large number of entries + ui.write("\n".join(fmt(entry) for entry in entries)) class CmdList(CmdBaseNoRepo): @@ -41,8 +57,7 @@ def run(self): if self.args.json: ui.write_json(entries) elif entries: - entries = _prettify(entries, with_color=True) - ui.write("\n".join(entries)) + show_entries(entries, with_color=True, with_size=self.args.size) return 0 except DvcException: logger.exception("failed to list '%s'", self.args.url) @@ -106,6 +121,11 @@ def add_parser(subparsers, parent_parser): "specified by '--remote') in the target repository." ), ) + list_parser.add_argument( + "--size", + action="store_true", + help="Show sizes.", + ) list_parser.add_argument( "path", nargs="?", diff --git a/dvc/commands/ls_url.py b/dvc/commands/ls_url.py index 6b5af9a57f..80034f5491 100644 --- a/dvc/commands/ls_url.py +++ b/dvc/commands/ls_url.py @@ -3,9 +3,8 @@ from dvc.cli.command import CmdBaseNoRepo from dvc.cli.utils import append_doc_link -from dvc.ui import ui -from .ls import _prettify +from .ls import show_entries logger = logging.getLogger(__name__) @@ -16,8 +15,7 @@ def run(self): entries = Repo.ls_url(self.args.url, recursive=self.args.recursive) if entries: - entries = _prettify(entries, with_color=True) - ui.write("\n".join(entries)) + show_entries(entries, with_color=True, with_size=self.args.size) return 0 @@ -40,4 +38,9 @@ def add_parser(subparsers, parent_parser): action="store_true", help="Recursively list files.", ) + lsurl_parser.add_argument( + "--size", + action="store_true", + help="Show sizes.", + ) lsurl_parser.set_defaults(func=CmdListUrl) diff --git a/dvc/repo/ls.py b/dvc/repo/ls.py index 5e6005ea38..e4d00a5546 100644 --- a/dvc/repo/ls.py +++ b/dvc/repo/ls.py @@ -109,6 +109,7 @@ def _ls( "isout": dvc_info.get("isout", False), "isdir": info["type"] == "directory", "isexec": info.get("isexec", False), + "size": info.get("size"), } return ret diff --git a/dvc/repo/ls_url.py b/dvc/repo/ls_url.py index 7caf53a67b..496daa128b 100644 --- a/dvc/repo/ls_url.py +++ b/dvc/repo/ls_url.py @@ -20,6 +20,7 @@ def ls_url(url, *, config=None, recursive=False): ls_info = { "path": fs.path.relpath(info["name"], fs_path), "isdir": info["type"] == "directory", + "size": info.get("size"), } ret.append(ls_info) diff --git a/tests/func/test_ls.py b/tests/func/test_ls.py index c74e435842..bdba92d42b 100644 --- a/tests/func/test_ls.py +++ b/tests/func/test_ls.py @@ -460,7 +460,7 @@ def test_ls_shows_pipeline_tracked_outs(tmp_dir, dvc, scm, run_copy): match_files(files, ((("bar",), True),)) -def test_ls_granular(erepo_dir): +def test_ls_granular(erepo_dir, M): with erepo_dir.chdir(): erepo_dir.dvc_gen( { @@ -475,15 +475,21 @@ def test_ls_granular(erepo_dir): entries = Repo.ls(os.fspath(erepo_dir), os.path.join("dir", "subdir")) assert entries == [ - {"isout": True, "isdir": False, "isexec": False, "path": "bar"}, - {"isout": True, "isdir": False, "isexec": False, "path": "foo"}, + {"isout": True, "isdir": False, "isexec": False, "path": "bar", "size": 3}, + {"isout": True, "isdir": False, "isexec": False, "path": "foo", "size": 3}, ] entries = Repo.ls(os.fspath(erepo_dir), "dir") assert entries == [ - {"isout": True, "isdir": False, "isexec": False, "path": "1"}, - {"isout": True, "isdir": False, "isexec": False, "path": "2"}, - {"isout": True, "isdir": True, "isexec": False, "path": "subdir"}, + {"isout": True, "isdir": False, "isexec": False, "path": "1", "size": 1}, + {"isout": True, "isdir": False, "isexec": False, "path": "2", "size": 1}, + { + "isout": True, + "isdir": True, + "isexec": False, + "path": "subdir", + "size": M.instance_of(int), + }, ] @@ -508,14 +514,14 @@ def _ls(path): return Repo.ls(os.fspath(erepo_dir), path) assert _ls(os.path.join("dir", "1")) == [ - {"isout": isout, "isdir": False, "isexec": False, "path": "1"} + {"isout": isout, "isdir": False, "isexec": False, "path": "1", "size": 1} ] assert _ls(os.path.join("dir", "subdir", "foo")) == [ - {"isout": isout, "isdir": False, "isexec": False, "path": "foo"} + {"isout": isout, "isdir": False, "isexec": False, "path": "foo", "size": 3} ] assert _ls(os.path.join("dir", "subdir")) == [ - {"isdir": False, "isexec": 0, "isout": isout, "path": "bar"}, - {"isdir": False, "isexec": 0, "isout": isout, "path": "foo"}, + {"isdir": False, "isexec": 0, "isout": isout, "path": "bar", "size": 3}, + {"isdir": False, "isexec": 0, "isout": isout, "path": "foo", "size": 3}, ] @@ -559,7 +565,7 @@ def _list_files(repo, path=None): assert _list_files(subrepo, "dvc_dir") == {"lorem"} -def test_broken_symlink(tmp_dir, dvc): +def test_broken_symlink(tmp_dir, dvc, M): from dvc.fs import system tmp_dir.gen("file", "content") @@ -575,17 +581,19 @@ def test_broken_symlink(tmp_dir, dvc): "isdir": False, "isexec": False, "path": ".dvcignore", + "size": M.instance_of(int), }, { "isout": False, "isdir": False, "isexec": False, "path": "link", + "size": 0, }, ] -def test_ls_broken_dir(tmp_dir, dvc): +def test_ls_broken_dir(tmp_dir, dvc, M): from dvc_data.index import DataIndexDirError tmp_dir.dvc_gen( @@ -606,13 +614,43 @@ def test_ls_broken_dir(tmp_dir, dvc): entries = Repo.ls(os.fspath(tmp_dir)) assert entries == [ - {"isdir": False, "isexec": False, "isout": False, "path": ".dvcignore"}, - {"isdir": True, "isexec": False, "isout": True, "path": "broken"}, - {"isdir": False, "isexec": False, "isout": False, "path": "broken.dvc"}, - {"isdir": True, "isexec": False, "isout": True, "path": "dir"}, - {"isdir": False, "isexec": False, "isout": False, "path": "dir.dvc"}, - {"isdir": False, "isexec": False, "isout": True, "path": "foo"}, - {"isdir": False, "isexec": False, "isout": False, "path": "foo.dvc"}, + { + "isdir": False, + "isexec": False, + "isout": False, + "path": ".dvcignore", + "size": M.instance_of(int), + }, + {"isdir": True, "isexec": False, "isout": True, "path": "broken", "size": 3}, + { + "isdir": False, + "isexec": False, + "isout": False, + "path": "broken.dvc", + "size": M.instance_of(int), + }, + { + "isdir": True, + "isexec": False, + "isout": True, + "path": "dir", + "size": M.instance_of(int), + }, + { + "isdir": False, + "isexec": False, + "isout": False, + "path": "dir.dvc", + "size": M.instance_of(int), + }, + {"isdir": False, "isexec": False, "isout": True, "path": "foo", "size": 3}, + { + "isdir": False, + "isexec": False, + "isout": False, + "path": "foo.dvc", + "size": M.instance_of(int), + }, ] with pytest.raises(DataIndexDirError):