Skip to content

Commit

Permalink
ls/ls-url: introduce --size
Browse files Browse the repository at this point in the history
I really need it only for studio right now, but this is really nice to have in a daily life
as well.

E.g.

```
$ dvc ls-url . --size
108    Metadata-and-Results.dvc
97     AudioMP3.dvc
97     AudioWAV.dvc
12181  README.md
54     .gitignore
168    .gitattributes
99     VideoFlash.dvc
312    LICENSE.txt
139    .dvcignore
96     AudioMP3
96     Asset
192    .dvc
64     Metadata-and-Results
64     AudioWAV
192    Scripts
64     VideoFlash
384    .git
96     .vscode
```

```
$ dvc list . --size
139    .dvcignore
168    .gitattributes
54     .gitignore
96     .vscode
96     Asset
96     AudioMP3
97     AudioMP3.dvc
64     AudioWAV
97     AudioWAV.dvc
312    LICENSE.txt
64     Metadata-and-Results
108    Metadata-and-Results.dvc
12181  README.md
192    Scripts
64     VideoFlash
99     VideoFlash.dvc
```
  • Loading branch information
efiop committed Aug 17, 2023
1 parent 3081c60 commit 1723abc
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 27 deletions.
28 changes: 24 additions & 4 deletions dvc/commands/ls/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import argparse
import logging

from tqdm import tqdm

from dvc.cli import completion
from dvc.cli.command import CmdBaseNoRepo
from dvc.cli.utils import DictAction, append_doc_link
Expand All @@ -11,7 +13,16 @@
logger = logging.getLogger(__name__)


def _prettify(entries, with_color=False):
def _format_entry(entry, fmt):
size = entry.get("size")
if size is None:
size = ""
else:
size = tqdm.format_sizeof(size, divisor=1024)
return size, fmt(entry)


def show_entries(entries, with_color=False, with_size=False):
if with_color:
ls_colors = LsColors()
fmt = ls_colors.format
Expand All @@ -20,7 +31,12 @@ def _prettify(entries, with_color=False):
def fmt(entry):
return entry["path"]

return [fmt(entry) for entry in entries]
if with_size:
ui.table([_format_entry(entry, fmt) for entry in entries])
return

# NOTE: this is faster than ui.table for very large number of entries
ui.write("\n".join(fmt(entry) for entry in entries))


class CmdList(CmdBaseNoRepo):
Expand All @@ -41,8 +57,7 @@ def run(self):
if self.args.json:
ui.write_json(entries)
elif entries:
entries = _prettify(entries, with_color=True)
ui.write("\n".join(entries))
show_entries(entries, with_color=True, with_size=self.args.size)
return 0
except DvcException:
logger.exception("failed to list '%s'", self.args.url)
Expand Down Expand Up @@ -106,6 +121,11 @@ def add_parser(subparsers, parent_parser):
"specified by '--remote') in the target repository."
),
)
list_parser.add_argument(
"--size",
action="store_true",
help="Show sizes.",
)
list_parser.add_argument(
"path",
nargs="?",
Expand Down
11 changes: 7 additions & 4 deletions dvc/commands/ls_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@

from dvc.cli.command import CmdBaseNoRepo
from dvc.cli.utils import append_doc_link
from dvc.ui import ui

from .ls import _prettify
from .ls import show_entries

logger = logging.getLogger(__name__)

Expand All @@ -16,8 +15,7 @@ def run(self):

entries = Repo.ls_url(self.args.url, recursive=self.args.recursive)
if entries:
entries = _prettify(entries, with_color=True)
ui.write("\n".join(entries))
show_entries(entries, with_color=True, with_size=self.args.size)
return 0


Expand All @@ -40,4 +38,9 @@ def add_parser(subparsers, parent_parser):
action="store_true",
help="Recursively list files.",
)
lsurl_parser.add_argument(
"--size",
action="store_true",
help="Show sizes.",
)
lsurl_parser.set_defaults(func=CmdListUrl)
1 change: 1 addition & 0 deletions dvc/repo/ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def _ls(
"isout": dvc_info.get("isout", False),
"isdir": info["type"] == "directory",
"isexec": info.get("isexec", False),
"size": info.get("size"),
}

return ret
1 change: 1 addition & 0 deletions dvc/repo/ls_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def ls_url(url, *, config=None, recursive=False):
ls_info = {
"path": fs.path.relpath(info["name"], fs_path),
"isdir": info["type"] == "directory",
"size": info.get("size"),
}
ret.append(ls_info)

Expand Down
76 changes: 57 additions & 19 deletions tests/func/test_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ def test_ls_shows_pipeline_tracked_outs(tmp_dir, dvc, scm, run_copy):
match_files(files, ((("bar",), True),))


def test_ls_granular(erepo_dir):
def test_ls_granular(erepo_dir, M):
with erepo_dir.chdir():
erepo_dir.dvc_gen(
{
Expand All @@ -475,15 +475,21 @@ def test_ls_granular(erepo_dir):

entries = Repo.ls(os.fspath(erepo_dir), os.path.join("dir", "subdir"))
assert entries == [
{"isout": True, "isdir": False, "isexec": False, "path": "bar"},
{"isout": True, "isdir": False, "isexec": False, "path": "foo"},
{"isout": True, "isdir": False, "isexec": False, "path": "bar", "size": 3},
{"isout": True, "isdir": False, "isexec": False, "path": "foo", "size": 3},
]

entries = Repo.ls(os.fspath(erepo_dir), "dir")
assert entries == [
{"isout": True, "isdir": False, "isexec": False, "path": "1"},
{"isout": True, "isdir": False, "isexec": False, "path": "2"},
{"isout": True, "isdir": True, "isexec": False, "path": "subdir"},
{"isout": True, "isdir": False, "isexec": False, "path": "1", "size": 1},
{"isout": True, "isdir": False, "isexec": False, "path": "2", "size": 1},
{
"isout": True,
"isdir": True,
"isexec": False,
"path": "subdir",
"size": M.instance_of(int),
},
]


Expand All @@ -508,14 +514,14 @@ def _ls(path):
return Repo.ls(os.fspath(erepo_dir), path)

assert _ls(os.path.join("dir", "1")) == [
{"isout": isout, "isdir": False, "isexec": False, "path": "1"}
{"isout": isout, "isdir": False, "isexec": False, "path": "1", "size": 1}
]
assert _ls(os.path.join("dir", "subdir", "foo")) == [
{"isout": isout, "isdir": False, "isexec": False, "path": "foo"}
{"isout": isout, "isdir": False, "isexec": False, "path": "foo", "size": 3}
]
assert _ls(os.path.join("dir", "subdir")) == [
{"isdir": False, "isexec": 0, "isout": isout, "path": "bar"},
{"isdir": False, "isexec": 0, "isout": isout, "path": "foo"},
{"isdir": False, "isexec": 0, "isout": isout, "path": "bar", "size": 3},
{"isdir": False, "isexec": 0, "isout": isout, "path": "foo", "size": 3},
]


Expand Down Expand Up @@ -559,7 +565,7 @@ def _list_files(repo, path=None):
assert _list_files(subrepo, "dvc_dir") == {"lorem"}


def test_broken_symlink(tmp_dir, dvc):
def test_broken_symlink(tmp_dir, dvc, M):
from dvc.fs import system

tmp_dir.gen("file", "content")
Expand All @@ -575,17 +581,19 @@ def test_broken_symlink(tmp_dir, dvc):
"isdir": False,
"isexec": False,
"path": ".dvcignore",
"size": M.instance_of(int),
},
{
"isout": False,
"isdir": False,
"isexec": False,
"path": "link",
"size": 0,
},
]


def test_ls_broken_dir(tmp_dir, dvc):
def test_ls_broken_dir(tmp_dir, dvc, M):
from dvc_data.index import DataIndexDirError

tmp_dir.dvc_gen(
Expand All @@ -606,13 +614,43 @@ def test_ls_broken_dir(tmp_dir, dvc):

entries = Repo.ls(os.fspath(tmp_dir))
assert entries == [
{"isdir": False, "isexec": False, "isout": False, "path": ".dvcignore"},
{"isdir": True, "isexec": False, "isout": True, "path": "broken"},
{"isdir": False, "isexec": False, "isout": False, "path": "broken.dvc"},
{"isdir": True, "isexec": False, "isout": True, "path": "dir"},
{"isdir": False, "isexec": False, "isout": False, "path": "dir.dvc"},
{"isdir": False, "isexec": False, "isout": True, "path": "foo"},
{"isdir": False, "isexec": False, "isout": False, "path": "foo.dvc"},
{
"isdir": False,
"isexec": False,
"isout": False,
"path": ".dvcignore",
"size": M.instance_of(int),
},
{"isdir": True, "isexec": False, "isout": True, "path": "broken", "size": 3},
{
"isdir": False,
"isexec": False,
"isout": False,
"path": "broken.dvc",
"size": M.instance_of(int),
},
{
"isdir": True,
"isexec": False,
"isout": True,
"path": "dir",
"size": M.instance_of(int),
},
{
"isdir": False,
"isexec": False,
"isout": False,
"path": "dir.dvc",
"size": M.instance_of(int),
},
{"isdir": False, "isexec": False, "isout": True, "path": "foo", "size": 3},
{
"isdir": False,
"isexec": False,
"isout": False,
"path": "foo.dvc",
"size": M.instance_of(int),
},
]

with pytest.raises(DataIndexDirError):
Expand Down

0 comments on commit 1723abc

Please sign in to comment.