Skip to content

Commit

Permalink
Refactor code after code review
Browse files Browse the repository at this point in the history
This attempts to address issues raised during the review of [pull
request
129](#129).
Changes involve renaming the `--wrap` argument and associated internal
variable, and factoring out the line wrapping code into a separate function.
  • Loading branch information
mhucka authored and acdha committed Oct 15, 2024
1 parent 81b01c0 commit a485c1e
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 35 deletions.
73 changes: 40 additions & 33 deletions bagit.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def find_locale_dir():

def make_bag(
bag_dir, bag_info=None, processes=1, checksums=None, checksum=None,
encoding="utf-8", line_length=0
encoding="utf-8", tag_wrap_column=0
):
"""
Convert a given directory into a bag. You can pass in arbitrary
Expand Down Expand Up @@ -261,7 +261,7 @@ def make_bag(
)

bag_info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files)
_make_tag_file("bag-info.txt", bag_info, line_length)
_make_tag_file("bag-info.txt", bag_info, tag_wrap_column)

for c in checksums:
_make_tagmanifest_file(c, bag_dir, encoding="utf-8")
Expand Down Expand Up @@ -455,7 +455,7 @@ def payload_entries(self):
if key.startswith("data" + os.sep)
)

def save(self, processes=1, manifests=False, line_length=0):
def save(self, processes=1, manifests=False, tag_wrap_column=0):
"""
save will persist any changes that have been made to the bag
metadata (self.info).
Expand All @@ -470,8 +470,8 @@ def save(self, processes=1, manifests=False, line_length=0):
recalculating checksums use the processes parameter.
If you want long tag values to be wrapped by breaking long strings at
whitespace characters, set line_length to a value greater than 0. An
integer value greater than 0 causes line-wrapping to be performed on
whitespace characters, set tag_wrap_column to a value greater than 0.
An integer value greater than 0 causes line-wrapping to be performed on
a best-effort basis to limit line lengths to the given value.
"""
# Error checking
Expand Down Expand Up @@ -524,7 +524,7 @@ def save(self, processes=1, manifests=False, line_length=0):
LOGGER.info(_("Updating Payload-Oxum in %s"), self.tag_file_name)
self.info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files)

_make_tag_file(self.tag_file_name, self.info, line_length)
_make_tag_file(self.tag_file_name, self.info, tag_wrap_column)

# Update tag-manifest for changes to manifest & bag-info files
for alg in self.algorithms:
Expand Down Expand Up @@ -1224,40 +1224,47 @@ def _parse_tags(tag_file):
yield (tag_name, tag_value.strip())


def _make_tag_file(bag_info_path, bag_info, line_length):
def _make_tag_file(bag_info_path, bag_info, tag_wrap_column):
headers = sorted(bag_info.keys())
with open_text_file(bag_info_path, "w") as f:
for h in headers:
values = bag_info[h]
if not isinstance(values, list):
values = [values]
for txt in values:
txt = force_unicode(txt)
if line_length > 1:
# Account for colon & space written after the property name.
prop_width = len(h) + 2
first_break = prop_width + len(txt.split(None, 1)[0])
if line_length <= first_break:
# Start value on the next line.
txt = '\n'.join(textwrap.wrap(txt, width=line_length,
break_long_words=False,
break_on_hyphens=False,
initial_indent='\n ',
subsequent_indent=' '))
else:
# Account for tag name by temporarily adding a leading
# space before calling wrap(), then removing the space.
txt = '\n'.join(textwrap.wrap(txt, width=line_length,
break_long_words=False,
break_on_hyphens=False,
initial_indent=' '*prop_width,
subsequent_indent=' '))
txt = txt[prop_width:]
else:
txt = re.sub(r"\n|\r|(\r\n)", "", txt)
txt = _format_tag_value(h, txt, tag_wrap_column)
f.write("%s: %s\n" % (h, txt))


def _format_tag_value(header, txt, tag_wrap_column):
txt = force_unicode(txt)
if tag_wrap_column > 1:
# Account for colon & space written after the property name.
prop_width = len(header) + 2
# If the wrap column falls in the middle of the first word of the
# value, start on the next line. This avoids a very long first line.
first_break = prop_width + len(txt.split(None, 1)[0])
if tag_wrap_column <= first_break:
# Start value on the next line.
txt = '\n'.join(textwrap.wrap(txt, initial_indent='\n ',
width=tag_wrap_column,
break_long_words=False,
break_on_hyphens=False,
subsequent_indent=' '))
else:
# Account for tag name by temporarily adding a leading space
# before calling wrap(), then removing the space later below.
txt = '\n'.join(textwrap.wrap(txt, initial_indent=' '*prop_width,
width=tag_wrap_column,
break_long_words=False,
break_on_hyphens=False,
subsequent_indent=' '))
txt = txt[prop_width:]
else:
txt = re.sub(r"\n|\r|(\r\n)", "", txt)
return txt


def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding="utf-8"):
LOGGER.info(
_("Using %(process_count)d processes to generate manifests: %(algorithms)s"),
Expand Down Expand Up @@ -1513,9 +1520,9 @@ def _make_parser():
),
)
parser.add_argument(
"--wrap",
"--wrap-tags",
type=int,
dest="line_length",
dest="tag_wrap_column",
default=0,
help=_(
"Limit line lengths in the tag file (bag-info.txt) by"
Expand Down Expand Up @@ -1632,7 +1639,7 @@ def main():
bag_info=args.bag_info,
processes=args.processes,
checksums=args.checksums,
line_length=args.line_length,
tag_wrap_column=args.tag_wrap_column,
)
except Exception as exc:
LOGGER.error(
Expand Down
4 changes: 2 additions & 2 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def test_validate_wrapped_tag_lines(self):
' digital content. A bag consists of a directory containing'
' the payload files and other accompanying metadata files'
' known as "tag" files.')}
bag = bagit.make_bag(self.tmpdir, bag_info=info, line_length=79)
bag = bagit.make_bag(self.tmpdir, bag_info=info, tag_wrap_column=79)
self.assertEqual(self.validate(bag, fast=True), True)
os.remove(j(self.tmpdir, "data", "loc", "2478433644_2839c5e8b8_o_d.jpg"))
self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=True)
Expand All @@ -135,7 +135,7 @@ def test_validate_wrapped_tag_lines_short(self):
' digital content. A bag consists of a directory containing'
' the payload files and other accompanying metadata files'
' known as "tag" files.')}
bag = bagit.make_bag(self.tmpdir, bag_info=info, line_length=18)
bag = bagit.make_bag(self.tmpdir, bag_info=info, tag_wrap_column=18)
self.assertEqual(self.validate(bag, fast=True), True)
os.remove(j(self.tmpdir, "data", "loc", "2478433644_2839c5e8b8_o_d.jpg"))
self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=True)
Expand Down

0 comments on commit a485c1e

Please sign in to comment.