Skip to content

Commit

Permalink
Create command to convert universal sheets to JSON
Browse files Browse the repository at this point in the history
  • Loading branch information
istride committed Oct 10, 2024
1 parent bd547d7 commit 86f5c2c
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 20 deletions.
6 changes: 4 additions & 2 deletions src/rpft/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,10 @@ def uni_to_sheets(args):


def sheets_to_uni(args):
# TODO: convert uni sheets to uni JSON
...
data = converters.sheets_to_uni(args.input)

with open(args.output, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)


def create_parser():
Expand Down
17 changes: 14 additions & 3 deletions src/rpft/converters.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
import re
import shutil
from pathlib import Path

Expand Down Expand Up @@ -62,11 +63,11 @@ def uni_to_sheets(infile) -> bytes:
]
)

return book.export("xlsx")
return book.export("ods")


def sheets_to_uni(infile, fmt) -> list:
return parse_tables(create_sheet_reader(fmt, infile))
def sheets_to_uni(infile) -> list:
return parse_tables(create_sheet_reader(None, infile))


def get_content_index_parser(input_files, sheet_format, data_models, tags):
Expand Down Expand Up @@ -114,6 +115,8 @@ def flows_to_sheets(


def create_sheet_reader(sheet_format, input_file):
sheet_format = sheet_format if sheet_format else detect_format(input_file)

if sheet_format == "csv":
sheet_reader = CSVSheetReader(input_file)
elif sheet_format == "xlsx":
Expand All @@ -128,6 +131,14 @@ def create_sheet_reader(sheet_format, input_file):
return sheet_reader


def detect_format(fp):
if bool(re.fullmatch(r"[a-z0-9_-]{44}", fp, re.IGNORECASE)):
return "google_sheets"

if Path(fp).suffix.lower() == ".xlsx":
return "xlsx"


def sheets_to_csv(path, sheet_ids):
prepare_dir(path)

Expand Down
26 changes: 18 additions & 8 deletions src/rpft/parsers/universal.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,16 @@ def stringify(value) -> str:

@stringify.register
def _(value: dict) -> str:
return " | ".join(
"{0}: {1}".format(stringify(k), stringify(v)) for k, v in value.items()

s = " | ".join(
f"{stringify(k)}{KEY_VALUE_SEP} {stringify(v)}" for k, v in value.items()
)

if len(value) == 1:
s += " " + SEQ_ITEM_SEP

return s


@stringify.register
def _(value: list) -> str:
Expand All @@ -203,10 +209,12 @@ def parse_tables(reader: AbstractSheetReader) -> dict:
"""
Parse a workbook into a nested structure
"""
return [
parse_table(title, sheet.table.headers, sheet.table[:])
for title, sheet in reader.sheets.items()
]
obj = benedict()

for title, sheet in reader.sheets.items():
obj.merge(parse_table(title, sheet.table.headers, sheet.table[:]))

return obj


def parse_table(
Expand Down Expand Up @@ -266,9 +274,11 @@ def create_obj(pairs):
obj = benedict()

for kp, v in pairs:
# print("KP:", kp)
# print("V:", v)
obj[kp] = v

return dict(obj)
return obj


def convert_cell(s: str, recursive=True) -> Any:
Expand All @@ -290,7 +300,7 @@ def convert_cell(s: str, recursive=True) -> Any:
if clean in ("true", "false"):
return clean == "true"

if recursive and KEY_VALUE_SEP in s:
if recursive and KEY_VALUE_SEP in s and SEQ_ITEM_SEP in s:
try:
props = [p.split(KEY_VALUE_SEP, 1) for p in s.split(SEQ_ITEM_SEP) if p]

Expand Down
24 changes: 17 additions & 7 deletions tests/test_universal.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@ def test_objects_use_single_cell_layout_by_default(self):

self.assertEqual(table[1], ["prop1: val1 | prop2: val2"])

def test_object_with_single_property_within_cell_has_trailing_separator(self):
data = [{"obj": {"k": "v"}}]

table = tabulate(data)

self.assertEqual(table[1], ["k: v |"])

def test_objects_use_wide_layout_if_indicated_by_metadata(self):
meta = {"headers": ["obj1.k1", "obj1.k2", "seq1.1.k1", "seq1.2.k2"]}
data = [
Expand Down Expand Up @@ -461,7 +468,7 @@ def test_save_as_dict(self):

class TestConvertWorkbookToUniversal(TestCase):

def test_workbook_converts_to_list_of_objects(self):
def test_workbook_converts_to_object(self):
workbook = DatasetSheetReader(
[
Dataset(("t1a1", "t1b1"), headers=("T1A", "T1B"), title="table1"),
Expand All @@ -471,9 +478,12 @@ def test_workbook_converts_to_list_of_objects(self):

nested = parse_tables(workbook)

self.assertIsInstance(nested, list)
self.assertEqual(len(nested), 2)
self.assertTrue(all(type(o) is dict for o in nested))
self.assertIsInstance(nested, dict)
self.assertEqual(list(nested.keys()), ["_idems", "table1", "table2"])
self.assertEqual(
list(nested["_idems"]["tabulate"].keys()),
["table1", "table2"],
)


class TestConvertTableToNested(TestCase):
Expand Down Expand Up @@ -560,6 +570,8 @@ def test_output_clean_string_if_no_conversion_possible(self):
self.assertEqual(convert_cell("one"), "one")
self.assertEqual(convert_cell(" one "), "one")
self.assertEqual(convert_cell(""), "")
self.assertEqual(convert_cell("http://example.com/"), "http://example.com/")
self.assertEqual(convert_cell("k1: v1"), "k1: v1")

def test_raises_error_if_not_string_input(self):
self.assertRaises(TypeError, convert_cell, None)
Expand All @@ -578,8 +590,6 @@ def test_convert_cell_string_to_list(self):
self.assertEqual(convert_cell("k1 | v1 : k2 | v2"), ["k1", "v1 : k2", "v2"])

def test_convert_cell_string_to_dict(self):
self.assertEqual(convert_cell("k1: v1"), {"k1": "v1"})
self.assertEqual(convert_cell(" k1 : v1 "), {"k1": "v1"})
self.assertEqual(convert_cell("k1: v1 |"), {"k1": "v1"})
self.assertEqual(convert_cell("k1: k2: v2"), {"k1": "k2: v2"})
self.assertEqual(convert_cell("k1: k2: v2 |"), {"k1": "k2: v2"})
self.assertEqual(convert_cell("k1: 1 | k2: true"), {"k1": 1, "k2": True})

0 comments on commit 86f5c2c

Please sign in to comment.