Skip to content

Commit

Permalink
feat: add universal loader part
Browse files Browse the repository at this point in the history
  • Loading branch information
phil65 committed Nov 10, 2024
1 parent 3d93cc3 commit c73b2a9
Show file tree
Hide file tree
Showing 2 changed files with 375 additions and 0 deletions.
164 changes: 164 additions & 0 deletions src/yamling/load_universal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import configparser
import importlib.util
import json
import logging
import os
from pathlib import Path
import tomllib
from typing import Any, Literal, get_args

from yaml import YAMLError

from yamling.yaml import load_yaml


logger = logging.getLogger(__name__)

SupportedFormats = Literal["yaml", "toml", "json", "ini"]
FormatType = SupportedFormats | Literal["auto"]

# Check if orjson is available
has_orjson = importlib.util.find_spec("orjson") is not None


class ParsingError(Exception):
"""Common exception for all parsing errors in yamling."""

def __init__(self, message: str, original_error: Exception | None = None) -> None:
super().__init__(message)
self.original_error = original_error


def load(text: str, mode: SupportedFormats, **kwargs: Any) -> Any:
"""Load data from a string in the specified format.
Args:
text: String containing data in the specified format
mode: Format of the input data ("yaml", "toml", "json", or "ini")
**kwargs: Additional keyword arguments passed to the underlying load functions
Returns:
Parsed data structure
Raises:
ValueError: If the format is not supported
ParsingError: If the text cannot be parsed in the specified format
"""
match mode:
case "yaml":
try:
return load_yaml(text, **kwargs)
except YAMLError as e:
logger.exception("Failed to load YAML data")
msg = f"Failed to parse YAML data: {e}"
raise ParsingError(msg, e) from e

case "toml":
try:
return tomllib.loads(text, **kwargs)
except tomllib.TOMLDecodeError as e:
logger.exception("Failed to load TOML data")
msg = f"Failed to parse TOML data: {e}"
raise ParsingError(msg, e) from e

case "json":
if has_orjson:
import orjson

try:
valid_kwargs = {
k: v for k, v in kwargs.items() if k in {"default", "option"}
}
return orjson.loads(text, **valid_kwargs)
except orjson.JSONDecodeError as e:
logger.exception("Failed to load JSON data with orjson")
msg = f"Failed to parse JSON data: {e}"
raise ParsingError(msg, e) from e
else:
try:
return json.loads(text, **kwargs)
except json.JSONDecodeError as e:
logger.exception("Failed to load JSON data with json")
msg = f"Failed to parse JSON data: {e}"
raise ParsingError(msg, e) from e

case "ini":
try:
parser = configparser.ConfigParser(**kwargs)
parser.read_string(text)
return {
section: dict(parser.items(section)) for section in parser.sections()
}
except (
configparser.Error,
configparser.ParsingError,
configparser.MissingSectionHeaderError,
) as e:
logger.exception("Failed to load INI data")
msg = f"Failed to parse INI data: {e}"
raise ParsingError(msg, e) from e

case _:
msg = f"Unsupported format: {mode}"
raise ValueError(msg)


def load_file(path: str | os.PathLike[str], mode: FormatType = "auto") -> Any:
"""Load data from a file, automatically detecting the format from extension if needed.
Args:
path: Path to the file to load
mode: Format of the file ("yaml", "toml", "json", "ini" or "auto")
Returns:
Parsed data structure
Raises:
ValueError: If the format cannot be determined or is not supported
OSError: If the file cannot be read
FileNotFoundError: If the file does not exist
PermissionError: If file permissions prevent reading
ParsingError: If the text cannot be parsed in the specified format
"""
path_obj = Path(path)

# Determine format from extension if auto mode
if mode == "auto":
ext = path_obj.suffix.lower()
format_mapping: dict[str, SupportedFormats] = {
".yaml": "yaml",
".yml": "yaml",
".toml": "toml",
".tml": "toml",
".json": "json",
".jsonc": "json",
".ini": "ini",
".cfg": "ini",
".conf": "ini",
".config": "ini",
".properties": "ini",
".cnf": "ini",
".env": "ini",
}
detected_mode = format_mapping.get(ext)
if detected_mode is None:
msg = f"Could not determine format from file extension: {path}"
raise ValueError(msg)
mode = detected_mode

# At this point, mode can't be "auto"
if mode not in get_args(SupportedFormats):
msg = f"Unsupported format: {mode}"
raise ValueError(msg)

try:
text = path_obj.read_text()
return load(text, mode)
except (OSError, FileNotFoundError, PermissionError) as e:
logger.exception("Failed to read file %r", path)
msg = f"Failed to read file {path}: {e!s}"
raise
except Exception as e:
logger.exception("Failed to parse file %r as %s", path, mode)
msg = f"Failed to parse {path} as {mode} format: {e!s}"
raise
211 changes: 211 additions & 0 deletions tests/test_load_unversal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
# test_load_universal.py
from pathlib import Path

import pytest

from yamling.load_universal import ParsingError, load, load_file


# Test Constants
VALID_YAML = """
key: value
nested:
inner: data
list:
- item1
- item2
"""

VALID_TOML = """
key = "value"
nested = { inner = "data" }
list = ["item1", "item2"]
"""


VALID_JSON = """
{
"key": "value",
"nested": {
"inner": "data"
},
"list": ["item1", "item2"]
}
"""

VALID_INI = """
[section1]
key = value
key2 = value2
[section2]
other = data
"""

INVALID_YAML = "key: [invalid:"
INVALID_TOML = "key = invalid["
INVALID_JSON = "{invalid json"
INVALID_INI = "invalid ini"

EXPECTED_DATA = {"key": "value", "nested": {"inner": "data"}, "list": ["item1", "item2"]}

TEMP_DIR = "test_files"


# Fixtures
@pytest.fixture
def setup_temp_files(tmp_path: Path) -> Path:
"""Create temporary test files with different formats."""
test_dir = tmp_path / TEMP_DIR
test_dir.mkdir()

# Create valid files
(test_dir / "test.yaml").write_text(VALID_YAML)
(test_dir / "test.yml").write_text(VALID_YAML)
(test_dir / "test.toml").write_text(VALID_TOML)
(test_dir / "test.json").write_text(VALID_JSON)
(test_dir / "test.ini").write_text(VALID_INI)

# Create invalid files
(test_dir / "invalid.yaml").write_text(INVALID_YAML)
(test_dir / "invalid.toml").write_text(INVALID_TOML)
(test_dir / "invalid.json").write_text(INVALID_JSON)
(test_dir / "invalid.ini").write_text(INVALID_INI)

return test_dir


# Test load() function
def test_load_valid_yaml():
result = load(VALID_YAML, "yaml")
assert result == EXPECTED_DATA


def test_load_valid_toml():
result = load(VALID_TOML, "toml")
assert result == EXPECTED_DATA


def test_load_valid_json():
result = load(VALID_JSON, "json")
assert result == EXPECTED_DATA


def test_load_valid_ini():
result = load(VALID_INI, "ini")
assert isinstance(result, dict)
assert "section1" in result
assert "section2" in result
assert result["section1"]["key"] == "value"


def test_load_invalid_yaml():
with pytest.raises(ParsingError):
load(INVALID_YAML, "yaml")


def test_load_invalid_toml():
with pytest.raises(ParsingError):
load(INVALID_TOML, "toml")


def test_load_invalid_json():
with pytest.raises(ParsingError):
load(INVALID_JSON, "json")


def test_load_invalid_ini():
with pytest.raises(ParsingError):
load(INVALID_INI, "ini")


def test_load_unsupported_format():
with pytest.raises(ValueError, match="Unsupported format"):
load(VALID_YAML, "unsupported") # type: ignore


# Test load_file() function
def test_load_file_yaml(setup_temp_files: Path):
result = load_file(setup_temp_files / "test.yaml")
assert result == EXPECTED_DATA


def test_load_file_yml(setup_temp_files: Path):
result = load_file(setup_temp_files / "test.yml")
assert result == EXPECTED_DATA


def test_load_file_toml(setup_temp_files: Path):
result = load_file(setup_temp_files / "test.toml")
assert result == EXPECTED_DATA


def test_load_file_json(setup_temp_files: Path):
result = load_file(setup_temp_files / "test.json")
assert result == EXPECTED_DATA


def test_load_file_ini(setup_temp_files: Path):
result = load_file(setup_temp_files / "test.ini")
assert isinstance(result, dict)
assert "section1" in result
assert "section2" in result


def test_load_file_explicit_format(setup_temp_files: Path):
result = load_file(setup_temp_files / "test.yaml", mode="yaml")
assert result == EXPECTED_DATA


def test_load_file_nonexistent():
with pytest.raises(FileNotFoundError):
load_file("nonexistent.yaml")


def test_load_file_invalid_extension():
with pytest.raises(ValueError, match="Could not determine format"):
load_file("test.invalid")


def test_load_file_invalid_explicit_format():
with pytest.raises(ValueError, match="Unsupported format"):
load_file("test.yaml", mode="invalid") # type: ignore


def test_load_file_permission_error(setup_temp_files: Path):
test_file = setup_temp_files / "test.yaml"
test_file.chmod(0o000) # Remove all permissions
load_file(test_file)
test_file.chmod(0o666) # Restore permissions


def test_load_file_invalid_content(setup_temp_files: Path):
with pytest.raises(ParsingError):
load_file(setup_temp_files / "invalid.yaml")


# Test edge cases
def test_load_empty_string():
assert load("", "yaml") is None # YAML treats empty string as None
assert load("", "toml") == {}


def test_load_whitespace_only():
assert load(" \n ", "yaml") is None # YAML treats whitespace as None
assert load(" \n ", "toml") == {}


def test_load_null_characters():
with pytest.raises(ParsingError):
load("key: value\0", "yaml")


# Test with various path types
def test_load_file_with_different_path_types(setup_temp_files: Path):
# Test with string path
result1 = load_file(str(setup_temp_files / "test.yaml"))
assert result1 == EXPECTED_DATA

# Test with Path object
result2 = load_file(Path(setup_temp_files / "test.yaml"))
assert result2 == EXPECTED_DATA

0 comments on commit c73b2a9

Please sign in to comment.