feat: add universal loader part

phil65 · Nov 10, 2024 · c73b2a9 · c73b2a9
1 parent 3d93cc3
commit c73b2a9
Show file tree

Hide file tree

Showing 2 changed files with 375 additions and 0 deletions.
diff --git a/src/yamling/load_universal.py b/src/yamling/load_universal.py
@@ -0,0 +1,164 @@
+import configparser
+import importlib.util
+import json
+import logging
+import os
+from pathlib import Path
+import tomllib
+from typing import Any, Literal, get_args
+
+from yaml import YAMLError
+
+from yamling.yaml import load_yaml
+
+
+logger = logging.getLogger(__name__)
+
+SupportedFormats = Literal["yaml", "toml", "json", "ini"]
+FormatType = SupportedFormats | Literal["auto"]
+
+# Check if orjson is available
+has_orjson = importlib.util.find_spec("orjson") is not None
+
+
+class ParsingError(Exception):
+    """Common exception for all parsing errors in yamling."""
+
+    def __init__(self, message: str, original_error: Exception | None = None) -> None:
+        super().__init__(message)
+        self.original_error = original_error
+
+
+def load(text: str, mode: SupportedFormats, **kwargs: Any) -> Any:
+    """Load data from a string in the specified format.
+
+    Args:
+        text: String containing data in the specified format
+        mode: Format of the input data ("yaml", "toml", "json", or "ini")
+        **kwargs: Additional keyword arguments passed to the underlying load functions
+
+    Returns:
+        Parsed data structure
+
+    Raises:
+        ValueError: If the format is not supported
+        ParsingError: If the text cannot be parsed in the specified format
+    """
+    match mode:
+        case "yaml":
+            try:
+                return load_yaml(text, **kwargs)
+            except YAMLError as e:
+                logger.exception("Failed to load YAML data")
+                msg = f"Failed to parse YAML data: {e}"
+                raise ParsingError(msg, e) from e
+
+        case "toml":
+            try:
+                return tomllib.loads(text, **kwargs)
+            except tomllib.TOMLDecodeError as e:
+                logger.exception("Failed to load TOML data")
+                msg = f"Failed to parse TOML data: {e}"
+                raise ParsingError(msg, e) from e
+
+        case "json":
+            if has_orjson:
+                import orjson
+
+                try:
+                    valid_kwargs = {
+                        k: v for k, v in kwargs.items() if k in {"default", "option"}
+                    }
+                    return orjson.loads(text, **valid_kwargs)
+                except orjson.JSONDecodeError as e:
+                    logger.exception("Failed to load JSON data with orjson")
+                    msg = f"Failed to parse JSON data: {e}"
+                    raise ParsingError(msg, e) from e
+            else:
+                try:
+                    return json.loads(text, **kwargs)
+                except json.JSONDecodeError as e:
+                    logger.exception("Failed to load JSON data with json")
+                    msg = f"Failed to parse JSON data: {e}"
+                    raise ParsingError(msg, e) from e
+
+        case "ini":
+            try:
+                parser = configparser.ConfigParser(**kwargs)
+                parser.read_string(text)
+                return {
+                    section: dict(parser.items(section)) for section in parser.sections()
+                }
+            except (
+                configparser.Error,
+                configparser.ParsingError,
+                configparser.MissingSectionHeaderError,
+            ) as e:
+                logger.exception("Failed to load INI data")
+                msg = f"Failed to parse INI data: {e}"
+                raise ParsingError(msg, e) from e
+
+        case _:
+            msg = f"Unsupported format: {mode}"
+            raise ValueError(msg)
+
+
+def load_file(path: str | os.PathLike[str], mode: FormatType = "auto") -> Any:
+    """Load data from a file, automatically detecting the format from extension if needed.
+
+    Args:
+        path: Path to the file to load
+        mode: Format of the file ("yaml", "toml", "json", "ini" or "auto")
+
+    Returns:
+        Parsed data structure
+
+    Raises:
+        ValueError: If the format cannot be determined or is not supported
+        OSError: If the file cannot be read
+        FileNotFoundError: If the file does not exist
+        PermissionError: If file permissions prevent reading
+        ParsingError: If the text cannot be parsed in the specified format
+    """
+    path_obj = Path(path)
+
+    # Determine format from extension if auto mode
+    if mode == "auto":
+        ext = path_obj.suffix.lower()
+        format_mapping: dict[str, SupportedFormats] = {
+            ".yaml": "yaml",
+            ".yml": "yaml",
+            ".toml": "toml",
+            ".tml": "toml",
+            ".json": "json",
+            ".jsonc": "json",
+            ".ini": "ini",
+            ".cfg": "ini",
+            ".conf": "ini",
+            ".config": "ini",
+            ".properties": "ini",
+            ".cnf": "ini",
+            ".env": "ini",
+        }
+        detected_mode = format_mapping.get(ext)
+        if detected_mode is None:
+            msg = f"Could not determine format from file extension: {path}"
+            raise ValueError(msg)
+        mode = detected_mode
+
+    # At this point, mode can't be "auto"
+    if mode not in get_args(SupportedFormats):
+        msg = f"Unsupported format: {mode}"
+        raise ValueError(msg)
+
+    try:
+        text = path_obj.read_text()
+        return load(text, mode)
+    except (OSError, FileNotFoundError, PermissionError) as e:
+        logger.exception("Failed to read file %r", path)
+        msg = f"Failed to read file {path}: {e!s}"
+        raise
+    except Exception as e:
+        logger.exception("Failed to parse file %r as %s", path, mode)
+        msg = f"Failed to parse {path} as {mode} format: {e!s}"
+        raise
diff --git a/tests/test_load_unversal.py b/tests/test_load_unversal.py
@@ -0,0 +1,211 @@
+# test_load_universal.py
+from pathlib import Path
+
+import pytest
+
+from yamling.load_universal import ParsingError, load, load_file
+
+
+# Test Constants
+VALID_YAML = """
+key: value
+nested:
+  inner: data
+list:
+  - item1
+  - item2
+"""
+
+VALID_TOML = """
+key = "value"
+nested = { inner = "data" }
+list = ["item1", "item2"]
+"""
+
+
+VALID_JSON = """
+{
+    "key": "value",
+    "nested": {
+        "inner": "data"
+    },
+    "list": ["item1", "item2"]
+}
+"""
+
+VALID_INI = """
+[section1]
+key = value
+key2 = value2
+
+[section2]
+other = data
+"""
+
+INVALID_YAML = "key: [invalid:"
+INVALID_TOML = "key = invalid["
+INVALID_JSON = "{invalid json"
+INVALID_INI = "invalid ini"
+
+EXPECTED_DATA = {"key": "value", "nested": {"inner": "data"}, "list": ["item1", "item2"]}
+
+TEMP_DIR = "test_files"
+
+
+# Fixtures
+@pytest.fixture
+def setup_temp_files(tmp_path: Path) -> Path:
+    """Create temporary test files with different formats."""
+    test_dir = tmp_path / TEMP_DIR
+    test_dir.mkdir()
+
+    # Create valid files
+    (test_dir / "test.yaml").write_text(VALID_YAML)
+    (test_dir / "test.yml").write_text(VALID_YAML)
+    (test_dir / "test.toml").write_text(VALID_TOML)
+    (test_dir / "test.json").write_text(VALID_JSON)
+    (test_dir / "test.ini").write_text(VALID_INI)
+
+    # Create invalid files
+    (test_dir / "invalid.yaml").write_text(INVALID_YAML)
+    (test_dir / "invalid.toml").write_text(INVALID_TOML)
+    (test_dir / "invalid.json").write_text(INVALID_JSON)
+    (test_dir / "invalid.ini").write_text(INVALID_INI)
+
+    return test_dir
+
+
+# Test load() function
+def test_load_valid_yaml():
+    result = load(VALID_YAML, "yaml")
+    assert result == EXPECTED_DATA
+
+
+def test_load_valid_toml():
+    result = load(VALID_TOML, "toml")
+    assert result == EXPECTED_DATA
+
+
+def test_load_valid_json():
+    result = load(VALID_JSON, "json")
+    assert result == EXPECTED_DATA
+
+
+def test_load_valid_ini():
+    result = load(VALID_INI, "ini")
+    assert isinstance(result, dict)
+    assert "section1" in result
+    assert "section2" in result
+    assert result["section1"]["key"] == "value"
+
+
+def test_load_invalid_yaml():
+    with pytest.raises(ParsingError):
+        load(INVALID_YAML, "yaml")
+
+
+def test_load_invalid_toml():
+    with pytest.raises(ParsingError):
+        load(INVALID_TOML, "toml")
+
+
+def test_load_invalid_json():
+    with pytest.raises(ParsingError):
+        load(INVALID_JSON, "json")
+
+
+def test_load_invalid_ini():
+    with pytest.raises(ParsingError):
+        load(INVALID_INI, "ini")
+
+
+def test_load_unsupported_format():
+    with pytest.raises(ValueError, match="Unsupported format"):
+        load(VALID_YAML, "unsupported")  # type: ignore
+
+
+# Test load_file() function
+def test_load_file_yaml(setup_temp_files: Path):
+    result = load_file(setup_temp_files / "test.yaml")
+    assert result == EXPECTED_DATA
+
+
+def test_load_file_yml(setup_temp_files: Path):
+    result = load_file(setup_temp_files / "test.yml")
+    assert result == EXPECTED_DATA
+
+
+def test_load_file_toml(setup_temp_files: Path):
+    result = load_file(setup_temp_files / "test.toml")
+    assert result == EXPECTED_DATA
+
+
+def test_load_file_json(setup_temp_files: Path):
+    result = load_file(setup_temp_files / "test.json")
+    assert result == EXPECTED_DATA
+
+
+def test_load_file_ini(setup_temp_files: Path):
+    result = load_file(setup_temp_files / "test.ini")
+    assert isinstance(result, dict)
+    assert "section1" in result
+    assert "section2" in result
+
+
+def test_load_file_explicit_format(setup_temp_files: Path):
+    result = load_file(setup_temp_files / "test.yaml", mode="yaml")
+    assert result == EXPECTED_DATA
+
+
+def test_load_file_nonexistent():
+    with pytest.raises(FileNotFoundError):
+        load_file("nonexistent.yaml")
+
+
+def test_load_file_invalid_extension():
+    with pytest.raises(ValueError, match="Could not determine format"):
+        load_file("test.invalid")
+
+
+def test_load_file_invalid_explicit_format():
+    with pytest.raises(ValueError, match="Unsupported format"):
+        load_file("test.yaml", mode="invalid")  # type: ignore
+
+
+def test_load_file_permission_error(setup_temp_files: Path):
+    test_file = setup_temp_files / "test.yaml"
+    test_file.chmod(0o000)  # Remove all permissions
+    load_file(test_file)
+    test_file.chmod(0o666)  # Restore permissions
+
+
+def test_load_file_invalid_content(setup_temp_files: Path):
+    with pytest.raises(ParsingError):
+        load_file(setup_temp_files / "invalid.yaml")
+
+
+# Test edge cases
+def test_load_empty_string():
+    assert load("", "yaml") is None  # YAML treats empty string as None
+    assert load("", "toml") == {}
+
+
+def test_load_whitespace_only():
+    assert load("   \n   ", "yaml") is None  # YAML treats whitespace as None
+    assert load("   \n   ", "toml") == {}
+
+
+def test_load_null_characters():
+    with pytest.raises(ParsingError):
+        load("key: value\0", "yaml")
+
+
+# Test with various path types
+def test_load_file_with_different_path_types(setup_temp_files: Path):
+    # Test with string path
+    result1 = load_file(str(setup_temp_files / "test.yaml"))
+    assert result1 == EXPECTED_DATA
+
+    # Test with Path object
+    result2 = load_file(Path(setup_temp_files / "test.yaml"))
+    assert result2 == EXPECTED_DATA