From eed18ee461164c3a1ec5548fef8c872f3000d890 Mon Sep 17 00:00:00 2001 From: Zach Kurtz Date: Wed, 27 Nov 2024 22:18:21 -0500 Subject: [PATCH] expand docs --- .github/workflows/ci.yml | 32 ++++++++++++++++++-------------- README.md | 5 ++++- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 87be790..6063dc0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,26 +10,30 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10', '3.12'] + python-version: + - '3.10' + - '3.13' + steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} + - name: Clone repo + uses: actions/checkout@v4 - - name: Set the python version for UV + - name: Set the python version run: echo "UV_PYTHON=${{ matrix.python-version }}" >> $GITHUB_ENV - - name: Set up uv - run: pip install uv + - name: Setup uv + uses: astral-sh/setup-uv@v3 + with: + version: "0.5.4" + + - name: Linting check + run: uv run ruff check - - name: Check code quality with Ruff - run: | - uv run ruff check - uv run ruff format --check + - name: Formatting check + run: uv run ruff format --check - - name: Check type hints with pyright + - name: Type checking run: uv run pyright - - name: Run unit tests with pytest + - name: Unit tests run: uv run pytest diff --git a/README.md b/README.md index 2a820a2..20966a5 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,13 @@ Packio allows you to use a single file to store and retrieve multiple python obj When a class contains multiple of these data types, or even multiple instances of the same data type, saving and loading the data associated with a class tends to become unwieldy, requiring the user to either keep track multiple file paths or to fall back to using pickle, which introduces other problems (see below). The goal of packio is to make it as easy as possible to write `save` and `load` methods for such a class while allowing you to keep using all of your favorite object-type-specific serializers (i.e. `to_parquet` for pandas, `json` for dictionaries, `pathlib.Path.write_text` for strings, etc). +## Why a single file and not a directory? + +In a word, *encapsulation*. Copy/move operations with a file are simpler than a directory, especially when it comes to moving data across platforms such as to/from the cloud. A file is also more tamper-resistant - it's typically harder to accidentally modify the contents of a file than it is for someone to add or remove files or subdirectories in a directory. ## Why not pickle? -The most common approach for serialization of such complex python objects is to use `pickle`. There are many reasons do dislike pickle. As summarized by Gemini, "Python's pickle module, while convenient, has drawbacks. It poses security risks due to potential code execution vulnerabilities when handling untrusted data. Compatibility issues arise because it's Python-specific and version-dependent. Maintaining pickle can be challenging due to refactoring difficulties and complex debugging." See also [Ben Frederickson](https://www.benfrederickson.com/dont-pickle-your-data/). +Although `pickle` may be the most common approach for serialization of complex python objects, there are strong reasons to dislike pickle. As summarized by Gemini, "Python's pickle module, while convenient, has drawbacks. It poses security risks due to potential code execution vulnerabilities when handling untrusted data. Compatibility issues arise because it's Python-specific and version-dependent. Maintaining pickle can be challenging due to refactoring difficulties and complex debugging." See also [Ben Frederickson](https://www.benfrederickson.com/dont-pickle-your-data/). ## Example