From 17e00fe15d53a74a707b068ec0ef7902c3672809 Mon Sep 17 00:00:00 2001 From: rziemianek Date: Thu, 20 Jun 2024 11:12:48 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=9D=20Updated=20docstring?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 1 + src/viadot/sources/sharepoint.py | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1df5c2a92..051d4d07e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added `na_values` parameter to `Sharepoint` class to parse `N/A` values coming from the excel file columns. - Added `get_last_segment_from_url` function to sharepoint file. - Added `validate` function to `viadot/utils.py` - Fixed `Databricks.create_table_from_pandas()` failing to overwrite a table in some cases even with `replace="True"` diff --git a/src/viadot/sources/sharepoint.py b/src/viadot/sources/sharepoint.py index d238f8ab4..b583d0293 100644 --- a/src/viadot/sources/sharepoint.py +++ b/src/viadot/sources/sharepoint.py @@ -168,7 +168,14 @@ def to_df( tests (Dict[str], optional): A dictionary with optional list of tests to verify the output dataframe. If defined, triggers the `validate` function from utils. Defaults to None. - na_values (list[str] | None): NA values for excel file. Defaults to None. + na_values (list[str] | None): Additional strings to recognize as NA/NaN. + If list passed, the specific NA values for each column will be recognized. + Defaults to None. + If None then the "DEFAULT_NA_VALUES" is assigned list(" ", "#N/A", "#N/A N/A", + "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND", "1.#QNAN", + "", "N/A", "NA", "NULL", "NaN", "None", "n/a", "nan", "null"). + If list passed, the specific NA values for each column will be recognized. + Defaults to None. kwargs (dict[str, Any], optional): Keyword arguments to pass to pd.ExcelFile.parse(). Note that `nrows` is not supported.