Skip to content

Commit

Permalink
90-Bug-fix-file-encoding-box-integration (#96)
Browse files Browse the repository at this point in the history
Allow setting File Encoding in Box implementation
  • Loading branch information
louis-paulvlx authored Nov 8, 2024
1 parent 90e6462 commit b37a302
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/koheesio/integrations/box.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,11 @@ class BoxReaderBase(Box, Reader, ABC):
default_factory=dict,
description="[Optional] Set of extra parameters that should be passed to the Spark reader.",
)


file_encoding: Optional[str] = Field(
default="utf-8",
description="[Optional] Set file encoding format. By default is utf-8."
)

class BoxCsvFileReader(BoxReaderBase):
"""
Expand Down Expand Up @@ -412,7 +416,7 @@ def execute(self) -> BoxReaderBase.Output:
for f in self.file:
self.log.debug(f"Reading contents of file with the ID '{f}' into Spark DataFrame")
file = self.client.file(file_id=f)
data = file.content().decode("utf-8")
data = file.content().decode(self.file_encoding)

data_buffer = StringIO(data)
temp_df_pandas = pd.read_csv(data_buffer, header=0, dtype=str if not self.schema_ else None, **self.params) # type: ignore
Expand Down

0 comments on commit b37a302

Please sign in to comment.