diff --git a/pipelines/migration/tasks.py b/pipelines/migration/tasks.py index 329f90a2..78778966 100644 --- a/pipelines/migration/tasks.py +++ b/pipelines/migration/tasks.py @@ -1557,12 +1557,8 @@ def transform_raw_to_nested_structure( content_columns = [c for c in data.columns if c not in primary_key] data["content"] = data.apply( - lambda row: json.dumps( - { - key: value if not pd.isna(value) else None - for key, value in row[content_columns].to_dict().items() - }, - ensure_ascii=( + lambda row: row[[c for c in content_columns]].to_json( + force_ascii=( constants.CONTROLE_FINANCEIRO_DATASET_ID.value not in raw_filepath ), ), diff --git a/pipelines/utils/pretreatment.py b/pipelines/utils/pretreatment.py index 19f6f1be..0e9edb35 100644 --- a/pipelines/utils/pretreatment.py +++ b/pipelines/utils/pretreatment.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- """Functions to pretreat data""" -import json import pandas as pd from prefeitura_rio.pipelines_utils.logging import log @@ -20,12 +19,7 @@ def transform_to_nested_structure(data: pd.DataFrame, primary_keys: list) -> pd. """ content_columns = [c for c in data.columns if c not in primary_keys] data["content"] = data.apply( - lambda row: json.dumps( - { - key: value if not pd.isna(value) else None - for key, value in row[content_columns].to_dict().items() - } - ), + lambda row: row[[c for c in content_columns]].to_json(), axis=1, ) return data[primary_keys + ["content"]]