diff --git a/data_bridges_knots/__init__.py b/data_bridges_knots/__init__.py index 9d0deb0..7ea5fa6 100644 --- a/data_bridges_knots/__init__.py +++ b/data_bridges_knots/__init__.py @@ -5,6 +5,6 @@ """ from .client import DataBridgesShapes -from .labels import get_column_labels, get_value_labels, map_value_labels +from .helpers import get_column_labels, get_value_labels, map_value_labels -__all__ = ['DataBridgesShapes', 'labels'] +__all__ = ['DataBridgesShapes', 'helpers'] diff --git a/data_bridges_knots/client.py b/data_bridges_knots/client.py index a444eec..5bdc4d1 100644 --- a/data_bridges_knots/client.py +++ b/data_bridges_knots/client.py @@ -116,7 +116,10 @@ def get_household_survey(self, survey_id, access_type, page_size=600): df = pd.DataFrame(responses) - df.apply(lambda x: pd.to_numeric(x, errors='coerce', downcast='integer').fillna(9999).astype(np.int64 if x.dtype == 'int64' else x.dtype)) + df = df.apply(lambda x: pd.to_numeric(x, errors='ignore', downcast='integer').fillna(9999).astype(int if x.dtype == 'int' or x.dtype == 'float' else x.dtype)) + + # df = df.apply(lambda x: pd.to_numeric(x, errors='ignore', downcast='integer').fillna(9999).astype(int if x.dtype == 'float' else x.dtype)) + df = df.replace({9999: None}) return df diff --git a/data_bridges_knots/labels.py b/data_bridges_knots/helpers.py similarity index 80% rename from data_bridges_knots/labels.py rename to data_bridges_knots/helpers.py index b6c21f0..e0b94ec 100644 --- a/data_bridges_knots/labels.py +++ b/data_bridges_knots/helpers.py @@ -11,9 +11,9 @@ def get_value_labels(df): name = row["name"] choice = row["choices"] if name in categories_dict: - categories_dict[name].update({int(choice["name"]): choice["label"]}) + categories_dict[name].update({(choice["name"]): choice["label"]}) else: - categories_dict[name] = {int(choice["name"]): choice["label"]} + categories_dict[name] = {(choice["name"]): choice["label"]} return categories_dict def get_column_labels(df): @@ -42,9 +42,9 @@ def map_value_labels(survey_data, questionnaire): name = row["name"] choice = row["choices"] if name in categories_dict: - categories_dict[name].update({int(choice["name"]): choice["label"]}) + categories_dict[name].update({(choice["name"]): choice["label"]}) else: - categories_dict[name] = {int(choice["name"]): choice["label"]} + categories_dict[name] = {(choice["name"]): choice["label"]} # Map the categories to survey_data survey_data_value_labels = survey_data.copy() @@ -55,3 +55,9 @@ def map_value_labels(survey_data, questionnaire): return survey_data_value_labels +def as_numeric(df, cols = []): + for col in cols: + df[col].apply(lambda x: x.astype(int)) + return df + +