Skip to content

Commit

Permalink
fix(xlsform): do not filter duplicates from choices sheet
Browse files Browse the repository at this point in the history
  • Loading branch information
spwoodcock committed Sep 24, 2024
1 parent ac8c71e commit dfd60a7
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 18 deletions.
16 changes: 7 additions & 9 deletions osm_fieldwork/update_xlsform.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,26 +34,24 @@ def merge_dataframes(mandatory_df: pd.DataFrame, user_question_df: pd.DataFrame,
# mandatory_df, user_question_df, digitisation_df, fields=["label", "hint", "required_message"]
# )

# Find common fields between user_question_df and mandatory_df or digitisation_df
duplicate_fields = set(user_question_df[NAME_COLUMN]).intersection(
set(mandatory_df[NAME_COLUMN]).union(set(digitisation_df[NAME_COLUMN]))
)

# Is choices sheet, return ordered merged choices
# If processing the choices sheet, retain all duplicates
if "list_name" in user_question_df.columns:
user_question_df_filtered = user_question_df[~user_question_df[NAME_COLUMN].isin(duplicate_fields)]

return pd.concat(
[
mandatory_df,
user_question_df_filtered,
user_question_df,
digitisation_df,
],
ignore_index=True,
)

# Else we are processing the survey sheet, continue

# Find common fields between user_question_df and mandatory_df or digitisation_df
duplicate_fields = set(user_question_df[NAME_COLUMN]).intersection(
set(mandatory_df[NAME_COLUMN]).union(set(digitisation_df[NAME_COLUMN]))
)

# NOTE filter out 'end group' from duplicate check as they have empty NAME_COLUMN
end_group_rows = user_question_df[user_question_df["type"].isin(["end group", "end_group"])]
user_question_df_filtered = user_question_df[
Expand Down
10 changes: 1 addition & 9 deletions tests/test_update_xlsform.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ async def test_merge_mandatory_fields():
merged_xlsform.write(updated_form.getvalue())

check_survey_sheet(workbook)
check_choices_sheet(workbook)
# NOTE the choices sheet can have duplicates in the 'name' field without issue
check_entities_sheet(workbook)
check_form_title(workbook)

Expand Down Expand Up @@ -113,14 +113,6 @@ def check_survey_sheet(workbook: Workbook) -> None:
check_for_duplicates(survey_sheet, name_col_index)


def check_choices_sheet(workbook: Workbook) -> None:
"""Check the 'choices' sheet and ensure no duplicates in 'name' column."""
choices_sheet = get_sheet(workbook, "choices")
name_col_index = get_column_index(choices_sheet, "name")

check_for_duplicates(choices_sheet, name_col_index)


def check_entities_sheet(workbook: Workbook) -> None:
"""Check the 'entities' sheet values."""
entities_sheet = get_sheet(workbook, "entities")
Expand Down

0 comments on commit dfd60a7

Please sign in to comment.