diff --git a/docs/api/update_xlsform.md b/docs/api/update_xlsform.md
new file mode 100644
index 00000000..1c6be481
--- /dev/null
+++ b/docs/api/update_xlsform.md
@@ -0,0 +1,6 @@
+# update_xlsform.py
+
+::: osm_fieldwork.update_xlsform.append_mandatory_fields
+options:
+show_source: false
+heading_level: 3
diff --git a/mkdocs.yml b/mkdocs.yml
index 87027284..2337e5b0 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -91,6 +91,7 @@ nav:
- ODKInstance: api/ODKInstance.md
- ODKForm: api/ODKForm.md
- filter_data: api/filter_data.md
+ - update_xlsform: api/update_xlsform.md
- Deep Tech Dives:
- External Data: about/externaldata.md
- Data conflation: about/conflation.md
diff --git a/osm_fieldwork/update_form.py b/osm_fieldwork/update_form.py
deleted file mode 100644
index c6a226a0..00000000
--- a/osm_fieldwork/update_form.py
+++ /dev/null
@@ -1,123 +0,0 @@
-from datetime import datetime
-from io import BytesIO
-
-import pandas as pd
-from python_calamine.pandas import pandas_monkeypatch
-
-from osm_fieldwork.xlsforms import xlsforms_path
-
-# Monkeypatch pandas to add calamine driver
-pandas_monkeypatch()
-
-
-def merge_sheets(mandatory_df, custom_df, digitisation_df, is_survey_sheet=False):
- # Remove rows with None in 'name' column
- if "name" in mandatory_df.columns:
- mandatory_df = mandatory_df.dropna(subset=["name"])
- if "name" in custom_df.columns:
- custom_df = custom_df.dropna(subset=["name"])
- if "name" in digitisation_df.columns:
- digitisation_df = digitisation_df.dropna(subset=["name"])
-
- # Identify common fields between custom_df and mandatory_df or digitisation_df
- common_fields = (
- set(custom_df["name"])
- .intersection(set(mandatory_df["name"]))
- .union(set(custom_df["name"]).intersection(set(digitisation_df["name"])))
- )
-
- # Keep common fields from custom_df in their original order
- custom_common_df = custom_df[custom_df["name"].isin(common_fields)]
- custom_non_common_df = custom_df[~custom_df["name"].isin(common_fields)]
-
- # Filter out the common fields from the mandatory_df and digitisation_df
- mandatory_df_filtered = mandatory_df[~mandatory_df["name"].isin(common_fields)]
- digitisation_df_filtered = digitisation_df[~digitisation_df["name"].isin(common_fields)]
-
- if not is_survey_sheet:
- return pd.concat(
- [
- custom_common_df,
- mandatory_df_filtered,
- custom_non_common_df,
- digitisation_df_filtered,
- ],
- ignore_index=True,
- )
- survey_group_row = pd.DataFrame(
- {
- "type": ["begin group"],
- "name": ["survey_questions"],
- "label": ["Survey Form"],
- "relevant": [
- "(${new_feature} = 'yes') or (${building_exists} = 'yes')"
- ], # Add the relevant condition to display this group only if "Yes" is selected
- }
- )
- survey_end_group_row = pd.DataFrame({"type": ["end group"], "name": ["end_survey_questions"], "label": ["End Survey Form"]})
- digitisation_group = pd.DataFrame(
- {
- "type": ["begin group"],
- "name": ["verification"],
- "label": ["Verification Form"],
- "relevant": ["(${new_feature} = 'yes') or (${building_exists} = 'yes')"],
- }
- )
- digitisation_end_group = pd.DataFrame({"type": ["end group"], "name": ["end_verification"], "label": ["End Verification Form"]})
-
- # Concatenate: mandatory fields at the top, custom common fields, remaining custom fields, and finally append form fields
- return pd.concat(
- [
- custom_common_df,
- mandatory_df_filtered,
- survey_group_row,
- custom_non_common_df,
- survey_end_group_row,
- digitisation_group,
- digitisation_df_filtered,
- digitisation_end_group,
- ],
- ignore_index=True,
- )
-
-
-def update_xls_form(custom_form: BytesIO) -> BytesIO:
- custom_sheets = pd.read_excel(custom_form, sheet_name=None, engine="calamine")
- default_form_path = f"{xlsforms_path}/fmtm/mandatory_fields.xls"
- digitisation_form_path = f"{xlsforms_path}/fmtm/digitisation_fields.xls"
- digitisation_sheets = pd.read_excel(digitisation_form_path, sheet_name=None, engine="calamine")
- mandatory_sheets = pd.read_excel(default_form_path, sheet_name=None, engine="calamine")
-
- # Process and merge the 'survey' sheet if present in all forms
- if "survey" in mandatory_sheets and "survey" in digitisation_sheets and "survey" in custom_sheets:
- custom_sheets["survey"] = merge_sheets(
- mandatory_sheets["survey"], custom_sheets["survey"], digitisation_sheets["survey"], True
- )
-
- # Process and merge the 'choices' sheet if present in all forms
- if "choices" in mandatory_sheets and "choices" in digitisation_sheets and "choices" in custom_sheets:
- custom_sheets["choices"] = merge_sheets(
- mandatory_sheets["choices"], custom_sheets["choices"], digitisation_sheets["choices"]
- )
-
- # Append or overwrite the existing entities sheet
- if "entities" in mandatory_sheets:
- custom_sheets["entities"] = mandatory_sheets["entities"]
-
- if "settings" in mandatory_sheets:
- custom_sheets["settings"] = mandatory_sheets["settings"]
- current_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-
- # Set the 'version' column to the current timestamp (if 'version' column exists in 'settings')
- if "version" in custom_sheets["settings"].columns:
- # set column type to str
- custom_sheets["settings"]["version"] = custom_sheets["settings"]["version"].astype(str)
- custom_sheets["settings"].loc[:, "version"] = current_timestamp
-
- output = BytesIO()
- with pd.ExcelWriter(output, engine="openpyxl") as writer:
- for sheet_name, df in custom_sheets.items():
- df.to_excel(writer, sheet_name=sheet_name, index=False)
-
- output.seek(0)
- return output
diff --git a/osm_fieldwork/update_xlsform.py b/osm_fieldwork/update_xlsform.py
new file mode 100644
index 00000000..31ff22df
--- /dev/null
+++ b/osm_fieldwork/update_xlsform.py
@@ -0,0 +1,213 @@
+"""Update an existing XLSForm with additional fields useful for field mapping."""
+
+from datetime import datetime
+from io import BytesIO
+from uuid import uuid4
+
+import pandas as pd
+from python_calamine.pandas import pandas_monkeypatch
+
+from osm_fieldwork.xlsforms import xlsforms_path
+
+# Monkeypatch pandas to add calamine driver
+pandas_monkeypatch()
+
+# Constants
+FEATURE_COLUMN = "feature"
+NAME_COLUMN = "name"
+SURVEY_GROUP_NAME = "survey_questions"
+DIGITISATION_GROUP_NAME = "verification"
+
+
+def filter_df_empty_rows(df, column=NAME_COLUMN):
+ """Remove rows with None values in the specified column, but retain group rows if they exist."""
+ if column in df.columns:
+ # Only retain 'begin group' and 'end group' if 'type' column exists
+ if "type" in df.columns:
+ return df[(df[column].notna()) | (df["type"].isin(["begin group", "end group", "begin_group", "end_group"]))]
+ else:
+ return df[df[column].notna()]
+ return df
+
+
+def merge_dataframes(mandatory_df, custom_df, digitisation_df, is_survey_sheet=False):
+ """Merge multiple Pandas dataframes together, keeping common fields in custom_df."""
+ # Filter dataframes (remove rows with None in 'name' column)
+ mandatory_df = filter_df_empty_rows(mandatory_df)
+ custom_df = filter_df_empty_rows(custom_df)
+ digitisation_df = filter_df_empty_rows(digitisation_df)
+
+ # Find common fields between custom_df and mandatory_df or digitisation_df
+ common_fields = set(custom_df[NAME_COLUMN]).intersection(
+ set(mandatory_df[NAME_COLUMN]).union(set(digitisation_df[NAME_COLUMN]))
+ )
+
+ # Split custom DataFrame into common and non-common fields
+ custom_common_df = custom_df[custom_df[NAME_COLUMN].isin(common_fields)]
+ custom_non_common_df = custom_df[~custom_df[NAME_COLUMN].isin(common_fields)]
+
+ # Filter out common fields from mandatory and digitisation DataFrames
+ mandatory_df_filtered = mandatory_df[~mandatory_df[NAME_COLUMN].isin(common_fields)]
+ digitisation_df_filtered = digitisation_df[~digitisation_df[NAME_COLUMN].isin(common_fields)]
+
+ if not is_survey_sheet:
+ return pd.concat(
+ [custom_common_df, mandatory_df_filtered, custom_non_common_df, digitisation_df_filtered],
+ ignore_index=True,
+ )
+
+ # Create groups for survey and digitisation
+ survey_group = create_group(SURVEY_GROUP_NAME)
+ digitisation_group = create_group(DIGITISATION_GROUP_NAME)
+ digitisation_group["start"]["relevant"] = ["(${new_feature} = 'yes') or (${building_exists} = 'yes')"]
+
+ # Concatenate dataframes in the desired order
+ return pd.concat(
+ [
+ custom_common_df,
+ mandatory_df_filtered,
+ survey_group["start"],
+ custom_non_common_df,
+ survey_group["end"],
+ digitisation_group["start"],
+ digitisation_df_filtered,
+ digitisation_group["end"],
+ ],
+ ignore_index=True,
+ )
+
+
+def create_group(name: str) -> dict[str, pd.DataFrame]:
+ """Helper function to create a start and end group for XLSForm."""
+ start_group = pd.DataFrame({"type": ["begin group"], "name": [name]})
+ end_group = pd.DataFrame({"type": ["end group"], "name": [f"end of {name}"]})
+ return {"start": start_group, "end": end_group}
+
+
+def append_select_one_from_file_row(df: pd.DataFrame, entity_name: str) -> pd.DataFrame:
+ """Add a new select_one_from_file question to reference an Entity."""
+ # Find the row index where name column = 'feature'
+ select_one_from_file_index = df.index[df[NAME_COLUMN] == FEATURE_COLUMN].tolist()
+
+ if not select_one_from_file_index:
+ raise ValueError(f"Row with '{NAME_COLUMN}' == '{FEATURE_COLUMN}' not found in survey sheet.")
+
+ # Find the row index after 'feature' row
+ row_index_to_split_on = select_one_from_file_index[0] + 1
+ # Strip the 's' from the end for singular form
+ if entity_name.endswith("s"):
+ # Plural to singular
+ entity_name = entity_name[:-1]
+
+ additional_row = pd.DataFrame(
+ {
+ "type": [f"select_one_from_file {entity_name}.csv"],
+ "name": [entity_name],
+ "label::English(en)": [entity_name],
+ "appearance": ["map"],
+ "choice_filter": ["selected(${task_filter}, '') or task_id=${task_filter}"],
+ "trigger": ["${task_filter}"],
+ "label::Swahili(sw)": [entity_name],
+ "label::French(fr)": [entity_name],
+ "label::Spanish(es)": [entity_name],
+ }
+ )
+
+ # Insert the new row into the DataFrame
+ top_df = df.iloc[:row_index_to_split_on]
+ bottom_df = df.iloc[row_index_to_split_on:]
+ return pd.concat([top_df, additional_row, bottom_df], ignore_index=True)
+
+
+def append_task_ids_to_choices_sheet(df: pd.DataFrame, task_count: int) -> pd.DataFrame:
+ """Add task id rows to choices sheet (for filtering Entity list)."""
+ task_ids = list(range(1, task_count + 1))
+
+ additional_rows = pd.DataFrame(
+ {
+ "list_name": ["task_filter"] * task_count,
+ "name": task_ids,
+ "label::English(en)": task_ids,
+ "label::Swahili(sw)": task_ids,
+ "label::French(fr)": task_ids,
+ "label::Spanish(es)": task_ids,
+ }
+ )
+
+ df = pd.concat([df, additional_rows], ignore_index=True)
+ return df
+
+
+async def append_mandatory_fields(
+ custom_form: BytesIO,
+ form_category: str,
+ additional_entities: list[str] = None,
+ task_count: int = None,
+ existing_id: str = None,
+) -> BytesIO:
+ """Append mandatory fields to the XLSForm for use in FMTM.
+
+ Args:
+ custom_form(BytesIO): the XLSForm data uploaded, wrapped in BytesIO.
+ form_category(str): the form category name (in form_title and description).
+ additional_entities(list[str]): add extra select_one_from_file fields to
+ reference an additional Entity list (set of geometries).
+ The values should be plural, so that 's' will be stripped in the
+ field name.
+ task_count(int): number of tasks, used to generate task_id entries in choices
+ sheet. These are used to filter Entities by task id in ODK Collect.
+ existing_id(str): an existing UUID to use for the form_id, else random uuid4.
+
+ Returns:
+ BytesIO: the update XLSForm, wrapped in BytesIO.
+ """
+ custom_sheets = pd.read_excel(custom_form, sheet_name=None, engine="calamine")
+ mandatory_sheets = pd.read_excel(f"{xlsforms_path}/fmtm/mandatory_fields.xls", sheet_name=None, engine="calamine")
+ digitisation_sheets = pd.read_excel(f"{xlsforms_path}/fmtm/digitisation_fields.xls", sheet_name=None, engine="calamine")
+
+ # Merge 'survey' and 'choices' sheets
+ if "survey" in custom_sheets:
+ custom_sheets["survey"] = merge_dataframes(
+ mandatory_sheets.get("survey"), custom_sheets.get("survey"), digitisation_sheets.get("survey"), True
+ )
+ # Hardcode the form_category value for the start instructions
+ if form_category.endswith("s"):
+ # Plural to singular
+ form_category_singular = form_category[:-1]
+ form_category_row = custom_sheets["survey"].loc[custom_sheets["survey"]["name"] == "form_category"]
+ if not form_category_row.empty:
+ custom_sheets["survey"].loc[custom_sheets["survey"]["name"] == "form_category", "calculation"] = (
+ f"once('{form_category_singular}')"
+ )
+
+ if "choices" in custom_sheets:
+ custom_sheets["choices"] = merge_dataframes(
+ mandatory_sheets.get("choices"), custom_sheets.get("choices"), digitisation_sheets.get("choices")
+ )
+
+ # Append or overwrite 'entities' and 'settings' sheets
+ custom_sheets.update({key: mandatory_sheets[key] for key in ["entities", "settings"] if key in mandatory_sheets})
+
+ # Set the 'version' column to the current timestamp (if 'version' column exists in 'settings')
+ if "settings" in custom_sheets:
+ custom_sheets["settings"]["version"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+ custom_sheets["settings"]["form_id"] = existing_id if existing_id else uuid4()
+ custom_sheets["settings"]["form_title"] = form_category
+
+ # Append select_one_from_file for additional entities
+ if additional_entities:
+ for entity_name in additional_entities:
+ custom_sheets["survey"] = append_select_one_from_file_row(custom_sheets["survey"], entity_name)
+
+ # Append task id rows to choices sheet
+ if task_count:
+ custom_sheets["choices"] = append_task_ids_to_choices_sheet(custom_sheets["choices"], task_count)
+
+ # Return spreadsheet wrapped as BytesIO memory object
+ output = BytesIO()
+ with pd.ExcelWriter(output, engine="openpyxl") as writer:
+ for sheet_name, df in custom_sheets.items():
+ df.to_excel(writer, sheet_name=sheet_name, index=False)
+
+ output.seek(0)
+ return output
diff --git a/osm_fieldwork/xlsforms/buildings.xls b/osm_fieldwork/xlsforms/buildings.xls
index 6551bc31..cfc6e46d 100644
Binary files a/osm_fieldwork/xlsforms/buildings.xls and b/osm_fieldwork/xlsforms/buildings.xls differ
diff --git a/osm_fieldwork/xlsforms/fmtm/mandatory_fields.xls b/osm_fieldwork/xlsforms/fmtm/mandatory_fields.xls
index d297c2a6..699ad317 100644
Binary files a/osm_fieldwork/xlsforms/fmtm/mandatory_fields.xls and b/osm_fieldwork/xlsforms/fmtm/mandatory_fields.xls differ
diff --git a/osm_fieldwork/xlsforms/health.xls b/osm_fieldwork/xlsforms/health.xls
index 8479710c..5357d9c2 100644
Binary files a/osm_fieldwork/xlsforms/health.xls and b/osm_fieldwork/xlsforms/health.xls differ
diff --git a/tests/test_update_form.py b/tests/test_update_form.py
deleted file mode 100644
index 73542081..00000000
--- a/tests/test_update_form.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (c) 2022, 2023 Humanitarian OpenStreetMap Team
-#
-# This file is part of osm_fieldwork.
-#
-# osm-fieldwork is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# osm-fieldwork is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with osm_fieldwork. If not, see .
-#
-"""Test functionalty of update_form.py."""
-
-from io import BytesIO
-from pathlib import Path
-
-from openpyxl import load_workbook
-
-from osm_fieldwork.update_form import update_xls_form
-
-
-def test_merge_mandatory_fields():
- """Merge the mandatory fields XLSForm to a test survey form."""
- test_form = Path(__file__).parent / "testdata" / "test_form_for_mandatory_fields.xls"
- with open(test_form, "rb") as xlsform:
- form_bytes = BytesIO(xlsform.read())
-
- updated_form = update_xls_form(form_bytes)
- workbook = load_workbook(filename=BytesIO(updated_form.getvalue()))
-
- # Check the 'survey' sheet
- if "survey" not in workbook.sheetnames:
- raise ValueError("The 'survey' sheet was not found in the workbook")
- survey_sheet = workbook["survey"]
-
- # Find the index of the 'name' column
- name_col = None
- for col in survey_sheet.iter_cols(1, survey_sheet.max_column):
- if col[0].value == "name":
- name_col = col
- break
- assert name_col is not None, "The 'name' column was not found."
-
- # Check if certain fields are present in the 'name' column (skip the header)
- existing_field = any(cell.value == "existing" for cell in name_col[1:])
- assert existing_field, "'existing' field not found in the 'name' column."
-
- status_field = any(cell.value == "status" for cell in name_col[1:])
- assert status_field, "'status' field not found in the 'name' column."
-
- digitisation_correct_field = any(cell.value == "digitisation_correct" for cell in name_col[1:])
- assert digitisation_correct_field, "'digitisation_correct' field not found in the 'name' column."
-
- # Check that the 'name' column does not have a duplicate entry for 'username'
- username_count = sum(1 for cell in name_col[1:] if cell.value == "username")
- assert username_count <= 1, "Duplicate 'username' entry found in the 'name' column."
-
- # Check the 'choices' sheet
- if "choices" not in workbook.sheetnames:
- raise ValueError("The 'choices' sheet was not found in the workbook")
- choices_sheet = workbook["choices"]
-
- # Find the index of the 'name' column in the 'choices' sheet
- choices_name_col = None
- for col in choices_sheet.iter_cols(1, choices_sheet.max_column):
- if col[0].value == "name":
- choices_name_col = col
- break
-
- assert choices_name_col is not None, "'name' column was not found in the 'choices' sheet."
-
- # Test: Check that the 'choices' sheet does not have duplicate entries for 'yes' and 'no'
- yes_count = sum(1 for cell in choices_name_col[1:] if cell.value == "yes")
- no_count = sum(1 for cell in choices_name_col[1:] if cell.value == "no")
- assert yes_count <= 1, "Duplicate 'yes' entry found in the 'value' column of 'choices' sheet."
- assert no_count <= 1, "Duplicate 'no' entry found in the 'value' column of 'choices' sheet."
-
- # Check the 'entities' sheet
- if "entities" not in workbook.sheetnames:
- raise ValueError("The 'entities' sheet was not found in the workbook")
- entities_sheet = workbook["entities"]
-
- # Find the index of the 'label' column in the 'entities' sheet
- entities_label_col = None
- for col in entities_sheet.iter_cols(1, entities_sheet.max_column):
- if col[0].value == "label":
- entities_label_col = col
- break
-
- assert entities_label_col is not None, "'label' column was not found in the 'entities' sheet."
-
- # Check that the 'entities' label value of 'test label' is replaced by required value
- test_label_present = any(cell.value == "test label" for cell in entities_label_col[1:])
- assert not test_label_present, "'test label' found in the 'label' column of 'entities' sheet."
-
- # TODO add test to check that digitisation questions come at end of sheet
diff --git a/tests/test_update_xlsform.py b/tests/test_update_xlsform.py
new file mode 100644
index 00000000..fcb17db4
--- /dev/null
+++ b/tests/test_update_xlsform.py
@@ -0,0 +1,148 @@
+# Copyright (c) 2022, 2023 Humanitarian OpenStreetMap Team
+#
+# This file is part of osm_fieldwork.
+#
+# osm-fieldwork is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# osm-fieldwork is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with osm_fieldwork. If not, see .
+#
+"""Test functionalty of update_form.py."""
+
+from io import BytesIO
+from pathlib import Path
+
+from openpyxl import load_workbook
+
+from osm_fieldwork.update_xlsform import append_mandatory_fields
+
+
+def get_column_index(sheet, column_name):
+ """Get the column index for the given column name."""
+ for col_idx, col in enumerate(sheet.iter_cols(1, sheet.max_column), start=1):
+ if col[0].value == column_name:
+ return col_idx
+ raise ValueError(f"Column '{column_name}' not found.")
+
+
+def get_row_index(sheet, column_index, value):
+ """Get the row index where the given column has the specified value."""
+ for row_idx, row in enumerate(sheet.iter_rows(min_col=column_index, max_col=column_index), start=1):
+ if row[0].value == value:
+ return row_idx
+ raise ValueError(f"Value '{value}' not found in column {column_index}.")
+
+
+async def test_merge_mandatory_fields():
+ """Merge the mandatory fields XLSForm to a test survey form."""
+ test_form = Path(__file__).parent / "testdata" / "test_form_for_mandatory_fields.xls"
+ with open(test_form, "rb") as xlsform:
+ form_bytes = BytesIO(xlsform.read())
+
+ updated_form = await append_mandatory_fields(form_bytes, "buildings")
+ workbook = load_workbook(filename=BytesIO(updated_form.getvalue()))
+
+ # Check the 'survey' sheet
+ if "survey" not in workbook.sheetnames:
+ raise ValueError("The 'survey' sheet was not found in the workbook")
+ survey_sheet = workbook["survey"]
+
+ name_col_index = get_column_index(survey_sheet, "name")
+ calculation_col_index = get_column_index(survey_sheet, "calculation")
+
+ form_category_row_index = get_row_index(survey_sheet, name_col_index, "form_category")
+ form_category_calculation = survey_sheet.cell(row=form_category_row_index, column=calculation_col_index).value
+ expected_calculation = "once('building')"
+ assert (
+ form_category_calculation == expected_calculation
+ ), f"Expected 'calculation' value for 'form_category' to be '{expected_calculation}', but got '{form_category_calculation}'."
+
+ # Check the 'choices' sheet
+ if "choices" not in workbook.sheetnames:
+ raise ValueError("The 'choices' sheet was not found in the workbook")
+ choices_sheet = workbook["choices"]
+
+ choices_name_col_index = get_column_index(choices_sheet, "name")
+
+ # Check that the 'choices' sheet does not have duplicate entries for 'yes' and 'no'
+ yes_count = sum(
+ 1
+ for row in choices_sheet.iter_rows(min_col=choices_name_col_index, max_col=choices_name_col_index, min_row=2)
+ if row[0].value == "yes"
+ )
+ no_count = sum(
+ 1
+ for row in choices_sheet.iter_rows(min_col=choices_name_col_index, max_col=choices_name_col_index, min_row=2)
+ if row[0].value == "no"
+ )
+ assert yes_count <= 1, "Duplicate 'yes' entry found in the 'value' column of 'choices' sheet."
+ assert no_count <= 1, "Duplicate 'no' entry found in the 'value' column of 'choices' sheet."
+
+ # Check the 'entities' sheet
+ if "entities" not in workbook.sheetnames:
+ raise ValueError("The 'entities' sheet was not found in the workbook")
+ entities_sheet = workbook["entities"]
+
+ entities_label_col_index = get_column_index(entities_sheet, "label")
+
+ # Check that the 'entities' label value of 'test label' is replaced by required value
+ test_label_present = any(
+ row[0].value == "test label"
+ for row in entities_sheet.iter_rows(min_col=entities_label_col_index, max_col=entities_label_col_index, min_row=2)
+ )
+ assert not test_label_present, "'test label' found in the 'label' column of 'entities' sheet."
+
+ # Check that form_title is set correctly
+ if "settings" not in workbook.sheetnames:
+ raise ValueError("The 'settings' sheet was not found in the workbook")
+ settings_sheet = workbook["settings"]
+
+ form_title_col_index = get_column_index(settings_sheet, "form_title")
+ form_title_value = settings_sheet.cell(row=2, column=form_title_col_index).value
+ assert form_title_value == "buildings", "form_title field is not set to 'buildings'"
+
+ # TODO add test to check that digitisation questions come at end of sheet
+
+
+async def test_add_extra_select_from_file():
+ """Append extra select_one_from_file questions based on Entity list names."""
+ test_form = Path(__file__).parent / "testdata" / "test_form_for_mandatory_fields.xls"
+ with open(test_form, "rb") as xlsform:
+ form_bytes = BytesIO(xlsform.read())
+
+ updated_form = await append_mandatory_fields(form_bytes, "buildings", additional_entities=["roads", "waterpoints"])
+ workbook = load_workbook(filename=BytesIO(updated_form.getvalue()))
+
+ survey_sheet = workbook["survey"]
+ # Assuming 'name' is in column B
+ name_column = [cell.value for cell in survey_sheet["B"]]
+ assert "road" in name_column, "The 'road' field was not added to the survey sheet."
+ assert "waterpoint" in name_column, "The 'waterpoint' field was not added to the survey sheet."
+
+
+async def test_add_task_ids_to_choices():
+ """Test appending each task id as a row in choices sheet."""
+ test_form = Path(__file__).parent / "testdata" / "test_form_for_mandatory_fields.xls"
+ with open(test_form, "rb") as xlsform:
+ form_bytes = BytesIO(xlsform.read())
+
+ task_count = 7
+ updated_form = await append_mandatory_fields(form_bytes, "buildings", task_count=task_count)
+ workbook = load_workbook(filename=BytesIO(updated_form.getvalue()))
+
+ survey_sheet = workbook["choices"]
+ # Assuming 'name' is in column B
+ name_column = [cell.value for cell in survey_sheet["B"]]
+
+ # Assert each task_id is in the name_column
+ task_ids = [1, 2, 3, 4, 5, 6, 7]
+ for task_id in task_ids:
+ assert task_id in name_column, f"Task ID {task_id} not found in the choices sheet."