diff --git a/docs/api/update_xlsform.md b/docs/api/update_xlsform.md
new file mode 100644
index 00000000..1c6be481
--- /dev/null
+++ b/docs/api/update_xlsform.md
@@ -0,0 +1,6 @@
+# update_xlsform.py
+
+::: osm_fieldwork.update_xlsform.append_mandatory_fields
+options:
+show_source: false
+heading_level: 3
diff --git a/mkdocs.yml b/mkdocs.yml
index 87027284..2337e5b0 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -91,6 +91,7 @@ nav:
       - ODKInstance: api/ODKInstance.md
       - ODKForm: api/ODKForm.md
       - filter_data: api/filter_data.md
+      - update_xlsform: api/update_xlsform.md
   - Deep Tech Dives:
       - External Data: about/externaldata.md
       - Data conflation: about/conflation.md
diff --git a/osm_fieldwork/update_form.py b/osm_fieldwork/update_form.py
deleted file mode 100644
index c6a226a0..00000000
--- a/osm_fieldwork/update_form.py
+++ /dev/null
@@ -1,123 +0,0 @@
-from datetime import datetime
-from io import BytesIO
-
-import pandas as pd
-from python_calamine.pandas import pandas_monkeypatch
-
-from osm_fieldwork.xlsforms import xlsforms_path
-
-# Monkeypatch pandas to add calamine driver
-pandas_monkeypatch()
-
-
-def merge_sheets(mandatory_df, custom_df, digitisation_df, is_survey_sheet=False):
-    # Remove rows with None in 'name' column
-    if "name" in mandatory_df.columns:
-        mandatory_df = mandatory_df.dropna(subset=["name"])
-    if "name" in custom_df.columns:
-        custom_df = custom_df.dropna(subset=["name"])
-    if "name" in digitisation_df.columns:
-        digitisation_df = digitisation_df.dropna(subset=["name"])
-
-    # Identify common fields between custom_df and mandatory_df or digitisation_df
-    common_fields = (
-        set(custom_df["name"])
-        .intersection(set(mandatory_df["name"]))
-        .union(set(custom_df["name"]).intersection(set(digitisation_df["name"])))
-    )
-
-    # Keep common fields from custom_df in their original order
-    custom_common_df = custom_df[custom_df["name"].isin(common_fields)]
-    custom_non_common_df = custom_df[~custom_df["name"].isin(common_fields)]
-
-    # Filter out the common fields from the mandatory_df and digitisation_df
-    mandatory_df_filtered = mandatory_df[~mandatory_df["name"].isin(common_fields)]
-    digitisation_df_filtered = digitisation_df[~digitisation_df["name"].isin(common_fields)]
-
-    if not is_survey_sheet:
-        return pd.concat(
-            [
-                custom_common_df,
-                mandatory_df_filtered,
-                custom_non_common_df,
-                digitisation_df_filtered,
-            ],
-            ignore_index=True,
-        )
-    survey_group_row = pd.DataFrame(
-        {
-            "type": ["begin group"],
-            "name": ["survey_questions"],
-            "label": ["Survey Form"],
-            "relevant": [
-                "(${new_feature} = 'yes') or (${building_exists} = 'yes')"
-            ],  # Add the relevant condition to display this group only if "Yes" is selected
-        }
-    )
-    survey_end_group_row = pd.DataFrame({"type": ["end group"], "name": ["end_survey_questions"], "label": ["End Survey Form"]})
-    digitisation_group = pd.DataFrame(
-        {
-            "type": ["begin group"],
-            "name": ["verification"],
-            "label": ["Verification Form"],
-            "relevant": ["(${new_feature} = 'yes') or (${building_exists} = 'yes')"],
-        }
-    )
-    digitisation_end_group = pd.DataFrame({"type": ["end group"], "name": ["end_verification"], "label": ["End Verification Form"]})
-
-    # Concatenate: mandatory fields at the top, custom common fields, remaining custom fields, and finally append form fields
-    return pd.concat(
-        [
-            custom_common_df,
-            mandatory_df_filtered,
-            survey_group_row,
-            custom_non_common_df,
-            survey_end_group_row,
-            digitisation_group,
-            digitisation_df_filtered,
-            digitisation_end_group,
-        ],
-        ignore_index=True,
-    )
-
-
-def update_xls_form(custom_form: BytesIO) -> BytesIO:
-    custom_sheets = pd.read_excel(custom_form, sheet_name=None, engine="calamine")
-    default_form_path = f"{xlsforms_path}/fmtm/mandatory_fields.xls"
-    digitisation_form_path = f"{xlsforms_path}/fmtm/digitisation_fields.xls"
-    digitisation_sheets = pd.read_excel(digitisation_form_path, sheet_name=None, engine="calamine")
-    mandatory_sheets = pd.read_excel(default_form_path, sheet_name=None, engine="calamine")
-
-    # Process and merge the 'survey' sheet if present in all forms
-    if "survey" in mandatory_sheets and "survey" in digitisation_sheets and "survey" in custom_sheets:
-        custom_sheets["survey"] = merge_sheets(
-            mandatory_sheets["survey"], custom_sheets["survey"], digitisation_sheets["survey"], True
-        )
-
-    # Process and merge the 'choices' sheet if present in all forms
-    if "choices" in mandatory_sheets and "choices" in digitisation_sheets and "choices" in custom_sheets:
-        custom_sheets["choices"] = merge_sheets(
-            mandatory_sheets["choices"], custom_sheets["choices"], digitisation_sheets["choices"]
-        )
-
-    # Append or overwrite the existing entities sheet
-    if "entities" in mandatory_sheets:
-        custom_sheets["entities"] = mandatory_sheets["entities"]
-
-    if "settings" in mandatory_sheets:
-        custom_sheets["settings"] = mandatory_sheets["settings"]
-        current_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-
-        # Set the 'version' column to the current timestamp (if 'version' column exists in 'settings')
-        if "version" in custom_sheets["settings"].columns:
-            # set column type to str
-            custom_sheets["settings"]["version"] = custom_sheets["settings"]["version"].astype(str)
-            custom_sheets["settings"].loc[:, "version"] = current_timestamp
-
-    output = BytesIO()
-    with pd.ExcelWriter(output, engine="openpyxl") as writer:
-        for sheet_name, df in custom_sheets.items():
-            df.to_excel(writer, sheet_name=sheet_name, index=False)
-
-    output.seek(0)
-    return output
diff --git a/osm_fieldwork/update_xlsform.py b/osm_fieldwork/update_xlsform.py
new file mode 100644
index 00000000..31ff22df
--- /dev/null
+++ b/osm_fieldwork/update_xlsform.py
@@ -0,0 +1,213 @@
+"""Update an existing XLSForm with additional fields useful for field mapping."""
+
+from datetime import datetime
+from io import BytesIO
+from uuid import uuid4
+
+import pandas as pd
+from python_calamine.pandas import pandas_monkeypatch
+
+from osm_fieldwork.xlsforms import xlsforms_path
+
+# Monkeypatch pandas to add calamine driver
+pandas_monkeypatch()
+
+# Constants
+FEATURE_COLUMN = "feature"
+NAME_COLUMN = "name"
+SURVEY_GROUP_NAME = "survey_questions"
+DIGITISATION_GROUP_NAME = "verification"
+
+
+def filter_df_empty_rows(df, column=NAME_COLUMN):
+    """Remove rows with None values in the specified column, but retain group rows if they exist."""
+    if column in df.columns:
+        # Only retain 'begin group' and 'end group' if 'type' column exists
+        if "type" in df.columns:
+            return df[(df[column].notna()) | (df["type"].isin(["begin group", "end group", "begin_group", "end_group"]))]
+        else:
+            return df[df[column].notna()]
+    return df
+
+
+def merge_dataframes(mandatory_df, custom_df, digitisation_df, is_survey_sheet=False):
+    """Merge multiple Pandas dataframes together, keeping common fields in custom_df."""
+    # Filter dataframes (remove rows with None in 'name' column)
+    mandatory_df = filter_df_empty_rows(mandatory_df)
+    custom_df = filter_df_empty_rows(custom_df)
+    digitisation_df = filter_df_empty_rows(digitisation_df)
+
+    # Find common fields between custom_df and mandatory_df or digitisation_df
+    common_fields = set(custom_df[NAME_COLUMN]).intersection(
+        set(mandatory_df[NAME_COLUMN]).union(set(digitisation_df[NAME_COLUMN]))
+    )
+
+    # Split custom DataFrame into common and non-common fields
+    custom_common_df = custom_df[custom_df[NAME_COLUMN].isin(common_fields)]
+    custom_non_common_df = custom_df[~custom_df[NAME_COLUMN].isin(common_fields)]
+
+    # Filter out common fields from mandatory and digitisation DataFrames
+    mandatory_df_filtered = mandatory_df[~mandatory_df[NAME_COLUMN].isin(common_fields)]
+    digitisation_df_filtered = digitisation_df[~digitisation_df[NAME_COLUMN].isin(common_fields)]
+
+    if not is_survey_sheet:
+        return pd.concat(
+            [custom_common_df, mandatory_df_filtered, custom_non_common_df, digitisation_df_filtered],
+            ignore_index=True,
+        )
+
+    # Create groups for survey and digitisation
+    survey_group = create_group(SURVEY_GROUP_NAME)
+    digitisation_group = create_group(DIGITISATION_GROUP_NAME)
+    digitisation_group["start"]["relevant"] = ["(${new_feature} = 'yes') or (${building_exists} = 'yes')"]
+
+    # Concatenate dataframes in the desired order
+    return pd.concat(
+        [
+            custom_common_df,
+            mandatory_df_filtered,
+            survey_group["start"],
+            custom_non_common_df,
+            survey_group["end"],
+            digitisation_group["start"],
+            digitisation_df_filtered,
+            digitisation_group["end"],
+        ],
+        ignore_index=True,
+    )
+
+
+def create_group(name: str) -> dict[str, pd.DataFrame]:
+    """Helper function to create a start and end group for XLSForm."""
+    start_group = pd.DataFrame({"type": ["begin group"], "name": [name]})
+    end_group = pd.DataFrame({"type": ["end group"], "name": [f"end of {name}"]})
+    return {"start": start_group, "end": end_group}
+
+
+def append_select_one_from_file_row(df: pd.DataFrame, entity_name: str) -> pd.DataFrame:
+    """Add a new select_one_from_file question to reference an Entity."""
+    # Find the row index where name column = 'feature'
+    select_one_from_file_index = df.index[df[NAME_COLUMN] == FEATURE_COLUMN].tolist()
+
+    if not select_one_from_file_index:
+        raise ValueError(f"Row with '{NAME_COLUMN}' == '{FEATURE_COLUMN}' not found in survey sheet.")
+
+    # Find the row index after 'feature' row
+    row_index_to_split_on = select_one_from_file_index[0] + 1
+    # Strip the 's' from the end for singular form
+    if entity_name.endswith("s"):
+        # Plural to singular
+        entity_name = entity_name[:-1]
+
+    additional_row = pd.DataFrame(
+        {
+            "type": [f"select_one_from_file {entity_name}.csv"],
+            "name": [entity_name],
+            "label::English(en)": [entity_name],
+            "appearance": ["map"],
+            "choice_filter": ["selected(${task_filter}, '') or task_id=${task_filter}"],
+            "trigger": ["${task_filter}"],
+            "label::Swahili(sw)": [entity_name],
+            "label::French(fr)": [entity_name],
+            "label::Spanish(es)": [entity_name],
+        }
+    )
+
+    # Insert the new row into the DataFrame
+    top_df = df.iloc[:row_index_to_split_on]
+    bottom_df = df.iloc[row_index_to_split_on:]
+    return pd.concat([top_df, additional_row, bottom_df], ignore_index=True)
+
+
+def append_task_ids_to_choices_sheet(df: pd.DataFrame, task_count: int) -> pd.DataFrame:
+    """Add task id rows to choices sheet (for filtering Entity list)."""
+    task_ids = list(range(1, task_count + 1))
+
+    additional_rows = pd.DataFrame(
+        {
+            "list_name": ["task_filter"] * task_count,
+            "name": task_ids,
+            "label::English(en)": task_ids,
+            "label::Swahili(sw)": task_ids,
+            "label::French(fr)": task_ids,
+            "label::Spanish(es)": task_ids,
+        }
+    )
+
+    df = pd.concat([df, additional_rows], ignore_index=True)
+    return df
+
+
+async def append_mandatory_fields(
+    custom_form: BytesIO,
+    form_category: str,
+    additional_entities: list[str] = None,
+    task_count: int = None,
+    existing_id: str = None,
+) -> BytesIO:
+    """Append mandatory fields to the XLSForm for use in FMTM.
+
+    Args:
+        custom_form(BytesIO): the XLSForm data uploaded, wrapped in BytesIO.
+        form_category(str): the form category name (in form_title and description).
+        additional_entities(list[str]): add extra select_one_from_file fields to
+            reference an additional Entity list (set of geometries).
+            The values should be plural, so that 's' will be stripped in the
+            field name.
+        task_count(int): number of tasks, used to generate task_id entries in choices
+            sheet. These are used to filter Entities by task id in ODK Collect.
+        existing_id(str): an existing UUID to use for the form_id, else random uuid4.
+
+    Returns:
+        BytesIO: the update XLSForm, wrapped in BytesIO.
+    """
+    custom_sheets = pd.read_excel(custom_form, sheet_name=None, engine="calamine")
+    mandatory_sheets = pd.read_excel(f"{xlsforms_path}/fmtm/mandatory_fields.xls", sheet_name=None, engine="calamine")
+    digitisation_sheets = pd.read_excel(f"{xlsforms_path}/fmtm/digitisation_fields.xls", sheet_name=None, engine="calamine")
+
+    # Merge 'survey' and 'choices' sheets
+    if "survey" in custom_sheets:
+        custom_sheets["survey"] = merge_dataframes(
+            mandatory_sheets.get("survey"), custom_sheets.get("survey"), digitisation_sheets.get("survey"), True
+        )
+        # Hardcode the form_category value for the start instructions
+        if form_category.endswith("s"):
+            # Plural to singular
+            form_category_singular = form_category[:-1]
+        form_category_row = custom_sheets["survey"].loc[custom_sheets["survey"]["name"] == "form_category"]
+        if not form_category_row.empty:
+            custom_sheets["survey"].loc[custom_sheets["survey"]["name"] == "form_category", "calculation"] = (
+                f"once('{form_category_singular}')"
+            )
+
+    if "choices" in custom_sheets:
+        custom_sheets["choices"] = merge_dataframes(
+            mandatory_sheets.get("choices"), custom_sheets.get("choices"), digitisation_sheets.get("choices")
+        )
+
+    # Append or overwrite 'entities' and 'settings' sheets
+    custom_sheets.update({key: mandatory_sheets[key] for key in ["entities", "settings"] if key in mandatory_sheets})
+
+    # Set the 'version' column to the current timestamp (if 'version' column exists in 'settings')
+    if "settings" in custom_sheets:
+        custom_sheets["settings"]["version"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        custom_sheets["settings"]["form_id"] = existing_id if existing_id else uuid4()
+        custom_sheets["settings"]["form_title"] = form_category
+
+    # Append select_one_from_file for additional entities
+    if additional_entities:
+        for entity_name in additional_entities:
+            custom_sheets["survey"] = append_select_one_from_file_row(custom_sheets["survey"], entity_name)
+
+    # Append task id rows to choices sheet
+    if task_count:
+        custom_sheets["choices"] = append_task_ids_to_choices_sheet(custom_sheets["choices"], task_count)
+
+    # Return spreadsheet wrapped as BytesIO memory object
+    output = BytesIO()
+    with pd.ExcelWriter(output, engine="openpyxl") as writer:
+        for sheet_name, df in custom_sheets.items():
+            df.to_excel(writer, sheet_name=sheet_name, index=False)
+
+    output.seek(0)
+    return output
diff --git a/osm_fieldwork/xlsforms/buildings.xls b/osm_fieldwork/xlsforms/buildings.xls
index 6551bc31..cfc6e46d 100644
Binary files a/osm_fieldwork/xlsforms/buildings.xls and b/osm_fieldwork/xlsforms/buildings.xls differ
diff --git a/osm_fieldwork/xlsforms/fmtm/mandatory_fields.xls b/osm_fieldwork/xlsforms/fmtm/mandatory_fields.xls
index d297c2a6..699ad317 100644
Binary files a/osm_fieldwork/xlsforms/fmtm/mandatory_fields.xls and b/osm_fieldwork/xlsforms/fmtm/mandatory_fields.xls differ
diff --git a/osm_fieldwork/xlsforms/health.xls b/osm_fieldwork/xlsforms/health.xls
index 8479710c..5357d9c2 100644
Binary files a/osm_fieldwork/xlsforms/health.xls and b/osm_fieldwork/xlsforms/health.xls differ
diff --git a/tests/test_update_form.py b/tests/test_update_form.py
deleted file mode 100644
index 73542081..00000000
--- a/tests/test_update_form.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (c) 2022, 2023 Humanitarian OpenStreetMap Team
-#
-# This file is part of osm_fieldwork.
-#
-#     osm-fieldwork is free software: you can redistribute it and/or modify
-#     it under the terms of the GNU General Public License as published by
-#     the Free Software Foundation, either version 3 of the License, or
-#     (at your option) any later version.
-#
-#     osm-fieldwork is distributed in the hope that it will be useful,
-#     but WITHOUT ANY WARRANTY; without even the implied warranty of
-#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#     GNU General Public License for more details.
-#
-#     You should have received a copy of the GNU General Public License
-#     along with osm_fieldwork.  If not, see <https:#www.gnu.org/licenses/>.
-#
-"""Test functionalty of update_form.py."""
-
-from io import BytesIO
-from pathlib import Path
-
-from openpyxl import load_workbook
-
-from osm_fieldwork.update_form import update_xls_form
-
-
-def test_merge_mandatory_fields():
-    """Merge the mandatory fields XLSForm to a test survey form."""
-    test_form = Path(__file__).parent / "testdata" / "test_form_for_mandatory_fields.xls"
-    with open(test_form, "rb") as xlsform:
-        form_bytes = BytesIO(xlsform.read())
-
-    updated_form = update_xls_form(form_bytes)
-    workbook = load_workbook(filename=BytesIO(updated_form.getvalue()))
-
-    # Check the 'survey' sheet
-    if "survey" not in workbook.sheetnames:
-        raise ValueError("The 'survey' sheet was not found in the workbook")
-    survey_sheet = workbook["survey"]
-
-    # Find the index of the 'name' column
-    name_col = None
-    for col in survey_sheet.iter_cols(1, survey_sheet.max_column):
-        if col[0].value == "name":
-            name_col = col
-            break
-    assert name_col is not None, "The 'name' column was not found."
-
-    # Check if certain fields are present in the 'name' column (skip the header)
-    existing_field = any(cell.value == "existing" for cell in name_col[1:])
-    assert existing_field, "'existing' field not found in the 'name' column."
-
-    status_field = any(cell.value == "status" for cell in name_col[1:])
-    assert status_field, "'status' field not found in the 'name' column."
-
-    digitisation_correct_field = any(cell.value == "digitisation_correct" for cell in name_col[1:])
-    assert digitisation_correct_field, "'digitisation_correct' field not found in the 'name' column."
-
-    # Check that the 'name' column does not have a duplicate entry for 'username'
-    username_count = sum(1 for cell in name_col[1:] if cell.value == "username")
-    assert username_count <= 1, "Duplicate 'username' entry found in the 'name' column."
-
-    # Check the 'choices' sheet
-    if "choices" not in workbook.sheetnames:
-        raise ValueError("The 'choices' sheet was not found in the workbook")
-    choices_sheet = workbook["choices"]
-
-    # Find the index of the 'name' column in the 'choices' sheet
-    choices_name_col = None
-    for col in choices_sheet.iter_cols(1, choices_sheet.max_column):
-        if col[0].value == "name":
-            choices_name_col = col
-            break
-
-    assert choices_name_col is not None, "'name' column was not found in the 'choices' sheet."
-
-    # Test: Check that the 'choices' sheet does not have duplicate entries for 'yes' and 'no'
-    yes_count = sum(1 for cell in choices_name_col[1:] if cell.value == "yes")
-    no_count = sum(1 for cell in choices_name_col[1:] if cell.value == "no")
-    assert yes_count <= 1, "Duplicate 'yes' entry found in the 'value' column of 'choices' sheet."
-    assert no_count <= 1, "Duplicate 'no' entry found in the 'value' column of 'choices' sheet."
-
-    # Check the 'entities' sheet
-    if "entities" not in workbook.sheetnames:
-        raise ValueError("The 'entities' sheet was not found in the workbook")
-    entities_sheet = workbook["entities"]
-
-    # Find the index of the 'label' column in the 'entities' sheet
-    entities_label_col = None
-    for col in entities_sheet.iter_cols(1, entities_sheet.max_column):
-        if col[0].value == "label":
-            entities_label_col = col
-            break
-
-    assert entities_label_col is not None, "'label' column was not found in the 'entities' sheet."
-
-    # Check that the 'entities' label value of 'test label' is replaced by required value
-    test_label_present = any(cell.value == "test label" for cell in entities_label_col[1:])
-    assert not test_label_present, "'test label' found in the 'label' column of 'entities' sheet."
-
-    # TODO add test to check that digitisation questions come at end of sheet
diff --git a/tests/test_update_xlsform.py b/tests/test_update_xlsform.py
new file mode 100644
index 00000000..fcb17db4
--- /dev/null
+++ b/tests/test_update_xlsform.py
@@ -0,0 +1,148 @@
+# Copyright (c) 2022, 2023 Humanitarian OpenStreetMap Team
+#
+# This file is part of osm_fieldwork.
+#
+#     osm-fieldwork is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+#
+#     osm-fieldwork is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU General Public License for more details.
+#
+#     You should have received a copy of the GNU General Public License
+#     along with osm_fieldwork.  If not, see <https:#www.gnu.org/licenses/>.
+#
+"""Test functionalty of update_form.py."""
+
+from io import BytesIO
+from pathlib import Path
+
+from openpyxl import load_workbook
+
+from osm_fieldwork.update_xlsform import append_mandatory_fields
+
+
+def get_column_index(sheet, column_name):
+    """Get the column index for the given column name."""
+    for col_idx, col in enumerate(sheet.iter_cols(1, sheet.max_column), start=1):
+        if col[0].value == column_name:
+            return col_idx
+    raise ValueError(f"Column '{column_name}' not found.")
+
+
+def get_row_index(sheet, column_index, value):
+    """Get the row index where the given column has the specified value."""
+    for row_idx, row in enumerate(sheet.iter_rows(min_col=column_index, max_col=column_index), start=1):
+        if row[0].value == value:
+            return row_idx
+    raise ValueError(f"Value '{value}' not found in column {column_index}.")
+
+
+async def test_merge_mandatory_fields():
+    """Merge the mandatory fields XLSForm to a test survey form."""
+    test_form = Path(__file__).parent / "testdata" / "test_form_for_mandatory_fields.xls"
+    with open(test_form, "rb") as xlsform:
+        form_bytes = BytesIO(xlsform.read())
+
+    updated_form = await append_mandatory_fields(form_bytes, "buildings")
+    workbook = load_workbook(filename=BytesIO(updated_form.getvalue()))
+
+    # Check the 'survey' sheet
+    if "survey" not in workbook.sheetnames:
+        raise ValueError("The 'survey' sheet was not found in the workbook")
+    survey_sheet = workbook["survey"]
+
+    name_col_index = get_column_index(survey_sheet, "name")
+    calculation_col_index = get_column_index(survey_sheet, "calculation")
+
+    form_category_row_index = get_row_index(survey_sheet, name_col_index, "form_category")
+    form_category_calculation = survey_sheet.cell(row=form_category_row_index, column=calculation_col_index).value
+    expected_calculation = "once('building')"
+    assert (
+        form_category_calculation == expected_calculation
+    ), f"Expected 'calculation' value for 'form_category' to be '{expected_calculation}', but got '{form_category_calculation}'."
+
+    # Check the 'choices' sheet
+    if "choices" not in workbook.sheetnames:
+        raise ValueError("The 'choices' sheet was not found in the workbook")
+    choices_sheet = workbook["choices"]
+
+    choices_name_col_index = get_column_index(choices_sheet, "name")
+
+    # Check that the 'choices' sheet does not have duplicate entries for 'yes' and 'no'
+    yes_count = sum(
+        1
+        for row in choices_sheet.iter_rows(min_col=choices_name_col_index, max_col=choices_name_col_index, min_row=2)
+        if row[0].value == "yes"
+    )
+    no_count = sum(
+        1
+        for row in choices_sheet.iter_rows(min_col=choices_name_col_index, max_col=choices_name_col_index, min_row=2)
+        if row[0].value == "no"
+    )
+    assert yes_count <= 1, "Duplicate 'yes' entry found in the 'value' column of 'choices' sheet."
+    assert no_count <= 1, "Duplicate 'no' entry found in the 'value' column of 'choices' sheet."
+
+    # Check the 'entities' sheet
+    if "entities" not in workbook.sheetnames:
+        raise ValueError("The 'entities' sheet was not found in the workbook")
+    entities_sheet = workbook["entities"]
+
+    entities_label_col_index = get_column_index(entities_sheet, "label")
+
+    # Check that the 'entities' label value of 'test label' is replaced by required value
+    test_label_present = any(
+        row[0].value == "test label"
+        for row in entities_sheet.iter_rows(min_col=entities_label_col_index, max_col=entities_label_col_index, min_row=2)
+    )
+    assert not test_label_present, "'test label' found in the 'label' column of 'entities' sheet."
+
+    # Check that form_title is set correctly
+    if "settings" not in workbook.sheetnames:
+        raise ValueError("The 'settings' sheet was not found in the workbook")
+    settings_sheet = workbook["settings"]
+
+    form_title_col_index = get_column_index(settings_sheet, "form_title")
+    form_title_value = settings_sheet.cell(row=2, column=form_title_col_index).value
+    assert form_title_value == "buildings", "form_title field is not set to 'buildings'"
+
+    # TODO add test to check that digitisation questions come at end of sheet
+
+
+async def test_add_extra_select_from_file():
+    """Append extra select_one_from_file questions based on Entity list names."""
+    test_form = Path(__file__).parent / "testdata" / "test_form_for_mandatory_fields.xls"
+    with open(test_form, "rb") as xlsform:
+        form_bytes = BytesIO(xlsform.read())
+
+    updated_form = await append_mandatory_fields(form_bytes, "buildings", additional_entities=["roads", "waterpoints"])
+    workbook = load_workbook(filename=BytesIO(updated_form.getvalue()))
+
+    survey_sheet = workbook["survey"]
+    # Assuming 'name' is in column B
+    name_column = [cell.value for cell in survey_sheet["B"]]
+    assert "road" in name_column, "The 'road' field was not added to the survey sheet."
+    assert "waterpoint" in name_column, "The 'waterpoint' field was not added to the survey sheet."
+
+
+async def test_add_task_ids_to_choices():
+    """Test appending each task id as a row in choices sheet."""
+    test_form = Path(__file__).parent / "testdata" / "test_form_for_mandatory_fields.xls"
+    with open(test_form, "rb") as xlsform:
+        form_bytes = BytesIO(xlsform.read())
+
+    task_count = 7
+    updated_form = await append_mandatory_fields(form_bytes, "buildings", task_count=task_count)
+    workbook = load_workbook(filename=BytesIO(updated_form.getvalue()))
+
+    survey_sheet = workbook["choices"]
+    # Assuming 'name' is in column B
+    name_column = [cell.value for cell in survey_sheet["B"]]
+
+    # Assert each task_id is in the name_column
+    task_ids = [1, 2, 3, 4, 5, 6, 7]
+    for task_id in task_ids:
+        assert task_id in name_column, f"Task ID {task_id} not found in the choices sheet."