diff --git a/owid/datautils/dataframes.py b/owid/datautils/dataframes.py
index f5f6971..aa2ab50 100644
--- a/owid/datautils/dataframes.py
+++ b/owid/datautils/dataframes.py
@@ -1,10 +1,15 @@
 """Objects related to pandas dataframes."""
 
-from typing import Tuple, Union, List, Any, Dict, Optional, cast, Callable
+from typing import Generator, Tuple, Union, List, Any, Dict, Optional, cast, Callable
 
 import numpy as np
 import pandas as pd
 from pandas.api.types import union_categoricals
+from owid.datautils.utils import (
+    yield_formatted_if_not_empty,
+    get_compact_list_description,
+    yield_list_lines,
+)
 
 from owid.datautils.common import ExceptionFromDocstring, warn_on_list_of_entities
 
@@ -218,6 +223,444 @@ def are_equal(
     return equal, compared
 
 
+class HighLevelDiff:
+    """Class for comparing two dataframes.
+
+    It assumes that all nans are identical, and compares floats by means of certain absolute and relative tolerances.
+    Construct this class by passing two dataframes of possibly different shape. Then check the are_structurally_equal
+    property to see if the column and row sets of the two dataframes match and/or check the are_equal flag to also
+    check for equality of values. The other fields give detailed information on what is different between the two
+    dataframes.
+
+    For cases where there is a difference, various member fields on this class give indications of what is different
+    (e.g. columns missing in dataframe 1 or 2, index values missing in dataframe 1 or 2, etc.).
+
+    The get_description_lines method fetches a list of strings that compactly describe the differences for humans.
+
+    Parameters
+    ----------
+    df1 : pd.DataFrame
+        First dataframe.
+    df2 : pd.DataFrame
+        Second dataframe.
+    absolute_tolerance : float
+        Absolute tolerance to assume in the comparison of each cell in the dataframes. A value a of an element in df1 is
+        considered equal to the corresponding element b at the same position in df2, if:
+        abs(a - b) <= absolute_tolerance
+    relative_tolerance : float
+        Relative tolerance to assume in the comparison of each cell in the dataframes. A value a of an element in df1 is
+        considered equal to the corresponding element b at the same position in df2, if:
+        abs(a - b) / abs(b) <= relative_tolerance
+
+    """
+
+    df1: pd.DataFrame
+    df2: pd.DataFrame
+    columns_missing_in_df1: List[str]
+    columns_missing_in_df2: List[str]
+    columns_shared: List[str]
+    index_columns_missing_in_df1: List[str]
+    index_columns_missing_in_df2: List[str]
+    index_columns_shared: List[str]
+    index_values_missing_in_df1: pd.Index
+    index_values_missing_in_df2: pd.Index
+    index_values_shared: pd.Index
+    duplicate_index_values_in_df1: pd.Series
+    duplicate_index_values_in_df2: pd.Series
+    value_differences: Optional[pd.DataFrame] = None
+
+    def __init__(
+        self,
+        df1: pd.DataFrame,
+        df2: pd.DataFrame,
+        absolute_tolerance: float = 1e-08,
+        relative_tolerance: float = 1e-05,
+    ):
+        self.df1 = df1
+        self.df2 = df2
+        self.absolute_tolerance = absolute_tolerance
+        self.relative_tolerance = relative_tolerance
+        self._diff()
+
+    @property
+    def value_differences_count(self) -> int:
+        """Get number of cells in the structural overlap of the two dataframes that differ by more than tolerance."""
+        if self.value_differences is None:
+            return 0
+        else:
+            return int(self.value_differences.sum().sum())
+
+    @property
+    def columns_with_differences(self) -> Any:
+        """Get the columns that are different in the two dataframes.
+
+        This will be an array of index values. If the index is a MultiIndex, the index values will be tuples.
+        """
+        if self.value_differences is None:
+            return np.array([])
+        return self.value_differences.columns.values
+
+    @property
+    def rows_with_differences(self) -> Any:
+        """Return the row indices that are different in the two dataframes.
+
+        This will be an array of index values. If the index is a MultiIndex, the index values will be tuples.
+        """
+        if self.value_differences is None:
+            return np.array([])
+        return self.value_differences.index.values
+
+    def _diff(self) -> None:
+        """Diff the two dataframes.
+
+        This can be a somewhat slow operation
+        """
+        df1_columns_set = set(self.df1.columns)
+        df2_columns_set = set(self.df2.columns)
+        self.columns_missing_in_df1 = sorted(df2_columns_set - df1_columns_set)
+        self.columns_missing_in_df2 = sorted(df1_columns_set - df2_columns_set)
+        self.columns_shared = sorted(df1_columns_set.intersection(df2_columns_set))
+
+        df1_index_names = set(self.df1.index.names)
+        df2_index_names = set(self.df2.index.names)
+        self.index_columns_missing_in_df1 = sorted(df2_index_names - df1_index_names)
+        self.index_columns_missing_in_df2 = sorted(df1_index_names - df2_index_names)
+        self.index_columns_shared = sorted(
+            df1_index_names.intersection(df2_index_names)
+        )
+
+        self.index_values_missing_in_df1 = self.df2.index.difference(self.df1.index)
+        self.index_values_missing_in_df2 = self.df1.index.difference(self.df2.index)
+        self.index_values_shared = self.df2.index.intersection(self.df1.index)
+        self.duplicate_index_values_in_df1 = self.df1[
+            self.df1.index.duplicated()
+        ].index.values
+        self.duplicate_index_values_in_df2 = self.df2[
+            self.df2.index.duplicated()
+        ].index.values
+
+        # Now we calculate the value differences in the intersection of the two dataframes.
+        if self.columns_shared and any(self.index_values_shared):
+            df1_intersected = self.df1.loc[
+                self.index_values_shared, list(self.columns_shared)
+            ]
+            df2_intersected = self.df2.loc[
+                self.index_values_shared, list(self.columns_shared)
+            ]
+            # We don't use the compare function here from above because it builds a new
+            # dataframe and we want to leave indices intact so we can know which rows and columns
+            # were different once we drop the ones with no differences
+            diffs = df1_intersected.eq(df2_intersected)
+
+            # Eq above does not take tolerance into account so compare again with tolerance
+            # for columns that are numeric. this could probably be sped up with a check on any on
+            # the column first but would have to be benchmarked
+            for col in diffs.columns:
+                if (df1_intersected[col].dtype in (object, "category")) or (
+                    df2_intersected[col].dtype in (object, "category")
+                ):
+                    # Apply a direct comparison for strings or categories
+                    pass
+                else:
+                    # For numeric data, consider them equal within certain absolute and relative tolerances.
+                    compared_values = np.isclose(
+                        df1_intersected[col].values,
+                        df2_intersected[col].values,
+                        atol=self.absolute_tolerance,
+                        rtol=self.relative_tolerance,
+                    )
+                    # Treat nans as equal.
+                    compared_values[
+                        pd.isnull(df1_intersected[col].values)
+                        & pd.isnull(df2_intersected[col].values)
+                    ] = True
+                    diffs[col] = compared_values
+
+            # We now have a dataframe with the same shape and indices as df1 and df2, filled with
+            # True where the values are the same. We want to use true for different values, so invert
+            # element-wise now
+            diffs = ~diffs
+
+            if diffs.empty:
+                self.value_differences = None
+            else:
+                # Get a copy of diffs with all rows dropped where all values in a row are False
+                # (i.e. where df1 and df2 have identical values for all columns)
+                rows_with_diffs = diffs[diffs.any(axis=1)]
+                if rows_with_diffs.empty or not rows_with_diffs.any().any():
+                    self.value_differences = None
+                else:
+                    # Now figure out all columns where there is at least one difference
+                    columns_with_diffs = diffs.any(axis=0)
+                    if not columns_with_diffs.any():
+                        self.value_differences = None
+                    else:
+                        # Here we drop the columns that did not have differences. We are left with a dataframe
+                        # with the original indices and only the rows and columns with differences.
+                        self.value_differences = rows_with_diffs.loc[
+                            :, columns_with_diffs
+                        ]
+
+    @property
+    def are_structurally_equal(self) -> bool:
+        """Check if the two dataframes are structurally equal (i.e. same columns, same index values, ...)."""
+        return not (
+            any(self.columns_missing_in_df1)
+            or any(self.columns_missing_in_df2)
+            or any(self.index_columns_missing_in_df1)
+            or any(self.index_columns_missing_in_df2)
+            or any(self.index_values_missing_in_df1)
+            or any(self.index_values_missing_in_df2)
+            or any(self.duplicate_index_values_in_df1)
+            or any(self.duplicate_index_values_in_df2)
+        )
+
+    @property
+    def are_equal(self) -> bool:
+        """Check if the two dataframes are equal, both structurally and cell-wise."""
+        return self.are_structurally_equal and self.are_overlapping_values_equal
+
+    @property
+    def are_overlapping_values_equal(self) -> bool:
+        """Check if the values within the overlapping columns and rows of the two dataframes are equal."""
+        return self.value_differences is None
+
+    @property
+    def df1_value_differences(self) -> Optional[pd.DataFrame]:
+        """Get a sliced version of df1 that contains only the columns and rows that differ from df2.
+
+        Note that this only includes the part of the dataframe that has structural overlap with
+        the other dataframe (i.e. extra columns or rows are not included).
+        """
+        if self.value_differences is None:
+            return None
+        return cast(
+            pd.DataFrame,
+            self.df1.loc[self.value_differences.index, self.value_differences.columns],
+        )
+
+    @property
+    def df2_value_differences(self) -> Optional[pd.DataFrame]:
+        """Get a sliced version of df2 that contains only the columns and rows that differ from df2.
+
+        Note that this only includes the part of the dataframe that has structural overlap with
+        the other dataframe (i.e. extra columns or rows are not included).
+        """
+        if self.value_differences is None:
+            return None
+        return cast(
+            pd.DataFrame,
+            self.df2.loc[self.value_differences.index, self.value_differences.columns],
+        )
+
+    def get_description_lines_for_diff(
+        self,
+        df1_label: str,
+        df2_label: str,
+        use_color_tags: bool = False,
+        preview_different_dataframe_values: bool = False,
+        show_shared: bool = False,
+        truncate_lists_longer_than: int = 20,
+    ) -> Generator[str, None, None]:
+        """Generate a human readable description of the differences between the two dataframes.
+
+        It is returned as a generator of strings, roughly one line per string yielded
+        (dataframe printing is done by pandas as one string and is returned as a single yielded item)
+        """
+        red, red_end = ("[red]", "[/red]") if use_color_tags else ("", "")
+        green, green_end = ("[green]", "[/green]") if use_color_tags else ("", "")
+        blue, blue_end = ("[blue]", "[/blue]") if use_color_tags else ("", "")
+
+        if self.are_equal:
+            yield (f"{green}{df1_label} is equal to {df2_label}{green_end}")
+        else:
+            yield (f"{red}{df1_label} is not equal to {df2_label}{red_end}")
+
+            if self.are_structurally_equal:
+                yield (f"The structure is {green}identical{green_end}")
+            else:
+                yield (f"The structure is {red}different{red_end}")
+
+                # The structure below works like this: we have a property that is a list
+                # (e.g. self.columns_missing_in_df1) that can be empty or have elements.
+                # If the list is empty we don't want to yield any lines. If the list has elements
+                # we want to yield a line. Additionally, we also want to truncate lines with many
+                # elements if they are too long. We use yield_formatted_if_not_empty on most of the
+                # member properties to output the differences if there are any.
+
+                # Structural differences
+                if show_shared:
+                    yield from yield_formatted_if_not_empty(
+                        self.columns_shared,
+                        lambda item: yield_list_lines(
+                            f"{blue}Shared columns{blue_end}",
+                            get_compact_list_description(
+                                item, max_items=truncate_lists_longer_than
+                            ),
+                        ),
+                        f"{red}No shared columns{red_end}",
+                    )
+                yield from yield_formatted_if_not_empty(
+                    self.columns_missing_in_df1,
+                    lambda item: yield_list_lines(
+                        f"Columns missing in {df1_label}",
+                        get_compact_list_description(
+                            item, max_items=truncate_lists_longer_than
+                        ),
+                    ),
+                )
+                yield from yield_formatted_if_not_empty(
+                    self.columns_missing_in_df2,
+                    lambda item: yield_list_lines(
+                        f"Columns missing in {df2_label}",
+                        get_compact_list_description(
+                            item, max_items=truncate_lists_longer_than
+                        ),
+                    ),
+                )
+                if show_shared:
+                    yield from yield_formatted_if_not_empty(
+                        self.index_columns_shared,
+                        lambda item: yield_list_lines(
+                            f"{blue}Shared index columns{blue_end}",
+                            get_compact_list_description(
+                                item, max_items=truncate_lists_longer_than
+                            ),
+                        ),
+                        f"{red}No shared index columns{red_end}",
+                    )
+                yield from yield_formatted_if_not_empty(
+                    self.index_columns_missing_in_df1,
+                    lambda item: yield_list_lines(
+                        f"Index columns missing in {df1_label}",
+                        get_compact_list_description(
+                            item, max_items=truncate_lists_longer_than
+                        ),
+                    ),
+                )
+                yield from yield_formatted_if_not_empty(
+                    self.index_columns_missing_in_df2,
+                    lambda item: yield_list_lines(
+                        f"Index columns missing in {df2_label}",
+                        get_compact_list_description(
+                            item, max_items=truncate_lists_longer_than
+                        ),
+                    ),
+                )
+                if show_shared:
+                    yield from yield_formatted_if_not_empty(
+                        self.index_values_shared,
+                        lambda item: yield_list_lines(
+                            f"{blue}Shared index values{blue_end}",
+                            get_compact_list_description(
+                                item, max_items=truncate_lists_longer_than
+                            ),
+                        ),
+                        f"{red}No shared index values{red_end}",
+                    )
+                yield from yield_formatted_if_not_empty(
+                    self.index_values_missing_in_df1,
+                    lambda item: yield_list_lines(
+                        f"Index values missing in {df1_label}",
+                        get_compact_list_description(
+                            item,
+                            self.df1.index.names,
+                            max_items=truncate_lists_longer_than,
+                        ),
+                    ),
+                )
+                yield from yield_formatted_if_not_empty(
+                    self.index_values_missing_in_df2,
+                    lambda item: yield_list_lines(
+                        f"Index values missing in {df2_label}",
+                        get_compact_list_description(
+                            item,
+                            self.df2.index.names,
+                            max_items=truncate_lists_longer_than,
+                        ),
+                    ),
+                )
+                yield from yield_formatted_if_not_empty(
+                    self.duplicate_index_values_in_df1,
+                    lambda item: yield_list_lines(
+                        f"Duplicate index values in {df1_label}",
+                        get_compact_list_description(
+                            item,
+                            self.df1.index.names,
+                            max_items=truncate_lists_longer_than,
+                        ),
+                    ),
+                )
+                yield from yield_formatted_if_not_empty(
+                    self.duplicate_index_values_in_df2,
+                    lambda item: yield_list_lines(
+                        f"Duplicate index values in {df2_label}",
+                        get_compact_list_description(
+                            item,
+                            self.df2.index.names,
+                            max_items=truncate_lists_longer_than,
+                        ),
+                    ),
+                )
+
+            # Show "coordinates" where there are value differences
+            # This is done in compact form, e.g. if you have 10 new years for 200 countries
+            # that would be 2000 values but instead we unpack the hierarchical index tuples
+            # and show that a (shortened) list for the 200 countries and the 10 new years.
+            if self.value_differences is not None:
+                yield (
+                    f"Values in the shared columns/rows are {red}different{red_end}. "
+                    + f"({self.value_differences_count} different cells)"
+                )
+                yield from yield_formatted_if_not_empty(
+                    self.columns_with_differences,
+                    lambda item: yield_list_lines(
+                        "Columns with diffs",
+                        get_compact_list_description(
+                            item, max_items=truncate_lists_longer_than
+                        ),
+                    ),
+                )
+                yield from yield_formatted_if_not_empty(
+                    self.rows_with_differences,
+                    lambda item: yield_list_lines(
+                        "Rows with diffs",
+                        get_compact_list_description(
+                            item,
+                            self.df1.index.names,
+                            max_items=truncate_lists_longer_than,
+                        ),
+                    ),
+                )
+
+        # This prints the two dataframes one after the other sliced to
+        # only the area where they have differences
+        if preview_different_dataframe_values:
+            if (
+                self.value_differences
+                and self.columns_shared
+                and self.index_values_shared
+            ):
+                yield f"Values with differences in {df1_label}:"
+                yield (
+                    str(
+                        self.df1.loc[
+                            self.value_differences.index, self.value_differences.columns
+                        ]
+                    )
+                )
+                yield f"Values with differences in {df2_label}:"
+                yield (
+                    str(
+                        self.df2.loc[
+                            self.value_differences.index, self.value_differences.columns
+                        ]
+                    )
+                )
+            else:
+                yield "The datasets have no overlapping columns/rows."
+
+
 def groupby_agg(
     df: pd.DataFrame,
     groupby_columns: Union[List[str], str],
diff --git a/owid/datautils/utils.py b/owid/datautils/utils.py
new file mode 100644
index 0000000..d78d9dd
--- /dev/null
+++ b/owid/datautils/utils.py
@@ -0,0 +1,91 @@
+"""Functions related to the dataframe HighLevelDiff class."""
+
+from typing import Callable, Generator, Iterable, List, Any, Optional
+
+
+def get_list_description_with_max_length(items: List[Any], max_items: int = 20) -> str:
+    """Return a string representation for a list, potentially shortened in the middle."""
+    if len(items) > max_items:
+        return (
+            f"[{len(items)} items] "
+            + f'{", ".join(str(item) for item in items[:int(max_items/2)])} ... "'
+            + f'{", ".join(str(item) for item in items[-int(max_items/2):])}'
+        )
+    else:
+        return ", ".join(str(item) for item in items)
+
+
+def yield_list_lines(
+    description: str, items: Iterable[Any]
+) -> Generator[str, None, None]:
+    """Yield a list of lines for a list of items.
+
+    If the sublist is a single item then no newline is inserted. If the sublist has more than one item
+    then the description is printed as a header and the items are printed on separate lines with a sligh indent.
+    """
+    sublines = [item for item in items]
+    if len(sublines) > 1:
+        yield f"{description}:"
+        for subline in sublines:
+            if subline != "":
+                yield f"  {subline}"
+    elif len(sublines) == 1:
+        yield f"{description}: {sublines[0]}"
+
+
+def get_compact_list_description(
+    items_iterable: Iterable[Any],
+    tuple_headers: Optional[List[str]] = None,
+    max_items: int = 20,
+) -> Generator[str, None, None]:
+    """Get a compact desription of a list.
+
+    If the list is numeric and monotonic then it gets compacted into a range like 2000-2015. If
+    the list contains tuples then the tuples are deconstructed into their components and the
+    components are compacted individually. Long lists (above max_items items) are
+    shortened in the middle.
+    """
+    items = set(items_iterable)
+    if not items:
+        yield "[]"
+    elif all(isinstance(item, int) for item in items):
+        sorted_items = sorted(items)
+        if len(items) == 1:
+            yield str(sorted_items[0])
+        if len(items) == 2:
+            yield f"{sorted_items[0]}, {sorted_items[1]}"
+        if len(items) > 2:
+            if len(items) == sorted_items[-1] - sorted_items[0]:
+                yield f"{sorted_items[0]}-{sorted_items[-1]}"
+            else:
+                yield get_list_description_with_max_length(sorted_items, max_items)
+    elif all(isinstance(item, tuple) for item in items):
+        transposed = zip(*items)
+        lines = [
+            line for item in transposed for line in get_compact_list_description(item)
+        ]
+        if tuple_headers and len(tuple_headers) == len(lines):
+            yield from (
+                f"{header}: {line}" for header, line in zip(tuple_headers, lines)
+            )
+        else:
+            yield from lines
+    else:
+        sorted_items = sorted(items)
+        yield get_list_description_with_max_length(sorted_items, max_items)
+
+
+def yield_formatted_if_not_empty(
+    item: Any,
+    format_function: Callable[[Any], Generator[str, None, None]],
+    fallback_message: str = "",
+) -> Generator[str, None, None]:
+    """Yield an item formatted with the given function if it is not empty.
+
+    This is a useful helper to avoid duplicating property/function access in if blocks and
+    then again in the block body.
+    """
+    if item is not None and any(item):
+        yield from format_function(item)
+    elif fallback_message != "":
+        yield fallback_message
diff --git a/tests/test_dataframes.py b/tests/test_dataframes.py
index e17196a..be00503 100644
--- a/tests/test_dataframes.py
+++ b/tests/test_dataframes.py
@@ -223,6 +223,386 @@ def test_on_dataframes_with_object_columns_with_nans(self):
         )[0]
 
 
+class TestAreDataFramesEqualWithHighLevelDiff:
+    def are_equal(self, df1: pd.DataFrame, df2: pd.DataFrame, **kwargs: float) -> bool:
+        return dataframes.HighLevelDiff(df1, df2, **kwargs).are_equal
+
+    def test_on_equal_dataframes_with_one_integer_column(self):
+        assert self.are_equal(
+            df1=pd.DataFrame({"col_01": [1, 2, 3]}),
+            df2=pd.DataFrame({"col_01": [1, 2, 3]}),
+        )
+
+    def test_on_almost_equal_dataframes_but_differing_by_one_element(self):
+        assert not self.are_equal(
+            df1=pd.DataFrame({"col_01": [1, 2, 3]}),
+            df2=pd.DataFrame({"col_01": [1, 2, 0]}),
+        )
+
+    def test_on_almost_equal_dataframes_but_differing_by_type(self):
+        assert not self.are_equal(
+            df1=pd.DataFrame({"col_01": [1, 2, 3]}),
+            df2=pd.DataFrame({"col_01": [1, 2, 3.0]}),
+        )
+
+    def test_on_equal_dataframes_containing_nans(self):
+        assert self.are_equal(
+            df1=pd.DataFrame({"col_01": [1, 2, np.nan]}),
+            df2=pd.DataFrame({"col_01": [1, 2, np.nan]}),
+        )
+
+    def test_on_equal_dataframes_containing_only_nans(self):
+        assert self.are_equal(
+            df1=pd.DataFrame({"col_01": [np.nan, np.nan]}),
+            df2=pd.DataFrame({"col_01": [np.nan, np.nan]}),
+        )
+
+    def test_on_equal_dataframes_both_empty(self):
+        assert self.are_equal(df1=pd.DataFrame(), df2=pd.DataFrame())
+
+    def test_on_equal_dataframes_with_various_types_of_columns(self):
+        assert self.are_equal(
+            df1=pd.DataFrame(
+                {
+                    "col_01": [1, 2],
+                    "col_02": [0.1, 0.2],
+                    "col_03": ["1", "2"],
+                    "col_04": [True, False],
+                }
+            ),
+            df2=pd.DataFrame(
+                {
+                    "col_01": [1, 2],
+                    "col_02": [0.1, 0.2],
+                    "col_03": ["1", "2"],
+                    "col_04": [True, False],
+                }
+            ),
+        )
+
+    def test_on_almost_equal_dataframes_but_columns_sorted_differently(self):
+        assert not self.are_equal(
+            df1=pd.DataFrame(
+                {
+                    "col_01": [1, 2],
+                    "col_02": [0.1, 0.2],
+                    "col_03": ["1", "2"],
+                    "col_04": [True, False],
+                }
+            ),
+            df2=pd.DataFrame(
+                {
+                    "col_02": [0.1, 0.2],
+                    "col_01": [1, 2],
+                    "col_03": ["1", "2"],
+                    "col_04": [True, False],
+                }
+            ),
+        )
+
+    def test_on_unequal_dataframes_with_all_columns_different(self):
+        assert not self.are_equal(
+            df1=pd.DataFrame({"col_01": [1, 2], "col_02": [0.1, 0.2]}),
+            df2=pd.DataFrame({"col_03": [0.1, 0.2], "col_04": [1, 2]}),
+        )
+
+    def test_on_unequal_dataframes_with_some_common_columns(self):
+        assert not self.are_equal(
+            df1=pd.DataFrame({"col_01": [1, 2], "col_02": [0.1, 0.2]}),
+            df2=pd.DataFrame({"col_01": [1, 2], "col_03": [1, 2]}),
+        )
+
+    def test_on_equal_dataframes_given_large_absolute_tolerance(self):
+        assert self.are_equal(
+            df1=pd.DataFrame({"col_01": [10, 20]}),
+            df2=pd.DataFrame({"col_01": [11, 21]}),
+            absolute_tolerance=1,
+            relative_tolerance=1e-8,
+        )
+
+    def test_on_unequal_dataframes_given_large_absolute_tolerance(self):
+        assert not self.are_equal(
+            df1=pd.DataFrame({"col_01": [10, 20]}),
+            df2=pd.DataFrame({"col_01": [11, 21]}),
+            absolute_tolerance=0.9,
+            relative_tolerance=1e-8,
+        )
+
+    def test_on_equal_dataframes_given_large_relative_tolerance(self):
+        assert self.are_equal(
+            df1=pd.DataFrame({"col_01": [1]}),
+            df2=pd.DataFrame({"col_01": [2]}),
+            absolute_tolerance=1e-8,
+            relative_tolerance=0.5,
+        )
+
+    def test_on_unequal_dataframes_given_large_relative_tolerance(self):
+        assert not self.are_equal(
+            df1=pd.DataFrame({"col_01": [1]}),
+            df2=pd.DataFrame({"col_01": [2]}),
+            absolute_tolerance=1e-8,
+            relative_tolerance=0.49,
+        )
+
+    def test_on_equal_dataframes_with_non_numeric_indexes(self):
+        assert self.are_equal(
+            df1=pd.DataFrame({"col_01": [1, 2], "col_02": ["a", "b"]}).set_index(
+                "col_02"
+            ),
+            df2=pd.DataFrame({"col_01": [1, 2], "col_02": ["a", "b"]}).set_index(
+                "col_02"
+            ),
+        )
+
+    def test_on_dataframes_of_equal_values_but_different_indexes(self):
+        assert not self.are_equal(
+            df1=pd.DataFrame({"col_01": [1, 2], "col_02": ["a", "b"]}).set_index(
+                "col_02"
+            ),
+            df2=pd.DataFrame({"col_01": [1, 2], "col_02": ["a", "c"]}).set_index(
+                "col_02"
+            ),
+        )
+
+    def test_on_dataframes_with_object_columns_with_nans(self):
+        assert self.are_equal(
+            df1=pd.DataFrame({"col_01": [np.nan, "b", "c"]}),
+            df2=pd.DataFrame({"col_01": [np.nan, "b", "c"]}),
+        )
+
+
+class TestHighLevelDiff:
+    def test_simple_equal_dataframes_are_equal(self):
+        df = pd.DataFrame(
+            {
+                "year": [2001, 2003, 2003, 2003, 2002, 2002],
+                "value_01": [1, 2, 3, 4, 5, 6],
+            }
+        )
+        diff = dataframes.HighLevelDiff(
+            df,
+            df,
+            absolute_tolerance=1e-8,
+            relative_tolerance=0.5,
+        )
+        assert any(diff.columns_missing_in_df1) == False
+        assert any(diff.columns_missing_in_df2) == False
+        assert any(diff.index_columns_missing_in_df1) == False
+        assert any(diff.index_columns_missing_in_df2) == False
+        assert any(diff.index_values_missing_in_df1) == False
+        assert any(diff.index_values_missing_in_df2) == False
+        assert any(diff.duplicate_index_values_in_df1) == False
+        assert any(diff.duplicate_index_values_in_df2) == False
+        assert diff.are_structurally_equal == True
+        assert diff.are_equal
+
+    def test_more_complex_equal_dataframes_are_equal(self):
+        df = pd.DataFrame(
+            {
+                "year": [2001, 2002, 2003, 2004, 2005, 2006] * 2,
+                "country": ["a"] * 6 + ["b"] * 6,
+                "value_01": [1, 2, 3, 4, 5, 6] * 2,
+            }
+        )
+        df.set_index(["year", "country"], inplace=True)
+        diff = dataframes.HighLevelDiff(
+            df,
+            df,
+            absolute_tolerance=1e-8,
+            relative_tolerance=0.5,
+        )
+        assert any(diff.columns_missing_in_df1) == False
+        assert any(diff.columns_missing_in_df2) == False
+        assert any(diff.index_columns_missing_in_df1) == False
+        assert any(diff.index_columns_missing_in_df2) == False
+        assert any(diff.index_values_missing_in_df1) == False
+        assert any(diff.index_values_missing_in_df2) == False
+        assert any(diff.duplicate_index_values_in_df1) == False
+        assert any(diff.duplicate_index_values_in_df2) == False
+        assert diff.are_structurally_equal == True
+        assert diff.are_equal
+
+    def test_detects_duplicate_index_values(self):
+        df = pd.DataFrame(
+            {
+                "year": [2001] * 12,
+                "country": ["a"] * 6 + ["b"] * 6,
+                "value_01": [1, 2, 3, 4, 5, 6] * 2,
+            }
+        )
+        df.set_index(["year", "country"], inplace=True)
+        diff = dataframes.HighLevelDiff(
+            df,
+            df,
+            absolute_tolerance=1e-8,
+            relative_tolerance=0.5,
+        )
+        assert any(diff.columns_missing_in_df1) == False
+        assert any(diff.columns_missing_in_df2) == False
+        assert any(diff.index_columns_missing_in_df1) == False
+        assert any(diff.index_columns_missing_in_df2) == False
+        assert any(diff.index_values_missing_in_df1) == False
+        assert any(diff.index_values_missing_in_df2) == False
+        assert any(diff.duplicate_index_values_in_df1) == True
+        assert any(diff.duplicate_index_values_in_df2) == True
+        assert diff.are_structurally_equal == False
+        assert not diff.are_equal
+
+    def test_detects_missing_index(self):
+        df = pd.DataFrame(
+            {
+                "year": [2001, 2002, 2003, 2004, 2005, 2006] * 2,
+                "country": ["a"] * 6 + ["b"] * 6,
+                "value_01": [1, 2, 3, 4, 5, 6] * 2,
+            }
+        )
+        df2 = df.set_index(["year", "country"], inplace=False)
+        diff = dataframes.HighLevelDiff(
+            df,
+            df2,
+            absolute_tolerance=1e-8,
+            relative_tolerance=0.5,
+        )
+        assert any(diff.columns_missing_in_df1) == False
+        assert any(diff.columns_missing_in_df2) == True
+        assert any(diff.index_columns_missing_in_df1) == True
+        assert any(diff.index_columns_missing_in_df2) == False
+        assert any(diff.index_values_missing_in_df1) == True
+        assert any(diff.index_values_missing_in_df2) == True
+        assert any(diff.duplicate_index_values_in_df1) == False
+        assert any(diff.duplicate_index_values_in_df2) == False
+        assert diff.are_structurally_equal == False
+        assert not diff.are_equal
+
+    def test_detects_missing_index_values(self):
+        df = pd.DataFrame(
+            {
+                "year": [2001, 2002, 2003, 2004, 2005, 2006] * 2,
+                "country": ["a"] * 6 + ["b"] * 6,
+                "value_01": [1, 2, 3, 4, 5, 6] * 2,
+            }
+        )
+        df.set_index(["year", "country"], inplace=True)
+        df2 = df.copy()
+        df2.drop((2006, "b"), inplace=True)
+        diff = dataframes.HighLevelDiff(
+            df,
+            df2,
+            absolute_tolerance=1e-8,
+            relative_tolerance=0.5,
+        )
+        assert any(diff.columns_missing_in_df1) == False
+        assert any(diff.columns_missing_in_df2) == False
+        assert any(diff.index_columns_missing_in_df1) == False
+        assert any(diff.index_columns_missing_in_df2) == False
+        assert any(diff.index_values_missing_in_df1) == False
+        assert any(diff.index_values_missing_in_df2) == True
+        assert any(diff.duplicate_index_values_in_df1) == False
+        assert any(diff.duplicate_index_values_in_df2) == False
+        assert diff.are_structurally_equal == False
+        assert not diff.are_equal
+
+    def test_detects_data_changes(self):
+        df = pd.DataFrame(
+            {
+                "year": [2001, 2002, 2003, 2004, 2005, 2006] * 2,
+                "country": ["a"] * 6 + ["b"] * 6,
+                "value_01": [1, 2, 3, 4, 5, 6] * 2,
+            }
+        )
+        df.set_index(["year", "country"], inplace=True)
+        df2 = df.copy()
+        df2.loc[(2006, "b"), "value_01"] = 7
+        diff = dataframes.HighLevelDiff(
+            df,
+            df2,
+            absolute_tolerance=1e-8,
+            relative_tolerance=0.05,
+        )
+        assert any(diff.columns_missing_in_df1) == False
+        assert any(diff.columns_missing_in_df2) == False
+        assert any(diff.index_columns_missing_in_df1) == False
+        assert any(diff.index_columns_missing_in_df2) == False
+        assert any(diff.index_values_missing_in_df1) == False
+        assert any(diff.index_values_missing_in_df2) == False
+        assert any(diff.duplicate_index_values_in_df1) == False
+        assert any(diff.duplicate_index_values_in_df2) == False
+        assert diff.are_structurally_equal == True
+        assert not diff.are_equal
+        assert diff.value_differences is not None and diff.value_differences.shape == (
+            1,
+            1,
+        )
+        assert type(diff.value_differences.index) == pd.MultiIndex
+        assert list(diff.rows_with_differences) == [(2006, "b")]
+        assert list(diff.columns_with_differences) == ["value_01"]
+
+    def test_detects_data_changes_with_enough_tolerance(self):
+        df = pd.DataFrame(
+            {
+                "year": [2001, 2002, 2003, 2004, 2005, 2006] * 2,
+                "country": ["a"] * 6 + ["b"] * 6,
+                "value_01": [1, 2, 3, 4, 5, 6] * 2,
+            }
+        )
+        df.set_index(["year", "country"], inplace=True)
+        df2 = df.copy()
+        df2.loc[(2006, "b"), "value_01"] = 7
+        diff = dataframes.HighLevelDiff(
+            df,
+            df2,
+            absolute_tolerance=1e-8,
+            relative_tolerance=0.3,
+        )
+        assert any(diff.columns_missing_in_df1) == False
+        assert any(diff.columns_missing_in_df2) == False
+        assert any(diff.index_columns_missing_in_df1) == False
+        assert any(diff.index_columns_missing_in_df2) == False
+        assert any(diff.index_values_missing_in_df1) == False
+        assert any(diff.index_values_missing_in_df2) == False
+        assert any(diff.duplicate_index_values_in_df1) == False
+        assert any(diff.duplicate_index_values_in_df2) == False
+        assert diff.are_structurally_equal == True
+        assert diff.are_equal
+        assert diff.value_differences is None
+
+    def test_detects_data_changes2(self):
+        df = pd.DataFrame(
+            {
+                "year": [2001, 2002, 2003, 2004, 2005, 2006] * 2,
+                "country": ["a"] * 6 + ["b"] * 6,
+                "value_01": [1, 2, 3, 4, 5, 6] * 2,
+            }
+        )
+        df.set_index(["year", "country"], inplace=True)
+        df2 = df.copy()
+        df2.loc[(2006, "b"), "value_01"] = 7
+        df2.loc[(2006, "a"), "value_01"] = 8
+        diff = dataframes.HighLevelDiff(
+            df,
+            df2,
+            absolute_tolerance=1e-8,
+            relative_tolerance=0.05,
+        )
+        assert any(diff.columns_missing_in_df1) == False
+        assert any(diff.columns_missing_in_df2) == False
+        assert any(diff.index_columns_missing_in_df1) == False
+        assert any(diff.index_columns_missing_in_df2) == False
+        assert any(diff.index_values_missing_in_df1) == False
+        assert any(diff.index_values_missing_in_df2) == False
+        assert any(diff.duplicate_index_values_in_df1) == False
+        assert any(diff.duplicate_index_values_in_df2) == False
+        assert diff.are_structurally_equal == True
+        assert not diff.are_equal
+        assert diff.value_differences is not None and diff.value_differences.shape == (
+            2,
+            1,
+        )
+        assert type(diff.value_differences.index) == pd.MultiIndex
+        assert list(diff.rows_with_differences) == [(2006, "a"), (2006, "b")]
+        assert list(diff.columns_with_differences) == ["value_01"]
+
+
 class TestGroupbyAggregate:
     def test_default_aggregate_single_groupby_column_as_string(self):
         df_in = pd.DataFrame(