Skip to content

Commit

Permalink
fix(pandas): don't silently ignore result column name mismatches
Browse files Browse the repository at this point in the history
  • Loading branch information
jcrist committed Sep 5, 2024
1 parent 1936437 commit d5ca729
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
9 changes: 3 additions & 6 deletions ibis/formats/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,11 @@ def infer_table(cls, df):

@classmethod
def convert_table(cls, df, schema):
if len(schema) != len(df.columns):
raise ValueError(
"schema column count does not match input data column count"
)
if schema.names != tuple(df.columns):
raise ValueError("schema names don't match input data columns")

columns = {
name: cls.convert_column(series, dtype)
for (name, dtype), (_, series) in zip(schema.items(), df.items())
name: cls.convert_column(df[name], dtype) for name, dtype in schema.items()
}
df = pd.DataFrame(columns)

Expand Down
7 changes: 7 additions & 0 deletions ibis/formats/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,3 +433,10 @@ def test_convert_dataframe_with_timezone():
desired_schema = ibis.schema(dict(time='timestamp("EST")'))
result = PandasData.convert_table(df.copy(), desired_schema)
tm.assert_frame_equal(expected, result)


def test_schema_doesnt_match_input_columns():
df = pd.DataFrame({"x": [1], "y": [2]})
schema = sch.Schema({"a": "int64", "b": "int64"})
with pytest.raises(ValueError, match="schema names don't match"):
PandasData.convert_table(df, schema)

0 comments on commit d5ca729

Please sign in to comment.