Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/dlt_iceberg/schema_casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,11 @@ def cast_table_safe(
f"to schema with {len(target_schema)} fields"
)

# Reorder columns to match target schema before casting
# PyArrow's cast() requires fields to be in the same order
target_field_names = [field.name for field in target_schema]
table = table.select(target_field_names)

try:
casted_table = table.cast(target_schema)
logger.info("Cast completed successfully")
Expand Down
47 changes: 47 additions & 0 deletions tests/test_schema_casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,50 @@ def test_real_world_iceberg_scenario():
result = cast_table_safe(table, iceberg_schema, strict=True)
assert len(result) == 3
assert result.schema == iceberg_schema


def test_cast_table_safe_different_field_order():
"""
Test that cast_table_safe handles tables with different field ordering.

This is a regression test for the field ordering bug where cast_table_safe
fails when the source table has fields in a different order than the target
schema, even when all field names and types match exactly.

This commonly occurs when loading JSON data where field order isn't guaranteed
(e.g., different API responses or extraction runs).
"""
# Source table with fields in order: a, b, c
source_schema = pa.schema([
pa.field("a", pa.int64()),
pa.field("b", pa.string()),
pa.field("c", pa.float64()),
])

table = pa.table(
{
"a": [1, 2, 3],
"b": ["x", "y", "z"],
"c": [1.1, 2.2, 3.3],
},
schema=source_schema
)

# Target schema with same fields but different order: c, b, a
target_schema = pa.schema([
pa.field("c", pa.float64()),
pa.field("b", pa.string()),
pa.field("a", pa.int64()),
])

# This should succeed - all field names and types match
# Currently fails with: "Target schema's field names are not matching the table's field names"
result = cast_table_safe(table, target_schema, strict=True)

assert len(result) == 3
assert result.schema == target_schema
# Verify columns are reordered correctly
assert result.column_names == ["c", "b", "a"]
assert result["a"].to_pylist() == [1, 2, 3]
assert result["b"].to_pylist() == ["x", "y", "z"]
assert result["c"].to_pylist() == [1.1, 2.2, 3.3]