Skip to content

Commit

Permalink
fix: null column should be nullable (#279)
Browse files Browse the repository at this point in the history
  • Loading branch information
PrettyWood committed Aug 23, 2024
1 parent c3cd0d9 commit cd13946
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ __pycache__
*.pyc
*.so
*.dat
.DS_Store

.python-version
.venv
Expand Down
Binary file added python/tests/fixtures/null-column.xlsx
Binary file not shown.
5 changes: 5 additions & 0 deletions python/tests/test_fastexcel.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,3 +531,8 @@ def test_null_values_in_cells() -> None:
pd_expected = pd.DataFrame(expected)
pd_expected["Date"] = pd_expected["Date"].dt.as_unit("ms")
pd_assert_frame_equal(sheet.to_pandas(), pd_expected)


def test_null_column_is_nullable() -> None:
sheet = fastexcel.read_excel(path_for_fixture("null-column.xlsx")).load_sheet(0)
assert sheet.to_arrow().schema.field("nullonly").nullable is True
13 changes: 9 additions & 4 deletions src/types/python/excelsheet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use sheet_data::ExcelSheetData;
use std::{cmp, collections::HashSet, fmt::Debug, str::FromStr, sync::Arc};

use arrow::{
array::NullArray,
array::{Array, NullArray},
datatypes::{Field, Schema},
pyarrow::ToPyArrow,
record_batch::RecordBatch,
Expand Down Expand Up @@ -521,9 +521,14 @@ impl TryFrom<&ExcelSheet> for RecordBatch {
let schema: Schema = sheet.into();
Ok(RecordBatch::new_empty(Arc::new(schema)))
} else {
RecordBatch::try_from_iter(iter)
.map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())
.with_context(|| format!("could not convert sheet {} to RecordBatch", sheet.name))
// We use `try_from_iter_with_nullable` because `try_from_iter` relies on `array.null_count() > 0;`
// to determine if the array is nullable. This is not the case for `NullArray` which has no nulls.
RecordBatch::try_from_iter_with_nullable(iter.map(|(field_name, array)| {
let nullable = array.is_nullable();
(field_name, array, nullable)
}))
.map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())
.with_context(|| format!("could not convert sheet {} to RecordBatch", sheet.name))
}
}
}
Expand Down

0 comments on commit cd13946

Please sign in to comment.