Skip to content

Commit

Permalink
perf(selectors): speed up the c selector by caching the column exis…
Browse files Browse the repository at this point in the history
…tence check
  • Loading branch information
cpcloud authored Jul 23, 2024
1 parent b23c5a3 commit fdaeb5c
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
12 changes: 8 additions & 4 deletions ibis/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,13 +362,17 @@ def c(*names: str | ir.Column) -> Predicate:
"""Select specific column names."""
names = frozenset(col if isinstance(col, str) else col.get_name() for col in names)

def func(col: ir.Value) -> bool:
schema = col.op().rel.schema
if extra_cols := (names - schema.keys()):
@functools.cache
def check_delta(schema):
if extra_cols := names - schema._name_locs.keys():
raise exc.IbisInputError(
f"Columns {extra_cols} are not present in {schema.names}"
)
return col.get_name() in names

def func(col: ir.Value) -> bool:
op = col.op()
check_delta(op.rel.schema)
return op.name in names

return where(func)

Expand Down
11 changes: 10 additions & 1 deletion ibis/tests/benchmarks/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.selectors as s
from ibis.backends import _get_backend_names

pytestmark = [pytest.mark.benchmark, pytest.mark.timeout(30)]
pytestmark = [pytest.mark.benchmark]


def make_t():
Expand Down Expand Up @@ -967,3 +968,11 @@ def test_duckdb_timestamp_conversion(benchmark):
con = ibis.duckdb.connect()
series = benchmark(con.execute, expr)
assert series.size == (stop - start).total_seconds()


@pytest.mark.parametrize("cols", [1_000, 10_000])
def test_selectors(benchmark, cols):
t = ibis.table(name="t", schema={f"col{i}": "int" for i in range(cols)})
n = cols - cols // 10
sel = s.across(s.c(*[f"col{i}" for i in range(n)]), lambda c: c.cast("str"))
benchmark(sel.expand, t)

0 comments on commit fdaeb5c

Please sign in to comment.