From 2036b9940234cff7b7859fba4c139d0bf8dc5027 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Mon, 15 Jul 2024 18:46:31 +0100
Subject: [PATCH] feat: make pandas and NumPy optional dependencies, don't
 require PyArrow for plotting with Polars/Modin/cuDF (#3452)

* feat: use Narwhals to make NumPy and pandas optional, and to support
Narwhals-compliant libraries natively without PyArrow conversions

* fix ibis-date32 bug too, make sure to only convert to arrow once

* stable api

* update docstring

* fixup

* silence warning for old pandas

* improve skipif condition, simplify

* lint

* explicitly set ibis backend

* use importlib / packaging

* xfail ibis date32 test on windows

* wip

* wip

* wip

* wip date32

* reorder imports

* typing

* bump version

* typo, `strict=False` comments, use consistent typevar name

* lint

* special-case Polars in `.dt.to_string` for Date

* rename test file
---
 altair/_magics.py                             |   4 +-
 altair/utils/__init__.py                      |  12 +-
 altair/utils/_vegafusion_data.py              |  13 +-
 altair/utils/core.py                          | 180 +++++++++---------
 altair/utils/data.py                          | 130 +++++++------
 altair/utils/schemapi.py                      |  62 ++++--
 altair/vegalite/data.py                       |   4 +-
 altair/vegalite/v5/api.py                     |  10 +-
 altair/vegalite/v5/schema/channels.py         |   4 +-
 pyproject.toml                                |  12 +-
 tests/utils/test_core.py                      |   2 +-
 tests/utils/test_data.py                      |   7 +-
 ...erchange.py => test_to_values_narwhals.py} |  12 +-
 tests/utils/test_utils.py                     |  31 +--
 tests/vegalite/v5/test_api.py                 |  48 ++++-
 tools/generate_schema_wrapper.py              |   5 +-
 tools/schemapi/schemapi.py                    |  62 ++++--
 17 files changed, 365 insertions(+), 233 deletions(-)
 rename tests/utils/{test_dataframe_interchange.py => test_to_values_narwhals.py} (85%)

diff --git a/altair/_magics.py b/altair/_magics.py
index 28e6d832f..638400c78 100644
--- a/altair/_magics.py
+++ b/altair/_magics.py
@@ -9,7 +9,7 @@
 
 import IPython
 from IPython.core import magic_arguments
-import pandas as pd
+from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe
 
 from altair.vegalite import v5 as vegalite_v5
 
@@ -39,7 +39,7 @@ def _prepare_data(data, data_transformers):
     """Convert input data to data for use within schema"""
     if data is None or isinstance(data, dict):
         return data
-    elif isinstance(data, pd.DataFrame):
+    elif _is_pandas_dataframe(data):
         if func := data_transformers.get():
             data = func(data)
         return data
diff --git a/altair/utils/__init__.py b/altair/utils/__init__.py
index 64d6f4566..36d35bca4 100644
--- a/altair/utils/__init__.py
+++ b/altair/utils/__init__.py
@@ -1,8 +1,8 @@
 from .core import (
-    infer_vegalite_type,
+    infer_vegalite_type_for_pandas,
     infer_encoding_types,
-    sanitize_dataframe,
-    sanitize_arrow_table,
+    sanitize_pandas_dataframe,
+    sanitize_narwhals_dataframe,
     parse_shorthand,
     use_signature,
     update_nested,
@@ -23,10 +23,10 @@
     "Undefined",
     "display_traceback",
     "infer_encoding_types",
-    "infer_vegalite_type",
+    "infer_vegalite_type_for_pandas",
     "parse_shorthand",
-    "sanitize_arrow_table",
-    "sanitize_dataframe",
+    "sanitize_narwhals_dataframe",
+    "sanitize_pandas_dataframe",
     "spec_to_html",
     "update_nested",
     "use_signature",
diff --git a/altair/utils/_vegafusion_data.py b/altair/utils/_vegafusion_data.py
index ea1ae6dad..c9f127378 100644
--- a/altair/utils/_vegafusion_data.py
+++ b/altair/utils/_vegafusion_data.py
@@ -12,19 +12,20 @@
     Callable,
 )
 
+import narwhals.stable.v1 as nw
+
 from altair.utils._importers import import_vegafusion
-from altair.utils.core import DataFrameLike
 from altair.utils.data import (
     DataType,
     ToValuesReturnType,
     MaxRowsError,
     SupportsGeoInterface,
 )
+from altair.utils.core import DataFrameLike
 from altair.vegalite.data import default_data_transformer
 
-
 if TYPE_CHECKING:
-    import pandas as pd
+    from narwhals.typing import IntoDataFrame
     from vegafusion.runtime import ChartState  # type: ignore
 
 # Temporary storage for dataframes that have been extracted
@@ -60,7 +61,7 @@ def vegafusion_data_transformer(
 
 @overload
 def vegafusion_data_transformer(
-    data: dict | pd.DataFrame | SupportsGeoInterface, max_rows: int = ...
+    data: dict | IntoDataFrame | SupportsGeoInterface, max_rows: int = ...
 ) -> _VegaFusionReturnType: ...
 
 
@@ -68,6 +69,10 @@ def vegafusion_data_transformer(
     data: DataType | None = None, max_rows: int = 100000
 ) -> Callable[..., Any] | _VegaFusionReturnType:
     """VegaFusion Data Transformer"""
+    # Vegafusion does not support Narwhals, so if `data` is a Narwhals
+    # object, we make sure to extract the native object and let Vegafusion handle it.
+    # `strict=False` passes `data` through as-is if it is not a Narwhals object.
+    data = nw.to_native(data, strict=False)
     if data is None:
         return vegafusion_data_transformer
     elif isinstance(data, DataFrameLike) and not isinstance(data, SupportsGeoInterface):
diff --git a/altair/utils/core.py b/altair/utils/core.py
index a001f7a12..9046a75d5 100644
--- a/altair/utils/core.py
+++ b/altair/utils/core.py
@@ -27,12 +27,11 @@
 from operator import itemgetter
 
 import jsonschema
-import pandas as pd
-import numpy as np
-from pandas.api.types import infer_dtype
+import narwhals.stable.v1 as nw
+from narwhals.dependencies import is_pandas_dataframe, get_polars
+from narwhals.typing import IntoDataFrame
 
 from altair.utils.schemapi import SchemaBase, Undefined
-from altair.utils._dfi_types import Column, DtypeKind, DataFrame as DfiDataFrame
 
 if sys.version_info >= (3, 10):
     from typing import ParamSpec
@@ -43,11 +42,14 @@
 if TYPE_CHECKING:
     from types import ModuleType
     import typing as t
-    from pandas.core.interchange.dataframe_protocol import Column as PandasColumn
-    import pyarrow as pa
+    from altair.vegalite.v5.schema._typing import StandardType_T as InferredVegaLiteType
+    from altair.utils._dfi_types import DataFrame as DfiDataFrame
+    from narwhals.typing import IntoExpr
+    import pandas as pd
 
 V = TypeVar("V")
 P = ParamSpec("P")
+TIntoDataFrame = TypeVar("TIntoDataFrame", bound=IntoDataFrame)
 
 
 @runtime_checkable
@@ -198,10 +200,7 @@ def __dataframe__(
 ]
 
 
-InferredVegaLiteType = Literal["ordinal", "nominal", "quantitative", "temporal"]
-
-
-def infer_vegalite_type(
+def infer_vegalite_type_for_pandas(
     data: object,
 ) -> InferredVegaLiteType | tuple[InferredVegaLiteType, list[Any]]:
     """
@@ -212,6 +211,9 @@ def infer_vegalite_type(
     ----------
     data: object
     """
+    # This is safe to import here, as this function is only called on pandas input.
+    from pandas.api.types import infer_dtype
+
     typ = infer_dtype(data, skipna=False)
 
     if typ in {
@@ -297,13 +299,16 @@ def sanitize_geo_interface(geo: t.MutableMapping[Any, Any]) -> dict[str, Any]:
 
 
 def numpy_is_subtype(dtype: Any, subtype: Any) -> bool:
+    # This is only called on `numpy` inputs, so it's safe to import it here.
+    import numpy as np
+
     try:
         return np.issubdtype(dtype, subtype)
     except (NotImplementedError, TypeError):
         return False
 
 
-def sanitize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
+def sanitize_pandas_dataframe(df: pd.DataFrame) -> pd.DataFrame:
     """Sanitize a DataFrame to prepare it for serialization.
 
     * Make a copy
@@ -320,6 +325,11 @@ def sanitize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
     * convert dedicated string column to objects and replace NaN with None
     * Raise a ValueError for TimeDelta dtypes
     """
+    # This is safe to import here, as this function is only called on pandas input.
+    # NumPy is a required dependency of pandas so is also safe to import.
+    import pandas as pd
+    import numpy as np
+
     df = df.copy()
 
     if isinstance(df.columns, pd.RangeIndex):
@@ -429,30 +439,54 @@ def to_list_if_array(val):
     return df
 
 
-def sanitize_arrow_table(pa_table: pa.Table) -> pa.Table:
-    """Sanitize arrow table for JSON serialization"""
-    import pyarrow as pa
-    import pyarrow.compute as pc
-
-    arrays = []
-    schema = pa_table.schema
-    for name in schema.names:
-        array = pa_table[name]
-        dtype_name = str(schema.field(name).type)
-        if dtype_name.startswith(("timestamp", "date")):
-            arrays.append(pc.strftime(array))
-        elif dtype_name.startswith("duration"):
+def sanitize_narwhals_dataframe(
+    data: nw.DataFrame[TIntoDataFrame],
+) -> nw.DataFrame[TIntoDataFrame]:
+    """Sanitize narwhals.DataFrame for JSON serialization"""
+    schema = data.schema
+    columns: list[IntoExpr] = []
+    # See https://github.com/vega/altair/issues/1027 for why this is necessary.
+    local_iso_fmt_string = "%Y-%m-%dT%H:%M:%S"
+    for name, dtype in schema.items():
+        if dtype == nw.Date and nw.get_native_namespace(data) is get_polars():
+            # Polars doesn't allow formatting `Date` with time directives.
+            # The date -> datetime cast is extremely fast compared with `to_string`
+            columns.append(
+                nw.col(name).cast(nw.Datetime).dt.to_string(local_iso_fmt_string)
+            )
+        elif dtype == nw.Date:
+            columns.append(nw.col(name).dt.to_string(local_iso_fmt_string))
+        elif dtype == nw.Datetime:
+            columns.append(nw.col(name).dt.to_string(f"{local_iso_fmt_string}%.f"))
+        elif dtype == nw.Duration:
             msg = (
-                f'Field "{name}" has type "{dtype_name}" which is '
+                f'Field "{name}" has type "{dtype}" which is '
                 "not supported by Altair. Please convert to "
                 "either a timestamp or a numerical value."
                 ""
             )
             raise ValueError(msg)
         else:
-            arrays.append(array)
+            columns.append(name)
+    return data.select(columns)
+
 
-    return pa.Table.from_arrays(arrays, names=schema.names)
+def to_eager_narwhals_dataframe(data: IntoDataFrame) -> nw.DataFrame[Any]:
+    """Wrap `data` in `narwhals.DataFrame`.
+
+    If `data` is not supported by Narwhals, but it is convertible
+    to a PyArrow table, then first convert to a PyArrow Table,
+    and then wrap in `narwhals.DataFrame`.
+    """
+    data_nw = nw.from_native(data, eager_or_interchange_only=True)
+    if nw.get_level(data_nw) == "interchange":
+        # If Narwhals' support for `data`'s class is only metadata-level, then we
+        # use the interchange protocol to convert to a PyArrow Table.
+        from altair.utils.data import arrow_table_from_dfi_dataframe
+
+        pa_table = arrow_table_from_dfi_dataframe(data)  # type: ignore[arg-type]
+        data_nw = nw.from_native(pa_table, eager_only=True)
+    return data_nw
 
 
 def parse_shorthand(
@@ -498,6 +532,7 @@ def parse_shorthand(
 
     Examples
     --------
+    >>> import pandas as pd
     >>> data = pd.DataFrame({'foo': ['A', 'B', 'A', 'B'],
     ...                      'bar': [1, 2, 3, 4]})
 
@@ -537,7 +572,7 @@ def parse_shorthand(
     >>> parse_shorthand('count()', data) == {'aggregate': 'count', 'type': 'quantitative'}
     True
     """
-    from altair.utils._importers import pyarrow_available
+    from altair.utils.data import is_data_type
 
     if not shorthand:
         return {}
@@ -597,39 +632,22 @@ def parse_shorthand(
         attrs["type"] = "temporal"
 
     # if data is specified and type is not, infer type from data
-    if "type" not in attrs:
-        if pyarrow_available() and data is not None and isinstance(data, DataFrameLike):
-            dfi = data.__dataframe__()
-            if "field" in attrs:
-                unescaped_field = attrs["field"].replace("\\", "")
-                if unescaped_field in dfi.column_names():
-                    column = dfi.get_column_by_name(unescaped_field)
-                    try:
-                        attrs["type"] = infer_vegalite_type_for_dfi_column(column)
-                    except (NotImplementedError, AttributeError, ValueError):
-                        # Fall back to pandas-based inference.
-                        # Note: The AttributeError catch is a workaround for
-                        # https://github.com/pandas-dev/pandas/issues/55332
-                        if isinstance(data, pd.DataFrame):
-                            attrs["type"] = infer_vegalite_type(data[unescaped_field])
-                        else:
-                            raise
-
-                    if isinstance(attrs["type"], tuple):
-                        attrs["sort"] = attrs["type"][1]
-                        attrs["type"] = attrs["type"][0]
-        elif isinstance(data, pd.DataFrame):
-            # Fallback if pyarrow is not installed or if pandas is older than 1.5
-            #
-            # Remove escape sequences so that types can be inferred for columns with special characters
-            if "field" in attrs and attrs["field"].replace("\\", "") in data.columns:
-                attrs["type"] = infer_vegalite_type(
-                    data[attrs["field"].replace("\\", "")]
-                )
-                # ordered categorical dataframe columns return the type and sort order as a tuple
-                if isinstance(attrs["type"], tuple):
-                    attrs["sort"] = attrs["type"][1]
-                    attrs["type"] = attrs["type"][0]
+    if "type" not in attrs and is_data_type(data):
+        unescaped_field = attrs["field"].replace("\\", "")
+        data_nw = nw.from_native(data, eager_or_interchange_only=True)
+        schema = data_nw.schema
+        if unescaped_field in schema:
+            column = data_nw[unescaped_field]
+            if schema[unescaped_field] in {
+                nw.Object,
+                nw.Unknown,
+            } and is_pandas_dataframe(nw.to_native(data_nw)):
+                attrs["type"] = infer_vegalite_type_for_pandas(nw.to_native(column))
+            else:
+                attrs["type"] = infer_vegalite_type_for_narwhals(column)
+            if isinstance(attrs["type"], tuple):
+                attrs["sort"] = attrs["type"][1]
+                attrs["type"] = attrs["type"][0]
 
     # If an unescaped colon is still present, it's often due to an incorrect data type specification
     # but could also be due to using a column name with ":" in it.
@@ -650,41 +668,23 @@ def parse_shorthand(
     return attrs
 
 
-def infer_vegalite_type_for_dfi_column(
-    column: Column | PandasColumn,
-) -> InferredVegaLiteType | tuple[InferredVegaLiteType, list[Any]]:
-    from pyarrow.interchange.from_dataframe import column_to_array
-
-    try:
-        kind = column.dtype[0]
-    except NotImplementedError as e:
-        # Edge case hack:
-        # dtype access fails for pandas column with datetime64[ns, UTC] type,
-        # but all we need to know is that its temporal, so check the
-        # error message for the presence of datetime64.
-        #
-        # See https://github.com/pandas-dev/pandas/issues/54239
-        if "datetime64" in e.args[0] or "timestamp" in e.args[0]:
-            return "temporal"
-        raise e
-
+def infer_vegalite_type_for_narwhals(
+    column: nw.Series,
+) -> InferredVegaLiteType | tuple[InferredVegaLiteType, list]:
+    dtype = column.dtype
     if (
-        kind == DtypeKind.CATEGORICAL
-        and column.describe_categorical["is_ordered"]
-        and column.describe_categorical["categories"] is not None
+        nw.is_ordered_categorical(column)
+        and not (categories := column.cat.get_categories()).is_empty()
     ):
-        # Treat ordered categorical column as Vega-Lite ordinal
-        categories_column = column.describe_categorical["categories"]
-        categories_array = column_to_array(categories_column)
-        return "ordinal", categories_array.to_pylist()
-    if kind in {DtypeKind.STRING, DtypeKind.CATEGORICAL, DtypeKind.BOOL}:
+        return "ordinal", categories.to_list()
+    if dtype in {nw.String, nw.Categorical, nw.Boolean}:
         return "nominal"
-    elif kind in {DtypeKind.INT, DtypeKind.UINT, DtypeKind.FLOAT}:
+    elif dtype.is_numeric():
         return "quantitative"
-    elif kind == DtypeKind.DATETIME:
+    elif dtype in {nw.Datetime, nw.Date}:
         return "temporal"
     else:
-        msg = f"Unexpected DtypeKind: {kind}"
+        msg = f"Unexpected DtypeKind: {dtype}"
         raise ValueError(msg)
 
 
diff --git a/altair/utils/data.py b/altair/utils/data.py
index 61923231c..daf37393d 100644
--- a/altair/utils/data.py
+++ b/altair/utils/data.py
@@ -22,10 +22,17 @@
 from functools import partial
 import sys
 
-import pandas as pd
+import narwhals.stable.v1 as nw
+from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe
+from narwhals.typing import IntoDataFrame
 
 from ._importers import import_pyarrow_interchange
-from .core import sanitize_dataframe, sanitize_arrow_table, DataFrameLike
+from .core import (
+    sanitize_pandas_dataframe,
+    DataFrameLike,
+    sanitize_narwhals_dataframe,
+    to_eager_narwhals_dataframe,
+)
 from .core import sanitize_geo_interface
 from .plugin_registry import PluginRegistry
 
@@ -36,6 +43,7 @@
 
 if TYPE_CHECKING:
     import pyarrow as pa
+    import pandas as pd
 
 
 @runtime_checkable
@@ -44,20 +52,23 @@ class SupportsGeoInterface(Protocol):
 
 
 DataType: TypeAlias = Union[
-    Dict[Any, Any], pd.DataFrame, SupportsGeoInterface, DataFrameLike
+    Dict[Any, Any], IntoDataFrame, SupportsGeoInterface, DataFrameLike
 ]
 
 TDataType = TypeVar("TDataType", bound=DataType)
+TIntoDataFrame = TypeVar("TIntoDataFrame", bound=IntoDataFrame)
 
 VegaLiteDataDict: TypeAlias = Dict[
     str, Union[str, Dict[Any, Any], List[Dict[Any, Any]]]
 ]
 ToValuesReturnType: TypeAlias = Dict[str, Union[Dict[Any, Any], List[Dict[Any, Any]]]]
-SampleReturnType = Union[pd.DataFrame, Dict[str, Sequence], "pa.lib.Table", None]
+SampleReturnType = Union[IntoDataFrame, Dict[str, Sequence], None]
 
 
 def is_data_type(obj: Any) -> TypeIs[DataType]:
-    return isinstance(obj, (dict, pd.DataFrame, DataFrameLike, SupportsGeoInterface))
+    return _is_pandas_dataframe(obj) or isinstance(
+        obj, (dict, DataFrameLike, SupportsGeoInterface, nw.DataFrame)
+    )
 
 
 # ==============================================================================
@@ -133,20 +144,14 @@ def raise_max_rows_error():
             values = data.__geo_interface__["features"]
         else:
             values = data.__geo_interface__
-    elif isinstance(data, pd.DataFrame):
-        values = data
     elif isinstance(data, dict):
         if "values" in data:
             values = data["values"]
         else:
             return data
-    elif isinstance(data, DataFrameLike):
-        pa_table = arrow_table_from_dfi_dataframe(data)
-        if max_rows is not None and pa_table.num_rows > max_rows:
-            raise_max_rows_error()
-        # Return pyarrow Table instead of input since the
-        # `arrow_table_from_dfi_dataframe` call above may be expensive
-        return pa_table
+    else:
+        data = to_eager_narwhals_dataframe(data)
+        values = data
 
     if max_rows is not None and len(values) > max_rows:
         raise_max_rows_error()
@@ -159,6 +164,10 @@ def sample(
     data: None = ..., n: int | None = ..., frac: float | None = ...
 ) -> partial: ...
 @overload
+def sample(
+    data: TIntoDataFrame, n: int | None = ..., frac: float | None = ...
+) -> TIntoDataFrame: ...
+@overload
 def sample(
     data: DataType, n: int | None = ..., frac: float | None = ...
 ) -> SampleReturnType: ...
@@ -171,7 +180,7 @@ def sample(
     if data is None:
         return partial(sample, n=n, frac=frac)
     check_data_type(data)
-    if isinstance(data, pd.DataFrame):
+    if _is_pandas_dataframe(data):
         return data.sample(n=n, frac=frac)
     elif isinstance(data, dict):
         if "values" in data:
@@ -186,19 +195,14 @@ def sample(
         else:
             # Maybe this should raise an error or return something useful?
             return None
-    elif isinstance(data, DataFrameLike):
-        pa_table = arrow_table_from_dfi_dataframe(data)
-        if not n:
-            if frac is None:
-                msg = "frac cannot be None if n is None with this data input type"
-                raise ValueError(msg)
-            n = int(frac * len(pa_table))
-        indices = random.sample(range(len(pa_table)), n)
-        return pa_table.take(indices)
-    else:
-        # Maybe this should raise an error or return something useful? Currently,
-        # if data is of type SupportsGeoInterface it lands here
-        return None
+    data = nw.from_native(data, eager_only=True)
+    if not n:
+        if frac is None:
+            msg = "frac cannot be None if n is None with this data input type"
+            raise ValueError(msg)
+        n = int(frac * len(data))
+    indices = random.sample(range(len(data)), n)
+    return nw.to_native(data[indices])
 
 
 _FormatType = Literal["csv", "json"]
@@ -309,24 +313,26 @@ def _to_text_kwds(prefix: str, extension: str, filename: str, urlpath: str, /) -
 def to_values(data: DataType) -> ToValuesReturnType:
     """Replace a DataFrame by a data model with values."""
     check_data_type(data)
-    if isinstance(data, SupportsGeoInterface):
-        if isinstance(data, pd.DataFrame):
-            data = sanitize_dataframe(data)
+    # `strict=False` passes `data` through as-is if it is not a Narwhals object.
+    data_native = nw.to_native(data, strict=False)
+    if isinstance(data_native, SupportsGeoInterface):
+        if _is_pandas_dataframe(data_native):
+            data_native = sanitize_pandas_dataframe(data_native)
         # Maybe the type could be further clarified here that it is
         # SupportGeoInterface and then the ignore statement is not needed?
-        data_sanitized = sanitize_geo_interface(data.__geo_interface__)  # type: ignore[arg-type]
+        data_sanitized = sanitize_geo_interface(data_native.__geo_interface__)
         return {"values": data_sanitized}
-    elif isinstance(data, pd.DataFrame):
-        data = sanitize_dataframe(data)
-        return {"values": data.to_dict(orient="records")}
-    elif isinstance(data, dict):
-        if "values" not in data:
+    elif _is_pandas_dataframe(data_native):
+        data_native = sanitize_pandas_dataframe(data_native)
+        return {"values": data_native.to_dict(orient="records")}
+    elif isinstance(data_native, dict):
+        if "values" not in data_native:
             msg = "values expected in data dict, but not present."
             raise KeyError(msg)
-        return data
-    elif isinstance(data, DataFrameLike):
-        pa_table = sanitize_arrow_table(arrow_table_from_dfi_dataframe(data))
-        return {"values": pa_table.to_pylist()}
+        return data_native
+    elif isinstance(data, nw.DataFrame):
+        data = sanitize_narwhals_dataframe(data)
+        return {"values": data.rows(named=True)}
     else:
         # Should never reach this state as tested by check_data_type
         msg = f"Unrecognized data type: {type(data)}"
@@ -349,24 +355,23 @@ def _compute_data_hash(data_str: str) -> str:
 def _data_to_json_string(data: DataType) -> str:
     """Return a JSON string representation of the input data"""
     check_data_type(data)
-    if isinstance(data, SupportsGeoInterface):
-        if isinstance(data, pd.DataFrame):
-            data = sanitize_dataframe(data)
-        # Maybe the type could be further clarified here that it is
-        # SupportGeoInterface and then the ignore statement is not needed?
-        data = sanitize_geo_interface(data.__geo_interface__)  # type: ignore[arg-type]
-        return json.dumps(data)
-    elif isinstance(data, pd.DataFrame):
-        data = sanitize_dataframe(data)
-        return data.to_json(orient="records", double_precision=15)
-    elif isinstance(data, dict):
-        if "values" not in data:
+    # `strict=False` passes `data` through as-is if it is not a Narwhals object.
+    data_native = nw.to_native(data, strict=False)
+    if isinstance(data_native, SupportsGeoInterface):
+        if _is_pandas_dataframe(data_native):
+            data_native = sanitize_pandas_dataframe(data_native)
+        data_native = sanitize_geo_interface(data_native.__geo_interface__)
+        return json.dumps(data_native)
+    elif _is_pandas_dataframe(data_native):
+        data = sanitize_pandas_dataframe(data_native)
+        return data_native.to_json(orient="records", double_precision=15)
+    elif isinstance(data_native, dict):
+        if "values" not in data_native:
             msg = "values expected in data dict, but not present."
             raise KeyError(msg)
-        return json.dumps(data["values"], sort_keys=True)
-    elif isinstance(data, DataFrameLike):
-        pa_table = arrow_table_from_dfi_dataframe(data)
-        return json.dumps(pa_table.to_pylist())
+        return json.dumps(data_native["values"], sort_keys=True)
+    elif isinstance(data, nw.DataFrame):
+        return json.dumps(data.rows(named=True))
     else:
         msg = "to_json only works with data expressed as " "a DataFrame or as a dict"
         raise NotImplementedError(msg)
@@ -382,13 +387,18 @@ def _data_to_csv_string(data: dict | pd.DataFrame | DataFrameLike) -> str:
             f"See https://github.com/vega/altair/issues/3441"
         )
         raise NotImplementedError(msg)
-    elif isinstance(data, pd.DataFrame):
-        data = sanitize_dataframe(data)
+    elif _is_pandas_dataframe(data):
+        data = sanitize_pandas_dataframe(data)
         return data.to_csv(index=False)
     elif isinstance(data, dict):
         if "values" not in data:
             msg = "values expected in data dict, but not present"
             raise KeyError(msg)
+        try:
+            import pandas as pd
+        except ImportError as exc:
+            msg = "pandas is required to convert a dict to a CSV string"
+            raise ImportError(msg) from exc
         return pd.DataFrame.from_dict(data["values"]).to_csv(index=False)
     elif isinstance(data, DataFrameLike):
         # experimental interchange dataframe support
@@ -413,7 +423,7 @@ def arrow_table_from_dfi_dataframe(dfi_df: DataFrameLike) -> pa.Table:
     # has more control over the conversion, and may have broader compatibility.
     # This is the case for Polars, which supports Date32 columns in direct conversion
     # while pyarrow does not yet support this type in from_dataframe
-    for convert_method_name in ("arrow", "to_arrow", "to_arrow_table"):
+    for convert_method_name in ("arrow", "to_arrow", "to_arrow_table", "to_pyarrow"):
         convert_method = getattr(dfi_df, convert_method_name, None)
         if callable(convert_method):
             result = convert_method()
diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index cbfaacf79..583e42e4a 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -7,9 +7,11 @@
 import inspect
 import json
 import textwrap
+from math import ceil
 from collections import defaultdict
 from importlib.metadata import version as importlib_version
 from itertools import chain, zip_longest
+import sys
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -29,8 +31,6 @@
 import jsonschema
 import jsonschema.exceptions
 import jsonschema.validators
-import numpy as np
-import pandas as pd
 from packaging.version import Version
 
 # This leads to circular imports with the vegalite module. Currently, this works
@@ -39,8 +39,6 @@
 from altair import vegalite
 
 if TYPE_CHECKING:
-    import sys
-
     from referencing import Registry
 
     from altair import ChartType
@@ -56,7 +54,6 @@
     else:
         from typing_extensions import Self, Never
 
-
 ValidationErrorList: TypeAlias = List[jsonschema.exceptions.ValidationError]
 GroupedValidationErrors: TypeAlias = Dict[str, ValidationErrorList]
 
@@ -477,20 +474,34 @@ def _subclasses(cls: type[Any]) -> Iterator[type[Any]]:
             yield cls
 
 
-def _todict(obj: Any, context: dict[str, Any] | None) -> Any:
+def _todict(obj: Any, context: dict[str, Any] | None, np_opt: Any, pd_opt: Any) -> Any:
     """Convert an object to a dict representation."""
+    if np_opt is not None:
+        np = np_opt
+        if isinstance(obj, np.ndarray):
+            return [_todict(v, context, np_opt, pd_opt) for v in obj]
+        elif isinstance(obj, np.number):
+            return float(obj)
+        elif isinstance(obj, np.datetime64):
+            result = str(obj)
+            if "T" not in result:
+                # See https://github.com/vega/altair/issues/1027 for why this is necessary.
+                result += "T00:00:00"
+            return result
     if isinstance(obj, SchemaBase):
         return obj.to_dict(validate=False, context=context)
-    elif isinstance(obj, (list, tuple, np.ndarray)):
-        return [_todict(v, context) for v in obj]
+    elif isinstance(obj, (list, tuple)):
+        return [_todict(v, context, np_opt, pd_opt) for v in obj]
     elif isinstance(obj, dict):
-        return {k: _todict(v, context) for k, v in obj.items() if v is not Undefined}
+        return {
+            k: _todict(v, context, np_opt, pd_opt)
+            for k, v in obj.items()
+            if v is not Undefined
+        }
     elif hasattr(obj, "to_dict"):
         return obj.to_dict()
-    elif isinstance(obj, np.number):
-        return float(obj)
-    elif isinstance(obj, (pd.Timestamp, np.datetime64)):
-        return pd.Timestamp(obj).isoformat()
+    elif pd_opt is not None and isinstance(obj, pd_opt.Timestamp):
+        return pd_opt.Timestamp(obj).isoformat()
     else:
         return obj
 
@@ -636,7 +647,7 @@ def _format_params_as_table(param_dict_keys: Iterable[str]) -> str:
         max_column_width = 80
         # Output a square table if not too big (since it is easier to read)
         num_param_names = len(param_names)
-        square_columns = int(np.ceil(num_param_names**0.5))
+        square_columns = int(ceil(num_param_names**0.5))
         columns = min(max_column_width // max_name_length, square_columns)
 
         # Compute roughly equal column heights to evenly divide the param names
@@ -965,9 +976,17 @@ def to_dict(
             context = {}
         if ignore is None:
             ignore = []
+        # The following return the package only if it has already been
+        # imported - otherwise they return None. This is useful for
+        # isinstance checks - for example, if pandas has not been imported,
+        # then an object is definitely not a `pandas.Timestamp`.
+        pd_opt = sys.modules.get("pandas")
+        np_opt = sys.modules.get("numpy")
 
         if self._args and not self._kwds:
-            result = _todict(self._args[0], context=context)
+            result = _todict(
+                self._args[0], context=context, np_opt=np_opt, pd_opt=pd_opt
+            )
         elif not self._args:
             kwds = self._kwds.copy()
             # parsed_shorthand is added by FieldChannelMixin.
@@ -995,10 +1014,7 @@ def to_dict(
             }
             if "mark" in kwds and isinstance(kwds["mark"], str):
                 kwds["mark"] = {"type": kwds["mark"]}
-            result = _todict(
-                kwds,
-                context=context,
-            )
+            result = _todict(kwds, context=context, np_opt=np_opt, pd_opt=pd_opt)
         else:
             msg = (
                 f"{self.__class__} instance has both a value and properties : "
@@ -1169,7 +1185,13 @@ def validate_property(
         Validate a property against property schema in the context of the
         rootschema
         """
-        value = _todict(value, context={})
+        # The following return the package only if it has already been
+        # imported - otherwise they return None. This is useful for
+        # isinstance checks - for example, if pandas has not been imported,
+        # then an object is definitely not a `pandas.Timestamp`.
+        pd_opt = sys.modules.get("pandas")
+        np_opt = sys.modules.get("numpy")
+        value = _todict(value, context={}, np_opt=np_opt, pd_opt=pd_opt)
         props = cls.resolve_references(schema or cls._schema).get("properties", {})
         return validate_jsonschema(
             value, props.get(name, {}), rootschema=cls._rootschema or cls._schema
diff --git a/altair/vegalite/data.py b/altair/vegalite/data.py
index db5b4bcdc..19371fc87 100644
--- a/altair/vegalite/data.py
+++ b/altair/vegalite/data.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 from typing import TYPE_CHECKING, overload, Callable
 
-from ..utils.core import sanitize_dataframe
+from ..utils.core import sanitize_pandas_dataframe
 from ..utils.data import (
     MaxRowsError,
     limit_rows,
@@ -58,7 +58,7 @@ def disable_max_rows(self) -> PluginEnabler:
     "default_data_transformer",
     "limit_rows",
     "sample",
-    "sanitize_dataframe",
+    "sanitize_pandas_dataframe",
     "to_csv",
     "to_json",
     "to_values",
diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py
index 23138613f..635577db8 100644
--- a/altair/vegalite/v5/api.py
+++ b/altair/vegalite/v5/api.py
@@ -25,6 +25,7 @@
 )
 from ...utils.data import DataType, is_data_type as _is_data_type
 from ...utils.deprecation import AltairDeprecationWarning
+from ...utils.core import to_eager_narwhals_dataframe as _to_eager_narwhals_dataframe
 
 if TYPE_CHECKING:
     from ...utils.core import DataFrameLike
@@ -1007,10 +1008,15 @@ def to_dict(
         # Altair is set up this should hold. Too complex to type hint right now
         copy = self.copy(deep=False)  # type: ignore[attr-defined]
         original_data = getattr(copy, "data", Undefined)
-        copy.data = _prepare_data(original_data, context)
+        try:
+            data: Any = _to_eager_narwhals_dataframe(original_data)  # type: ignore[arg-type]
+        except TypeError:
+            # Non-narwhalifiable type supported by Altair, such as dict
+            data = original_data
+        copy.data = _prepare_data(data, context)
 
         if original_data is not Undefined:
-            context["data"] = original_data
+            context["data"] = data
 
         # remaining to_dict calls are not at top level
         context["top_level"] = False
diff --git a/altair/vegalite/v5/schema/channels.py b/altair/vegalite/v5/schema/channels.py
index 55dfc1e2d..bbd6e0d51 100644
--- a/altair/vegalite/v5/schema/channels.py
+++ b/altair/vegalite/v5/schema/channels.py
@@ -13,7 +13,7 @@
 
 from typing import TYPE_CHECKING, Any, Literal, Sequence, overload
 
-import pandas as pd
+from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe
 
 from altair.utils import infer_encoding_types as _infer_encoding_types
 from altair.utils import parse_shorthand
@@ -72,7 +72,7 @@ def to_dict(
                 # We still parse it out of the shorthand, but drop it here.
                 parsed.pop("type", None)
             elif not (type_in_shorthand or type_defined_explicitly):
-                if isinstance(context.get("data", None), pd.DataFrame):
+                if _is_pandas_dataframe(context.get("data", None)):
                     msg = (
                         f'Unable to determine data type for the field "{shorthand}";'
                         " verify that the field name is not misspelled."
diff --git a/pyproject.toml b/pyproject.toml
index e0b31c1ac..04430f426 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,10 +19,8 @@ dependencies = [
     "jinja2",
     # If you update the minimum required jsonschema version, also update it in build.yml
     "jsonschema>=3.0",
-    "numpy<2.0.0",
-    # If you update the minimum required pandas version, also update it in build.yml
-    "pandas>=0.25",
-    "packaging"
+    "packaging",
+    "narwhals>=1.1.0"
 ]
 description = "Vega-Altair: A declarative statistical visualization library for Python."
 readme = "README.md"
@@ -59,6 +57,8 @@ Source = "https://github.com/vega/altair"
 all = [
     "vega_datasets>=0.9.0",
     "vl-convert-python>=1.3.0",
+    "pandas>=0.25.3",
+    "numpy<2.0.0",
     "pyarrow>=11",
     "vegafusion[embed]>=1.6.6",
     "anywidget>=0.9.0",
@@ -67,7 +67,9 @@ all = [
 dev = [
     "hatch",
     "ruff>=0.5.1",
+    "ibis-framework",
     "ipython",
+    "pandas>=0.25.3",
     "pytest",
     "pytest-cov",
     "pytest-xdist[psutil]~=3.5",
@@ -77,6 +79,7 @@ dev = [
     "types-jsonschema",
     "types-setuptools",
     "geopandas",
+    "polars>=0.20.3",
 ]
 doc = [
     "sphinx",
@@ -350,6 +353,7 @@ module = [
     "geopandas.*",
     "nbformat.*",
     "ipykernel.*",
+    "ibis.*",
     "m2r.*",
     # This refers to schemapi in the tools folder which is imported
     # by the tools scripts such as generate_schema_wrapper.py
diff --git a/tests/utils/test_core.py b/tests/utils/test_core.py
index 8327d3afe..a2344a218 100644
--- a/tests/utils/test_core.py
+++ b/tests/utils/test_core.py
@@ -8,7 +8,7 @@
 
 import altair as alt
 from altair.utils.core import parse_shorthand, update_nested, infer_encoding_types
-from altair.utils.core import infer_dtype
+from pandas.api.types import infer_dtype
 
 json_schema_specification = alt.load_schema()["$schema"]
 json_schema_dict_str = f'{{"$schema": "{json_schema_specification}"}}'
diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py
index e90474d83..f58fc9f10 100644
--- a/tests/utils/test_data.py
+++ b/tests/utils/test_data.py
@@ -3,6 +3,8 @@
 from typing import Any, Callable
 import pytest
 import pandas as pd
+import polars as pl
+import narwhals.stable.v1 as nw
 from altair.utils.data import (
     limit_rows,
     MaxRowsError,
@@ -33,7 +35,7 @@ def _create_data_with_values(N):
 
 def test_limit_rows():
     """Test the limit_rows data transformer."""
-    data = _create_dataframe(10)
+    data = nw.from_native(_create_dataframe(10), eager_only=True)
     result = limit_rows(data, max_rows=20)
     assert data is result
     with pytest.raises(MaxRowsError):
@@ -65,6 +67,9 @@ def test_sample():
     assert isinstance(result, dict)
     assert "values" in result
     assert len(result["values"]) == 10
+    result = sample(pl.DataFrame(data), n=10)
+    assert isinstance(result, pl.DataFrame)
+    assert len(result) == 10
 
 
 def test_to_values():
diff --git a/tests/utils/test_dataframe_interchange.py b/tests/utils/test_to_values_narwhals.py
similarity index 85%
rename from tests/utils/test_dataframe_interchange.py
rename to tests/utils/test_to_values_narwhals.py
index 56e6499ff..3e7977d18 100644
--- a/tests/utils/test_dataframe_interchange.py
+++ b/tests/utils/test_to_values_narwhals.py
@@ -3,6 +3,7 @@
 import pandas as pd
 import pytest
 import sys
+import narwhals.stable.v1 as nw
 
 try:
     import pyarrow as pa
@@ -36,8 +37,9 @@ def test_arrow_timestamp_conversion():
         "value": [102, 129, 139],
     }
     pa_table = pa.table(data)
+    nw_frame = nw.from_native(pa_table)
 
-    values = to_values(pa_table)
+    values = to_values(nw_frame)
     expected_values = {
         "values": [
             {"date": "2004-08-01T00:00:00.000000", "value": 102},
@@ -54,9 +56,13 @@ def test_duration_raises():
     df = pd.DataFrame(td).reset_index()
     df.columns = ["id", "timedelta"]
     pa_table = pa.table(df)
+    nw_frame = nw.from_native(pa_table)
     with pytest.raises(ValueError) as e:  # noqa: PT011
-        to_values(pa_table)
+        to_values(nw_frame)
 
     # Check that exception mentions the duration[ns] type,
     # which is what the pandas timedelta is converted into
-    assert "duration[ns]" in e.value.args[0]
+    assert (
+        'Field "timedelta" has type "Duration" which is not supported by Altair'
+        in e.value.args[0]
+    )
diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py
index 875647214..c3a73acb7 100644
--- a/tests/utils/test_utils.py
+++ b/tests/utils/test_utils.py
@@ -3,11 +3,16 @@
 import sys
 import warnings
 
+import narwhals.stable.v1 as nw
 import numpy as np
 import pandas as pd
 import pytest
 
-from altair.utils import infer_vegalite_type, sanitize_dataframe, sanitize_arrow_table
+from altair.utils import (
+    infer_vegalite_type_for_pandas,
+    sanitize_pandas_dataframe,
+    sanitize_narwhals_dataframe,
+)
 
 try:
     import pyarrow as pa
@@ -17,7 +22,7 @@
 
 def test_infer_vegalite_type():
     def _check(arr, typ):
-        assert infer_vegalite_type(arr) == typ
+        assert infer_vegalite_type_for_pandas(arr) == typ
 
     _check(np.arange(5, dtype=float), "quantitative")
     _check(np.arange(5, dtype=int), "quantitative")
@@ -64,7 +69,7 @@ def test_sanitize_dataframe():
 
     # JSON serialize. This will fail on non-sanitized dataframes
     print(df[["s", "c2"]])
-    df_clean = sanitize_dataframe(df)
+    df_clean = sanitize_pandas_dataframe(df)
     print(df_clean[["s", "c2"]])
     print(df_clean[["s", "c2"]].to_dict())
     s = json.dumps(df_clean.to_dict(orient="records"))
@@ -107,7 +112,7 @@ def test_sanitize_dataframe_arrow_columns():
         }
     )
     df_arrow = pa.Table.from_pandas(df).to_pandas(types_mapper=pd.ArrowDtype)
-    df_clean = sanitize_dataframe(df_arrow)
+    df_clean = sanitize_pandas_dataframe(df_arrow)
     records = df_clean.to_dict(orient="records")
     assert records[0] == {
         "s": "a",
@@ -157,8 +162,8 @@ def test_sanitize_pyarrow_table_columns() -> None:
             ]
         ),
     )
-    sanitized = sanitize_arrow_table(pa_table)
-    values = sanitized.to_pylist()
+    sanitized = sanitize_narwhals_dataframe(nw.from_native(pa_table, eager_only=True))
+    values = sanitized.rows(named=True)
 
     assert values[0] == {
         "s": "a",
@@ -178,26 +183,26 @@ def test_sanitize_dataframe_colnames():
     df = pd.DataFrame(np.arange(12).reshape(4, 3))
 
     # Test that RangeIndex is converted to strings
-    df = sanitize_dataframe(df)
+    df = sanitize_pandas_dataframe(df)
     assert [isinstance(col, str) for col in df.columns]
 
     # Test that non-string columns result in an error
     df.columns = [4, "foo", "bar"]
     with pytest.raises(ValueError) as err:  # noqa: PT011
-        sanitize_dataframe(df)
+        sanitize_pandas_dataframe(df)
     assert str(err.value).startswith("Dataframe contains invalid column name: 4.")
 
 
 def test_sanitize_dataframe_timedelta():
     df = pd.DataFrame({"r": pd.timedelta_range(start="1 day", periods=4)})
     with pytest.raises(ValueError) as err:  # noqa: PT011
-        sanitize_dataframe(df)
+        sanitize_pandas_dataframe(df)
     assert str(err.value).startswith('Field "r" has type "timedelta')
 
 
 def test_sanitize_dataframe_infs():
     df = pd.DataFrame({"x": [0, 1, 2, np.inf, -np.inf, np.nan]})
-    df_clean = sanitize_dataframe(df)
+    df_clean = sanitize_pandas_dataframe(df)
     assert list(df_clean.dtypes) == [object]
     assert list(df_clean["x"]) == [0, 1, 2, None, None, None]
 
@@ -218,7 +223,7 @@ def test_sanitize_nullable_integers():
         }
     )
 
-    df_clean = sanitize_dataframe(df)
+    df_clean = sanitize_pandas_dataframe(df)
     assert {col.dtype.name for _, col in df_clean.items()} == {"object"}
 
     result_python = {col_name: list(col) for col_name, col in df_clean.items()}
@@ -246,7 +251,7 @@ def test_sanitize_string_dtype():
         }
     )
 
-    df_clean = sanitize_dataframe(df)
+    df_clean = sanitize_pandas_dataframe(df)
     assert {col.dtype.name for _, col in df_clean.items()} == {"object"}
 
     result_python = {col_name: list(col) for col_name, col in df_clean.items()}
@@ -271,7 +276,7 @@ def test_sanitize_boolean_dtype():
         }
     )
 
-    df_clean = sanitize_dataframe(df)
+    df_clean = sanitize_pandas_dataframe(df)
     assert {col.dtype.name for _, col in df_clean.items()} == {"object"}
 
     result_python = {col_name: list(col) for col_name, col in df_clean.items()}
diff --git a/tests/vegalite/v5/test_api.py b/tests/vegalite/v5/test_api.py
index a7197e0b1..d00cc9849 100644
--- a/tests/vegalite/v5/test_api.py
+++ b/tests/vegalite/v5/test_api.py
@@ -1,15 +1,22 @@
 """Unit tests for altair API"""
 
+from datetime import date
 import io
+import ibis
+import sys
 import json
 import operator
 import os
 import pathlib
 import tempfile
+from importlib.metadata import version as importlib_version
+from packaging.version import Version
 
 import jsonschema
+import narwhals.stable.v1 as nw
 import pytest
 import pandas as pd
+import polars as pl
 
 import altair.vegalite.v5 as alt
 
@@ -18,6 +25,10 @@
 except ImportError:
     vlc = None
 
+ibis.set_backend("polars")
+
+PANDAS_VERSION = Version(importlib_version("pandas"))
+
 
 def getargs(*args, **kwargs):
     return args, kwargs
@@ -737,7 +748,7 @@ def test_selection_property():
 
 
 def test_LookupData():
-    df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    df = nw.from_native(pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}))
     lookup = alt.LookupData(data=df, key="x")
 
     dct = lookup.to_dict()
@@ -1065,3 +1076,38 @@ def test_validate_dataset():
     jsn = chart.to_json()
 
     assert jsn
+
+
+def test_polars_with_pandas_nor_pyarrow(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.delitem(sys.modules, "pandas")
+    monkeypatch.delitem(sys.modules, "numpy")
+    monkeypatch.delitem(sys.modules, "pyarrow", raising=False)
+
+    df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    _ = alt.Chart(df).mark_line().encode(x="a", y="b").to_json()
+    # Check pandas and PyArrow weren't imported anywhere along the way,
+    # confirming that the plot above would work without pandas no PyArrow
+    # installed.
+    assert "pandas" not in sys.modules
+    assert "pyarrow" not in sys.modules
+    assert "numpy" not in sys.modules
+
+
+@pytest.mark.skipif(
+    Version("1.5") > PANDAS_VERSION,
+    reason="A warning is thrown on old pandas versions",
+)
+@pytest.mark.xfail(
+    sys.platform == "win32", reason="Timezone database is not installed on Windows"
+)
+def test_ibis_with_date_32():
+    df = pl.DataFrame(
+        {"a": [1, 2, 3], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]}
+    )
+    tbl = ibis.memtable(df)
+    result = alt.Chart(tbl).mark_line().encode(x="a", y="b").to_dict()
+    assert next(iter(result["datasets"].values())) == [
+        {"a": 1, "b": "2020-01-01T00:00:00"},
+        {"a": 2, "b": "2020-01-02T00:00:00"},
+        {"a": 3, "b": "2020-01-03T00:00:00"},
+    ]
diff --git a/tools/generate_schema_wrapper.py b/tools/generate_schema_wrapper.py
index 9b15f2900..462029f81 100644
--- a/tools/generate_schema_wrapper.py
+++ b/tools/generate_schema_wrapper.py
@@ -108,7 +108,7 @@ def to_dict(
                 # We still parse it out of the shorthand, but drop it here.
                 parsed.pop("type", None)
             elif not (type_in_shorthand or type_defined_explicitly):
-                if isinstance(context.get("data", None), pd.DataFrame):
+                if _is_pandas_dataframe(context.get("data", None)):
                     msg = (
                         f'Unable to determine data type for the field "{shorthand}";'
                         " verify that the field name is not misspelled."
@@ -514,6 +514,7 @@ def generate_vegalite_schema_wrapper(schema_file: Path) -> str:
         "from typing import Any, Literal, Union, Protocol, Sequence, List, Iterator, TYPE_CHECKING",
         "import pkgutil",
         "import json\n",
+        "from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe",
         "from altair.utils.schemapi import SchemaBase, Undefined, UndefinedType, _subclasses # noqa: F401\n",
         _type_checking_only_imports(
             "from altair import Parameter",
@@ -564,7 +565,7 @@ def generate_vegalite_channel_wrappers(
     imports = imports or [
         "from __future__ import annotations\n",
         "from typing import Any, overload, Sequence, List, Literal, Union, TYPE_CHECKING",
-        "import pandas as pd",
+        "from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe",
         "from altair.utils.schemapi import Undefined, with_property_setters",
         "from altair.utils import infer_encoding_types as _infer_encoding_types",
         "from altair.utils import parse_shorthand",
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 9c5081e2c..6a2cce78d 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -5,9 +5,11 @@
 import inspect
 import json
 import textwrap
+from math import ceil
 from collections import defaultdict
 from importlib.metadata import version as importlib_version
 from itertools import chain, zip_longest
+import sys
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -27,8 +29,6 @@
 import jsonschema
 import jsonschema.exceptions
 import jsonschema.validators
-import numpy as np
-import pandas as pd
 from packaging.version import Version
 
 # This leads to circular imports with the vegalite module. Currently, this works
@@ -37,8 +37,6 @@
 from altair import vegalite
 
 if TYPE_CHECKING:
-    import sys
-
     from referencing import Registry
 
     from altair import ChartType
@@ -54,7 +52,6 @@
     else:
         from typing_extensions import Self, Never
 
-
 ValidationErrorList: TypeAlias = List[jsonschema.exceptions.ValidationError]
 GroupedValidationErrors: TypeAlias = Dict[str, ValidationErrorList]
 
@@ -475,20 +472,34 @@ def _subclasses(cls: type[Any]) -> Iterator[type[Any]]:
             yield cls
 
 
-def _todict(obj: Any, context: dict[str, Any] | None) -> Any:
+def _todict(obj: Any, context: dict[str, Any] | None, np_opt: Any, pd_opt: Any) -> Any:
     """Convert an object to a dict representation."""
+    if np_opt is not None:
+        np = np_opt
+        if isinstance(obj, np.ndarray):
+            return [_todict(v, context, np_opt, pd_opt) for v in obj]
+        elif isinstance(obj, np.number):
+            return float(obj)
+        elif isinstance(obj, np.datetime64):
+            result = str(obj)
+            if "T" not in result:
+                # See https://github.com/vega/altair/issues/1027 for why this is necessary.
+                result += "T00:00:00"
+            return result
     if isinstance(obj, SchemaBase):
         return obj.to_dict(validate=False, context=context)
-    elif isinstance(obj, (list, tuple, np.ndarray)):
-        return [_todict(v, context) for v in obj]
+    elif isinstance(obj, (list, tuple)):
+        return [_todict(v, context, np_opt, pd_opt) for v in obj]
     elif isinstance(obj, dict):
-        return {k: _todict(v, context) for k, v in obj.items() if v is not Undefined}
+        return {
+            k: _todict(v, context, np_opt, pd_opt)
+            for k, v in obj.items()
+            if v is not Undefined
+        }
     elif hasattr(obj, "to_dict"):
         return obj.to_dict()
-    elif isinstance(obj, np.number):
-        return float(obj)
-    elif isinstance(obj, (pd.Timestamp, np.datetime64)):
-        return pd.Timestamp(obj).isoformat()
+    elif pd_opt is not None and isinstance(obj, pd_opt.Timestamp):
+        return pd_opt.Timestamp(obj).isoformat()
     else:
         return obj
 
@@ -634,7 +645,7 @@ def _format_params_as_table(param_dict_keys: Iterable[str]) -> str:
         max_column_width = 80
         # Output a square table if not too big (since it is easier to read)
         num_param_names = len(param_names)
-        square_columns = int(np.ceil(num_param_names**0.5))
+        square_columns = int(ceil(num_param_names**0.5))
         columns = min(max_column_width // max_name_length, square_columns)
 
         # Compute roughly equal column heights to evenly divide the param names
@@ -963,9 +974,17 @@ def to_dict(
             context = {}
         if ignore is None:
             ignore = []
+        # The following return the package only if it has already been
+        # imported - otherwise they return None. This is useful for
+        # isinstance checks - for example, if pandas has not been imported,
+        # then an object is definitely not a `pandas.Timestamp`.
+        pd_opt = sys.modules.get("pandas")
+        np_opt = sys.modules.get("numpy")
 
         if self._args and not self._kwds:
-            result = _todict(self._args[0], context=context)
+            result = _todict(
+                self._args[0], context=context, np_opt=np_opt, pd_opt=pd_opt
+            )
         elif not self._args:
             kwds = self._kwds.copy()
             # parsed_shorthand is added by FieldChannelMixin.
@@ -993,10 +1012,7 @@ def to_dict(
             }
             if "mark" in kwds and isinstance(kwds["mark"], str):
                 kwds["mark"] = {"type": kwds["mark"]}
-            result = _todict(
-                kwds,
-                context=context,
-            )
+            result = _todict(kwds, context=context, np_opt=np_opt, pd_opt=pd_opt)
         else:
             msg = (
                 f"{self.__class__} instance has both a value and properties : "
@@ -1167,7 +1183,13 @@ def validate_property(
         Validate a property against property schema in the context of the
         rootschema
         """
-        value = _todict(value, context={})
+        # The following return the package only if it has already been
+        # imported - otherwise they return None. This is useful for
+        # isinstance checks - for example, if pandas has not been imported,
+        # then an object is definitely not a `pandas.Timestamp`.
+        pd_opt = sys.modules.get("pandas")
+        np_opt = sys.modules.get("numpy")
+        value = _todict(value, context={}, np_opt=np_opt, pd_opt=pd_opt)
         props = cls.resolve_references(schema or cls._schema).get("properties", {})
         return validate_jsonschema(
             value, props.get(name, {}), rootschema=cls._rootschema or cls._schema