Clean-up alongside dependency pinning updates (#487)

* Update to py3.11 as an upper bound * Update dependencies * Add ruff as a linter and add to pre-commit * Remove `pandas.use_inf_as_na` and update null + inf masking * Allow for skipping some of the slowest tests * Use `ISO8601` as the default datetime format string --------- Co-authored-by: Stefan Pfenninger <[email protected]>
calliope-project · Oct 23, 2023 · cbed4c8 · cbed4c8
1 parent 889a229
commit cbed4c8
Show file tree

Hide file tree

Showing 42 changed files with 421 additions and 281 deletions.
diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
@@ -55,12 +55,16 @@ strategy:
       IMAGE_NAME: ubuntu-latest
       PYTHON_VERSION: 3.9
       CODECOV: True  # Only run on one build
-    macos-py3.9:
+    linux-py3.11:
+      IMAGE_NAME: ubuntu-latest
+      PYTHON_VERSION: 3.11
+      CODECOV: True  # Only run on one build
+    macos-py3.11:
       IMAGE_NAME: macOS-latest
-      PYTHON_VERSION: 3.9
-    windows-py3.9:
+      PYTHON_VERSION: 3.11
+    windows-py3.11:
       IMAGE_NAME: windows-latest
-      PYTHON_VERSION: 3.9
+      PYTHON_VERSION: 3.11
 
 steps:
   - bash: echo "##vso[task.prependpath]$CONDA/bin"

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,11 +1,25 @@
+default_language_version:
+  python: python3
+
 repos:
   - repo: https://github.com/psf/black
-    rev: stable
+    rev: 23.10.0
     hooks:
       - id: black
 
-  - repo: https://github.com/pycqa/isort
-    rev: 5.11.2
+  - repo: https://github.com/astral-sh/ruff-pre-commit # https://beta.ruff.rs/docs/usage/#github-action
+    rev: v0.1.1
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
+
+  - repo: https://github.com/nbQA-dev/nbQA
+    rev: 1.7.0
     hooks:
-      - id: isort
-        name: isort (python)
+      - id: nbqa-black
+      - id: nbqa-ruff
+        args: [--fix, --exit-non-zero-on-fix]
+
+ci:  # https://pre-commit.ci/
+  autofix_prs: false
+  autoupdate_schedule: monthly
diff --git a/calliope/backend/backends.py b/calliope/backend/backends.py
@@ -1076,17 +1076,20 @@ def _to_pyomo_param(
                 If both `val` and `default` are np.nan/None, return np.nan.
                 Otherwise return ObjParameter(val/default).
         """
-        with pd.option_context("mode.use_inf_as_na", use_inf_as_na):
-            if pd.isnull(val):
-                if pd.isnull(default):
-                    param = np.nan
-                else:
-                    param = ObjParameter(default)
-                    self._instance.parameters[name].append(param)
+        if use_inf_as_na:
+            val = np.nan if val in [np.inf, -np.inf] else val
+            default = np.nan if default in [np.inf, -np.inf] else default
+
+        if pd.isnull(val):
+            if pd.isnull(default):
+                param = np.nan
             else:
-                param = ObjParameter(val)
+                param = ObjParameter(default)
                 self._instance.parameters[name].append(param)
-            return param
+        else:
+            param = ObjParameter(val)
+            self._instance.parameters[name].append(param)
+        return param
 
     def _to_pyomo_constraint(
         self,

diff --git a/calliope/backend/helper_functions.py b/calliope/backend/helper_functions.py
@@ -7,11 +7,12 @@
 
 Functions that can be used to process data in math `where` and `expression` strings.
 """
+import functools
 import re
 from abc import ABC, abstractmethod
 from typing import Any, Literal, Mapping, Union, overload
 
-import pandas as pd
+import numpy as np
 import xarray as xr
 
 from calliope.exceptions import BackendError
@@ -189,12 +190,11 @@ def as_array(self, parameter: str, *, over: Union[str, list[str]]) -> xr.DataArr
         """
         if parameter in self._kwargs["model_data"].data_vars:
             parameter_da = self._kwargs["model_data"][parameter]
-            with pd.option_context("mode.use_inf_as_na", True):
-                bool_parameter_da = (
-                    parameter_da.where(pd.notnull(parameter_da))  # type: ignore
-                    .notnull()
-                    .any(dim=over, keep_attrs=True)
-                )
+            bool_parameter_da = (
+                parameter_da.notnull()
+                & (parameter_da != np.inf)
+                & (parameter_da != -np.inf)
+            ).any(dim=over, keep_attrs=True)
         else:
             bool_parameter_da = xr.DataArray(False)
         return bool_parameter_da
@@ -227,7 +227,8 @@ def as_array(
         Returns:
             xr.DataArray:
                 Array with dimensions reduced by applying a summation over the dimensions given in `over`.
-                NaNs are ignored (xarray.DataArray.sum arg: `skipna: True`) and if all values along the dimension(s) are NaN, the summation will lead to a NaN (xarray.DataArray.sum arg: `min_count=1`).
+                NaNs are ignored (xarray.DataArray.sum arg: `skipna: True`) and if all values along the dimension(s) are NaN,
+                the summation will lead to a NaN (xarray.DataArray.sum arg: `min_count=1`).
         """
         return array.sum(over, min_count=1, skipna=True)
 
@@ -282,7 +283,8 @@ def as_array(
         self, array: xr.DataArray, carrier_tier: Literal["in", "out"]
     ) -> xr.DataArray:
         """Reduce expression array data by selecting the carrier that corresponds to the primary carrier and then dropping the `carriers` dimension.
-        This function is only valid for `conversion_plus` technologies, so should only be included in a math component if the `where` string includes `inheritance(conversion_plus)` or an equivalent expression.
+        This function is only valid for `conversion_plus` technologies,
+        so should only be included in a math component if the `where` string includes `inheritance(conversion_plus)` or an equivalent expression.
 
         Args:
             array (xr.DataArray): Expression array.
@@ -353,6 +355,9 @@ def as_array(
         The lookup array assigns the value at "B" to "A" and vice versa.
         "C" is masked since the lookup array value is NaN.
         """
+        # Inspired by https://github.com/pydata/xarray/issues/1553#issuecomment-748491929
+        # Reindex does not presently support vectorized lookups: https://github.com/pydata/xarray/issues/1553
+        # Sel does (e.g. https://github.com/pydata/xarray/issues/4630) but can't handle missing keys
 
         dims = set(lookup_arrays.keys())
         missing_dims_in_component = dims.difference(array.dims)
@@ -368,24 +373,30 @@ def as_array(
                 f"All lookup arrays used to select items from `{array.name}` must be indexed over the dimensions {dims}"
             )
 
-        stacked_and_dense_lookup_arrays = {
-            # Although we have the lookup array, its values are backend objects,
-            # so we grab the same array from the unadulterated model data.
-            # FIXME: do not add lookup tables as backend objects.
-            dim_name: self._kwargs["model_data"][lookup.name]
-            # Stacking ensures that the dimensions on `component` are not reordered on calling `.sel()`.
-            .stack(idx=list(dims))
-            # Cannot select on NaNs, so we drop them all.
-            .dropna("idx")
-            for dim_name, lookup in lookup_arrays.items()
-        }
-        sliced_component = array.sel(stacked_and_dense_lookup_arrays)
+        dim = "dim_0"
+        ixs = {}
+        masks = []
+
+        # Turn string lookup values to numeric ones.
+        # We stack the dimensions to handle multidimensional lookups
+        for index_dim, index in lookup_arrays.items():
+            stacked_lookup = self._kwargs["model_data"][index.name].stack({dim: dims})
+            ix = array.indexes[index_dim].get_indexer(stacked_lookup)
+            ixs[index_dim] = xr.DataArray(
+                np.fmax(0, ix),
+                coords={dim: stacked_lookup[dim]},
+            )
+            masks.append(ix >= 0)
 
-        return (
-            sliced_component.drop_vars(dims)
-            .unstack("idx")
-            .reindex_like(array, copy=False)
-        )
+        # Create a mask to nullify any lookup values that are not given (i.e., are np.nan in the lookup array)
+        mask = functools.reduce(lambda x, y: x & y, masks)
+
+        result = array[ixs]
+
+        if not mask.all():
+            result[{dim: ~mask}] = np.nan
+        unstacked_result = result.drop_vars(dims).unstack(dim)
+        return unstacked_result
 
 
 class GetValAtIndex(ParsingHelperFunction):

diff --git a/calliope/backend/latex_backend.py b/calliope/backend/latex_backend.py
@@ -67,7 +67,9 @@ def write(  # noqa: F811
                 If given, will write the built mathematical formulation to a file with the given extension as the file format. Defaults to None.
 
             format (Optional["tex", "rst", "md"], optional):
-                Not required if filename is given (as the format will be automatically inferred). Required if expecting a string return from calling this function. The LaTeX math will be embedded in a document of the given format (tex=LaTeX, rst=reStructuredText, md=Markdown). Defaults to None.
+                Not required if filename is given (as the format will be automatically inferred).
+                Required if expecting a string return from calling this function. The LaTeX math will be embedded in a document of the given format (tex=LaTeX, rst=reStructuredText, md=Markdown).
+                Defaults to None.
 
         Raises:
             exceptions.ModelError: Math strings need to be built first (`build`)

diff --git a/calliope/backend/parsing.py b/calliope/backend/parsing.py
@@ -616,7 +616,8 @@ def extend_equation_list_with_expression_group(
 
         Returns:
             list[ParsedBackendEquation]:
-                Expanded list of parsed equations with the product of all references to items from the `expression_group` producing a new equation object. E.g., if the input equation object has a reference to an slice which itself has two expression options, two equation objects will be added to the return list.
+                Expanded list of parsed equations with the product of all references to items from the `expression_group` producing a new equation object.
+                E.g., if the input equation object has a reference to an slice which itself has two expression options, two equation objects will be added to the return list.
         """
         if expression_group == "sub_expressions":
             equation_items = parsed_equation.find_sub_expressions()
@@ -643,11 +644,10 @@ def extend_equation_list_with_expression_group(
         ]
 
     def combine_exists_and_foreach(self, model_data: xr.Dataset) -> xr.DataArray:
-        """
-        Generate a multi-dimensional boolean array based on the sets
-        over which the constraint is to be built (defined by "foreach") and the
-        model `exists` array.
-        The `exists` array is a boolean array defining the structure of the model and is True for valid combinations of technologies consuming/producing specific carriers at specific nodes. It is indexed over ["nodes", "techs", "carriers", "carrier_tiers"].
+        """Generate a multi-dimensional boolean array based on the sets over which the constraint is to be built (defined by "foreach") and the model `exists` array.
+
+        The `exists` array is a boolean array defining the structure of the model and is True for valid combinations of technologies consuming/producing specific carriers at specific nodes.
+        It is indexed over ["nodes", "techs", "carriers", "carrier_tiers"].
 
         Args:
             model_data (xr.Dataset): Calliope model dataset.
@@ -682,7 +682,8 @@ def generate_top_level_where_array(
         Args:
             model_data (xr.Dataset): Calliope model input data.
             align_to_foreach_sets (bool, optional):
-                By default, all foreach arrays have the dimensions ("nodes", "techs", "carriers", "carrier_tiers") as well as any additional dimensions provided by the component's "foreach" key. If this argument is True, the dimensions not included in "foreach" are removed from the array.
+                By default, all foreach arrays have the dimensions ("nodes", "techs", "carriers", "carrier_tiers") as well as any additional dimensions provided by the component's "foreach" key.
+                If this argument is True, the dimensions not included in "foreach" are removed from the array.
                 Defaults to True.
             break_early (bool, optional):
                 If any intermediate array has no valid elements (i.e. all are False), the function will return that array rather than continuing - this saves time and memory on large models.

diff --git a/calliope/backend/where_parser.py b/calliope/backend/where_parser.py
@@ -7,7 +7,6 @@
 from typing import Any, Union
 
 import numpy as np
-import pandas as pd
 import pyparsing as pp
 import xarray as xr
 
@@ -179,9 +178,8 @@ def as_latex(self, model_data: xr.Dataset, apply_where: bool = True) -> str:
 
     def _data_var_exists(self, model_data: xr.Dataset) -> xr.DataArray:
         "mask by setting all (NaN | INF/-INF) to False, otherwise True"
-        model_data_var = model_data.get(self.data_var, xr.DataArray(None))
-        with pd.option_context("mode.use_inf_as_na", True):
-            return model_data_var.where(pd.notnull(model_data_var)).notnull()  # type: ignore
+        var = model_data.get(self.data_var, xr.DataArray(np.nan))
+        return var.notnull() & (var != np.inf) & (var != -np.inf)
 
     def _data_var_with_default(self, model_data: xr.Dataset) -> xr.DataArray:
         "Access data var and fill with default values. Return default value as an array if var does not exist"

diff --git a/calliope/config/defaults.yaml b/calliope/config/defaults.yaml
@@ -44,7 +44,7 @@ model:
   time: {} # Optional settings to adjust time resolution, see :ref:`time_clustering` for the available options
   timeseries_data_path: null # Path to time series data
   timeseries_data: null # Dict of dataframes with time series data (when passing in dicts rather than YAML files to Model constructor)
-  timeseries_dateformat: "%Y-%m-%d %H:%M:%S" # Timestamp format of all time series data when read from file
+  timeseries_dateformat: "ISO8601" # Timestamp format of all time series data when read from file. "ISO8601" means "YYYY-mm-dd HH:MM:SS".
   file_allowed: [
       "clustering_func",
       "energy_eff",

diff --git a/calliope/core/io.py b/calliope/core/io.py
@@ -11,6 +11,9 @@
 
 import os
 
+# We import netCDF4 before xarray to mitigate a numpy warning:
+# https://github.com/pydata/xarray/issues/7259
+import netCDF4  # noqa: F401
 import numpy as np
 import pandas as pd
 import xarray as xr
@@ -27,7 +30,7 @@ def read_netcdf(path):
 
     calliope_version = model_data.attrs.get("calliope_version", False)
     if calliope_version:
-        if not str(calliope_version) in __version__:
+        if str(calliope_version) not in __version__:
             exceptions.warn(
                 "This model data was created with Calliope version {}, "
                 "but you are running {}. Proceed with caution!".format(
@@ -43,6 +46,7 @@ def read_netcdf(path):
 
     # Convert empty strings back to np.NaN
     # TODO: revert when this issue is solved: https://github.com/pydata/xarray/issues/1647
+    # which it might be once this is merged: https://github.com/pydata/xarray/pull/7869
     for var_name, var_array in model_data.data_vars.items():
         if var_array.dtype.kind in ["U", "O"]:
             model_data[var_name] = var_array.where(lambda x: x != "")

diff --git a/calliope/core/model.py b/calliope/core/model.py
@@ -589,7 +589,9 @@ def info(self) -> str:
         return "\n".join(info_strings)
 
     def validate_math_strings(self, math_dict: dict) -> None:
-        """Validate that `expression` and `where` strings of a dictionary containing string mathematical formulations can be successfully parsed. This function can be used to test custom math before attempting to build the optimisation problem.
+        """Validate that `expression` and `where` strings of a dictionary containing string mathematical formulations can be successfully parsed.
+
+        This function can be used to test custom math before attempting to build the optimisation problem.
 
         NOTE: strings are not checked for evaluation validity. Evaluation issues will be raised only on calling `Model.build()`.
 

diff --git a/calliope/core/util/generate_runs.py b/calliope/core/util/generate_runs.py
@@ -166,8 +166,10 @@ def generate_sbatch_script(
     )
 
     if ":" not in cluster_time:
-        # Assuming time given as minutes, so needs changing to %H:%M%S
-        cluster_time = pd.to_datetime(cluster_time, unit="m").strftime("%H:%M:%S")
+        # Assuming time given as minutes, so needs changing to %H:%M:%S
+        cluster_time = pd.to_datetime(float(cluster_time), unit="m").strftime(
+            "%H:%M:%S"
+        )
 
     lines = [
         "#!/bin/bash",

diff --git a/calliope/core/util/tools.py b/calliope/core/util/tools.py
@@ -9,9 +9,8 @@
 import sys
 from typing import Callable, TypeVar
 
-from typing_extensions import ParamSpec
-
 import jsonschema
+from typing_extensions import ParamSpec
 
 from calliope.exceptions import print_warnings_and_raise_errors
 

diff --git a/calliope/exceptions.py b/calliope/exceptions.py
@@ -93,7 +93,9 @@ def print_warnings_and_raise_errors(
             List of error strings or dictionary of error strings.
             If None or an empty list, no errors will be raised.
             Defaults to None.
-        during (str, optional): substring that will be placed at the top of the concated list of warnings/errors to point to during which phase of data processing they occured. Defaults to "model processing".
+        during (str, optional):
+            Substring that will be placed at the top of the concated list of warnings/errors to point to during which phase of data processing they occured.
+            Defaults to "model processing".
         bullet (str, optional): Type of bullet points to use. Defaults to " * ".
 
     Raises:

diff --git a/calliope/preprocess/checks.py b/calliope/preprocess/checks.py
@@ -105,7 +105,7 @@ def check_initial(config_model):
     # Check for version mismatch
     model_version = config_model.model.get("calliope_version", False)
     if model_version:
-        if not str(model_version) in __version__:
+        if str(model_version) not in __version__:
             model_warnings.append(
                 "Model configuration specifies calliope_version={}, "
                 "but you are running {}. Proceed with caution!".format(

diff --git a/calliope/preprocess/model_run.py b/calliope/preprocess/model_run.py
@@ -521,7 +521,7 @@ def load_timeseries_from_dataframe(timeseries_dataframes, tskey):
 
 
 def _parser(x, dtformat):
-    return pd.to_datetime(x, format=dtformat, exact=False)
+    return pd.to_datetime(x, format=dtformat)
 
 
 def _get_names(config):
@@ -603,7 +603,7 @@ def process_timeseries_data(config_model, model_run, timeseries_dataframes):
     if subset_time_config is not None:
         # Test parsing dates first, to make sure they fit our required subset format
         try:
-            subset_time = _parser(subset_time_config, "%Y-%m-%d %H:%M:%S")
+            subset_time = _parser(subset_time_config, "ISO8601")
         except ValueError as e:
             raise exceptions.ModelError(
                 "Timeseries subset must be in ISO format (anything up to the  "