Skip to content

Commit

Permalink
Clean-up alongside dependency pinning updates (#487)
Browse files Browse the repository at this point in the history
* Update to py3.11 as an upper bound
* Update dependencies 
* Add ruff as a linter and add to pre-commit
* Remove `pandas.use_inf_as_na` and update null + inf masking
* Allow for skipping some of the slowest tests
* Use `ISO8601` as the default datetime format string

---------

Co-authored-by: Stefan Pfenninger <[email protected]>
  • Loading branch information
brynpickering and sjpfenninger authored Oct 23, 2023
1 parent 889a229 commit cbed4c8
Show file tree
Hide file tree
Showing 42 changed files with 421 additions and 281 deletions.
12 changes: 8 additions & 4 deletions .azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,16 @@ strategy:
IMAGE_NAME: ubuntu-latest
PYTHON_VERSION: 3.9
CODECOV: True # Only run on one build
macos-py3.9:
linux-py3.11:
IMAGE_NAME: ubuntu-latest
PYTHON_VERSION: 3.11
CODECOV: True # Only run on one build
macos-py3.11:
IMAGE_NAME: macOS-latest
PYTHON_VERSION: 3.9
windows-py3.9:
PYTHON_VERSION: 3.11
windows-py3.11:
IMAGE_NAME: windows-latest
PYTHON_VERSION: 3.9
PYTHON_VERSION: 3.11

steps:
- bash: echo "##vso[task.prependpath]$CONDA/bin"
Expand Down
24 changes: 19 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
default_language_version:
python: python3

repos:
- repo: https://github.com/psf/black
rev: stable
rev: 23.10.0
hooks:
- id: black

- repo: https://github.com/pycqa/isort
rev: 5.11.2
- repo: https://github.com/astral-sh/ruff-pre-commit # https://beta.ruff.rs/docs/usage/#github-action
rev: v0.1.1
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]

- repo: https://github.com/nbQA-dev/nbQA
rev: 1.7.0
hooks:
- id: isort
name: isort (python)
- id: nbqa-black
- id: nbqa-ruff
args: [--fix, --exit-non-zero-on-fix]

ci: # https://pre-commit.ci/
autofix_prs: false
autoupdate_schedule: monthly
21 changes: 12 additions & 9 deletions calliope/backend/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1076,17 +1076,20 @@ def _to_pyomo_param(
If both `val` and `default` are np.nan/None, return np.nan.
Otherwise return ObjParameter(val/default).
"""
with pd.option_context("mode.use_inf_as_na", use_inf_as_na):
if pd.isnull(val):
if pd.isnull(default):
param = np.nan
else:
param = ObjParameter(default)
self._instance.parameters[name].append(param)
if use_inf_as_na:
val = np.nan if val in [np.inf, -np.inf] else val
default = np.nan if default in [np.inf, -np.inf] else default

if pd.isnull(val):
if pd.isnull(default):
param = np.nan
else:
param = ObjParameter(val)
param = ObjParameter(default)
self._instance.parameters[name].append(param)
return param
else:
param = ObjParameter(val)
self._instance.parameters[name].append(param)
return param

def _to_pyomo_constraint(
self,
Expand Down
63 changes: 37 additions & 26 deletions calliope/backend/helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
Functions that can be used to process data in math `where` and `expression` strings.
"""
import functools
import re
from abc import ABC, abstractmethod
from typing import Any, Literal, Mapping, Union, overload

import pandas as pd
import numpy as np
import xarray as xr

from calliope.exceptions import BackendError
Expand Down Expand Up @@ -189,12 +190,11 @@ def as_array(self, parameter: str, *, over: Union[str, list[str]]) -> xr.DataArr
"""
if parameter in self._kwargs["model_data"].data_vars:
parameter_da = self._kwargs["model_data"][parameter]
with pd.option_context("mode.use_inf_as_na", True):
bool_parameter_da = (
parameter_da.where(pd.notnull(parameter_da)) # type: ignore
.notnull()
.any(dim=over, keep_attrs=True)
)
bool_parameter_da = (
parameter_da.notnull()
& (parameter_da != np.inf)
& (parameter_da != -np.inf)
).any(dim=over, keep_attrs=True)
else:
bool_parameter_da = xr.DataArray(False)
return bool_parameter_da
Expand Down Expand Up @@ -227,7 +227,8 @@ def as_array(
Returns:
xr.DataArray:
Array with dimensions reduced by applying a summation over the dimensions given in `over`.
NaNs are ignored (xarray.DataArray.sum arg: `skipna: True`) and if all values along the dimension(s) are NaN, the summation will lead to a NaN (xarray.DataArray.sum arg: `min_count=1`).
NaNs are ignored (xarray.DataArray.sum arg: `skipna: True`) and if all values along the dimension(s) are NaN,
the summation will lead to a NaN (xarray.DataArray.sum arg: `min_count=1`).
"""
return array.sum(over, min_count=1, skipna=True)

Expand Down Expand Up @@ -282,7 +283,8 @@ def as_array(
self, array: xr.DataArray, carrier_tier: Literal["in", "out"]
) -> xr.DataArray:
"""Reduce expression array data by selecting the carrier that corresponds to the primary carrier and then dropping the `carriers` dimension.
This function is only valid for `conversion_plus` technologies, so should only be included in a math component if the `where` string includes `inheritance(conversion_plus)` or an equivalent expression.
This function is only valid for `conversion_plus` technologies,
so should only be included in a math component if the `where` string includes `inheritance(conversion_plus)` or an equivalent expression.
Args:
array (xr.DataArray): Expression array.
Expand Down Expand Up @@ -353,6 +355,9 @@ def as_array(
The lookup array assigns the value at "B" to "A" and vice versa.
"C" is masked since the lookup array value is NaN.
"""
# Inspired by https://github.com/pydata/xarray/issues/1553#issuecomment-748491929
# Reindex does not presently support vectorized lookups: https://github.com/pydata/xarray/issues/1553
# Sel does (e.g. https://github.com/pydata/xarray/issues/4630) but can't handle missing keys

dims = set(lookup_arrays.keys())
missing_dims_in_component = dims.difference(array.dims)
Expand All @@ -368,24 +373,30 @@ def as_array(
f"All lookup arrays used to select items from `{array.name}` must be indexed over the dimensions {dims}"
)

stacked_and_dense_lookup_arrays = {
# Although we have the lookup array, its values are backend objects,
# so we grab the same array from the unadulterated model data.
# FIXME: do not add lookup tables as backend objects.
dim_name: self._kwargs["model_data"][lookup.name]
# Stacking ensures that the dimensions on `component` are not reordered on calling `.sel()`.
.stack(idx=list(dims))
# Cannot select on NaNs, so we drop them all.
.dropna("idx")
for dim_name, lookup in lookup_arrays.items()
}
sliced_component = array.sel(stacked_and_dense_lookup_arrays)
dim = "dim_0"
ixs = {}
masks = []

# Turn string lookup values to numeric ones.
# We stack the dimensions to handle multidimensional lookups
for index_dim, index in lookup_arrays.items():
stacked_lookup = self._kwargs["model_data"][index.name].stack({dim: dims})
ix = array.indexes[index_dim].get_indexer(stacked_lookup)
ixs[index_dim] = xr.DataArray(
np.fmax(0, ix),
coords={dim: stacked_lookup[dim]},
)
masks.append(ix >= 0)

return (
sliced_component.drop_vars(dims)
.unstack("idx")
.reindex_like(array, copy=False)
)
# Create a mask to nullify any lookup values that are not given (i.e., are np.nan in the lookup array)
mask = functools.reduce(lambda x, y: x & y, masks)

result = array[ixs]

if not mask.all():
result[{dim: ~mask}] = np.nan
unstacked_result = result.drop_vars(dims).unstack(dim)
return unstacked_result


class GetValAtIndex(ParsingHelperFunction):
Expand Down
4 changes: 3 additions & 1 deletion calliope/backend/latex_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ def write( # noqa: F811
If given, will write the built mathematical formulation to a file with the given extension as the file format. Defaults to None.
format (Optional["tex", "rst", "md"], optional):
Not required if filename is given (as the format will be automatically inferred). Required if expecting a string return from calling this function. The LaTeX math will be embedded in a document of the given format (tex=LaTeX, rst=reStructuredText, md=Markdown). Defaults to None.
Not required if filename is given (as the format will be automatically inferred).
Required if expecting a string return from calling this function. The LaTeX math will be embedded in a document of the given format (tex=LaTeX, rst=reStructuredText, md=Markdown).
Defaults to None.
Raises:
exceptions.ModelError: Math strings need to be built first (`build`)
Expand Down
15 changes: 8 additions & 7 deletions calliope/backend/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,8 @@ def extend_equation_list_with_expression_group(
Returns:
list[ParsedBackendEquation]:
Expanded list of parsed equations with the product of all references to items from the `expression_group` producing a new equation object. E.g., if the input equation object has a reference to an slice which itself has two expression options, two equation objects will be added to the return list.
Expanded list of parsed equations with the product of all references to items from the `expression_group` producing a new equation object.
E.g., if the input equation object has a reference to an slice which itself has two expression options, two equation objects will be added to the return list.
"""
if expression_group == "sub_expressions":
equation_items = parsed_equation.find_sub_expressions()
Expand All @@ -643,11 +644,10 @@ def extend_equation_list_with_expression_group(
]

def combine_exists_and_foreach(self, model_data: xr.Dataset) -> xr.DataArray:
"""
Generate a multi-dimensional boolean array based on the sets
over which the constraint is to be built (defined by "foreach") and the
model `exists` array.
The `exists` array is a boolean array defining the structure of the model and is True for valid combinations of technologies consuming/producing specific carriers at specific nodes. It is indexed over ["nodes", "techs", "carriers", "carrier_tiers"].
"""Generate a multi-dimensional boolean array based on the sets over which the constraint is to be built (defined by "foreach") and the model `exists` array.
The `exists` array is a boolean array defining the structure of the model and is True for valid combinations of technologies consuming/producing specific carriers at specific nodes.
It is indexed over ["nodes", "techs", "carriers", "carrier_tiers"].
Args:
model_data (xr.Dataset): Calliope model dataset.
Expand Down Expand Up @@ -682,7 +682,8 @@ def generate_top_level_where_array(
Args:
model_data (xr.Dataset): Calliope model input data.
align_to_foreach_sets (bool, optional):
By default, all foreach arrays have the dimensions ("nodes", "techs", "carriers", "carrier_tiers") as well as any additional dimensions provided by the component's "foreach" key. If this argument is True, the dimensions not included in "foreach" are removed from the array.
By default, all foreach arrays have the dimensions ("nodes", "techs", "carriers", "carrier_tiers") as well as any additional dimensions provided by the component's "foreach" key.
If this argument is True, the dimensions not included in "foreach" are removed from the array.
Defaults to True.
break_early (bool, optional):
If any intermediate array has no valid elements (i.e. all are False), the function will return that array rather than continuing - this saves time and memory on large models.
Expand Down
6 changes: 2 additions & 4 deletions calliope/backend/where_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from typing import Any, Union

import numpy as np
import pandas as pd
import pyparsing as pp
import xarray as xr

Expand Down Expand Up @@ -179,9 +178,8 @@ def as_latex(self, model_data: xr.Dataset, apply_where: bool = True) -> str:

def _data_var_exists(self, model_data: xr.Dataset) -> xr.DataArray:
"mask by setting all (NaN | INF/-INF) to False, otherwise True"
model_data_var = model_data.get(self.data_var, xr.DataArray(None))
with pd.option_context("mode.use_inf_as_na", True):
return model_data_var.where(pd.notnull(model_data_var)).notnull() # type: ignore
var = model_data.get(self.data_var, xr.DataArray(np.nan))
return var.notnull() & (var != np.inf) & (var != -np.inf)

def _data_var_with_default(self, model_data: xr.Dataset) -> xr.DataArray:
"Access data var and fill with default values. Return default value as an array if var does not exist"
Expand Down
2 changes: 1 addition & 1 deletion calliope/config/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ model:
time: {} # Optional settings to adjust time resolution, see :ref:`time_clustering` for the available options
timeseries_data_path: null # Path to time series data
timeseries_data: null # Dict of dataframes with time series data (when passing in dicts rather than YAML files to Model constructor)
timeseries_dateformat: "%Y-%m-%d %H:%M:%S" # Timestamp format of all time series data when read from file
timeseries_dateformat: "ISO8601" # Timestamp format of all time series data when read from file. "ISO8601" means "YYYY-mm-dd HH:MM:SS".
file_allowed: [
"clustering_func",
"energy_eff",
Expand Down
6 changes: 5 additions & 1 deletion calliope/core/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

import os

# We import netCDF4 before xarray to mitigate a numpy warning:
# https://github.com/pydata/xarray/issues/7259
import netCDF4 # noqa: F401
import numpy as np
import pandas as pd
import xarray as xr
Expand All @@ -27,7 +30,7 @@ def read_netcdf(path):

calliope_version = model_data.attrs.get("calliope_version", False)
if calliope_version:
if not str(calliope_version) in __version__:
if str(calliope_version) not in __version__:
exceptions.warn(
"This model data was created with Calliope version {}, "
"but you are running {}. Proceed with caution!".format(
Expand All @@ -43,6 +46,7 @@ def read_netcdf(path):

# Convert empty strings back to np.NaN
# TODO: revert when this issue is solved: https://github.com/pydata/xarray/issues/1647
# which it might be once this is merged: https://github.com/pydata/xarray/pull/7869
for var_name, var_array in model_data.data_vars.items():
if var_array.dtype.kind in ["U", "O"]:
model_data[var_name] = var_array.where(lambda x: x != "")
Expand Down
4 changes: 3 additions & 1 deletion calliope/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,9 @@ def info(self) -> str:
return "\n".join(info_strings)

def validate_math_strings(self, math_dict: dict) -> None:
"""Validate that `expression` and `where` strings of a dictionary containing string mathematical formulations can be successfully parsed. This function can be used to test custom math before attempting to build the optimisation problem.
"""Validate that `expression` and `where` strings of a dictionary containing string mathematical formulations can be successfully parsed.
This function can be used to test custom math before attempting to build the optimisation problem.
NOTE: strings are not checked for evaluation validity. Evaluation issues will be raised only on calling `Model.build()`.
Expand Down
6 changes: 4 additions & 2 deletions calliope/core/util/generate_runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,10 @@ def generate_sbatch_script(
)

if ":" not in cluster_time:
# Assuming time given as minutes, so needs changing to %H:%M%S
cluster_time = pd.to_datetime(cluster_time, unit="m").strftime("%H:%M:%S")
# Assuming time given as minutes, so needs changing to %H:%M:%S
cluster_time = pd.to_datetime(float(cluster_time), unit="m").strftime(
"%H:%M:%S"
)

lines = [
"#!/bin/bash",
Expand Down
3 changes: 1 addition & 2 deletions calliope/core/util/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
import sys
from typing import Callable, TypeVar

from typing_extensions import ParamSpec

import jsonschema
from typing_extensions import ParamSpec

from calliope.exceptions import print_warnings_and_raise_errors

Expand Down
4 changes: 3 additions & 1 deletion calliope/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ def print_warnings_and_raise_errors(
List of error strings or dictionary of error strings.
If None or an empty list, no errors will be raised.
Defaults to None.
during (str, optional): substring that will be placed at the top of the concated list of warnings/errors to point to during which phase of data processing they occured. Defaults to "model processing".
during (str, optional):
Substring that will be placed at the top of the concated list of warnings/errors to point to during which phase of data processing they occured.
Defaults to "model processing".
bullet (str, optional): Type of bullet points to use. Defaults to " * ".
Raises:
Expand Down
2 changes: 1 addition & 1 deletion calliope/preprocess/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def check_initial(config_model):
# Check for version mismatch
model_version = config_model.model.get("calliope_version", False)
if model_version:
if not str(model_version) in __version__:
if str(model_version) not in __version__:
model_warnings.append(
"Model configuration specifies calliope_version={}, "
"but you are running {}. Proceed with caution!".format(
Expand Down
4 changes: 2 additions & 2 deletions calliope/preprocess/model_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ def load_timeseries_from_dataframe(timeseries_dataframes, tskey):


def _parser(x, dtformat):
return pd.to_datetime(x, format=dtformat, exact=False)
return pd.to_datetime(x, format=dtformat)


def _get_names(config):
Expand Down Expand Up @@ -603,7 +603,7 @@ def process_timeseries_data(config_model, model_run, timeseries_dataframes):
if subset_time_config is not None:
# Test parsing dates first, to make sure they fit our required subset format
try:
subset_time = _parser(subset_time_config, "%Y-%m-%d %H:%M:%S")
subset_time = _parser(subset_time_config, "ISO8601")
except ValueError as e:
raise exceptions.ModelError(
"Timeseries subset must be in ISO format (anything up to the "
Expand Down
Loading

0 comments on commit cbed4c8

Please sign in to comment.