From 5ef149b7e1b3545324a48f01d0a05acf4553791e Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 20 Dec 2023 11:52:42 +0000 Subject: [PATCH 01/41] pass all parameters to cellfinder_run --- .../cellfinder_core/cellfinder.py | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/brainglobe_workflows/cellfinder_core/cellfinder.py b/brainglobe_workflows/cellfinder_core/cellfinder.py index ec6dfa60..9f35e487 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder.py @@ -351,7 +351,30 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): # Run main analysis using `cellfinder_run` detected_cells = cellfinder_run( - signal_array, background_array, cfg.voxel_sizes + signal_array, + background_array, + cfg.voxel_sizes, + cfg.start_plane, + cfg.end_plane, + cfg.trained_model, + cfg.model_weights, + cfg.model, + cfg.batch_size, + cfg.n_free_cpus, + cfg.network_voxel_sizes, + cfg.soma_diameter, + cfg.ball_xy_size, + cfg.ball_z_size, + cfg.ball_overlap_fraction, + cfg.log_sigma_size, + cfg.n_sds_above_mean_thresh, + cfg.soma_spread_factor, + cfg.max_cluster_size, + cfg.cube_width, + cfg.cube_height, + cfg.cube_depth, + cfg.network_depth, + # *cfg, # .voxel_sizes, ---> does this work? ) # Save results to xml file From cb0d50694bb8c078e9ece13a559da3c90a67da08 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 20 Dec 2023 15:32:19 +0000 Subject: [PATCH 02/41] changed required, optional and internal fields. read_config passes. add method to config class (WIP) --- benchmarks/cellfinder_core.py | 20 +- .../cellfinder_core/cellfinder.py | 213 ++++++++++-------- brainglobe_workflows/configs/cellfinder.json | 6 - tests/cellfinder_core/conftest.py | 46 +++- .../test_integration/test_cellfinder.py | 2 +- .../test_unit/test_cellfinder.py | 81 ++++--- tests/data/input_data_GIN.json | 6 - tests/data/input_data_locally.json | 7 +- tests/data/input_data_missing_background.json | 7 +- tests/data/input_data_missing_signal.json | 7 +- tests/data/input_data_not_locally_or_GIN.json | 7 +- 11 files changed, 230 insertions(+), 172 deletions(-) diff --git a/benchmarks/cellfinder_core.py b/benchmarks/cellfinder_core.py index 717cc085..882349d0 100644 --- a/benchmarks/cellfinder_core.py +++ b/benchmarks/cellfinder_core.py @@ -112,14 +112,14 @@ def setup_cache( _ = pooch.retrieve( url=config.data_url, known_hash=config.data_hash, - path=config.install_path, + path=config._install_path, progressbar=True, processor=pooch.Unzip(extract_dir=config.data_dir_relative), ) # Check paths to input data should now exist in config - assert Path(config.signal_dir_path).exists() - assert Path(config.background_dir_path).exists() + assert Path(config._signal_dir_path).exists() + assert Path(config._background_dir_path).exists() def setup(self): """ @@ -177,10 +177,10 @@ class TimeReadInputDask(TimeBenchmarkPrepGIN): """ def time_read_signal_with_dask(self): - read_with_dask(self.cfg.signal_dir_path) + read_with_dask(self.cfg._signal_dir_path) def time_read_background_with_dask(self): - read_with_dask(self.cfg.background_dir_path) + read_with_dask(self.cfg._background_dir_path) class TimeDetectCells(TimeBenchmarkPrepGIN): @@ -199,8 +199,8 @@ def setup(self): TimeBenchmarkPrepGIN.setup(self) # add input data as arrays to config - self.signal_array = read_with_dask(self.cfg.signal_dir_path) - self.background_array = read_with_dask(self.cfg.background_dir_path) + self.signal_array = read_with_dask(self.cfg._signal_dir_path) + self.background_array = read_with_dask(self.cfg._background_dir_path) def time_cellfinder_run(self): cellfinder_run( @@ -215,8 +215,8 @@ def setup(self): TimeBenchmarkPrepGIN.setup(self) # add input data as arrays to config - self.signal_array = read_with_dask(self.cfg.signal_dir_path) - self.background_array = read_with_dask(self.cfg.background_dir_path) + self.signal_array = read_with_dask(self.cfg._signal_dir_path) + self.background_array = read_with_dask(self.cfg._background_dir_path) # detect cells self.detected_cells = cellfinder_run( @@ -224,4 +224,4 @@ def setup(self): ) def time_save_cells(self): - save_cells(self.detected_cells, self.cfg.detected_cells_path) + save_cells(self.detected_cells, self.cfg._detected_cells_path) diff --git a/brainglobe_workflows/cellfinder_core/cellfinder.py b/brainglobe_workflows/cellfinder_core/cellfinder.py index 9f35e487..5b07aa0b 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder.py @@ -25,7 +25,7 @@ import sys from dataclasses import dataclass from pathlib import Path -from typing import Optional, Tuple, Union +from typing import Optional, Union import pooch from brainglobe_utils.IO.cells import save_cells @@ -45,24 +45,20 @@ @dataclass class CellfinderConfig: - """ - Define input and output data locations, and the parameters for + """Define input and output data locations, and the parameters for the cellfinder preprocessing steps. - """ - - # input data - # data_dir_relative: parent directory to signal and background, - # relative to install path - data_dir_relative: Pathlike - signal_subdir: str - background_subdir: str - # output - output_path_basename_relative: Pathlike - detected_cells_filename: Pathlike + We distinguish three types of fields: + - optional fields: they have a default value + - required fields: must be provided, they do not have a default value + - internal fields: their names start with _ indicating these are private. + Any functionality to update them is moved to a function of + CellfinderConfig. + """ - # preprocessing parameters - voxel_sizes: Tuple[float, float, float] + ################ Required ######################## + # Parameters + voxel_sizes: tuple[float, float, float] start_plane: int end_plane: int trained_model: Optional[ @@ -72,7 +68,7 @@ class CellfinderConfig: model: str batch_size: int n_free_cpus: int - network_voxel_sizes: Tuple[int, int, int] + network_voxel_sizes: tuple[int, int, int] soma_diameter: int ball_xy_size: int ball_z_size: int @@ -86,27 +82,68 @@ class CellfinderConfig: cube_depth: int network_depth: depth_type - # install path (root for all inputs and outputs) - install_path: Pathlike = ".cellfinder_workflows" + ################ Optional ######################## + # they have a default value if not specified in json + + # install path (root for donwloaded inputs and outputs) + _install_path: Pathlike = ( + Path.home() / ".brainglobe" / "workflows" / "cellfinder_core" + ) + + # input data: if not specified the are assumed to be signal and + # background dirs under + # ~/.brainglobe/workflows/cellfinder/cellfinder_test_data/ + input_data_dir: Pathlike = Path(_install_path) / "cellfinder_test_data" + signal_subdir: Pathlike = "signal" + background_subdir: Pathlike = "background" + + # output data + output_dir_basename: str = "cellfinder_output_" + detected_cells_filename: str = "detected_cells.xml" + output_parent_dir: Pathlike = Path(_install_path) / output_dir_basename - # origin of data to download (if required) + # source of data to download + # (if not provided in JSON it is set to None) data_url: Optional[str] = None data_hash: Optional[str] = None + ################ Internal ######################## + # install path (root for donwloaded inputs and outputs) + _signal_dir_path: Pathlike = input_data_dir / Path(signal_subdir) + _background_dir_path: Pathlike = input_data_dir / Path(background_subdir) + # The following attributes are added # during the setup phase of the workflow - list_signal_files: Optional[list] = None - list_background_files: Optional[list] = None - output_path: Pathlike = "" - detected_cells_path: Pathlike = "" - signal_dir_path: Pathlike = "" - background_dir_path: Pathlike = "" + # _signal_dir_path: Pathlike = "" + # _background_dir_path: Pathlike = "" + _list_signal_files: Optional[list] = None + _list_background_files: Optional[list] = None + _detected_cells_path: Pathlike = "" + _output_path: Pathlike = "" + + def add_output_timestamped(self): + # output dir and file + timestamp = datetime.datetime.now() + timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") + output_path_timestamped = Path(self.output_parent_dir) / ( + str(self.output_dir_basename) + timestamp_formatted + ) + output_path_timestamped.mkdir( + parents=True, # create any missing parents + exist_ok=True, # ignore FileExistsError exceptions + ) + + # Add output path and output file path to config + self.output_path = output_path_timestamped + self._detected_cells_path = ( + self.output_path / self.detected_cells_filename + ) -def read_cellfinder_config(input_config_path: Path): - """Instantiate a CellfinderConfig from the input json file - (assumes config is json serializable) +def read_cellfinder_config(input_config_path: Path) -> CellfinderConfig: + """Instantiate a CellfinderConfig from the input json file. + Assumes config is json serializable. Parameters ---------- @@ -129,8 +166,7 @@ def read_cellfinder_config(input_config_path: Path): def add_signal_and_background_files( config: CellfinderConfig, ) -> CellfinderConfig: - """ - Adds the lists of input data files (signal and background) + """Adds the lists of input data files (signal and background) to the config. These files are first searched locally. If not found, we @@ -160,34 +196,35 @@ def add_signal_and_background_files( # Check if input data directories (signal and background) exist locally. # If both directories exist, get list of signal and background files if ( - Path(config.signal_dir_path).exists() - and Path(config.background_dir_path).exists() + Path(config._signal_dir_path).exists() + and Path(config._background_dir_path).exists() ): logger.info("Fetching input data from the local directories") - config.list_signal_files = [ + config._list_signal_files = [ f - for f in Path(config.signal_dir_path).resolve().iterdir() + for f in Path(config._signal_dir_path).resolve().iterdir() if f.is_file() ] - config.list_background_files = [ + config._list_background_files = [ f - for f in Path(config.background_dir_path).resolve().iterdir() + for f in Path(config._background_dir_path).resolve().iterdir() if f.is_file() ] # If exactly one of the input data directories is missing, print error elif ( - Path(config.signal_dir_path).resolve().exists() - or Path(config.background_dir_path).resolve().exists() + Path(config._signal_dir_path).resolve().exists() + or Path(config._background_dir_path).resolve().exists() ): - if not Path(config.signal_dir_path).resolve().exists(): + if not Path(config._signal_dir_path).resolve().exists(): logger.error( - f"The directory {config.signal_dir_path} does not exist" + f"The directory {config._signal_dir_path} does not exist", ) else: logger.error( - f"The directory {config.background_dir_path} " "does not exist" + f"The directory {config._background_dir_path} " + "does not exist", ) # If neither of the input data directories exist, @@ -199,42 +236,43 @@ def add_signal_and_background_files( list_files_archive = pooch.retrieve( url=config.data_url, known_hash=config.data_hash, - path=config.install_path, # zip will be downloaded here + path=Path(config.input_data_dir).parent, + # config._install_path, # ----zip will be downloaded here progressbar=True, processor=pooch.Unzip( - extract_dir=config.data_dir_relative - # path to unzipped dir, + extract_dir=Path(config.input_data_dir).stem, + # files are unpacked here, a dir # *relative* to the path set in 'path' ), ) logger.info("Fetching input data from the provided GIN repository") # Check signal and background parent directories exist now - assert Path(config.signal_dir_path).resolve().exists() - assert Path(config.background_dir_path).resolve().exists() + assert Path(config._signal_dir_path).resolve().exists() + assert Path(config._background_dir_path).resolve().exists() # Add signal files to config - config.list_signal_files = [ + config._list_signal_files = [ f for f in list_files_archive if f.startswith( - str(Path(config.signal_dir_path).resolve()) + str(Path(config._signal_dir_path).resolve()), ) # if str(config.signal_dir_path) in f ] # Add background files to config - config.list_background_files = [ + config._list_background_files = [ f for f in list_files_archive if f.startswith( - str(Path(config.background_dir_path).resolve()) + str(Path(config._background_dir_path).resolve()), ) ] # If one of URL/hash to GIN repo not defined, throw an error else: logger.error( "Input data not found locally, and URL/hash to " - "GIN repository not provided" + "GIN repository not provided", ) return config @@ -262,7 +300,6 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig: a dataclass whose attributes are the parameters for running cellfinder. """ - # Fetch logger logger = logging.getLogger(LOGGER_NAME) @@ -280,38 +317,39 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig: # Add lists of input data files to the config, # if these are not defined yet - if not (config.list_signal_files and config.list_background_files): - # build fullpaths to input directories - config.signal_dir_path = str( - Path(config.install_path) - / config.data_dir_relative - / config.signal_subdir - ) - config.background_dir_path = str( - Path(config.install_path) - / config.data_dir_relative - / config.background_subdir - ) + if not (config._list_signal_files and config._list_background_files): + # # build fullpaths to input directories + # config._signal_dir_path = str( + # Path(config._install_path) + # / config.data_dir_relative + # / config.signal_subdir, + # ) + # config._background_dir_path = str( + # Path(config._install_path) + # / config.data_dir_relative + # / config.background_subdir, + # ) # add signal and background files to config config = add_signal_and_background_files(config) # Create timestamped output directory if it doesn't exist - timestamp = datetime.datetime.now() - timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") - output_path_timestamped = Path(config.install_path) / ( - str(config.output_path_basename_relative) + timestamp_formatted - ) - output_path_timestamped.mkdir( - parents=True, # create any missing parents - exist_ok=True, # ignore FileExistsError exceptions - ) - - # Add output path and output file path to config - config.output_path = output_path_timestamped - config.detected_cells_path = ( - config.output_path / config.detected_cells_filename - ) + config.add_output_timestamped() + # timestamp = datetime.datetime.now() + # timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") + # output_path_timestamped = Path(config._install_path) / ( + # str(config.output_dir_basename) + timestamp_formatted + # ) + # output_path_timestamped.mkdir( + # parents=True, # create any missing parents + # exist_ok=True, # ignore FileExistsError exceptions + # ) + + # # Add output path and output file path to config + # config.output_path = output_path_timestamped + # config._detected_cells_path = ( + # config.output_path / config.detected_cells_filename + # ) return config @@ -327,8 +365,7 @@ def setup(input_config_path: str) -> CellfinderConfig: def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): - """ - Run workflow based on the cellfinder_core.main.main() + """Run workflow based on the cellfinder_core.main.main() function. The steps are: @@ -346,8 +383,8 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): the cellfinder workflow """ # Read input data as Dask arrays - signal_array = read_with_dask(cfg.signal_dir_path) - background_array = read_with_dask(cfg.background_dir_path) + signal_array = read_with_dask(cfg._signal_dir_path) + background_array = read_with_dask(cfg._background_dir_path) # Run main analysis using `cellfinder_run` detected_cells = cellfinder_run( @@ -380,15 +417,14 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): # Save results to xml file save_cells( detected_cells, - cfg.detected_cells_path, + cfg._detected_cells_path, ) def main( input_config: str = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), ) -> CellfinderConfig: - """ - Setup and run cellfinder workflow. + """Setup and run cellfinder workflow. This function runs the setup steps required to run the cellfinder workflow, and the @@ -417,8 +453,7 @@ def main( def main_app_wrapper(): - """ - Parse command line arguments and + """Parse command line arguments and run cellfinder setup and workflow This function is used to define an entry-point, diff --git a/brainglobe_workflows/configs/cellfinder.json b/brainglobe_workflows/configs/cellfinder.json index daf056a5..e977271c 100644 --- a/brainglobe_workflows/configs/cellfinder.json +++ b/brainglobe_workflows/configs/cellfinder.json @@ -1,12 +1,6 @@ { - "install_path": ".cellfinder_workflows", "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "signal", - "background_subdir": "background", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", "voxel_sizes": [ 5, 2, diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index 749d83d3..15b7c8f6 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -5,9 +5,41 @@ import pooch import pytest -from brainglobe_workflows.cellfinder_core.cellfinder import ( - read_cellfinder_config, -) +# from brainglobe_workflows.cellfinder_core.cellfinder import ( +# read_cellfinder_config, +# ) + + +@pytest.fixture(autouse=True) +def mock_home_directory(monkeypatch: pytest.MonkeyPatch, tmp_path): + """_summary_ + + from https://github.com/brainglobe/brainrender-napari/blob/52673db58df247261b1ad43c52135e5a26f88d1e/tests/conftest.py#L10 + + Parameters + ---------- + monkeypatch : pytest.MonkeyPatch + _description_ + + Returns + ------- + _type_ + _description_ + """ + + # define mock home path + # home_path = Path.home() # actual home path + mock_home_path = tmp_path # home_path / ".brainglobe-tests" + + # create dir if it doesn't exist + if not mock_home_path.exists(): + mock_home_path.mkdir() + + # monkeypatch Path.home() to point to the mock home + def mock_home(): + return mock_home_path + + monkeypatch.setattr(Path, "home", mock_home) @pytest.fixture() @@ -81,6 +113,10 @@ def input_config_fetch_local( Path Path to the config json file for fetching data locally """ + from brainglobe_workflows.cellfinder_core.cellfinder import ( + read_cellfinder_config, + ) + # read local config input_config_path = input_configs_dir / "input_data_locally.json" config = read_cellfinder_config(input_config_path) @@ -89,10 +125,10 @@ def input_config_fetch_local( pooch.retrieve( url=cellfinder_GIN_data["url"], known_hash=cellfinder_GIN_data["hash"], - path=config.install_path, # path to download zip to + path=config._install_path, # path to download zip to progressbar=True, processor=pooch.Unzip( - extract_dir=config.data_dir_relative + extract_dir=Path(config.input_data_dir).stem # path to unzipped dir, *relative* to 'path' ), ) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index 1bd120ba..31de408b 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -47,7 +47,7 @@ def test_main( cfg = main(str(request.getfixturevalue(input_config))) # check output files exist - assert Path(cfg.detected_cells_path).is_file() + assert Path(cfg._detected_cells_path).is_file() @pytest.mark.parametrize( diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 87f65623..02cdcd14 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -6,14 +6,15 @@ import pooch import pytest -from brainglobe_workflows.cellfinder_core.cellfinder import ( - CellfinderConfig, - add_signal_and_background_files, - read_cellfinder_config, - run_workflow_from_cellfinder_run, - setup_workflow, -) -from brainglobe_workflows.cellfinder_core.cellfinder import setup as setup_full +# from brainglobe_workflows.cellfinder_core.cellfinder import ( +# # CellfinderConfig, +# add_signal_and_background_files, +# read_cellfinder_config, +# run_workflow_from_cellfinder_run, +# setup_workflow, +# ) +# from brainglobe_workflows.cellfinder_core.cellfinder import +# setup as setup_full from brainglobe_workflows.utils import setup_logger @@ -52,6 +53,10 @@ def test_read_cellfinder_config(input_config: str, input_configs_dir: Path): input_configs_dir : Path Test data directory path """ + from brainglobe_workflows.cellfinder_core.cellfinder import ( + read_cellfinder_config, + ) + # path to config json file input_config_path = input_configs_dir / input_config @@ -92,6 +97,7 @@ def test_read_cellfinder_config(input_config: str, input_configs_dir: Path): ], ) def test_add_signal_and_background_files( + mock_home_directory, caplog: pytest.LogCaptureFixture, tmp_path: Path, cellfinder_GIN_data: dict, @@ -116,6 +122,14 @@ def test_add_signal_and_background_files( message_pattern : str Expected pattern in the log """ + # mock_home_directory + + # import after mocking home dir! + from brainglobe_workflows.cellfinder_core.cellfinder import ( + add_signal_and_background_files, + read_cellfinder_config, + ) + # instantiate our custom logger _ = setup_logger() @@ -124,22 +138,12 @@ def test_add_signal_and_background_files( # monkeypatch cellfinder config: # set install_path to pytest temporary directory - config.install_path = tmp_path / config.install_path + # config._install_path = + # Path.home() / ".brainglobe" / "workflows" / "cellfinder_core" + # config._install_path = tmp_path / config._install_path # check lists of signal and background files are not defined - assert not (config.list_signal_files and config.list_background_files) - - # build fullpaths to input data directories - config.signal_dir_path = str( - Path(config.install_path) - / config.data_dir_relative - / config.signal_subdir - ) - config.background_dir_path = str( - Path(config.install_path) - / config.data_dir_relative - / config.background_subdir - ) + assert not (config._list_signal_files and config._list_background_files) # monkeypatch cellfinder config: # if config is "local" or "signal/background missing": @@ -153,10 +157,10 @@ def test_add_signal_and_background_files( pooch.retrieve( url=cellfinder_GIN_data["url"], known_hash=cellfinder_GIN_data["hash"], - path=config.install_path, # path to download zip to + path=config._install_path, # path to download zip to progressbar=True, processor=pooch.Unzip( - extract_dir=config.data_dir_relative + extract_dir=Path(config.input_data_dir).stem # path to unzipped dir, *relative* to 'path' ), ) @@ -211,6 +215,7 @@ def test_setup_workflow( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ + from brainglobe_workflows.cellfinder_core.cellfinder import setup_workflow # setup logger _ = setup_logger() @@ -227,19 +232,19 @@ def test_setup_workflow( assert message in caplog.text # check all signal files exist - assert config.list_signal_files - assert all([Path(f).is_file() for f in config.list_signal_files]) + assert config._list_signal_files + assert all([Path(f).is_file() for f in config._list_signal_files]) # check all background files exist - assert config.list_background_files - assert all([Path(f).is_file() for f in config.list_background_files]) + assert config._list_background_files + assert all([Path(f).is_file() for f in config._list_background_files]) # check output directory exists assert Path(config.output_path).resolve().is_dir() # check output directory name has correct format out = re.fullmatch( - str(config.output_path_basename_relative) + "\\d{8}_\\d{6}$", + str(config.output_dir_basename) + "\\d{8}_\\d{6}$", Path(config.output_path).stem, ) assert out is not None @@ -247,7 +252,7 @@ def test_setup_workflow( # check output file path assert ( - Path(config.detected_cells_path) + Path(config._detected_cells_path) == Path(config.output_path) / config.detected_cells_filename ) @@ -283,6 +288,13 @@ def test_setup( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ + from brainglobe_workflows.cellfinder_core.cellfinder import ( + CellfinderConfig, + ) + from brainglobe_workflows.cellfinder_core.cellfinder import ( + setup as setup_full, + ) + # Monkeypatch to change current directory to # pytest temporary directory # (cellfinder cache directory is created in cwd) @@ -328,6 +340,13 @@ def test_run_workflow_from_cellfinder_run( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ + from brainglobe_workflows.cellfinder_core.cellfinder import ( + run_workflow_from_cellfinder_run, + ) + from brainglobe_workflows.cellfinder_core.cellfinder import ( + setup as setup_full, + ) + # monkeypatch to change current directory to # pytest temporary directory # (cellfinder cache directory is created in cwd) @@ -340,4 +359,4 @@ def test_run_workflow_from_cellfinder_run( run_workflow_from_cellfinder_run(cfg) # check output files are those expected? - assert Path(cfg.detected_cells_path).is_file() + assert Path(cfg._detected_cells_path).is_file() diff --git a/tests/data/input_data_GIN.json b/tests/data/input_data_GIN.json index daf056a5..e977271c 100644 --- a/tests/data/input_data_GIN.json +++ b/tests/data/input_data_GIN.json @@ -1,12 +1,6 @@ { - "install_path": ".cellfinder_workflows", "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "signal", - "background_subdir": "background", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", "voxel_sizes": [ 5, 2, diff --git a/tests/data/input_data_locally.json b/tests/data/input_data_locally.json index e3761543..b2e613e5 100644 --- a/tests/data/input_data_locally.json +++ b/tests/data/input_data_locally.json @@ -1,10 +1,5 @@ { - "install_path": ".cellfinder_workflows", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "signal", - "background_subdir": "background", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", + "input_data_dir": "/Users/sofia/Documents_local/project_BrainGlobe_workflows/brainglobe-workflows/tests/data", "voxel_sizes": [ 5, 2, diff --git a/tests/data/input_data_missing_background.json b/tests/data/input_data_missing_background.json index 52454f9b..b2e613e5 100644 --- a/tests/data/input_data_missing_background.json +++ b/tests/data/input_data_missing_background.json @@ -1,10 +1,5 @@ { - "install_path": ".cellfinder_workflows", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "signal", - "background_subdir": "__", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", + "input_data_dir": "/Users/sofia/Documents_local/project_BrainGlobe_workflows/brainglobe-workflows/tests/data", "voxel_sizes": [ 5, 2, diff --git a/tests/data/input_data_missing_signal.json b/tests/data/input_data_missing_signal.json index 22c5247b..b2e613e5 100644 --- a/tests/data/input_data_missing_signal.json +++ b/tests/data/input_data_missing_signal.json @@ -1,10 +1,5 @@ { - "install_path": ".cellfinder_workflows", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "__", - "background_subdir": "background", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", + "input_data_dir": "/Users/sofia/Documents_local/project_BrainGlobe_workflows/brainglobe-workflows/tests/data", "voxel_sizes": [ 5, 2, diff --git a/tests/data/input_data_not_locally_or_GIN.json b/tests/data/input_data_not_locally_or_GIN.json index e3761543..5b1c5410 100644 --- a/tests/data/input_data_not_locally_or_GIN.json +++ b/tests/data/input_data_not_locally_or_GIN.json @@ -1,10 +1,5 @@ { - "install_path": ".cellfinder_workflows", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "signal", - "background_subdir": "background", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", + "input_data_dir": "_", "voxel_sizes": [ 5, 2, From 974140bade0a3ddb952916fc263790af1ad67879 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 20 Dec 2023 17:52:18 +0000 Subject: [PATCH 03/41] add signal and background data from local --- .../cellfinder_core/cellfinder.py | 268 ++++++++++-------- tests/cellfinder_core/conftest.py | 139 ++++----- .../test_unit/test_cellfinder.py | 70 +---- tests/data/input_data_GIN.json | 33 --- tests/data/input_data_locally.json | 32 --- tests/data/input_data_missing_background.json | 32 --- tests/data/input_data_missing_signal.json | 32 --- tests/data/input_data_not_locally_or_GIN.json | 32 --- 8 files changed, 232 insertions(+), 406 deletions(-) delete mode 100644 tests/data/input_data_GIN.json delete mode 100644 tests/data/input_data_locally.json delete mode 100644 tests/data/input_data_missing_background.json delete mode 100644 tests/data/input_data_missing_signal.json delete mode 100644 tests/data/input_data_not_locally_or_GIN.json diff --git a/brainglobe_workflows/cellfinder_core/cellfinder.py b/brainglobe_workflows/cellfinder_core/cellfinder.py index 5b07aa0b..6e98c6a4 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder.py @@ -91,16 +91,19 @@ class CellfinderConfig: ) # input data: if not specified the are assumed to be signal and - # background dirs under - # ~/.brainglobe/workflows/cellfinder/cellfinder_test_data/ - input_data_dir: Pathlike = Path(_install_path) / "cellfinder_test_data" + # background dirs under _install_path/cellfinder_test_data/ + # see __post_init__ + input_data_dir: Optional[Pathlike] = None signal_subdir: Pathlike = "signal" background_subdir: Pathlike = "background" # output data + # if dir not specified it is assumed to be under + # _install_path/output_dir_basename + # see __post_init__ output_dir_basename: str = "cellfinder_output_" detected_cells_filename: str = "detected_cells.xml" - output_parent_dir: Pathlike = Path(_install_path) / output_dir_basename + output_parent_dir: Optional[Pathlike] = None # source of data to download # (if not provided in JSON it is set to None) @@ -108,10 +111,6 @@ class CellfinderConfig: data_hash: Optional[str] = None ################ Internal ######################## - # install path (root for donwloaded inputs and outputs) - _signal_dir_path: Pathlike = input_data_dir / Path(signal_subdir) - _background_dir_path: Pathlike = input_data_dir / Path(background_subdir) - # The following attributes are added # during the setup phase of the workflow # _signal_dir_path: Pathlike = "" @@ -121,6 +120,27 @@ class CellfinderConfig: _detected_cells_path: Pathlike = "" _output_path: Pathlike = "" + def __post_init__(self: "CellfinderConfig"): + # See https://peps.python.org/pep-0557/#post-init-processing + if self.input_data_dir is None: + self.input_data_dir = ( + Path(self._install_path) / "cellfinder_test_data" + ) + + if self.output_parent_dir is None: + self.output_parent_dir = ( + Path(self._install_path) / self.output_dir_basename + ) + + # install path (root for donwloaded inputs and outputs) + self._signal_dir_path: Pathlike = self.input_data_dir / Path( + self.signal_subdir + ) + self._background_dir_path: Pathlike = self.input_data_dir / Path( + self.background_subdir + ) + + ################# Methods ######################### def add_output_timestamped(self): # output dir and file timestamp = datetime.datetime.now() @@ -139,6 +159,120 @@ def add_output_timestamped(self): self.output_path / self.detected_cells_filename ) + def add_signal_and_background_files(self): + """Adds the lists of input data files (signal and background) + to the config. + + These files are first searched locally. If not found, we + attempt to download them from GIN. + + Specifically: + - If both parent data directories (signal and background) exist + locally, + the lists of signal and background files are added to the config. + - If exactly one of the parent data directories is missing, an error + message is logged. + - If neither of them exist, the data is retrieved from the provided GIN + repository. If no URL or hash to GIN is provided, an error is thrown. + + Parameters + ---------- + config : CellfinderConfig + a cellfinder config with input data files to be validated + + Returns + ------- + config : CellfinderConfig + a cellfinder config with updated input data lists. + """ + # Fetch logger + logger = logging.getLogger(LOGGER_NAME) + + # Check if input data directories (signal and background) exist + # locally. + # If both directories exist, get list of signal and background files + if ( + Path(self._signal_dir_path).exists() + and Path(self._background_dir_path).exists() + ): + logger.info("Fetching input data from the local directories") + + self._list_signal_files = [ + f + for f in Path(self._signal_dir_path).resolve().iterdir() + if f.is_file() + ] + self._list_background_files = [ + f + for f in Path(self._background_dir_path).resolve().iterdir() + if f.is_file() + ] + + # If exactly one of the input data directories is missing, print error + elif ( + Path(self._signal_dir_path).resolve().exists() + or Path(self._background_dir_path).resolve().exists() + ): + if not Path(self._signal_dir_path).resolve().exists(): + logger.error( + f"The directory {self._signal_dir_path} does not exist", + ) + else: + logger.error( + f"The directory {self._background_dir_path} " + "does not exist", + ) + + # If neither of the input data directories exist, + # retrieve data from GIN repository and add list of files to config + else: + # Check if GIN URL and hash are defined (log error otherwise) + if self.data_url and self.data_hash: + # get list of files in GIN archive with pooch.retrieve + list_files_archive = pooch.retrieve( + url=self.data_url, + known_hash=self.data_hash, + path=Path(self.input_data_dir).parent, + # self._install_path, # ----zip will be downloaded here + progressbar=True, + processor=pooch.Unzip( + extract_dir=Path(self.input_data_dir).stem, + # files are unpacked here, a dir + # *relative* to the path set in 'path' + ), + ) + logger.info( + "Fetching input data from the provided GIN repository" + ) + + # Check signal and background parent directories exist now + assert Path(self._signal_dir_path).resolve().exists() + assert Path(self._background_dir_path).resolve().exists() + + # Add signal files to config + self._list_signal_files = [ + f + for f in list_files_archive + if f.startswith( + str(Path(self._signal_dir_path).resolve()), + ) # if str(self.signal_dir_path) in f + ] + + # Add background files to config + self._list_background_files = [ + f + for f in list_files_archive + if f.startswith( + str(Path(self._background_dir_path).resolve()), + ) + ] + # If one of URL/hash to GIN repo not defined, throw an error + else: + logger.error( + "Input data not found locally, and URL/hash to " + "GIN repository not provided", + ) + def read_cellfinder_config(input_config_path: Path) -> CellfinderConfig: """Instantiate a CellfinderConfig from the input json file. @@ -163,121 +297,6 @@ def read_cellfinder_config(input_config_path: Path) -> CellfinderConfig: return config -def add_signal_and_background_files( - config: CellfinderConfig, -) -> CellfinderConfig: - """Adds the lists of input data files (signal and background) - to the config. - - These files are first searched locally. If not found, we - attempt to download them from GIN. - - Specifically: - - If both parent data directories (signal and background) exist locally, - the lists of signal and background files are added to the config. - - If exactly one of the parent data directories is missing, an error - message is logged. - - If neither of them exist, the data is retrieved from the provided GIN - repository. If no URL or hash to GIN is provided, an error is thrown. - - Parameters - ---------- - config : CellfinderConfig - a cellfinder config with input data files to be validated - - Returns - ------- - config : CellfinderConfig - a cellfinder config with updated input data lists. - """ - # Fetch logger - logger = logging.getLogger(LOGGER_NAME) - - # Check if input data directories (signal and background) exist locally. - # If both directories exist, get list of signal and background files - if ( - Path(config._signal_dir_path).exists() - and Path(config._background_dir_path).exists() - ): - logger.info("Fetching input data from the local directories") - - config._list_signal_files = [ - f - for f in Path(config._signal_dir_path).resolve().iterdir() - if f.is_file() - ] - config._list_background_files = [ - f - for f in Path(config._background_dir_path).resolve().iterdir() - if f.is_file() - ] - - # If exactly one of the input data directories is missing, print error - elif ( - Path(config._signal_dir_path).resolve().exists() - or Path(config._background_dir_path).resolve().exists() - ): - if not Path(config._signal_dir_path).resolve().exists(): - logger.error( - f"The directory {config._signal_dir_path} does not exist", - ) - else: - logger.error( - f"The directory {config._background_dir_path} " - "does not exist", - ) - - # If neither of the input data directories exist, - # retrieve data from GIN repository and add list of files to config - else: - # Check if GIN URL and hash are defined (log error otherwise) - if config.data_url and config.data_hash: - # get list of files in GIN archive with pooch.retrieve - list_files_archive = pooch.retrieve( - url=config.data_url, - known_hash=config.data_hash, - path=Path(config.input_data_dir).parent, - # config._install_path, # ----zip will be downloaded here - progressbar=True, - processor=pooch.Unzip( - extract_dir=Path(config.input_data_dir).stem, - # files are unpacked here, a dir - # *relative* to the path set in 'path' - ), - ) - logger.info("Fetching input data from the provided GIN repository") - - # Check signal and background parent directories exist now - assert Path(config._signal_dir_path).resolve().exists() - assert Path(config._background_dir_path).resolve().exists() - - # Add signal files to config - config._list_signal_files = [ - f - for f in list_files_archive - if f.startswith( - str(Path(config._signal_dir_path).resolve()), - ) # if str(config.signal_dir_path) in f - ] - - # Add background files to config - config._list_background_files = [ - f - for f in list_files_archive - if f.startswith( - str(Path(config._background_dir_path).resolve()), - ) - ] - # If one of URL/hash to GIN repo not defined, throw an error - else: - logger.error( - "Input data not found locally, and URL/hash to " - "GIN repository not provided", - ) - - return config - - def setup_workflow(input_config_path: Path) -> CellfinderConfig: """Run setup steps prior to executing the workflow @@ -331,7 +350,8 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig: # ) # add signal and background files to config - config = add_signal_and_background_files(config) + # config = add_signal_and_background_files(config) + config.add_signal_and_background_files() # Create timestamped output directory if it doesn't exist config.add_output_timestamped() diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index 15b7c8f6..7a82a681 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -1,35 +1,17 @@ """Pytest fixtures shared across unit and integration tests""" +import json from pathlib import Path import pooch import pytest -# from brainglobe_workflows.cellfinder_core.cellfinder import ( -# read_cellfinder_config, -# ) - @pytest.fixture(autouse=True) -def mock_home_directory(monkeypatch: pytest.MonkeyPatch, tmp_path): - """_summary_ - - from https://github.com/brainglobe/brainrender-napari/blob/52673db58df247261b1ad43c52135e5a26f88d1e/tests/conftest.py#L10 - - Parameters - ---------- - monkeypatch : pytest.MonkeyPatch - _description_ - - Returns - ------- - _type_ - _description_ - """ - +def mock_home_directory(monkeypatch: pytest.MonkeyPatch): # define mock home path - # home_path = Path.home() # actual home path - mock_home_path = tmp_path # home_path / ".brainglobe-tests" + home_path = Path.home() # actual home path + mock_home_path = home_path / ".brainglobe-tests" # tmp_path # # create dir if it doesn't exist if not mock_home_path.exists(): @@ -42,7 +24,7 @@ def mock_home(): monkeypatch.setattr(Path, "home", mock_home) -@pytest.fixture() +@pytest.fixture() # Do I need this? def input_configs_dir() -> Path: """Return the directory path to the input configs used for testing @@ -71,66 +53,93 @@ def cellfinder_GIN_data() -> dict: @pytest.fixture() -def input_config_fetch_GIN(input_configs_dir: Path) -> Path: +def config_GIN(cellfinder_GIN_data): + """ + Return a config pointing to the location where GIN would be by default """ - Return the cellfinder config json file that is configured to fetch from GIN + from brainglobe_workflows.cellfinder_core.cellfinder import ( + read_cellfinder_config, + ) + from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER - Parameters - ---------- - input_configs_dir : Path - Path to the directory holding the test config files. + # fetch data from GIN and download locally + # if it exists, pooch doesnt download again + pooch.retrieve( + url=cellfinder_GIN_data["url"], + known_hash=cellfinder_GIN_data["hash"], + path=Path.home(), # path to download zip to + progressbar=True, + processor=pooch.Unzip( + extract_dir="cellfinder_test_data" + # path to unzipped dir, *relative* to 'path' + ), + ) - Returns - ------- - Path - Path to the config json file for fetching data from GIN - """ - return input_configs_dir / "input_data_GIN.json" + return read_cellfinder_config(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) + # read_cellfinder_config(input_configs_dir / "input_data_GIN.json") @pytest.fixture() -def input_config_fetch_local( - input_configs_dir: Path, - cellfinder_GIN_data: dict, -) -> Path: - """ - Download the cellfinder data locally and return the config json - file configured to fetch local data. - - The data is downloaded to a directory under the current working - directory (that is, to a directory under the directory from where - pytest is launched). - - Parameters - ---------- - input_configs_dir : Path - Path to the directory holding the test config files. - cellfinder_GIN_data : dict - URL and hash of the GIN repository with the cellfinder test data +def config_local(cellfinder_GIN_data): + """ """ - Returns - ------- - Path - Path to the config json file for fetching data locally - """ from brainglobe_workflows.cellfinder_core.cellfinder import ( - read_cellfinder_config, + CellfinderConfig, ) + from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER - # read local config - input_config_path = input_configs_dir / "input_data_locally.json" - config = read_cellfinder_config(input_config_path) + # read default config as dict + # as dict because some paths are computed derived from input_data_dir + with open(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) as cfg: + config_dict = json.load(cfg) - # fetch data from GIN and download locally + # modify location of data? + # - remove url + # - remove data hash + # - add input_data_dir + config_dict["data_url"] = None + config_dict["data_hash"] = None + config_dict["input_data_dir"] = Path.home() / "local_data" + + # instantiate object + config = CellfinderConfig(**config_dict) + + # fetch data from GIN and download locally to local location? pooch.retrieve( url=cellfinder_GIN_data["url"], known_hash=cellfinder_GIN_data["hash"], - path=config._install_path, # path to download zip to + path=Path(config.input_data_dir).parent, # path to download zip to progressbar=True, processor=pooch.Unzip( extract_dir=Path(config.input_data_dir).stem # path to unzipped dir, *relative* to 'path' ), ) + return config + + +@pytest.fixture() +def config_missing_signal(config_local): + from brainglobe_workflows.cellfinder_core.cellfinder import ( + CellfinderConfig, + ) + + config_local.signal_subdir = "_" + config = CellfinderConfig(**config_local) + + return config + + +@pytest.fixture() +def config_missing_background(config_local): + config_local.background_subdir = "_" + return config_local + + +@pytest.fixture() +def config_not_GIN_or_local(config_GIN): + # remove GIN refs + config_GIN.data_url = None + config_GIN.data_hash = None - return input_config_path + return config_GIN diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 02cdcd14..a6804b31 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -3,7 +3,6 @@ import re from pathlib import Path -import pooch import pytest # from brainglobe_workflows.cellfinder_core.cellfinder import ( @@ -36,14 +35,10 @@ def default_input_config_cellfinder() -> Path: @pytest.mark.parametrize( "input_config", [ - "input_data_GIN.json", - "input_data_locally.json", - "input_data_missing_background.json", - "input_data_missing_signal.json", - "input_data_not_locally_or_GIN.json", + "default_input_config_cellfinder", ], ) -def test_read_cellfinder_config(input_config: str, input_configs_dir: Path): +def test_read_cellfinder_config(input_config: str, request): """Test for reading a cellfinder config file Parameters @@ -57,8 +52,7 @@ def test_read_cellfinder_config(input_config: str, input_configs_dir: Path): read_cellfinder_config, ) - # path to config json file - input_config_path = input_configs_dir / input_config + input_config_path = request.getfixturevalue(input_config) # read json as Cellfinder config config = read_cellfinder_config(input_config_path) @@ -77,33 +71,30 @@ def test_read_cellfinder_config(input_config: str, input_configs_dir: Path): "input_config, message_pattern", [ ( - "input_data_GIN.json", + "config_GIN", "Fetching input data from the provided GIN repository", ), ( - "input_data_locally.json", + "config_local", "Fetching input data from the local directories", ), ( - "input_data_missing_background.json", + "config_missing_signal", "The directory .+ does not exist$", ), - ("input_data_missing_signal.json", "The directory .+ does not exist$"), + ("config_missing_background", "The directory .+ does not exist$"), ( - "input_data_not_locally_or_GIN.json", + "config_not_GIN_or_local", "Input data not found locally, and URL/hash to " "GIN repository not provided", ), ], ) def test_add_signal_and_background_files( - mock_home_directory, caplog: pytest.LogCaptureFixture, - tmp_path: Path, - cellfinder_GIN_data: dict, - input_configs_dir: Path, input_config: str, message_pattern: str, + request: pytest.FixtureRequest, ): """Test signal and background files addition to the cellfinder config @@ -111,8 +102,6 @@ def test_add_signal_and_background_files( ---------- caplog : pytest.LogCaptureFixture Pytest fixture to capture the logs during testing - tmp_path : Path - Pytest fixture providing a temporary path for each test cellfinder_GIN_data : dict Dict holding the URL and hash of the cellfinder test data in GIN input_configs_dir : Path @@ -122,51 +111,20 @@ def test_add_signal_and_background_files( message_pattern : str Expected pattern in the log """ - # mock_home_directory - # import after mocking home dir! - from brainglobe_workflows.cellfinder_core.cellfinder import ( - add_signal_and_background_files, - read_cellfinder_config, - ) - - # instantiate our custom logger + # instantiate custom logger _ = setup_logger() # read json as Cellfinder config - config = read_cellfinder_config(input_configs_dir / input_config) - - # monkeypatch cellfinder config: - # set install_path to pytest temporary directory - # config._install_path = - # Path.home() / ".brainglobe" / "workflows" / "cellfinder_core" - # config._install_path = tmp_path / config._install_path + # ---> change so that the fixture is the config object! + # config = read_cellfinder_config(input_configs_dir / input_config) + config = request.getfixturevalue(input_config) # check lists of signal and background files are not defined assert not (config._list_signal_files and config._list_background_files) - # monkeypatch cellfinder config: - # if config is "local" or "signal/background missing": - # ensure signal and background data from GIN are downloaded locally - if input_config in [ - "input_data_locally.json", - "input_data_missing_signal.json", - "input_data_missing_background.json", - ]: - # fetch data from GIN and download locally - pooch.retrieve( - url=cellfinder_GIN_data["url"], - known_hash=cellfinder_GIN_data["hash"], - path=config._install_path, # path to download zip to - progressbar=True, - processor=pooch.Unzip( - extract_dir=Path(config.input_data_dir).stem - # path to unzipped dir, *relative* to 'path' - ), - ) - # add signal and background files lists to config - add_signal_and_background_files(config) + config.add_signal_and_background_files() # check log messages assert len(caplog.messages) > 0 diff --git a/tests/data/input_data_GIN.json b/tests/data/input_data_GIN.json deleted file mode 100644 index e977271c..00000000 --- a/tests/data/input_data_GIN.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", - "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", - "voxel_sizes": [ - 5, - 2, - 2 - ], - "start_plane": 0, - "end_plane": -1, - "trained_model": null, - "model_weights": null, - "model": "resnet50_tv", - "batch_size": 32, - "n_free_cpus": 2, - "network_voxel_sizes": [ - 5, - 1, - 1 - ], - "soma_diameter": 16, - "ball_xy_size": 6, - "ball_z_size": 15, - "ball_overlap_fraction": 0.6, - "log_sigma_size": 0.2, - "n_sds_above_mean_thresh": 10, - "soma_spread_factor": 1.4, - "max_cluster_size": 100000, - "cube_width": 50, - "cube_height": 50, - "cube_depth": 20, - "network_depth": "50" -} diff --git a/tests/data/input_data_locally.json b/tests/data/input_data_locally.json deleted file mode 100644 index b2e613e5..00000000 --- a/tests/data/input_data_locally.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "input_data_dir": "/Users/sofia/Documents_local/project_BrainGlobe_workflows/brainglobe-workflows/tests/data", - "voxel_sizes": [ - 5, - 2, - 2 - ], - "start_plane": 0, - "end_plane": -1, - "trained_model": null, - "model_weights": null, - "model": "resnet50_tv", - "batch_size": 32, - "n_free_cpus": 2, - "network_voxel_sizes": [ - 5, - 1, - 1 - ], - "soma_diameter": 16, - "ball_xy_size": 6, - "ball_z_size": 15, - "ball_overlap_fraction": 0.6, - "log_sigma_size": 0.2, - "n_sds_above_mean_thresh": 10, - "soma_spread_factor": 1.4, - "max_cluster_size": 100000, - "cube_width": 50, - "cube_height": 50, - "cube_depth": 20, - "network_depth": "50" -} diff --git a/tests/data/input_data_missing_background.json b/tests/data/input_data_missing_background.json deleted file mode 100644 index b2e613e5..00000000 --- a/tests/data/input_data_missing_background.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "input_data_dir": "/Users/sofia/Documents_local/project_BrainGlobe_workflows/brainglobe-workflows/tests/data", - "voxel_sizes": [ - 5, - 2, - 2 - ], - "start_plane": 0, - "end_plane": -1, - "trained_model": null, - "model_weights": null, - "model": "resnet50_tv", - "batch_size": 32, - "n_free_cpus": 2, - "network_voxel_sizes": [ - 5, - 1, - 1 - ], - "soma_diameter": 16, - "ball_xy_size": 6, - "ball_z_size": 15, - "ball_overlap_fraction": 0.6, - "log_sigma_size": 0.2, - "n_sds_above_mean_thresh": 10, - "soma_spread_factor": 1.4, - "max_cluster_size": 100000, - "cube_width": 50, - "cube_height": 50, - "cube_depth": 20, - "network_depth": "50" -} diff --git a/tests/data/input_data_missing_signal.json b/tests/data/input_data_missing_signal.json deleted file mode 100644 index b2e613e5..00000000 --- a/tests/data/input_data_missing_signal.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "input_data_dir": "/Users/sofia/Documents_local/project_BrainGlobe_workflows/brainglobe-workflows/tests/data", - "voxel_sizes": [ - 5, - 2, - 2 - ], - "start_plane": 0, - "end_plane": -1, - "trained_model": null, - "model_weights": null, - "model": "resnet50_tv", - "batch_size": 32, - "n_free_cpus": 2, - "network_voxel_sizes": [ - 5, - 1, - 1 - ], - "soma_diameter": 16, - "ball_xy_size": 6, - "ball_z_size": 15, - "ball_overlap_fraction": 0.6, - "log_sigma_size": 0.2, - "n_sds_above_mean_thresh": 10, - "soma_spread_factor": 1.4, - "max_cluster_size": 100000, - "cube_width": 50, - "cube_height": 50, - "cube_depth": 20, - "network_depth": "50" -} diff --git a/tests/data/input_data_not_locally_or_GIN.json b/tests/data/input_data_not_locally_or_GIN.json deleted file mode 100644 index 5b1c5410..00000000 --- a/tests/data/input_data_not_locally_or_GIN.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "input_data_dir": "_", - "voxel_sizes": [ - 5, - 2, - 2 - ], - "start_plane": 0, - "end_plane": -1, - "trained_model": null, - "model_weights": null, - "model": "resnet50_tv", - "batch_size": 32, - "n_free_cpus": 2, - "network_voxel_sizes": [ - 5, - 1, - 1 - ], - "soma_diameter": 16, - "ball_xy_size": 6, - "ball_z_size": 15, - "ball_overlap_fraction": 0.6, - "log_sigma_size": 0.2, - "n_sds_above_mean_thresh": 10, - "soma_spread_factor": 1.4, - "max_cluster_size": 100000, - "cube_width": 50, - "cube_height": 50, - "cube_depth": 20, - "network_depth": "50" -} From b41d7430118b4774713e1b17abdbe2263b1eb66a Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 20 Dec 2023 18:05:10 +0000 Subject: [PATCH 04/41] add missing data config fixtures --- .../cellfinder_core/cellfinder.py | 4 +-- tests/cellfinder_core/conftest.py | 34 ++++++++++++++----- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/brainglobe_workflows/cellfinder_core/cellfinder.py b/brainglobe_workflows/cellfinder_core/cellfinder.py index 6e98c6a4..b5018353 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder.py @@ -113,8 +113,8 @@ class CellfinderConfig: ################ Internal ######################## # The following attributes are added # during the setup phase of the workflow - # _signal_dir_path: Pathlike = "" - # _background_dir_path: Pathlike = "" + _signal_dir_path: Optional[Pathlike] = None + _background_dir_path: Optional[Pathlike] = None _list_signal_files: Optional[list] = None _list_background_files: Optional[list] = None _detected_cells_path: Pathlike = "" diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index 7a82a681..22f1f4ef 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -124,22 +124,38 @@ def config_missing_signal(config_local): CellfinderConfig, ) - config_local.signal_subdir = "_" - config = CellfinderConfig(**config_local) + config_dict = config_local.__dict__ + config_dict["signal_subdir"] = "_" + + # update rest of the paths + config = CellfinderConfig(**config_dict) return config @pytest.fixture() def config_missing_background(config_local): - config_local.background_subdir = "_" - return config_local + from brainglobe_workflows.cellfinder_core.cellfinder import ( + CellfinderConfig, + ) + + config_dict = config_local.__dict__ + config_dict["background_subdir"] = "_" + + # update rest of the paths + config = CellfinderConfig(**config_dict) + return config @pytest.fixture() -def config_not_GIN_or_local(config_GIN): - # remove GIN refs - config_GIN.data_url = None - config_GIN.data_hash = None +def config_not_GIN_or_local(config_local): + from brainglobe_workflows.cellfinder_core.cellfinder import ( + CellfinderConfig, + ) - return config_GIN + config_dict = config_local.__dict__ + config_dict["input_data_dir"] = "_" + + # update rest of the paths + config = CellfinderConfig(**config_dict) + return config From 574733ccf6ecf283a24df17b68f733f313160ef0 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 20 Dec 2023 19:36:31 +0000 Subject: [PATCH 05/41] mark GIN download data test as slow --- .../cellfinder_core/cellfinder.py | 13 +-- tests/cellfinder_core/conftest.py | 85 +++++++++++++++---- .../test_unit/test_cellfinder.py | 22 +---- 3 files changed, 81 insertions(+), 39 deletions(-) diff --git a/brainglobe_workflows/cellfinder_core/cellfinder.py b/brainglobe_workflows/cellfinder_core/cellfinder.py index b5018353..34c0a79a 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder.py @@ -113,12 +113,15 @@ class CellfinderConfig: ################ Internal ######################## # The following attributes are added # during the setup phase of the workflow + # even tho these are optional we dont expect users to + # change them!!! _signal_dir_path: Optional[Pathlike] = None _background_dir_path: Optional[Pathlike] = None _list_signal_files: Optional[list] = None _list_background_files: Optional[list] = None _detected_cells_path: Pathlike = "" _output_path: Pathlike = "" + # _input_data_download_dir: Optional[Pathlike] = None def __post_init__(self: "CellfinderConfig"): # See https://peps.python.org/pep-0557/#post-init-processing @@ -127,11 +130,6 @@ def __post_init__(self: "CellfinderConfig"): Path(self._install_path) / "cellfinder_test_data" ) - if self.output_parent_dir is None: - self.output_parent_dir = ( - Path(self._install_path) / self.output_dir_basename - ) - # install path (root for donwloaded inputs and outputs) self._signal_dir_path: Pathlike = self.input_data_dir / Path( self.signal_subdir @@ -140,6 +138,11 @@ def __post_init__(self: "CellfinderConfig"): self.background_subdir ) + if self.output_parent_dir is None: + self.output_parent_dir = ( + Path(self._install_path) / self.output_dir_basename + ) + ################# Methods ######################### def add_output_timestamped(self): # output dir and file diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index 22f1f4ef..666f66f9 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -7,6 +7,21 @@ import pytest +@pytest.fixture() +def default_input_config_cellfinder() -> Path: + """Return path to default input config for cellfinder workflow + + Returns + ------- + Path + Path to default input config + + """ + from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER + + return DEFAULT_JSON_CONFIG_PATH_CELLFINDER + + @pytest.fixture(autouse=True) def mock_home_directory(monkeypatch: pytest.MonkeyPatch): # define mock home path @@ -53,44 +68,82 @@ def cellfinder_GIN_data() -> dict: @pytest.fixture() -def config_GIN(cellfinder_GIN_data): +def config_GIN(cellfinder_GIN_data, default_input_config_cellfinder): """ Return a config pointing to the location where GIN would be by default """ + from brainglobe_workflows.cellfinder_core.cellfinder import ( - read_cellfinder_config, + CellfinderConfig, ) - from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER - # fetch data from GIN and download locally - # if it exists, pooch doesnt download again + # download data to default location for GIN pooch.retrieve( url=cellfinder_GIN_data["url"], known_hash=cellfinder_GIN_data["hash"], - path=Path.home(), # path to download zip to + path=Path.home() + / ".brainglobe" + / "workflows" + / "cellfinder_core", # path to download zip to progressbar=True, - processor=pooch.Unzip( - extract_dir="cellfinder_test_data" - # path to unzipped dir, *relative* to 'path' - ), + processor=pooch.Unzip(extract_dir="cellfinder_test_data"), ) - return read_cellfinder_config(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) - # read_cellfinder_config(input_configs_dir / "input_data_GIN.json") + # read default config as dict + with open(default_input_config_cellfinder) as cfg: + config_dict = json.load(cfg) + + # modify / ensure + # - add url + # - add data hash + # - add input_data_dir + config_dict["data_url"] = cellfinder_GIN_data["url"] + config_dict["data_hash"] = cellfinder_GIN_data["hash"] + if "input_data_dir" in config_dict.keys(): + del config_dict["input_data_dir"] + + # instantiate object + config = CellfinderConfig(**config_dict) + + return config @pytest.fixture() -def config_local(cellfinder_GIN_data): +def config_force_GIN(config_GIN, tmp_path): + """ + Return a config pointing to the location where GIN would be by default + """ + from brainglobe_workflows.cellfinder_core.cellfinder import ( + CellfinderConfig, + ) + + # ensure neither signal or background dir exist! + config_dict = config_GIN.__dict__ + config_dict["input_data_dir"] = tmp_path + # since the input_data_dir does not exist, it will + # download the GIN data there + + # instantiate object + config = CellfinderConfig(**config_dict) + + # ensure neither signal or background dir exist, to force GIN case + assert not Path(config._signal_dir_path).exists() + assert not Path(config._background_dir_path).exists() + + return config + + +@pytest.fixture() +def config_local(cellfinder_GIN_data, default_input_config_cellfinder): """ """ from brainglobe_workflows.cellfinder_core.cellfinder import ( CellfinderConfig, ) - from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER # read default config as dict # as dict because some paths are computed derived from input_data_dir - with open(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) as cfg: + with open(default_input_config_cellfinder) as cfg: config_dict = json.load(cfg) # modify location of data? @@ -99,7 +152,7 @@ def config_local(cellfinder_GIN_data): # - add input_data_dir config_dict["data_url"] = None config_dict["data_hash"] = None - config_dict["input_data_dir"] = Path.home() / "local_data" + config_dict["input_data_dir"] = Path.home() / "local_cellfinder_data" # instantiate object config = CellfinderConfig(**config_dict) diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index a6804b31..9a46955b 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -17,21 +17,6 @@ from brainglobe_workflows.utils import setup_logger -@pytest.fixture() -def default_input_config_cellfinder() -> Path: - """Return path to default input config for cellfinder workflow - - Returns - ------- - Path - Path to default input config - - """ - from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER - - return DEFAULT_JSON_CONFIG_PATH_CELLFINDER - - @pytest.mark.parametrize( "input_config", [ @@ -70,9 +55,10 @@ def test_read_cellfinder_config(input_config: str, request): @pytest.mark.parametrize( "input_config, message_pattern", [ - ( - "config_GIN", + pytest.param( + "config_force_GIN", "Fetching input data from the provided GIN repository", + marks=pytest.mark.slow, ), ( "config_local", @@ -137,7 +123,7 @@ def test_add_signal_and_background_files( "input_config, message", [ ("default_input_config_cellfinder", "Using default config file"), - ("input_config_fetch_GIN", "Input config read from"), + ("config_GIN", "Input config read from"), ], ) def test_setup_workflow( From 075cc50e12874698d2d95ae9ad4ba4b88daf23b1 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:09:02 +0000 Subject: [PATCH 06/41] refactor config class methods and remove setup_workflow --- .../cellfinder_core/cellfinder.py | 243 ++++++++---------- .../test_unit/test_cellfinder.py | 135 +++------- 2 files changed, 143 insertions(+), 235 deletions(-) diff --git a/brainglobe_workflows/cellfinder_core/cellfinder.py b/brainglobe_workflows/cellfinder_core/cellfinder.py index 34c0a79a..ec8d1e09 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder.py @@ -49,21 +49,28 @@ class CellfinderConfig: the cellfinder preprocessing steps. We distinguish three types of fields: - - optional fields: they have a default value - - required fields: must be provided, they do not have a default value - - internal fields: their names start with _ indicating these are private. - Any functionality to update them is moved to a function of - CellfinderConfig. + - optional attributes: they have a default value + - required attributes: must be provided, they do not have a default value + - internal attributes: their names start with _ indicating these are + private. Any functionality to update them should be a class method. + + Notes + ----- + Additional details on some optional parameters: + - input data path: if not specified, the are assumed to be "signal" and + "background" dirs under _install_path/cellfinder_test_data/ + (see __post_init__ method). + - output data path: if not specified, it is assumed to be under + _install_path/output_dir_basename (see __post_init__ method). + - data_url, data_hash: source of data to download. If not specified + in JSON, it is set to None. """ - ################ Required ######################## - # Parameters + # Required parameters voxel_sizes: tuple[float, float, float] start_plane: int end_plane: int - trained_model: Optional[ - os.PathLike - ] # if None, it will use a default model + trained_model: Optional[os.PathLike] model_weights: Optional[os.PathLike] model: str batch_size: int @@ -82,97 +89,112 @@ class CellfinderConfig: cube_depth: int network_depth: depth_type - ################ Optional ######################## + # Optional parameters # they have a default value if not specified in json - - # install path (root for donwloaded inputs and outputs) _install_path: Pathlike = ( Path.home() / ".brainglobe" / "workflows" / "cellfinder_core" ) - # input data: if not specified the are assumed to be signal and - # background dirs under _install_path/cellfinder_test_data/ - # see __post_init__ + # input data + # for default value of `input_data_dir`, see __post_init__ method input_data_dir: Optional[Pathlike] = None signal_subdir: Pathlike = "signal" background_subdir: Pathlike = "background" # output data - # if dir not specified it is assumed to be under - # _install_path/output_dir_basename - # see __post_init__ + # for default value of `output_parent_dir`, see __post_init__ method output_dir_basename: str = "cellfinder_output_" detected_cells_filename: str = "detected_cells.xml" output_parent_dir: Optional[Pathlike] = None # source of data to download - # (if not provided in JSON it is set to None) data_url: Optional[str] = None data_hash: Optional[str] = None - ################ Internal ######################## - # The following attributes are added - # during the setup phase of the workflow - # even tho these are optional we dont expect users to - # change them!!! + # Internal parameters + # even though these are optional we don't expect users to + # change them _signal_dir_path: Optional[Pathlike] = None _background_dir_path: Optional[Pathlike] = None _list_signal_files: Optional[list] = None _list_background_files: Optional[list] = None _detected_cells_path: Pathlike = "" - _output_path: Pathlike = "" - # _input_data_download_dir: Optional[Pathlike] = None def __post_init__(self: "CellfinderConfig"): - # See https://peps.python.org/pep-0557/#post-init-processing - if self.input_data_dir is None: - self.input_data_dir = ( - Path(self._install_path) / "cellfinder_test_data" - ) + """Executed after __init__ function. + + We use it to define attributes as a function of other + attributes. See https://peps.python.org/pep-0557/#post-init-processing + + The following attributes are set: + - input_data_dir is set to a default value if not set in __init__ + - _signal_dir_path: full path to the directory holding the signal files + - _background_dir_path: full path to the directory holding the + background files + In 'add_signal_and_background_files': + - _list_signal_files: list of signal files + - _list_background_files: list of background files + In 'add_output_timestamped': + - output_parent_dir - # install path (root for donwloaded inputs and outputs) - self._signal_dir_path: Pathlike = self.input_data_dir / Path( - self.signal_subdir - ) - self._background_dir_path: Pathlike = self.input_data_dir / Path( - self.background_subdir - ) + Parameters + ---------- + self : CellfinderConfig + a CellfinderConfig instance + """ + + # Add signal and background files to config + self.add_input_paths() + # Add output paths + self.add_output_paths() + + def add_output_paths(self): + """Adds output paths to the cellfinder config + + Specifically it adds: + - output_path: a path to a timestamped output directory + - _detected_cells_path: the full path to the output file + (under output_path). + + Parameters + ---------- + config : CellfinderConfig + a cellfinder config + """ + + # Fill in output directory if not specified if self.output_parent_dir is None: - self.output_parent_dir = ( - Path(self._install_path) / self.output_dir_basename - ) + self.output_parent_dir = Path(self._install_path) - ################# Methods ######################### - def add_output_timestamped(self): - # output dir and file + # Add path to timestamped output directory to config timestamp = datetime.datetime.now() timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") - output_path_timestamped = Path(self.output_parent_dir) / ( + self.output_path = Path(self.output_parent_dir) / ( str(self.output_dir_basename) + timestamp_formatted ) - output_path_timestamped.mkdir( + self.output_path.mkdir( parents=True, # create any missing parents exist_ok=True, # ignore FileExistsError exceptions ) - # Add output path and output file path to config - self.output_path = output_path_timestamped + # Add paths to output file to config self._detected_cells_path = ( self.output_path / self.detected_cells_filename ) - def add_signal_and_background_files(self): + def add_input_paths(self): """Adds the lists of input data files (signal and background) to the config. - These files are first searched locally. If not found, we - attempt to download them from GIN. + These files are first searched locally at the given location. + If not found, we attempt to download them from GIN and place + them at the specified location. Specifically: - If both parent data directories (signal and background) exist - locally, - the lists of signal and background files are added to the config. + locally, the lists of signal and background files are added to + the config. - If exactly one of the parent data directories is missing, an error message is logged. - If neither of them exist, the data is retrieved from the provided GIN @@ -183,14 +205,22 @@ def add_signal_and_background_files(self): config : CellfinderConfig a cellfinder config with input data files to be validated - Returns - ------- - config : CellfinderConfig - a cellfinder config with updated input data lists. """ # Fetch logger logger = logging.getLogger(LOGGER_NAME) + # Fill in input data directory if not specified + if self.input_data_dir is None: + self.input_data_dir = ( + Path(self._install_path) / "cellfinder_test_data" + ) + + # Fill in signal and background paths derived from 'input_data_dir' + self._signal_dir_path = self.input_data_dir / Path(self.signal_subdir) + self._background_dir_path = self.input_data_dir / Path( + self.background_subdir + ) + # Check if input data directories (signal and background) exist # locally. # If both directories exist, get list of signal and background files @@ -235,8 +265,9 @@ def add_signal_and_background_files(self): list_files_archive = pooch.retrieve( url=self.data_url, known_hash=self.data_hash, - path=Path(self.input_data_dir).parent, - # self._install_path, # ----zip will be downloaded here + path=Path( + self.input_data_dir + ).parent, # zip will be downloaded here progressbar=True, processor=pooch.Unzip( extract_dir=Path(self.input_data_dir).stem, @@ -258,7 +289,7 @@ def add_signal_and_background_files(self): for f in list_files_archive if f.startswith( str(Path(self._signal_dir_path).resolve()), - ) # if str(self.signal_dir_path) in f + ) ] # Add background files to config @@ -277,7 +308,9 @@ def add_signal_and_background_files(self): ) -def read_cellfinder_config(input_config_path: Path) -> CellfinderConfig: +def read_cellfinder_config( + input_config_path: str, log_on: bool = False +) -> CellfinderConfig: """Instantiate a CellfinderConfig from the input json file. Assumes config is json serializable. @@ -297,82 +330,12 @@ def read_cellfinder_config(input_config_path: Path) -> CellfinderConfig: config_dict = json.load(cfg) config = CellfinderConfig(**config_dict) - return config - - -def setup_workflow(input_config_path: Path) -> CellfinderConfig: - """Run setup steps prior to executing the workflow - - These setup steps include: - - instantiating a CellfinderConfig object with the required parameters, - - checking if the input data exists locally, and fetching from - GIN repository otherwise, - - adding the path to the input data files to the config, and - - creating a timestamped directory for the output of the workflow if - it doesn't exist and adding its path to the config - - Parameters - ---------- - input_config_path : Path - path to the input config file - - Returns - ------- - config : CellfinderConfig - a dataclass whose attributes are the parameters - for running cellfinder. - """ - # Fetch logger - logger = logging.getLogger(LOGGER_NAME) - - # Check config file exists - assert input_config_path.exists() - - # Instantiate a CellfinderConfig from the input json file - # (assumes config is json serializable) - config = read_cellfinder_config(input_config_path) - - # Print info logs for status - logger.info(f"Input config read from {input_config_path}") - if input_config_path == DEFAULT_JSON_CONFIG_PATH_CELLFINDER: - logger.info("Using default config file") - - # Add lists of input data files to the config, - # if these are not defined yet - if not (config._list_signal_files and config._list_background_files): - # # build fullpaths to input directories - # config._signal_dir_path = str( - # Path(config._install_path) - # / config.data_dir_relative - # / config.signal_subdir, - # ) - # config._background_dir_path = str( - # Path(config._install_path) - # / config.data_dir_relative - # / config.background_subdir, - # ) - - # add signal and background files to config - # config = add_signal_and_background_files(config) - config.add_signal_and_background_files() - - # Create timestamped output directory if it doesn't exist - config.add_output_timestamped() - # timestamp = datetime.datetime.now() - # timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") - # output_path_timestamped = Path(config._install_path) / ( - # str(config.output_dir_basename) + timestamp_formatted - # ) - # output_path_timestamped.mkdir( - # parents=True, # create any missing parents - # exist_ok=True, # ignore FileExistsError exceptions - # ) - - # # Add output path and output file path to config - # config.output_path = output_path_timestamped - # config._detected_cells_path = ( - # config.output_path / config.detected_cells_filename - # ) + # log config origin + if log_on: + logger = logging.getLogger(LOGGER_NAME) + logger.info(f"Input config read from {input_config_path}") + if input_config_path == DEFAULT_JSON_CONFIG_PATH_CELLFINDER: + logger.info("Using default config file") return config @@ -381,8 +344,8 @@ def setup(input_config_path: str) -> CellfinderConfig: # setup logger _ = setup_logger() - # run setup steps and return config - cfg = setup_workflow(Path(input_config_path)) + # read config + cfg = read_cellfinder_config(input_config_path) return cfg diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 9a46955b..d5bfb745 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -5,15 +5,6 @@ import pytest -# from brainglobe_workflows.cellfinder_core.cellfinder import ( -# # CellfinderConfig, -# add_signal_and_background_files, -# read_cellfinder_config, -# run_workflow_from_cellfinder_run, -# setup_workflow, -# ) -# from brainglobe_workflows.cellfinder_core.cellfinder import -# setup as setup_full from brainglobe_workflows.utils import setup_logger @@ -23,7 +14,12 @@ "default_input_config_cellfinder", ], ) -def test_read_cellfinder_config(input_config: str, request): +def test_read_cellfinder_config( + input_config: str, + message: str, + caplog: pytest.LogCaptureFixture, + request: pytest.FixtureRequest, +): """Test for reading a cellfinder config file Parameters @@ -37,10 +33,13 @@ def test_read_cellfinder_config(input_config: str, request): read_cellfinder_config, ) + # instantiate custom logger + _ = setup_logger() + input_config_path = request.getfixturevalue(input_config) # read json as Cellfinder config - config = read_cellfinder_config(input_config_path) + config = read_cellfinder_config(input_config_path, log_on=True) # read json as dict with open(input_config_path) as cfg: @@ -51,6 +50,34 @@ def test_read_cellfinder_config(input_config: str, request): [ky in config.__dataclass_fields__.keys() for ky in config_dict.keys()] ) + # check logs + assert message in caplog.text + + # check all signal files exist + assert config._list_signal_files + assert all([Path(f).is_file() for f in config._list_signal_files]) + + # check all background files exist + assert config._list_background_files + assert all([Path(f).is_file() for f in config._list_background_files]) + + # check output directory exists + assert Path(config.output_path).resolve().is_dir() + + # check output directory name has correct format + out = re.fullmatch( + str(config.output_dir_basename) + "\\d{8}_\\d{6}$", + Path(config.output_path).stem, + ) + assert out is not None + assert out.group() is not None + + # check output file path is as expected + assert ( + Path(config._detected_cells_path) + == Path(config.output_path) / config.detected_cells_filename + ) + @pytest.mark.parametrize( "input_config, message_pattern", @@ -76,7 +103,7 @@ def test_read_cellfinder_config(input_config: str, request): ), ], ) -def test_add_signal_and_background_files( +def test_add_input_paths( caplog: pytest.LogCaptureFixture, input_config: str, message_pattern: str, @@ -110,7 +137,7 @@ def test_add_signal_and_background_files( assert not (config._list_signal_files and config._list_background_files) # add signal and background files lists to config - config.add_signal_and_background_files() + config.add_input_paths() # check log messages assert len(caplog.messages) > 0 @@ -119,88 +146,6 @@ def test_add_signal_and_background_files( assert out.group() is not None -@pytest.mark.parametrize( - "input_config, message", - [ - ("default_input_config_cellfinder", "Using default config file"), - ("config_GIN", "Input config read from"), - ], -) -def test_setup_workflow( - input_config: str, - message: str, - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, - caplog: pytest.LogCaptureFixture, - request: pytest.FixtureRequest, -): - """Test setup steps for the cellfinder workflow, using the default config - and passing a specific config file. - - These setup steps include: - - instantiating a CellfinderConfig object using the input json file, - - add the signal and background files to the config if these are not - defined, - - create a timestamped directory for the output of the workflow if - it doesn't exist and add its path to the config - - Parameters - ---------- - input_config : str - Name of input config json file - message : str - Expected log message - monkeypatch : pytest.MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test - caplog : pytest.LogCaptureFixture - Pytest fixture to capture the logs during testing - request : pytest.FixtureRequest - Pytest fixture to enable requesting fixtures by name - """ - from brainglobe_workflows.cellfinder_core.cellfinder import setup_workflow - - # setup logger - _ = setup_logger() - - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) - - # setup workflow - config = setup_workflow(request.getfixturevalue(input_config)) - - # check logs - assert message in caplog.text - - # check all signal files exist - assert config._list_signal_files - assert all([Path(f).is_file() for f in config._list_signal_files]) - - # check all background files exist - assert config._list_background_files - assert all([Path(f).is_file() for f in config._list_background_files]) - - # check output directory exists - assert Path(config.output_path).resolve().is_dir() - - # check output directory name has correct format - out = re.fullmatch( - str(config.output_dir_basename) + "\\d{8}_\\d{6}$", - Path(config.output_path).stem, - ) - assert out is not None - assert out.group() is not None - - # check output file path - assert ( - Path(config._detected_cells_path) - == Path(config.output_path) / config.detected_cells_filename - ) - - @pytest.mark.parametrize( "input_config", [ From 40a658d5b6fda87afedb21b6bef28daf315922ae Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:47:01 +0000 Subject: [PATCH 07/41] tests pass with default config --- benchmarks/cellfinder_core.py | 2 +- .../cellfinder_core/cellfinder.py | 26 +++++++---- .../test_unit/test_cellfinder.py | 44 +++---------------- 3 files changed, 24 insertions(+), 48 deletions(-) diff --git a/benchmarks/cellfinder_core.py b/benchmarks/cellfinder_core.py index 882349d0..cdf25a5a 100644 --- a/benchmarks/cellfinder_core.py +++ b/benchmarks/cellfinder_core.py @@ -146,7 +146,7 @@ def teardown(self): The input data is kept for all repeats of the same benchmark, to avoid repeated downloads from GIN. """ - shutil.rmtree(Path(self.cfg.output_path).resolve()) + shutil.rmtree(Path(self.cfg._output_path).resolve()) class TimeFullWorkflow(TimeBenchmarkPrepGIN): diff --git a/brainglobe_workflows/cellfinder_core/cellfinder.py b/brainglobe_workflows/cellfinder_core/cellfinder.py index ec8d1e09..a3b6937a 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder.py @@ -91,23 +91,30 @@ class CellfinderConfig: # Optional parameters # they have a default value if not specified in json + + # install path: default path for downloaded and output data _install_path: Pathlike = ( Path.home() / ".brainglobe" / "workflows" / "cellfinder_core" ) - # input data - # for default value of `input_data_dir`, see __post_init__ method + # input data path: + # if not specified, the are assumed to be "signal" and + # "background" dirs under _install_path/cellfinder_test_data/ + # (see __post_init__ method) input_data_dir: Optional[Pathlike] = None signal_subdir: Pathlike = "signal" background_subdir: Pathlike = "background" - # output data - # for default value of `output_parent_dir`, see __post_init__ method + # output data path: + # if not specified, it is assumed to be under + # _install_path/output_dir_basename + # (see __post_init__ method) output_dir_basename: str = "cellfinder_output_" detected_cells_filename: str = "detected_cells.xml" output_parent_dir: Optional[Pathlike] = None # source of data to download + # if not specified in JSON, it is set to None data_url: Optional[str] = None data_hash: Optional[str] = None @@ -119,6 +126,7 @@ class CellfinderConfig: _list_signal_files: Optional[list] = None _list_background_files: Optional[list] = None _detected_cells_path: Pathlike = "" + _output_path: Pathlike = "" def __post_init__(self: "CellfinderConfig"): """Executed after __init__ function. @@ -170,17 +178,17 @@ def add_output_paths(self): # Add path to timestamped output directory to config timestamp = datetime.datetime.now() timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") - self.output_path = Path(self.output_parent_dir) / ( + self._output_path = Path(self.output_parent_dir) / ( str(self.output_dir_basename) + timestamp_formatted ) - self.output_path.mkdir( + self._output_path.mkdir( parents=True, # create any missing parents exist_ok=True, # ignore FileExistsError exceptions ) # Add paths to output file to config self._detected_cells_path = ( - self.output_path / self.detected_cells_filename + self._output_path / self.detected_cells_filename ) def add_input_paths(self): @@ -369,8 +377,8 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): the cellfinder workflow """ # Read input data as Dask arrays - signal_array = read_with_dask(cfg._signal_dir_path) - background_array = read_with_dask(cfg._background_dir_path) + signal_array = read_with_dask(str(cfg._signal_dir_path)) + background_array = read_with_dask(str(cfg._background_dir_path)) # Run main analysis using `cellfinder_run` detected_cells = cellfinder_run( diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index d5bfb745..4fab84e2 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -9,9 +9,9 @@ @pytest.mark.parametrize( - "input_config", + "input_config, message", [ - "default_input_config_cellfinder", + ("default_input_config_cellfinder", "Using default config file"), ], ) def test_read_cellfinder_config( @@ -62,12 +62,12 @@ def test_read_cellfinder_config( assert all([Path(f).is_file() for f in config._list_background_files]) # check output directory exists - assert Path(config.output_path).resolve().is_dir() + assert Path(config._output_path).resolve().is_dir() # check output directory name has correct format out = re.fullmatch( str(config.output_dir_basename) + "\\d{8}_\\d{6}$", - Path(config.output_path).stem, + Path(config._output_path).stem, ) assert out is not None assert out.group() is not None @@ -75,32 +75,17 @@ def test_read_cellfinder_config( # check output file path is as expected assert ( Path(config._detected_cells_path) - == Path(config.output_path) / config.detected_cells_filename + == Path(config._output_path) / config.detected_cells_filename ) @pytest.mark.parametrize( "input_config, message_pattern", [ - pytest.param( - "config_force_GIN", - "Fetching input data from the provided GIN repository", - marks=pytest.mark.slow, - ), ( "config_local", "Fetching input data from the local directories", ), - ( - "config_missing_signal", - "The directory .+ does not exist$", - ), - ("config_missing_background", "The directory .+ does not exist$"), - ( - "config_not_GIN_or_local", - "Input data not found locally, and URL/hash to " - "GIN repository not provided", - ), ], ) def test_add_input_paths( @@ -131,13 +116,7 @@ def test_add_input_paths( # read json as Cellfinder config # ---> change so that the fixture is the config object! # config = read_cellfinder_config(input_configs_dir / input_config) - config = request.getfixturevalue(input_config) - - # check lists of signal and background files are not defined - assert not (config._list_signal_files and config._list_background_files) - - # add signal and background files lists to config - config.add_input_paths() + _ = request.getfixturevalue(input_config) # check log messages assert len(caplog.messages) > 0 @@ -150,8 +129,6 @@ def test_add_input_paths( "input_config", [ "default_input_config_cellfinder", - "input_config_fetch_GIN", - "input_config_fetch_local", ], ) def test_setup( @@ -205,14 +182,10 @@ def test_setup( "input_config", [ "default_input_config_cellfinder", - "input_config_fetch_GIN", - "input_config_fetch_local", ], ) def test_run_workflow_from_cellfinder_run( input_config: str, - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, request: pytest.FixtureRequest, ): """Test running cellfinder workflow with default input config @@ -236,11 +209,6 @@ def test_run_workflow_from_cellfinder_run( setup as setup_full, ) - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) - # run setup cfg = setup_full(str(request.getfixturevalue(input_config))) From 45c118df3363a29afdbc3acaacbae37c1304ccdd Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:58:55 +0000 Subject: [PATCH 08/41] remove unused fixtures --- tests/cellfinder_core/conftest.py | 177 ------------------ .../test_integration/test_cellfinder.py | 75 +------- .../test_unit/test_cellfinder.py | 54 ++++++ 3 files changed, 59 insertions(+), 247 deletions(-) diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index 666f66f9..127a4483 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -1,9 +1,7 @@ """Pytest fixtures shared across unit and integration tests""" -import json from pathlib import Path -import pooch import pytest @@ -37,178 +35,3 @@ def mock_home(): return mock_home_path monkeypatch.setattr(Path, "home", mock_home) - - -@pytest.fixture() # Do I need this? -def input_configs_dir() -> Path: - """Return the directory path to the input configs - used for testing - - Returns - ------- - Path - Test data directory path - """ - return Path(__file__).parents[1] / "data" - - -@pytest.fixture(scope="session") -def cellfinder_GIN_data() -> dict: - """Return the URL and hash to the GIN repository with the input data - - Returns - ------- - dict - URL and hash of the GIN repository with the cellfinder test data - """ - return { - "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", - "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", # noqa - } - - -@pytest.fixture() -def config_GIN(cellfinder_GIN_data, default_input_config_cellfinder): - """ - Return a config pointing to the location where GIN would be by default - """ - - from brainglobe_workflows.cellfinder_core.cellfinder import ( - CellfinderConfig, - ) - - # download data to default location for GIN - pooch.retrieve( - url=cellfinder_GIN_data["url"], - known_hash=cellfinder_GIN_data["hash"], - path=Path.home() - / ".brainglobe" - / "workflows" - / "cellfinder_core", # path to download zip to - progressbar=True, - processor=pooch.Unzip(extract_dir="cellfinder_test_data"), - ) - - # read default config as dict - with open(default_input_config_cellfinder) as cfg: - config_dict = json.load(cfg) - - # modify / ensure - # - add url - # - add data hash - # - add input_data_dir - config_dict["data_url"] = cellfinder_GIN_data["url"] - config_dict["data_hash"] = cellfinder_GIN_data["hash"] - if "input_data_dir" in config_dict.keys(): - del config_dict["input_data_dir"] - - # instantiate object - config = CellfinderConfig(**config_dict) - - return config - - -@pytest.fixture() -def config_force_GIN(config_GIN, tmp_path): - """ - Return a config pointing to the location where GIN would be by default - """ - from brainglobe_workflows.cellfinder_core.cellfinder import ( - CellfinderConfig, - ) - - # ensure neither signal or background dir exist! - config_dict = config_GIN.__dict__ - config_dict["input_data_dir"] = tmp_path - # since the input_data_dir does not exist, it will - # download the GIN data there - - # instantiate object - config = CellfinderConfig(**config_dict) - - # ensure neither signal or background dir exist, to force GIN case - assert not Path(config._signal_dir_path).exists() - assert not Path(config._background_dir_path).exists() - - return config - - -@pytest.fixture() -def config_local(cellfinder_GIN_data, default_input_config_cellfinder): - """ """ - - from brainglobe_workflows.cellfinder_core.cellfinder import ( - CellfinderConfig, - ) - - # read default config as dict - # as dict because some paths are computed derived from input_data_dir - with open(default_input_config_cellfinder) as cfg: - config_dict = json.load(cfg) - - # modify location of data? - # - remove url - # - remove data hash - # - add input_data_dir - config_dict["data_url"] = None - config_dict["data_hash"] = None - config_dict["input_data_dir"] = Path.home() / "local_cellfinder_data" - - # instantiate object - config = CellfinderConfig(**config_dict) - - # fetch data from GIN and download locally to local location? - pooch.retrieve( - url=cellfinder_GIN_data["url"], - known_hash=cellfinder_GIN_data["hash"], - path=Path(config.input_data_dir).parent, # path to download zip to - progressbar=True, - processor=pooch.Unzip( - extract_dir=Path(config.input_data_dir).stem - # path to unzipped dir, *relative* to 'path' - ), - ) - return config - - -@pytest.fixture() -def config_missing_signal(config_local): - from brainglobe_workflows.cellfinder_core.cellfinder import ( - CellfinderConfig, - ) - - config_dict = config_local.__dict__ - config_dict["signal_subdir"] = "_" - - # update rest of the paths - config = CellfinderConfig(**config_dict) - - return config - - -@pytest.fixture() -def config_missing_background(config_local): - from brainglobe_workflows.cellfinder_core.cellfinder import ( - CellfinderConfig, - ) - - config_dict = config_local.__dict__ - config_dict["background_subdir"] = "_" - - # update rest of the paths - config = CellfinderConfig(**config_dict) - return config - - -@pytest.fixture() -def config_not_GIN_or_local(config_local): - from brainglobe_workflows.cellfinder_core.cellfinder import ( - CellfinderConfig, - ) - - config_dict = config_local.__dict__ - config_dict["input_data_dir"] = "_" - - # update rest of the paths - config = CellfinderConfig(**config_dict) - return config diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index 31de408b..7893dd20 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -1,27 +1,11 @@ import subprocess import sys from pathlib import Path -from typing import Optional - -import pytest from brainglobe_workflows.cellfinder_core.cellfinder import main -@pytest.mark.parametrize( - "input_config", - [ - None, - "input_config_fetch_GIN", - "input_config_fetch_local", - ], -) -def test_main( - input_config: Optional[str], - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, - request: pytest.FixtureRequest, -): +def test_main(): """Test main function for setting up and running cellfinder workflow Parameters @@ -35,35 +19,15 @@ def test_main( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) # run main - if not input_config: - cfg = main() - else: - cfg = main(str(request.getfixturevalue(input_config))) + cfg = main() # check output files exist assert Path(cfg._detected_cells_path).is_file() -@pytest.mark.parametrize( - "input_config", - [ - None, - "input_config_fetch_GIN", - "input_config_fetch_local", - ], -) -def test_script( - input_config: Optional[str], - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, - request: pytest.FixtureRequest, -): +def test_script(): """Test running the cellfinder worklfow from the command line Parameters @@ -77,10 +41,6 @@ def test_script( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) # define CLI input script_path = ( @@ -93,10 +53,6 @@ def test_script( sys.executable, str(script_path), ] - # append config if required - if input_config: - subprocess_input.append("--config") - subprocess_input.append(str(request.getfixturevalue(input_config))) # run workflow script from the CLI subprocess_output = subprocess.run( @@ -107,20 +63,7 @@ def test_script( assert subprocess_output.returncode == 0 -@pytest.mark.parametrize( - "input_config", - [ - None, - "input_config_fetch_GIN", - "input_config_fetch_local", - ], -) -def test_entry_point( - input_config: Optional[str], - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, - request: pytest.FixtureRequest, -): +def test_entry_point(): """Test running the cellfinder workflow via the predefined entry point Parameters @@ -134,17 +77,9 @@ def test_entry_point( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) # define CLI input - subprocess_input = ["cellfinder-workflow"] - # append config if required - if input_config: - subprocess_input.append("--config") - subprocess_input.append(str(request.getfixturevalue(input_config))) + subprocess_input = ["cellfinder-core-workflow"] # run workflow with no CLI arguments, subprocess_output = subprocess.run( diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 4fab84e2..038dfcf0 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -3,11 +3,65 @@ import re from pathlib import Path +import pooch import pytest from brainglobe_workflows.utils import setup_logger +@pytest.fixture(scope="session") +def cellfinder_GIN_data() -> dict: + """Return the URL and hash to the GIN repository with the input data + + Returns + ------- + dict + URL and hash of the GIN repository with the cellfinder test data + """ + return { + "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", + "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", # noqa + } + + +@pytest.fixture() +def config_local(cellfinder_GIN_data, default_input_config_cellfinder): + """ """ + + from brainglobe_workflows.cellfinder_core.cellfinder import ( + CellfinderConfig, + ) + + # read default config as dict + # as dict because some paths are computed derived from input_data_dir + with open(default_input_config_cellfinder) as cfg: + config_dict = json.load(cfg) + + # modify location of data? + # - remove url + # - remove data hash + # - add input_data_dir + config_dict["data_url"] = None + config_dict["data_hash"] = None + config_dict["input_data_dir"] = Path.home() / "local_cellfinder_data" + + # instantiate object + config = CellfinderConfig(**config_dict) + + # fetch data from GIN and download locally to local location? + pooch.retrieve( + url=cellfinder_GIN_data["url"], + known_hash=cellfinder_GIN_data["hash"], + path=Path(config.input_data_dir).parent, # path to download zip to + progressbar=True, + processor=pooch.Unzip( + extract_dir=Path(config.input_data_dir).stem + # path to unzipped dir, *relative* to 'path' + ), + ) + return config + + @pytest.mark.parametrize( "input_config, message", [ From b911ef3b9898a6e22ddea603bbf338759d0a5c99 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 22 Dec 2023 15:47:09 +0000 Subject: [PATCH 09/41] update entry point --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0c464945..8aa57a6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,7 +81,7 @@ napari = [ "Source Code" = "https://github.com/brainglobe/brainglobe-workflows" [project.scripts] -cellfinder-workflow = "brainglobe_workflows.cellfinder_core.cellfinder:main_app_wrapper" +cellfinder-core-workflow = "brainglobe_workflows.cellfinder_core.cellfinder:main_app_wrapper" cellfinder = "brainglobe_workflows.cellfinder_brainreg.main:main" [build-system] From e96a883fa5ae7675ba79901f74387cca1424240b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 9 Jan 2024 15:27:23 +0000 Subject: [PATCH 10/41] change cellfinder-core to cellfinder in pyproject --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c17a4252..2463c715 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -143,10 +143,10 @@ INPUT_COREDEV = extras = dev deps = - coredev: git+https://github.com/brainglobe/cellfinder-core.git + coredev: git+https://github.com/brainglobe/cellfinder.git commands = pytest {toxinidir} -v --color=yes --cov=./ --cov-report=xml description = Run tests - coredev: Run tests with the development version of cellfinder-core + coredev: Run tests with the development version of cellfinder """ From a511c361046b397c1a1871c2a57b9ede97830b6b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 9 Jan 2024 15:33:39 +0000 Subject: [PATCH 11/41] update cellfinder to cellfinder_core --- brainglobe_workflows/cellfinder_core/cellfinder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/brainglobe_workflows/cellfinder_core/cellfinder.py b/brainglobe_workflows/cellfinder_core/cellfinder.py index a3b6937a..775f8b60 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder.py @@ -29,9 +29,9 @@ import pooch from brainglobe_utils.IO.cells import save_cells -from cellfinder_core.main import main as cellfinder_run -from cellfinder_core.tools.IO import read_with_dask -from cellfinder_core.train.train_yml import depth_type +from cellfinder.core.main import main as cellfinder_run +from cellfinder.core.tools.IO import read_with_dask +from cellfinder.core.train.train_yml import depth_type from brainglobe_workflows.utils import ( DEFAULT_JSON_CONFIG_PATH_CELLFINDER, @@ -359,7 +359,7 @@ def setup(input_config_path: str) -> CellfinderConfig: def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): - """Run workflow based on the cellfinder_core.main.main() + """Run workflow based on the cellfinder.core.main.main() function. The steps are: From c15b09f52947bfbfe0e9f1aa7a9ca45b8c9f6942 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:16:41 +0000 Subject: [PATCH 12/41] renaming cellfinder script workflow to cellfinder_core. fix entrypoint test. --- benchmarks/cellfinder_core.py | 4 ++-- .../{cellfinder.py => cellfinder_core.py} | 12 +++++------- pyproject.toml | 7 +++---- .../test_integration/test_cellfinder.py | 6 +++--- tests/cellfinder_core/test_unit/test_cellfinder.py | 12 ++++++------ 5 files changed, 19 insertions(+), 22 deletions(-) rename brainglobe_workflows/cellfinder_core/{cellfinder.py => cellfinder_core.py} (97%) diff --git a/benchmarks/cellfinder_core.py b/benchmarks/cellfinder_core.py index aad4e5f4..41821d94 100644 --- a/benchmarks/cellfinder_core.py +++ b/benchmarks/cellfinder_core.py @@ -7,11 +7,11 @@ from cellfinder.core.main import main as cellfinder_run from cellfinder.core.tools.IO import read_with_dask -from brainglobe_workflows.cellfinder_core.cellfinder import ( +from brainglobe_workflows.cellfinder_core.cellfinder_core import ( CellfinderConfig, run_workflow_from_cellfinder_run, ) -from brainglobe_workflows.cellfinder_core.cellfinder import ( +from brainglobe_workflows.cellfinder_core.cellfinder_core import ( setup as setup_cellfinder_workflow, ) from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER diff --git a/brainglobe_workflows/cellfinder_core/cellfinder.py b/brainglobe_workflows/cellfinder_core/cellfinder_core.py similarity index 97% rename from brainglobe_workflows/cellfinder_core/cellfinder.py rename to brainglobe_workflows/cellfinder_core/cellfinder_core.py index 775f8b60..e3ce55b1 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder_core.py @@ -45,13 +45,12 @@ @dataclass class CellfinderConfig: - """Define input and output data locations, and the parameters for - the cellfinder preprocessing steps. + """Define the parameters for the cellfinder workflow. - We distinguish three types of fields: - - optional attributes: they have a default value - - required attributes: must be provided, they do not have a default value - - internal attributes: their names start with _ indicating these are + There are three types of fields: + - optional attributes: they have a default value; + - required attributes: must be provided, they do not have a default value; + - internal attributes: their names start with _, indicating these are private. Any functionality to update them should be a class method. Notes @@ -405,7 +404,6 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): cfg.cube_height, cfg.cube_depth, cfg.network_depth, - # *cfg, # .voxel_sizes, ---> does this work? ) # Save results to xml file diff --git a/pyproject.toml b/pyproject.toml index 2463c715..0506fc1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,8 +24,8 @@ classifiers = [ "Topic :: Scientific/Engineering :: Image Recognition", ] -# Below the dependenciess for the cellfinder CLI tool only -# (i.e., only what users will need for the CLI) +# Below the dependenciess for brainmapper (the cellfinder CLI tool) only +# (i.e., only what users will need for brainmapper) dependencies = [ "brainglobe>=1.0.0", "brainreg>=1.0.0", @@ -88,11 +88,10 @@ zip-safe = false [tool.setuptools.packages.find] include = ["brainglobe_workflows"] exclude = [ - "brainglobe_workflows.cellfinder_core", "tests", "resources", "benchmarks", -] # it's not excluding "brainglobe_workflows.cellfinder_core"! +] [tool.black] target-version = ["py39", "py310"] diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index 7893dd20..26f22ec8 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -2,7 +2,7 @@ import sys from pathlib import Path -from brainglobe_workflows.cellfinder_core.cellfinder import main +from brainglobe_workflows.cellfinder_core.cellfinder_core import main def test_main(): @@ -47,7 +47,7 @@ def test_script(): Path(__file__).resolve().parents[3] / "brainglobe_workflows" / "cellfinder_core" - / "cellfinder.py" + / "cellfinder_core.py" ) subprocess_input = [ sys.executable, @@ -79,7 +79,7 @@ def test_entry_point(): """ # define CLI input - subprocess_input = ["cellfinder-core-workflow"] + subprocess_input = ["cellfinder-workflow"] # run workflow with no CLI arguments, subprocess_output = subprocess.run( diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 038dfcf0..6083df95 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -28,7 +28,7 @@ def cellfinder_GIN_data() -> dict: def config_local(cellfinder_GIN_data, default_input_config_cellfinder): """ """ - from brainglobe_workflows.cellfinder_core.cellfinder import ( + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( CellfinderConfig, ) @@ -83,7 +83,7 @@ def test_read_cellfinder_config( input_configs_dir : Path Test data directory path """ - from brainglobe_workflows.cellfinder_core.cellfinder import ( + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( read_cellfinder_config, ) @@ -208,10 +208,10 @@ def test_setup( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - from brainglobe_workflows.cellfinder_core.cellfinder import ( + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( CellfinderConfig, ) - from brainglobe_workflows.cellfinder_core.cellfinder import ( + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( setup as setup_full, ) @@ -256,10 +256,10 @@ def test_run_workflow_from_cellfinder_run( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - from brainglobe_workflows.cellfinder_core.cellfinder import ( + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( run_workflow_from_cellfinder_run, ) - from brainglobe_workflows.cellfinder_core.cellfinder import ( + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( setup as setup_full, ) From f8f2af691a801006e9e349de677224334e824483 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:38:35 +0000 Subject: [PATCH 13/41] docstrings for class config --- .../cellfinder_core/cellfinder_core.py | 97 +++++++++---------- 1 file changed, 45 insertions(+), 52 deletions(-) diff --git a/brainglobe_workflows/cellfinder_core/cellfinder_core.py b/brainglobe_workflows/cellfinder_core/cellfinder_core.py index e3ce55b1..d16e958c 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder_core.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder_core.py @@ -48,21 +48,10 @@ class CellfinderConfig: """Define the parameters for the cellfinder workflow. There are three types of fields: - - optional attributes: they have a default value; - required attributes: must be provided, they do not have a default value; + - optional attributes: they have a default value if not specified; - internal attributes: their names start with _, indicating these are private. Any functionality to update them should be a class method. - - Notes - ----- - Additional details on some optional parameters: - - input data path: if not specified, the are assumed to be "signal" and - "background" dirs under _install_path/cellfinder_test_data/ - (see __post_init__ method). - - output data path: if not specified, it is assumed to be under - _install_path/output_dir_basename (see __post_init__ method). - - data_url, data_hash: source of data to download. If not specified - in JSON, it is set to None. """ # Required parameters @@ -89,24 +78,24 @@ class CellfinderConfig: network_depth: depth_type # Optional parameters - # they have a default value if not specified in json # install path: default path for downloaded and output data _install_path: Pathlike = ( Path.home() / ".brainglobe" / "workflows" / "cellfinder_core" ) - # input data path: - # if not specified, the are assumed to be "signal" and - # "background" dirs under _install_path/cellfinder_test_data/ + # input data paths + # Note: if not specified, the signal and background data + # are assumed to be under "signal" and "background" + # dirs under _install_path/cellfinder_test_data/ # (see __post_init__ method) input_data_dir: Optional[Pathlike] = None signal_subdir: Pathlike = "signal" background_subdir: Pathlike = "background" - # output data path: - # if not specified, it is assumed to be under - # _install_path/output_dir_basename + # output data paths + # Note: if output_parent_dir is not specified, + # it is assumed to be under _install_path # (see __post_init__ method) output_dir_basename: str = "cellfinder_output_" detected_cells_filename: str = "detected_cells.xml" @@ -130,19 +119,11 @@ class CellfinderConfig: def __post_init__(self: "CellfinderConfig"): """Executed after __init__ function. - We use it to define attributes as a function of other - attributes. See https://peps.python.org/pep-0557/#post-init-processing + We use this method to define attributes of the data class + as a function of other attributes. + See https://peps.python.org/pep-0557/#post-init-processing - The following attributes are set: - - input_data_dir is set to a default value if not set in __init__ - - _signal_dir_path: full path to the directory holding the signal files - - _background_dir_path: full path to the directory holding the - background files - In 'add_signal_and_background_files': - - _list_signal_files: list of signal files - - _list_background_files: list of background files - In 'add_output_timestamped': - - output_parent_dir + The attributes added are input and output data paths Parameters ---------- @@ -150,19 +131,19 @@ def __post_init__(self: "CellfinderConfig"): a CellfinderConfig instance """ - # Add signal and background files to config + # Add input data paths to config self.add_input_paths() - # Add output paths + # Add output paths to config self.add_output_paths() def add_output_paths(self): - """Adds output paths to the cellfinder config + """Adds output paths to the config - Specifically it adds: - - output_path: a path to a timestamped output directory - - _detected_cells_path: the full path to the output file - (under output_path). + Specifically, it adds: + - output_parent_dir: set to a a timestamped output directory if not + set in __init__(); + - _detected_cells_path: path to the output file Parameters ---------- @@ -174,7 +155,7 @@ def add_output_paths(self): if self.output_parent_dir is None: self.output_parent_dir = Path(self._install_path) - # Add path to timestamped output directory to config + # Add to config the path to timestamped output directory timestamp = datetime.datetime.now() timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") self._output_path = Path(self.output_parent_dir) / ( @@ -185,20 +166,33 @@ def add_output_paths(self): exist_ok=True, # ignore FileExistsError exceptions ) - # Add paths to output file to config + # Add to config the path to the output file self._detected_cells_path = ( self._output_path / self.detected_cells_filename ) def add_input_paths(self): - """Adds the lists of input data files (signal and background) - to the config. + """Adds input data paths to the config. - These files are first searched locally at the given location. - If not found, we attempt to download them from GIN and place - them at the specified location. + Specifically, it adds: + - input_data_dir: set to a default value if not set in __init__(); + - _signal_dir_path: full path to the directory with the signal files + - _background_dir_path: full path to the directory with the + background files. + - _list_signal_files: list of signal files + - _list_background_files: list of background files + + Parameters + ---------- + config : CellfinderConfig + a cellfinder config with input data files to be validated + + Notes + ----- + The signal and background files are first searched locally at the + given location. If not found, we attempt to download them from GIN + and place them at the specified location (input_data_dir). - Specifically: - If both parent data directories (signal and background) exist locally, the lists of signal and background files are added to the config. @@ -207,11 +201,6 @@ def add_input_paths(self): - If neither of them exist, the data is retrieved from the provided GIN repository. If no URL or hash to GIN is provided, an error is thrown. - Parameters - ---------- - config : CellfinderConfig - a cellfinder config with input data files to be validated - """ # Fetch logger logger = logging.getLogger(LOGGER_NAME) @@ -326,18 +315,22 @@ def read_cellfinder_config( ---------- input_config_path : Path Absolute path to a cellfinder config file + log_on : bool, optional + whether to log the info messages from reading the config + to the logger, by default False Returns ------- CellfinderConfig: The cellfinder config object, populated with data from the input """ + # read input config with open(input_config_path) as cfg: config_dict = json.load(cfg) config = CellfinderConfig(**config_dict) - # log config origin + # print config's origin to log if required if log_on: logger = logging.getLogger(LOGGER_NAME) logger.info(f"Input config read from {input_config_path}") From 70227856e37ef46bb6dbe38e9b484e564e39d0fc Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:54:59 +0000 Subject: [PATCH 14/41] fix entry point --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0506fc1c..6ba3d3ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,7 @@ napari = ["napari[pyqt5]", "brainglobe-napari-io", "cellfinder[napari]>=1.0.0"] "Source Code" = "https://github.com/brainglobe/brainglobe-workflows" [project.scripts] -cellfinder-workflow = "brainglobe_workflows.cellfinder_core.cellfinder:main_app_wrapper" +cellfinder-workflow = "brainglobe_workflows.cellfinder_core.cellfinder_core:main_app_wrapper" brainmapper = "brainglobe_workflows.brainmapper.main:main" [build-system] From bedd3bae1aa4e0740f0357a4ca918b08ff247f2c Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 9 Jan 2024 19:50:59 +0000 Subject: [PATCH 15/41] skip add input paths test --- tests/cellfinder_core/test_unit/test_cellfinder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 6083df95..0db472ab 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -133,6 +133,7 @@ def test_read_cellfinder_config( ) +@pytest.mark.skip(reason="focus of PR62") @pytest.mark.parametrize( "input_config, message_pattern", [ From 9947440b570b0ce33332423519984ccff1f72787 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 20 Dec 2023 11:23:09 +0000 Subject: [PATCH 16/41] split tests with and without CLI inputs --- tests/cellfinder_core/test_integration/test_cellfinder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index 26f22ec8..2bc7c137 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -1,12 +1,12 @@ import subprocess import sys from pathlib import Path - from brainglobe_workflows.cellfinder_core.cellfinder_core import main def test_main(): """Test main function for setting up and running cellfinder workflow + without inputs Parameters ---------- @@ -29,6 +29,7 @@ def test_main(): def test_script(): """Test running the cellfinder worklfow from the command line + without inputs Parameters ---------- @@ -65,6 +66,7 @@ def test_script(): def test_entry_point(): """Test running the cellfinder workflow via the predefined entry point + without inputs Parameters ---------- From 86bb090efc03e9c239c69759046ec086186fee34 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 20 Dec 2023 20:12:53 +0000 Subject: [PATCH 17/41] all tests passing with default config --- .../test_integration/test_cellfinder.py | 33 ------- .../test_unit/test_cellfinder.py | 91 ++++++++++++++++++- 2 files changed, 88 insertions(+), 36 deletions(-) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index 2bc7c137..f8bccc49 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -7,17 +7,6 @@ def test_main(): """Test main function for setting up and running cellfinder workflow without inputs - - Parameters - ---------- - input_config : Optional[str] - Path to input config json file - monkeypatch : pytest.MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test - request : pytest.FixtureRequest - Pytest fixture to enable requesting fixtures by name """ # run main @@ -30,17 +19,6 @@ def test_main(): def test_script(): """Test running the cellfinder worklfow from the command line without inputs - - Parameters - ---------- - input_config : Optional[str] - Path to input config json file - monkeypatch : pytest.MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test - request : pytest.FixtureRequest - Pytest fixture to enable requesting fixtures by name """ # define CLI input @@ -67,17 +45,6 @@ def test_script(): def test_entry_point(): """Test running the cellfinder workflow via the predefined entry point without inputs - - Parameters - ---------- - input_config : Optional[str] - Path to input config json file - monkeypatch : pytest.MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test - request : pytest.FixtureRequest - Pytest fixture to enable requesting fixtures by name """ # define CLI input diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 0db472ab..d02e7897 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -180,6 +180,88 @@ def test_add_input_paths( assert out.group() is not None +@pytest.mark.parametrize( + "input_config, message", + [ + ("default_input_config_cellfinder", "Using default config file"), + # ("config_GIN", "Input config read from"), + ], +) +def test_setup_workflow( + input_config: str, + message: str, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + caplog: pytest.LogCaptureFixture, + request: pytest.FixtureRequest, +): + """Test setup steps for the cellfinder workflow, using the default config + and passing a specific config file. + + These setup steps include: + - instantiating a CellfinderConfig object using the input json file, + - add the signal and background files to the config if these are not + defined, + - create a timestamped directory for the output of the workflow if + it doesn't exist and add its path to the config + + Parameters + ---------- + input_config : str + Name of input config json file + message : str + Expected log message + monkeypatch : pytest.MonkeyPatch + Pytest fixture to use monkeypatching utils + tmp_path : Path + Pytest fixture providing a temporary path for each test + caplog : pytest.LogCaptureFixture + Pytest fixture to capture the logs during testing + request : pytest.FixtureRequest + Pytest fixture to enable requesting fixtures by name + """ + from brainglobe_workflows.cellfinder_core.cellfinder_core import setup + + # setup logger + _ = setup_logger() + + # monkeypatch to change current directory to + # pytest temporary directory + # (cellfinder cache directory is created in cwd) + monkeypatch.chdir(tmp_path) + + # setup workflow + config = setup(request.getfixturevalue(input_config)) + + # check logs + assert message in caplog.text + + # check all signal files exist + assert config._list_signal_files + assert all([Path(f).is_file() for f in config._list_signal_files]) + + # check all background files exist + assert config._list_background_files + assert all([Path(f).is_file() for f in config._list_background_files]) + + # check output directory exists + assert Path(config.output_path).resolve().is_dir() + + # check output directory name has correct format + out = re.fullmatch( + str(config.output_dir_basename) + "\\d{8}_\\d{6}$", + Path(config.output_path).stem, + ) + assert out is not None + assert out.group() is not None + + # check output file path + assert ( + Path(config._detected_cells_path) + == Path(config.output_path) / config.detected_cells_filename + ) + + @pytest.mark.parametrize( "input_config", [ @@ -189,8 +271,6 @@ def test_add_input_paths( def test_setup( input_config: str, custom_logger_name: str, - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, request: pytest.FixtureRequest, ): """Test full setup for cellfinder workflow, using the default config @@ -219,7 +299,7 @@ def test_setup( # Monkeypatch to change current directory to # pytest temporary directory # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) + # monkeypatch.chdir(tmp_path) # run setup on default configuration cfg = setup_full(request.getfixturevalue(input_config)) @@ -264,6 +344,11 @@ def test_run_workflow_from_cellfinder_run( setup as setup_full, ) + # monkeypatch to change current directory to + # pytest temporary directory + # (cellfinder cache directory is created in cwd) + # monkeypatch.chdir(tmp_path) + # run setup cfg = setup_full(str(request.getfixturevalue(input_config))) From 91284b95feafc7142d5879d67e2f60b63815da19 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 20 Dec 2023 20:31:15 +0000 Subject: [PATCH 18/41] all tests passing with default option --- .../test_integration/test_cellfinder.py | 55 ++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index f8bccc49..eabc5342 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -2,6 +2,8 @@ import sys from pathlib import Path from brainglobe_workflows.cellfinder_core.cellfinder_core import main +import pytest +from typing import Optional def test_main(): @@ -16,6 +18,44 @@ def test_main(): assert Path(cfg._detected_cells_path).is_file() +@pytest.mark.parametrize( + "input_config", + [ + None, + # "input_config_fetch_GIN", + # "input_config_fetch_local", + ], +) +def test_main_w_inputs( + input_config: Optional[str], + request: pytest.FixtureRequest, +): + """Test main function for setting up and running cellfinder workflow + with inputs + + Parameters + ---------- + input_config : Optional[str] + Path to input config json file + monkeypatch : pytest.MonkeyPatch + Pytest fixture to use monkeypatching utils + tmp_path : Path + Pytest fixture providing a temporary path for each test + request : pytest.FixtureRequest + Pytest fixture to enable requesting fixtures by name + """ + # monkeypatch to change current directory to + # pytest temporary directory + # (cellfinder cache directory is created in cwd) + # monkeypatch.chdir(tmp_path) + + # run main + cfg = main(str(request.getfixturevalue(input_config))) + + # check output files exist + assert Path(cfg._detected_cells_path).is_file() + + def test_script(): """Test running the cellfinder worklfow from the command line without inputs @@ -44,8 +84,19 @@ def test_script(): def test_entry_point(): """Test running the cellfinder workflow via the predefined entry point - without inputs + with no inputs + + Parameters + ---------- + monkeypatch : pytest.MonkeyPatch + Pytest fixture to use monkeypatching utils + tmp_path : Path + Pytest fixture providing a temporary path for each test """ + # monkeypatch to change current directory to + # pytest temporary directory + # (cellfinder cache directory is created in cwd) + # monkeypatch.chdir(tmp_path) # define CLI input subprocess_input = ["cellfinder-workflow"] @@ -56,4 +107,4 @@ def test_entry_point(): ) # check returncode - assert subprocess_output.returncode == 0 + assert subprocess_output.returncode == 0 \ No newline at end of file From d60f5034555d03904d57e2860bd3f2ee923bb0a7 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 20 Dec 2023 21:54:04 +0000 Subject: [PATCH 19/41] make all paths input strings. move methods to class. make some fixtures dict. simplify other fixtures --- tests/cellfinder_core/conftest.py | 22 +-- tests/cellfinder_core/test_unit/conftest.py | 135 +++++++++++++++++- .../test_unit/test_cellfinder.py | 41 +++--- 3 files changed, 165 insertions(+), 33 deletions(-) diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index 127a4483..cb692d58 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -4,20 +4,20 @@ import pytest +# @pytest.fixture() +# def default_input_config_cellfinder() -> Path: # do I need this? +# """Return path to default input config for cellfinder workflow -@pytest.fixture() -def default_input_config_cellfinder() -> Path: - """Return path to default input config for cellfinder workflow +# Returns +# ------- +# Path +# Path to default input config - Returns - ------- - Path - Path to default input config +# """ +# from brainglobe_workflows.utils import +# DEFAULT_JSON_CONFIG_PATH_CELLFINDER - """ - from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER - - return DEFAULT_JSON_CONFIG_PATH_CELLFINDER +# return DEFAULT_JSON_CONFIG_PATH_CELLFINDER @pytest.fixture(autouse=True) diff --git a/tests/cellfinder_core/test_unit/conftest.py b/tests/cellfinder_core/test_unit/conftest.py index 57492be5..5b6123cc 100644 --- a/tests/cellfinder_core/test_unit/conftest.py +++ b/tests/cellfinder_core/test_unit/conftest.py @@ -1,5 +1,11 @@ +import json +from pathlib import Path + +import pooch import pytest +from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER + @pytest.fixture() def custom_logger_name() -> str: @@ -10,8 +16,131 @@ def custom_logger_name() -> str: str Name of custom logger """ - from brainglobe_workflows.utils import ( - __name__ as logger_name, - ) + from brainglobe_workflows.utils import __name__ as logger_name return logger_name + + +@pytest.fixture(scope="session") +def cellfinder_GIN_data() -> dict: + """Return the URL and hash to the GIN repository with the input data + + Returns + ------- + dict + URL and hash of the GIN repository with the cellfinder test data + """ + return { + "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", + "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", # noqa + } + + +@pytest.fixture() +def config_GIN_dict(cellfinder_GIN_data): + """ + Return a config pointing to the location where GIN would be by default + """ + + # read default config as dict + with open(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) as cfg: + config_dict = json.load(cfg) + + # modify / ensure + # - add url + # - add data hash + # - remove input_data_dir + config_dict["data_url"] = cellfinder_GIN_data["url"] + config_dict["data_hash"] = cellfinder_GIN_data["hash"] + if "input_data_dir" in config_dict.keys(): + del config_dict["input_data_dir"] + + # download data to default location for GIN + pooch.retrieve( + url=cellfinder_GIN_data["url"], + known_hash=cellfinder_GIN_data["hash"], + path=Path.home() + / ".brainglobe" + / "workflows" + / "cellfinder_core", # path to download zip to + progressbar=True, + processor=pooch.Unzip(extract_dir="cellfinder_test_data"), + ) + + return config_dict + + +@pytest.fixture() +def config_force_GIN_dict(cellfinder_GIN_data, tmp_path): + """ + Return a config pointing to the location where GIN would be by default + """ + # read default config as dict + with open(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) as cfg: + config_dict = json.load(cfg) + + # modify + config_dict["data_url"] = cellfinder_GIN_data["url"] + config_dict["data_hash"] = cellfinder_GIN_data["hash"] + config_dict["input_data_dir"] = tmp_path + # since the input_data_dir does not exist, it will + # download the GIN data there + + return config_dict + + +@pytest.fixture() +def config_local_dict(cellfinder_GIN_data): + """ """ + + # read default config as dict + # as dict because some paths are computed derived from input_data_dir + with open(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) as cfg: + config_dict = json.load(cfg) + + # modify dict + # - remove url + # - remove data hash + # - add input_data_dir + config_dict["data_url"] = None + config_dict["data_hash"] = None + config_dict["input_data_dir"] = Path.home() / "local_cellfinder_data" + + # fetch data from GIN and download locally to local location? + pooch.retrieve( + url=cellfinder_GIN_data["url"], + known_hash=cellfinder_GIN_data["hash"], + path=Path( + config_dict["input_data_dir"] + ).parent, # path to download zip to + progressbar=True, + processor=pooch.Unzip( + extract_dir=Path(config_dict["input_data_dir"]).stem + # path to unzipped dir, *relative* to 'path' + ), + ) + return config_dict + + +@pytest.fixture() +def config_missing_signal_dict(config_local_dict): + config_dict = config_local_dict.copy() + config_dict["signal_subdir"] = "_" + + return config_dict + + +@pytest.fixture() +def config_missing_background_dict(config_local_dict): + config_dict = config_local_dict.copy() + config_dict["background_subdir"] = "_" + + return config_dict + + +@pytest.fixture() +def config_not_GIN_nor_local_dict(config_local_dict): + config_dict = config_local_dict.copy() + config_dict["input_data_dir"] = "_" + + return config_dict diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index d02e7897..c8b00cd8 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -85,12 +85,17 @@ def test_read_cellfinder_config( """ from brainglobe_workflows.cellfinder_core.cellfinder_core import ( read_cellfinder_config, + DEFAULT_JSON_CONFIG_PATH_CELLFINDER ) # instantiate custom logger _ = setup_logger() - input_config_path = request.getfixturevalue(input_config) + # instantiate custom logger + _ = setup_logger() + + input_config_path = DEFAULT_JSON_CONFIG_PATH_CELLFINDER + # request.getfixturevalue(input_config) # read json as Cellfinder config config = read_cellfinder_config(input_config_path, log_on=True) @@ -135,17 +140,17 @@ def test_read_cellfinder_config( @pytest.mark.skip(reason="focus of PR62") @pytest.mark.parametrize( - "input_config, message_pattern", + "input_config_dict, message_pattern", [ ( - "config_local", + "config_local_dict", "Fetching input data from the local directories", ), ], ) def test_add_input_paths( caplog: pytest.LogCaptureFixture, - input_config: str, + input_config_dict: dict, message_pattern: str, request: pytest.FixtureRequest, ): @@ -155,12 +160,8 @@ def test_add_input_paths( ---------- caplog : pytest.LogCaptureFixture Pytest fixture to capture the logs during testing - cellfinder_GIN_data : dict - Dict holding the URL and hash of the cellfinder test data in GIN - input_configs_dir : Path - Test data directory path - input_config : str - Name of input config json file + input_config_dict : dicy + input config as a dict message_pattern : str Expected pattern in the log """ @@ -171,7 +172,7 @@ def test_add_input_paths( # read json as Cellfinder config # ---> change so that the fixture is the config object! # config = read_cellfinder_config(input_configs_dir / input_config) - _ = request.getfixturevalue(input_config) + _ = request.getfixturevalue(input_config_dict) # check log messages assert len(caplog.messages) > 0 @@ -220,7 +221,10 @@ def test_setup_workflow( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - from brainglobe_workflows.cellfinder_core.cellfinder_core import setup + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( + setup, + DEFAULT_JSON_CONFIG_PATH_CELLFINDER + ) # setup logger _ = setup_logger() @@ -228,10 +232,10 @@ def test_setup_workflow( # monkeypatch to change current directory to # pytest temporary directory # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) + # monkeypatch.chdir(tmp_path) # setup workflow - config = setup(request.getfixturevalue(input_config)) + config = setup(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) # check logs assert message in caplog.text @@ -271,7 +275,6 @@ def test_setup_workflow( def test_setup( input_config: str, custom_logger_name: str, - request: pytest.FixtureRequest, ): """Test full setup for cellfinder workflow, using the default config and passing a specific config file. @@ -300,9 +303,8 @@ def test_setup( # pytest temporary directory # (cellfinder cache directory is created in cwd) # monkeypatch.chdir(tmp_path) - # run setup on default configuration - cfg = setup_full(request.getfixturevalue(input_config)) + cfg = setup_full(input_config) # (request.getfixturevalue(input_config)) # check logger exists logger = logging.getLogger(custom_logger_name) @@ -348,9 +350,10 @@ def test_run_workflow_from_cellfinder_run( # pytest temporary directory # (cellfinder cache directory is created in cwd) # monkeypatch.chdir(tmp_path) - # run setup - cfg = setup_full(str(request.getfixturevalue(input_config))) + cfg = setup_full( + input_config + ) # str(request.getfixturevalue(input_config))) # run workflow run_workflow_from_cellfinder_run(cfg) From 9172e3a90d31f3ea148bf37b1ee1ed7caccc2ec3 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 11:08:25 +0000 Subject: [PATCH 20/41] refactor setup_workflow --- .../test_unit/test_cellfinder.py | 94 +------------------ 1 file changed, 5 insertions(+), 89 deletions(-) diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index c8b00cd8..990a2565 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -62,82 +62,6 @@ def config_local(cellfinder_GIN_data, default_input_config_cellfinder): return config -@pytest.mark.parametrize( - "input_config, message", - [ - ("default_input_config_cellfinder", "Using default config file"), - ], -) -def test_read_cellfinder_config( - input_config: str, - message: str, - caplog: pytest.LogCaptureFixture, - request: pytest.FixtureRequest, -): - """Test for reading a cellfinder config file - - Parameters - ---------- - input_config : str - Name of input config json file - input_configs_dir : Path - Test data directory path - """ - from brainglobe_workflows.cellfinder_core.cellfinder_core import ( - read_cellfinder_config, - DEFAULT_JSON_CONFIG_PATH_CELLFINDER - ) - - # instantiate custom logger - _ = setup_logger() - - # instantiate custom logger - _ = setup_logger() - - input_config_path = DEFAULT_JSON_CONFIG_PATH_CELLFINDER - # request.getfixturevalue(input_config) - - # read json as Cellfinder config - config = read_cellfinder_config(input_config_path, log_on=True) - - # read json as dict - with open(input_config_path) as cfg: - config_dict = json.load(cfg) - - # check keys of dictionary are a subset of Cellfinder config attributes - assert all( - [ky in config.__dataclass_fields__.keys() for ky in config_dict.keys()] - ) - - # check logs - assert message in caplog.text - - # check all signal files exist - assert config._list_signal_files - assert all([Path(f).is_file() for f in config._list_signal_files]) - - # check all background files exist - assert config._list_background_files - assert all([Path(f).is_file() for f in config._list_background_files]) - - # check output directory exists - assert Path(config._output_path).resolve().is_dir() - - # check output directory name has correct format - out = re.fullmatch( - str(config.output_dir_basename) + "\\d{8}_\\d{6}$", - Path(config._output_path).stem, - ) - assert out is not None - assert out.group() is not None - - # check output file path is as expected - assert ( - Path(config._detected_cells_path) - == Path(config._output_path) / config.detected_cells_filename - ) - - @pytest.mark.skip(reason="focus of PR62") @pytest.mark.parametrize( "input_config_dict, message_pattern", @@ -182,14 +106,14 @@ def test_add_input_paths( @pytest.mark.parametrize( - "input_config, message", + "input_config_path, message", [ ("default_input_config_cellfinder", "Using default config file"), # ("config_GIN", "Input config read from"), ], ) -def test_setup_workflow( - input_config: str, +def test_read_cellfinder_config( + input_config_path: str, message: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, @@ -259,7 +183,7 @@ def test_setup_workflow( assert out is not None assert out.group() is not None - # check output file path + # check output file path is as expected assert ( Path(config._detected_cells_path) == Path(config.output_path) / config.detected_cells_filename @@ -299,10 +223,6 @@ def test_setup( setup as setup_full, ) - # Monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - # monkeypatch.chdir(tmp_path) # run setup on default configuration cfg = setup_full(input_config) # (request.getfixturevalue(input_config)) @@ -346,10 +266,6 @@ def test_run_workflow_from_cellfinder_run( setup as setup_full, ) - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - # monkeypatch.chdir(tmp_path) # run setup cfg = setup_full( input_config @@ -358,5 +274,5 @@ def test_run_workflow_from_cellfinder_run( # run workflow run_workflow_from_cellfinder_run(cfg) - # check output files are those expected? + # check output files exist assert Path(cfg._detected_cells_path).is_file() From 242668da77a8a35c0293a4cf63f129d25ef6d50b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 11:11:29 +0000 Subject: [PATCH 21/41] cosmetic changes to fixtures --- tests/cellfinder_core/conftest.py | 19 +---- tests/cellfinder_core/test_unit/conftest.py | 83 +++++++++++++++++---- 2 files changed, 69 insertions(+), 33 deletions(-) diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index cb692d58..a6c4b079 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -4,29 +4,14 @@ import pytest -# @pytest.fixture() -# def default_input_config_cellfinder() -> Path: # do I need this? -# """Return path to default input config for cellfinder workflow - -# Returns -# ------- -# Path -# Path to default input config - -# """ -# from brainglobe_workflows.utils import -# DEFAULT_JSON_CONFIG_PATH_CELLFINDER - -# return DEFAULT_JSON_CONFIG_PATH_CELLFINDER - @pytest.fixture(autouse=True) def mock_home_directory(monkeypatch: pytest.MonkeyPatch): # define mock home path home_path = Path.home() # actual home path - mock_home_path = home_path / ".brainglobe-tests" # tmp_path # + mock_home_path = home_path / ".brainglobe-tests" - # create dir if it doesn't exist + # create directory if it doesn't exist if not mock_home_path.exists(): mock_home_path.mkdir() diff --git a/tests/cellfinder_core/test_unit/conftest.py b/tests/cellfinder_core/test_unit/conftest.py index 5b6123cc..f468dc02 100644 --- a/tests/cellfinder_core/test_unit/conftest.py +++ b/tests/cellfinder_core/test_unit/conftest.py @@ -37,25 +37,26 @@ def cellfinder_GIN_data() -> dict: @pytest.fixture() -def config_GIN_dict(cellfinder_GIN_data): +def config_GIN_dict(cellfinder_GIN_data: dict) -> dict: """ - Return a config pointing to the location where GIN would be by default + Return a config pointing to the location where GIN would be by default, + and download the data """ - # read default config as dict + # read default config as a dictionary with open(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) as cfg: config_dict = json.load(cfg) - # modify / ensure + # modify # - add url # - add data hash - # - remove input_data_dir + # - remove input_data_dir if present config_dict["data_url"] = cellfinder_GIN_data["url"] config_dict["data_hash"] = cellfinder_GIN_data["hash"] if "input_data_dir" in config_dict.keys(): del config_dict["input_data_dir"] - # download data to default location for GIN + # download GIN data to default location for GIN pooch.retrieve( url=cellfinder_GIN_data["url"], known_hash=cellfinder_GIN_data["hash"], @@ -71,42 +72,49 @@ def config_GIN_dict(cellfinder_GIN_data): @pytest.fixture() -def config_force_GIN_dict(cellfinder_GIN_data, tmp_path): +def config_force_GIN_dict(cellfinder_GIN_data: dict, tmp_path: Path) -> dict: """ - Return a config pointing to the location where GIN would be by default + Return a config pointing to a temporary directory where to download GIN + data, without downloading the data first. + + Since there is no data at the input_data_dir location, the GIN download + will be triggered """ # read default config as dict with open(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) as cfg: config_dict = json.load(cfg) # modify + # - add url + # - add data hash + # - point to a temporary directory in input_data_dir config_dict["data_url"] = cellfinder_GIN_data["url"] config_dict["data_hash"] = cellfinder_GIN_data["hash"] config_dict["input_data_dir"] = tmp_path - # since the input_data_dir does not exist, it will - # download the GIN data there return config_dict @pytest.fixture() -def config_local_dict(cellfinder_GIN_data): - """ """ +def config_local_dict(cellfinder_GIN_data: dict) -> dict: + """ + Return a config pointing to a local dataset, + and ensure the data is downloaded there + """ # read default config as dict - # as dict because some paths are computed derived from input_data_dir with open(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) as cfg: config_dict = json.load(cfg) # modify dict # - remove url # - remove data hash - # - add input_data_dir + # - point to a local directory under home in input_data_dir config_dict["data_url"] = None config_dict["data_hash"] = None config_dict["input_data_dir"] = Path.home() / "local_cellfinder_data" - # fetch data from GIN and download locally to local location? + # fetch data from GIN and download to the local location pooch.retrieve( url=cellfinder_GIN_data["url"], known_hash=cellfinder_GIN_data["hash"], @@ -123,7 +131,21 @@ def config_local_dict(cellfinder_GIN_data): @pytest.fixture() -def config_missing_signal_dict(config_local_dict): +def config_missing_signal_dict(config_local_dict: dict) -> dict: + """ + Return a config pointing to a local dataset, whose signal directory + does not exist + + Parameters + ---------- + config_local_dict : _type_ + _description_ + + Returns + ------- + dict + _description_ + """ config_dict = config_local_dict.copy() config_dict["signal_subdir"] = "_" @@ -132,6 +154,19 @@ def config_missing_signal_dict(config_local_dict): @pytest.fixture() def config_missing_background_dict(config_local_dict): + """Return a config pointing to a local dataset, whose background directory + does not exist + + Parameters + ---------- + config_local_dict : _type_ + _description_ + + Returns + ------- + _type_ + _description_ + """ config_dict = config_local_dict.copy() config_dict["background_subdir"] = "_" @@ -140,7 +175,23 @@ def config_missing_background_dict(config_local_dict): @pytest.fixture() def config_not_GIN_nor_local_dict(config_local_dict): + """Return a config pointing to a local dataset whose input_data_dir + directory does not exist, and with no references to a GIN dataset. + + Parameters + ---------- + config_local_dict : _type_ + _description_ + + Returns + ------- + _type_ + _description_ + """ config_dict = config_local_dict.copy() config_dict["input_data_dir"] = "_" + config_dict["data_url"] = None + config_dict["data_hash"] = None + return config_dict From 9500446461c8c58fd271d1228036f38bfa4796aa Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 11:18:10 +0000 Subject: [PATCH 22/41] remove spurious monkeypatched cwd from merge --- .../test_integration/test_cellfinder.py | 43 +++++++++++++++++-- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index eabc5342..d7a17746 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -44,10 +44,6 @@ def test_main_w_inputs( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - # monkeypatch.chdir(tmp_path) # run main cfg = main(str(request.getfixturevalue(input_config))) @@ -93,6 +89,45 @@ def test_entry_point(): tmp_path : Path Pytest fixture providing a temporary path for each test """ + + # define CLI input + subprocess_input = ["cellfinder-workflow"] + + # run workflow with no CLI arguments, + subprocess_output = subprocess.run( + subprocess_input, + ) + + # check returncode + assert subprocess_output.returncode == 0 + + +@pytest.mark.parametrize( + "input_config", + [ + None, + # "input_config_fetch_GIN", + # "input_config_fetch_local", + ], +) +def test_entry_point_w_inputs( + input_config: Optional[str], + request: pytest.FixtureRequest, +): + """Test running the cellfinder workflow via the predefined entry point + with inputs + + Parameters + ---------- + input_config : Optional[str] + Path to input config json file + monkeypatch : pytest.MonkeyPatch + Pytest fixture to use monkeypatching utils + tmp_path : Path + Pytest fixture providing a temporary path for each test + request : pytest.FixtureRequest + Pytest fixture to enable requesting fixtures by name + """ # monkeypatch to change current directory to # pytest temporary directory # (cellfinder cache directory is created in cwd) From ca51acbe888419c0637d8a8b0931422955d45d40 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 11:27:21 +0000 Subject: [PATCH 23/41] add skips that were removed in merge --- .../test_integration/test_cellfinder.py | 66 ++++++++++++++++--- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index d7a17746..48829082 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -18,12 +18,12 @@ def test_main(): assert Path(cfg._detected_cells_path).is_file() +@pytest.mark.skip() @pytest.mark.parametrize( "input_config", [ - None, - # "input_config_fetch_GIN", - # "input_config_fetch_local", + "input_config_fetch_GIN", + "input_config_fetch_local", ], ) def test_main_w_inputs( @@ -78,6 +78,56 @@ def test_script(): assert subprocess_output.returncode == 0 +@pytest.mark.skip() +@pytest.mark.parametrize( + "input_config", + [ + "input_config_fetch_GIN", + "input_config_fetch_local", + ], +) +def test_script_w_inputs( + input_config: Optional[str], + request: pytest.FixtureRequest, +): + """Test running the cellfinder worklfow from the command line with inputs + + Parameters + ---------- + input_config : Optional[str] + Path to input config json file + monkeypatch : pytest.MonkeyPatch + Pytest fixture to use monkeypatching utils + tmp_path : Path + Pytest fixture providing a temporary path for each test + request : pytest.FixtureRequest + Pytest fixture to enable requesting fixtures by name + """ + + # define CLI input + script_path = ( + Path(__file__).resolve().parents[3] + / "brainglobe_workflows" + / "cellfinder_core" + / "cellfinder.py" + ) + subprocess_input = [ + sys.executable, + str(script_path), + ] + # append config to subprocess input + subprocess_input.append("--config") + subprocess_input.append(str(request.getfixturevalue(input_config))) + + # run workflow script from the CLI + subprocess_output = subprocess.run( + subprocess_input, + ) + + # check returncode + assert subprocess_output.returncode == 0 + + def test_entry_point(): """Test running the cellfinder workflow via the predefined entry point with no inputs @@ -102,12 +152,12 @@ def test_entry_point(): assert subprocess_output.returncode == 0 +@pytest.mark.skip() @pytest.mark.parametrize( "input_config", [ - None, - # "input_config_fetch_GIN", - # "input_config_fetch_local", + "input_config_fetch_GIN", + "input_config_fetch_local", ], ) def test_entry_point_w_inputs( @@ -128,10 +178,6 @@ def test_entry_point_w_inputs( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - # monkeypatch.chdir(tmp_path) # define CLI input subprocess_input = ["cellfinder-workflow"] From ce4bc6ebc785bc5ff51761926b2f1708684d2bc7 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 11:36:21 +0000 Subject: [PATCH 24/41] move fixtures to code where they are used --- tests/cellfinder_core/test_unit/conftest.py | 182 -------------------- 1 file changed, 182 deletions(-) diff --git a/tests/cellfinder_core/test_unit/conftest.py b/tests/cellfinder_core/test_unit/conftest.py index f468dc02..ae85bc53 100644 --- a/tests/cellfinder_core/test_unit/conftest.py +++ b/tests/cellfinder_core/test_unit/conftest.py @@ -1,11 +1,5 @@ -import json -from pathlib import Path - -import pooch import pytest -from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER - @pytest.fixture() def custom_logger_name() -> str: @@ -19,179 +13,3 @@ def custom_logger_name() -> str: from brainglobe_workflows.utils import __name__ as logger_name return logger_name - - -@pytest.fixture(scope="session") -def cellfinder_GIN_data() -> dict: - """Return the URL and hash to the GIN repository with the input data - - Returns - ------- - dict - URL and hash of the GIN repository with the cellfinder test data - """ - return { - "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", - "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", # noqa - } - - -@pytest.fixture() -def config_GIN_dict(cellfinder_GIN_data: dict) -> dict: - """ - Return a config pointing to the location where GIN would be by default, - and download the data - """ - - # read default config as a dictionary - with open(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) as cfg: - config_dict = json.load(cfg) - - # modify - # - add url - # - add data hash - # - remove input_data_dir if present - config_dict["data_url"] = cellfinder_GIN_data["url"] - config_dict["data_hash"] = cellfinder_GIN_data["hash"] - if "input_data_dir" in config_dict.keys(): - del config_dict["input_data_dir"] - - # download GIN data to default location for GIN - pooch.retrieve( - url=cellfinder_GIN_data["url"], - known_hash=cellfinder_GIN_data["hash"], - path=Path.home() - / ".brainglobe" - / "workflows" - / "cellfinder_core", # path to download zip to - progressbar=True, - processor=pooch.Unzip(extract_dir="cellfinder_test_data"), - ) - - return config_dict - - -@pytest.fixture() -def config_force_GIN_dict(cellfinder_GIN_data: dict, tmp_path: Path) -> dict: - """ - Return a config pointing to a temporary directory where to download GIN - data, without downloading the data first. - - Since there is no data at the input_data_dir location, the GIN download - will be triggered - """ - # read default config as dict - with open(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) as cfg: - config_dict = json.load(cfg) - - # modify - # - add url - # - add data hash - # - point to a temporary directory in input_data_dir - config_dict["data_url"] = cellfinder_GIN_data["url"] - config_dict["data_hash"] = cellfinder_GIN_data["hash"] - config_dict["input_data_dir"] = tmp_path - - return config_dict - - -@pytest.fixture() -def config_local_dict(cellfinder_GIN_data: dict) -> dict: - """ - Return a config pointing to a local dataset, - and ensure the data is downloaded there - """ - - # read default config as dict - with open(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) as cfg: - config_dict = json.load(cfg) - - # modify dict - # - remove url - # - remove data hash - # - point to a local directory under home in input_data_dir - config_dict["data_url"] = None - config_dict["data_hash"] = None - config_dict["input_data_dir"] = Path.home() / "local_cellfinder_data" - - # fetch data from GIN and download to the local location - pooch.retrieve( - url=cellfinder_GIN_data["url"], - known_hash=cellfinder_GIN_data["hash"], - path=Path( - config_dict["input_data_dir"] - ).parent, # path to download zip to - progressbar=True, - processor=pooch.Unzip( - extract_dir=Path(config_dict["input_data_dir"]).stem - # path to unzipped dir, *relative* to 'path' - ), - ) - return config_dict - - -@pytest.fixture() -def config_missing_signal_dict(config_local_dict: dict) -> dict: - """ - Return a config pointing to a local dataset, whose signal directory - does not exist - - Parameters - ---------- - config_local_dict : _type_ - _description_ - - Returns - ------- - dict - _description_ - """ - config_dict = config_local_dict.copy() - config_dict["signal_subdir"] = "_" - - return config_dict - - -@pytest.fixture() -def config_missing_background_dict(config_local_dict): - """Return a config pointing to a local dataset, whose background directory - does not exist - - Parameters - ---------- - config_local_dict : _type_ - _description_ - - Returns - ------- - _type_ - _description_ - """ - config_dict = config_local_dict.copy() - config_dict["background_subdir"] = "_" - - return config_dict - - -@pytest.fixture() -def config_not_GIN_nor_local_dict(config_local_dict): - """Return a config pointing to a local dataset whose input_data_dir - directory does not exist, and with no references to a GIN dataset. - - Parameters - ---------- - config_local_dict : _type_ - _description_ - - Returns - ------- - _type_ - _description_ - """ - config_dict = config_local_dict.copy() - config_dict["input_data_dir"] = "_" - - config_dict["data_url"] = None - config_dict["data_hash"] = None - - return config_dict From d2dfb2ff1c783f4f454d1ed2d2e0d32e9ef3c9a8 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 11:51:39 +0000 Subject: [PATCH 25/41] bring back default_input_config_cellfinder fixture --- .../test_unit/test_cellfinder.py | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 990a2565..a2ae68c4 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -9,6 +9,21 @@ from brainglobe_workflows.utils import setup_logger +@pytest.fixture() +def default_input_config_cellfinder() -> Path: + """Return path to default input config for cellfinder workflow + + Returns + ------- + Path + Path to default input config + + """ + from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER + + return DEFAULT_JSON_CONFIG_PATH_CELLFINDER + + @pytest.fixture(scope="session") def cellfinder_GIN_data() -> dict: """Return the URL and hash to the GIN repository with the input data @@ -197,8 +212,7 @@ def test_read_cellfinder_config( ], ) def test_setup( - input_config: str, - custom_logger_name: str, + input_config: str, custom_logger_name: str, request: pytest.FixtureRequest ): """Test full setup for cellfinder workflow, using the default config and passing a specific config file. @@ -224,7 +238,7 @@ def test_setup( ) # run setup on default configuration - cfg = setup_full(input_config) # (request.getfixturevalue(input_config)) + cfg = setup_full(str(request.getfixturevalue(input_config))) # check logger exists logger = logging.getLogger(custom_logger_name) @@ -267,9 +281,7 @@ def test_run_workflow_from_cellfinder_run( ) # run setup - cfg = setup_full( - input_config - ) # str(request.getfixturevalue(input_config))) + cfg = setup_full(str(request.getfixturevalue(input_config))) # run workflow run_workflow_from_cellfinder_run(cfg) From 2651d4615197c8d7a1771545f8eaef10be619956 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 12:05:37 +0000 Subject: [PATCH 26/41] finalise adding local config as parameter in test_read_cellfinder_config --- .../test_unit/test_cellfinder.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index a2ae68c4..dd3ab661 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -40,41 +40,41 @@ def cellfinder_GIN_data() -> dict: @pytest.fixture() -def config_local(cellfinder_GIN_data, default_input_config_cellfinder): - """ """ - - from brainglobe_workflows.cellfinder_core.cellfinder_core import ( - CellfinderConfig, - ) +def config_local_dict( + cellfinder_GIN_data: dict, default_input_config_cellfinder: Path +) -> dict: + """ + Return a config pointing to a local dataset, + and ensure the data is downloaded there + """ # read default config as dict - # as dict because some paths are computed derived from input_data_dir with open(default_input_config_cellfinder) as cfg: config_dict = json.load(cfg) - # modify location of data? + # modify dict # - remove url # - remove data hash # - add input_data_dir config_dict["data_url"] = None config_dict["data_hash"] = None config_dict["input_data_dir"] = Path.home() / "local_cellfinder_data" - - # instantiate object - config = CellfinderConfig(**config_dict) - + # fetch data from GIN and download locally to local location? pooch.retrieve( url=cellfinder_GIN_data["url"], known_hash=cellfinder_GIN_data["hash"], - path=Path(config.input_data_dir).parent, # path to download zip to + path=Path( + config_dict["input_data_dir"] + ).parent, # path to download zip to progressbar=True, processor=pooch.Unzip( - extract_dir=Path(config.input_data_dir).stem + extract_dir=Path(config_dict["input_data_dir"]).stem # path to unzipped dir, *relative* to 'path' ), ) - return config + return config_dict + @pytest.mark.skip(reason="focus of PR62") From 39043eeffe7b32f1977fb923f3e8893eb17bdc8c Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 12:11:07 +0000 Subject: [PATCH 27/41] add local config to remaining unit tests --- tests/cellfinder_core/test_unit/test_cellfinder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index dd3ab661..f19e3551 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -238,7 +238,7 @@ def test_setup( ) # run setup on default configuration - cfg = setup_full(str(request.getfixturevalue(input_config))) + cfg = setup_workflow(str(request.getfixturevalue(input_config))) # check logger exists logger = logging.getLogger(custom_logger_name) @@ -281,7 +281,7 @@ def test_run_workflow_from_cellfinder_run( ) # run setup - cfg = setup_full(str(request.getfixturevalue(input_config))) + cfg = setup_workflow(str(request.getfixturevalue(input_config))) # run workflow run_workflow_from_cellfinder_run(cfg) From 8418ef0c439ede65dedfe5a6f402c0db424bd643 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 12:22:43 +0000 Subject: [PATCH 28/41] add local config and GIN config to test main --- tests/cellfinder_core/conftest.py | 129 ++++++++++++++++++ .../test_integration/test_cellfinder.py | 5 +- .../test_unit/test_cellfinder.py | 1 - 3 files changed, 131 insertions(+), 4 deletions(-) diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index a6c4b079..658f831a 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -2,6 +2,7 @@ from pathlib import Path +import pooch import pytest @@ -20,3 +21,131 @@ def mock_home(): return mock_home_path monkeypatch.setattr(Path, "home", mock_home) + + +@pytest.fixture() +def default_input_config_cellfinder() -> Path: + """Return path to default input config for cellfinder workflow + + Returns + ------- + Path + Path to default input config + + """ + from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER + + return DEFAULT_JSON_CONFIG_PATH_CELLFINDER + + +@pytest.fixture(scope="session") +def cellfinder_GIN_data() -> dict: + """Return the URL and hash to the GIN repository with the input data + + Returns + ------- + dict + URL and hash of the GIN repository with the cellfinder test data + """ + return { + "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", + "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", # noqa + } + + +@pytest.fixture() +def config_local_dict( + cellfinder_GIN_data: dict, default_input_config_cellfinder: Path +) -> dict: + """ + Return a config pointing to a local dataset, + and ensure the data is downloaded there + """ + + # read default config as dict + with open(default_input_config_cellfinder) as cfg: + config_dict = json.load(cfg) + + # modify dict + # - remove url + # - remove data hash + # - point to a local directory under home in input_data_dir + config_dict["data_url"] = None + config_dict["data_hash"] = None + config_dict["input_data_dir"] = str(Path.home() / "local_cellfinder_data") + + # fetch data from GIN and download to the local location + pooch.retrieve( + url=cellfinder_GIN_data["url"], + known_hash=cellfinder_GIN_data["hash"], + path=Path( + config_dict["input_data_dir"] + ).parent, # path to download zip to + progressbar=True, + processor=pooch.Unzip( + extract_dir=Path(config_dict["input_data_dir"]).stem + # path to unzipped dir, *relative* to 'path' + ), + ) + return config_dict + + +@pytest.fixture() +def config_GIN_dict( + cellfinder_GIN_data: dict, default_input_config_cellfinder: Path +) -> dict: + """ + Return a config pointing to the location where GIN would be by default, + and download the data + """ + + # read default config as a dictionary + with open(default_input_config_cellfinder) as cfg: + config_dict = json.load(cfg) + + # modify + # - add url + # - add data hash + # - remove input_data_dir if present + config_dict["data_url"] = cellfinder_GIN_data["url"] + config_dict["data_hash"] = cellfinder_GIN_data["hash"] + if "input_data_dir" in config_dict.keys(): + del config_dict["input_data_dir"] + + # download GIN data to default location for GIN + pooch.retrieve( + url=cellfinder_GIN_data["url"], + known_hash=cellfinder_GIN_data["hash"], + path=Path.home() + / ".brainglobe" + / "workflows" + / "cellfinder_core", # path to download zip to + progressbar=True, + processor=pooch.Unzip(extract_dir="cellfinder_test_data"), + ) + + return config_dict + + +@pytest.fixture() +def config_local_json(config_local_dict: dict, tmp_path: Path) -> Path: + # define location of input config file + config_file_path = tmp_path / "input_config.json" + + # write config dict to that location + with open(config_file_path, "w") as js: + json.dump(config_local_dict, js) + + return config_file_path + + +@pytest.fixture() +def config_GIN_json(config_GIN_dict: dict, tmp_path: Path) -> Path: + # define location of input config file + config_file_path = tmp_path / "input_config.json" + + # write config dict to that location + with open(config_file_path, "w") as js: + json.dump(config_GIN_dict, js) + + return config_file_path diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index 48829082..491b405c 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -18,12 +18,11 @@ def test_main(): assert Path(cfg._detected_cells_path).is_file() -@pytest.mark.skip() @pytest.mark.parametrize( "input_config", [ - "input_config_fetch_GIN", - "input_config_fetch_local", + "config_local_json", + "config_GIN_json", ], ) def test_main_w_inputs( diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index f19e3551..3e9867e3 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -3,7 +3,6 @@ import re from pathlib import Path -import pooch import pytest from brainglobe_workflows.utils import setup_logger From 167d78ec9b1189dd4d07cd3a595e88602de09e99 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 14:38:28 +0000 Subject: [PATCH 29/41] monkeypatched home in integration tests (only works in main) --- .../test_integration/test_cellfinder.py | 61 ++++++++----------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index 491b405c..c31939c1 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -1,16 +1,19 @@ import subprocess import sys from pathlib import Path -from brainglobe_workflows.cellfinder_core.cellfinder_core import main import pytest from typing import Optional def test_main(): """Test main function for setting up and running cellfinder workflow - without inputs + with no inputs + """ + from brainglobe_workflows.cellfinder_core.cellfinder_core import main + + # otherwise imported before monkeypatching? # run main cfg = main() @@ -43,6 +46,7 @@ def test_main_w_inputs( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ + from brainglobe_workflows.cellfinder_core.cellfinder_core import main # run main cfg = main(str(request.getfixturevalue(input_config))) @@ -53,22 +57,20 @@ def test_main_w_inputs( def test_script(): """Test running the cellfinder worklfow from the command line - without inputs + with no inputs """ + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( + __file__ as script_path, + ) # define CLI input - script_path = ( - Path(__file__).resolve().parents[3] - / "brainglobe_workflows" - / "cellfinder_core" - / "cellfinder_core.py" - ) subprocess_input = [ sys.executable, str(script_path), ] - # run workflow script from the CLI + # run workflow + # Path.home() is not monkeypatched :( subprocess_output = subprocess.run( subprocess_input, ) @@ -77,12 +79,11 @@ def test_script(): assert subprocess_output.returncode == 0 -@pytest.mark.skip() @pytest.mark.parametrize( "input_config", [ - "input_config_fetch_GIN", - "input_config_fetch_local", + "config_local_json", + "config_GIN_json", ], ) def test_script_w_inputs( @@ -95,30 +96,27 @@ def test_script_w_inputs( ---------- input_config : Optional[str] Path to input config json file - monkeypatch : pytest.MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - # define CLI input + # path to script script_path = ( Path(__file__).resolve().parents[3] / "brainglobe_workflows" / "cellfinder_core" / "cellfinder.py" ) + + # define CLI input subprocess_input = [ sys.executable, str(script_path), ] - # append config to subprocess input subprocess_input.append("--config") subprocess_input.append(str(request.getfixturevalue(input_config))) - # run workflow script from the CLI + # run workflow subprocess_output = subprocess.run( subprocess_input, ) @@ -130,19 +128,12 @@ def test_script_w_inputs( def test_entry_point(): """Test running the cellfinder workflow via the predefined entry point with no inputs - - Parameters - ---------- - monkeypatch : pytest.MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test """ # define CLI input subprocess_input = ["cellfinder-workflow"] - # run workflow with no CLI arguments, + # run workflow subprocess_output = subprocess.run( subprocess_input, ) @@ -151,12 +142,11 @@ def test_entry_point(): assert subprocess_output.returncode == 0 -@pytest.mark.skip() @pytest.mark.parametrize( "input_config", [ - "input_config_fetch_GIN", - "input_config_fetch_local", + "config_local_json", + "config_GIN_json", ], ) def test_entry_point_w_inputs( @@ -170,18 +160,17 @@ def test_entry_point_w_inputs( ---------- input_config : Optional[str] Path to input config json file - monkeypatch : pytest.MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ # define CLI input subprocess_input = ["cellfinder-workflow"] + # append config + subprocess_input.append("--config") + subprocess_input.append(str(request.getfixturevalue(input_config))) - # run workflow with no CLI arguments, + # run workflow subprocess_output = subprocess.run( subprocess_input, ) From 1f141da586fa2fca52de51306bd5b9094553c40c Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 15:19:22 +0000 Subject: [PATCH 30/41] skip tests that use subprocess for now --- tests/cellfinder_core/test_integration/test_cellfinder.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index c31939c1..260880b3 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -55,6 +55,7 @@ def test_main_w_inputs( assert Path(cfg._detected_cells_path).is_file() +@pytest.mark.skip() def test_script(): """Test running the cellfinder worklfow from the command line with no inputs @@ -73,12 +74,14 @@ def test_script(): # Path.home() is not monkeypatched :( subprocess_output = subprocess.run( subprocess_input, + # input=Path.home() ) # check returncode assert subprocess_output.returncode == 0 +@pytest.mark.skip() @pytest.mark.parametrize( "input_config", [ @@ -125,6 +128,7 @@ def test_script_w_inputs( assert subprocess_output.returncode == 0 +@pytest.mark.skip() def test_entry_point(): """Test running the cellfinder workflow via the predefined entry point with no inputs @@ -142,6 +146,7 @@ def test_entry_point(): assert subprocess_output.returncode == 0 +@pytest.mark.skip() @pytest.mark.parametrize( "input_config", [ From 35824df85723b223056ba4d0708834f37453b74e Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 16:27:30 +0000 Subject: [PATCH 31/41] monkeypatch pooch.retrieve when forcing download --- tests/cellfinder_core/test_unit/test_cellfinder.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 3e9867e3..539f5215 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -4,7 +4,7 @@ from pathlib import Path import pytest - +import pooch from brainglobe_workflows.utils import setup_logger @@ -80,6 +80,10 @@ def config_local_dict( @pytest.mark.parametrize( "input_config_dict, message_pattern", [ + ( + "config_force_GIN_dict", + "Fetching input data from the provided GIN repository", + ), ( "config_local_dict", "Fetching input data from the local directories", @@ -237,7 +241,7 @@ def test_setup( ) # run setup on default configuration - cfg = setup_workflow(str(request.getfixturevalue(input_config))) + cfg = setup_full(str(request.getfixturevalue(input_config))) # check logger exists logger = logging.getLogger(custom_logger_name) @@ -280,7 +284,7 @@ def test_run_workflow_from_cellfinder_run( ) # run setup - cfg = setup_workflow(str(request.getfixturevalue(input_config))) + cfg = setup_full(str(request.getfixturevalue(input_config))) # run workflow run_workflow_from_cellfinder_run(cfg) From a4d1e39715efaf0e411cf936981694ab759d002b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 16:43:11 +0000 Subject: [PATCH 32/41] make config local fixture copy downloaded GIN data (rather than re-download again) --- tests/cellfinder_core/conftest.py | 98 ++++++++++--------- .../test_unit/test_cellfinder.py | 71 +++++++++++++- 2 files changed, 122 insertions(+), 47 deletions(-) diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index 658f831a..63a037e8 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -53,43 +53,6 @@ def cellfinder_GIN_data() -> dict: } -@pytest.fixture() -def config_local_dict( - cellfinder_GIN_data: dict, default_input_config_cellfinder: Path -) -> dict: - """ - Return a config pointing to a local dataset, - and ensure the data is downloaded there - """ - - # read default config as dict - with open(default_input_config_cellfinder) as cfg: - config_dict = json.load(cfg) - - # modify dict - # - remove url - # - remove data hash - # - point to a local directory under home in input_data_dir - config_dict["data_url"] = None - config_dict["data_hash"] = None - config_dict["input_data_dir"] = str(Path.home() / "local_cellfinder_data") - - # fetch data from GIN and download to the local location - pooch.retrieve( - url=cellfinder_GIN_data["url"], - known_hash=cellfinder_GIN_data["hash"], - path=Path( - config_dict["input_data_dir"] - ).parent, # path to download zip to - progressbar=True, - processor=pooch.Unzip( - extract_dir=Path(config_dict["input_data_dir"]).stem - # path to unzipped dir, *relative* to 'path' - ), - ) - return config_dict - - @pytest.fixture() def config_GIN_dict( cellfinder_GIN_data: dict, default_input_config_cellfinder: Path @@ -112,40 +75,85 @@ def config_GIN_dict( if "input_data_dir" in config_dict.keys(): del config_dict["input_data_dir"] + # GIN downloaded data default location + GIN_default_location = ( + Path.home() + / ".brainglobe" + / "workflows" + / "cellfinder_core" + / "cellfinder_test_data" + ) + # download GIN data to default location for GIN pooch.retrieve( url=cellfinder_GIN_data["url"], known_hash=cellfinder_GIN_data["hash"], - path=Path.home() + path=GIN_default_location.parent, # path to download zip to + progressbar=True, + processor=pooch.Unzip(extract_dir=GIN_default_location.stem), + ) + + return config_dict + + +@pytest.fixture() +def config_local_dict( + config_GIN_dict, # forces download to GIN default location +) -> dict: + """ + Return a config pointing to a local dataset, + and ensure the data is downloaded there + """ + import shutil + + # copy GIN config as dict + config_dict = config_GIN_dict.copy() + + # modify dict + # - remove url + # - remove data hash + # - point to a local directory under home in input_data_dir + config_dict["data_url"] = None + config_dict["data_hash"] = None + config_dict["input_data_dir"] = str(Path.home() / "local_cellfinder_data") + + # copy data from default GIN location to the local location + # GIN downloaded data default location + GIN_default_location = ( + Path.home() / ".brainglobe" / "workflows" - / "cellfinder_core", # path to download zip to - progressbar=True, - processor=pooch.Unzip(extract_dir="cellfinder_test_data"), + / "cellfinder_core" + / "cellfinder_test_data" + ) + shutil.copytree( + GIN_default_location, + config_dict["input_data_dir"], + dirs_exist_ok=True, ) return config_dict @pytest.fixture() -def config_local_json(config_local_dict: dict, tmp_path: Path) -> Path: +def config_GIN_json(config_GIN_dict: dict, tmp_path: Path) -> Path: # define location of input config file config_file_path = tmp_path / "input_config.json" # write config dict to that location with open(config_file_path, "w") as js: - json.dump(config_local_dict, js) + json.dump(config_GIN_dict, js) return config_file_path @pytest.fixture() -def config_GIN_json(config_GIN_dict: dict, tmp_path: Path) -> Path: +def config_local_json(config_local_dict: dict, tmp_path: Path) -> Path: # define location of input config file config_file_path = tmp_path / "input_config.json" # write config dict to that location with open(config_file_path, "w") as js: - json.dump(config_GIN_dict, js) + json.dump(config_local_dict, js) return config_file_path diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 539f5215..6329b875 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -9,8 +9,75 @@ @pytest.fixture() -def default_input_config_cellfinder() -> Path: - """Return path to default input config for cellfinder workflow +def config_force_GIN_dict( + config_GIN_dict: dict, tmp_path: Path, monkeypatch +) -> dict: + """ + Return a config pointing to a temporary directory where to download GIN + data, without downloading the data first. + + Since there is no data at the input_data_dir location, the GIN download + will be triggered + """ + import shutil + + import pooch + + # read GIN config as dict + config_dict = config_GIN_dict.copy() + + # point to a temporary directory in input_data_dir + config_dict["input_data_dir"] = str(tmp_path) + + # monkeypatch pooch.retrieve + # when called: copy GIN downloaded data, instead of re-downloading + def mock_pooch_download( + url="", known_hash="", path="", progressbar="", processor="" + ): + # GIN downloaded data default location + GIN_default_location = ( + Path.home() + / ".brainglobe" + / "workflows" + / "cellfinder_core" + / "cellfinder_test_data" + ) + + # Copy destination + GIN_copy_destination = tmp_path + + # copy only relevant subdirectories + for subdir in ["signal", "background"]: + shutil.copytree( + GIN_default_location / subdir, # src + GIN_copy_destination / subdir, # dest + dirs_exist_ok=True, + ) + + # List of files in destination + list_of_files = [ + str(f) for f in GIN_copy_destination.glob("**/*") if f.is_file() + ] + list_of_files.sort() + + return list_of_files + + # monkeypatch pooch.retreive with mock_pooch_download() + monkeypatch.setattr(pooch, "retrieve", mock_pooch_download) + + return config_dict + + +@pytest.fixture() +def config_missing_signal_dict(config_local_dict: dict) -> dict: + """ + Return a config pointing to a local dataset, whose signal directory + does not exist + + Parameters + ---------- + config_local_dict : _type_ + _description_ Returns ------- From 0c59ba913207580af41c12531ca667917e0ab95f Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 21 Dec 2023 20:01:52 +0000 Subject: [PATCH 33/41] fix output dir definition --- .../cellfinder_core/cellfinder_core.py | 463 ------------------ 1 file changed, 463 deletions(-) diff --git a/brainglobe_workflows/cellfinder_core/cellfinder_core.py b/brainglobe_workflows/cellfinder_core/cellfinder_core.py index d16e958c..e69de29b 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder_core.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder_core.py @@ -1,463 +0,0 @@ -"""This script reproduces the most common cellfinder workflow - -It receives as an (optional) command line input the path to a configuration -json file, that holds the values of the required parameters for the workflow. - -If no input json file is passed as a configuration, the default -configuration defined at brainglobe_workflows/cellfinder/default_config.json -is used. - -Example usage: - - to pass a custom configuration, run (from the cellfinder_main.py - parent directory): - python cellfinder_main.py --config path/to/input/config.json - - to use the default configuration, run - python cellfinder_main.py - - -""" - - -import datetime -import json -import logging -import os -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import Optional, Union - -import pooch -from brainglobe_utils.IO.cells import save_cells -from cellfinder.core.main import main as cellfinder_run -from cellfinder.core.tools.IO import read_with_dask -from cellfinder.core.train.train_yml import depth_type - -from brainglobe_workflows.utils import ( - DEFAULT_JSON_CONFIG_PATH_CELLFINDER, - config_parser, - setup_logger, -) -from brainglobe_workflows.utils import __name__ as LOGGER_NAME - -Pathlike = Union[str, os.PathLike] - - -@dataclass -class CellfinderConfig: - """Define the parameters for the cellfinder workflow. - - There are three types of fields: - - required attributes: must be provided, they do not have a default value; - - optional attributes: they have a default value if not specified; - - internal attributes: their names start with _, indicating these are - private. Any functionality to update them should be a class method. - """ - - # Required parameters - voxel_sizes: tuple[float, float, float] - start_plane: int - end_plane: int - trained_model: Optional[os.PathLike] - model_weights: Optional[os.PathLike] - model: str - batch_size: int - n_free_cpus: int - network_voxel_sizes: tuple[int, int, int] - soma_diameter: int - ball_xy_size: int - ball_z_size: int - ball_overlap_fraction: float - log_sigma_size: float - n_sds_above_mean_thresh: int - soma_spread_factor: float - max_cluster_size: int - cube_width: int - cube_height: int - cube_depth: int - network_depth: depth_type - - # Optional parameters - - # install path: default path for downloaded and output data - _install_path: Pathlike = ( - Path.home() / ".brainglobe" / "workflows" / "cellfinder_core" - ) - - # input data paths - # Note: if not specified, the signal and background data - # are assumed to be under "signal" and "background" - # dirs under _install_path/cellfinder_test_data/ - # (see __post_init__ method) - input_data_dir: Optional[Pathlike] = None - signal_subdir: Pathlike = "signal" - background_subdir: Pathlike = "background" - - # output data paths - # Note: if output_parent_dir is not specified, - # it is assumed to be under _install_path - # (see __post_init__ method) - output_dir_basename: str = "cellfinder_output_" - detected_cells_filename: str = "detected_cells.xml" - output_parent_dir: Optional[Pathlike] = None - - # source of data to download - # if not specified in JSON, it is set to None - data_url: Optional[str] = None - data_hash: Optional[str] = None - - # Internal parameters - # even though these are optional we don't expect users to - # change them - _signal_dir_path: Optional[Pathlike] = None - _background_dir_path: Optional[Pathlike] = None - _list_signal_files: Optional[list] = None - _list_background_files: Optional[list] = None - _detected_cells_path: Pathlike = "" - _output_path: Pathlike = "" - - def __post_init__(self: "CellfinderConfig"): - """Executed after __init__ function. - - We use this method to define attributes of the data class - as a function of other attributes. - See https://peps.python.org/pep-0557/#post-init-processing - - The attributes added are input and output data paths - - Parameters - ---------- - self : CellfinderConfig - a CellfinderConfig instance - """ - - # Add input data paths to config - self.add_input_paths() - - # Add output paths to config - self.add_output_paths() - - def add_output_paths(self): - """Adds output paths to the config - - Specifically, it adds: - - output_parent_dir: set to a a timestamped output directory if not - set in __init__(); - - _detected_cells_path: path to the output file - - Parameters - ---------- - config : CellfinderConfig - a cellfinder config - """ - - # Fill in output directory if not specified - if self.output_parent_dir is None: - self.output_parent_dir = Path(self._install_path) - - # Add to config the path to timestamped output directory - timestamp = datetime.datetime.now() - timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") - self._output_path = Path(self.output_parent_dir) / ( - str(self.output_dir_basename) + timestamp_formatted - ) - self._output_path.mkdir( - parents=True, # create any missing parents - exist_ok=True, # ignore FileExistsError exceptions - ) - - # Add to config the path to the output file - self._detected_cells_path = ( - self._output_path / self.detected_cells_filename - ) - - def add_input_paths(self): - """Adds input data paths to the config. - - Specifically, it adds: - - input_data_dir: set to a default value if not set in __init__(); - - _signal_dir_path: full path to the directory with the signal files - - _background_dir_path: full path to the directory with the - background files. - - _list_signal_files: list of signal files - - _list_background_files: list of background files - - Parameters - ---------- - config : CellfinderConfig - a cellfinder config with input data files to be validated - - Notes - ----- - The signal and background files are first searched locally at the - given location. If not found, we attempt to download them from GIN - and place them at the specified location (input_data_dir). - - - If both parent data directories (signal and background) exist - locally, the lists of signal and background files are added to - the config. - - If exactly one of the parent data directories is missing, an error - message is logged. - - If neither of them exist, the data is retrieved from the provided GIN - repository. If no URL or hash to GIN is provided, an error is thrown. - - """ - # Fetch logger - logger = logging.getLogger(LOGGER_NAME) - - # Fill in input data directory if not specified - if self.input_data_dir is None: - self.input_data_dir = ( - Path(self._install_path) / "cellfinder_test_data" - ) - - # Fill in signal and background paths derived from 'input_data_dir' - self._signal_dir_path = self.input_data_dir / Path(self.signal_subdir) - self._background_dir_path = self.input_data_dir / Path( - self.background_subdir - ) - - # Check if input data directories (signal and background) exist - # locally. - # If both directories exist, get list of signal and background files - if ( - Path(self._signal_dir_path).exists() - and Path(self._background_dir_path).exists() - ): - logger.info("Fetching input data from the local directories") - - self._list_signal_files = [ - f - for f in Path(self._signal_dir_path).resolve().iterdir() - if f.is_file() - ] - self._list_background_files = [ - f - for f in Path(self._background_dir_path).resolve().iterdir() - if f.is_file() - ] - - # If exactly one of the input data directories is missing, print error - elif ( - Path(self._signal_dir_path).resolve().exists() - or Path(self._background_dir_path).resolve().exists() - ): - if not Path(self._signal_dir_path).resolve().exists(): - logger.error( - f"The directory {self._signal_dir_path} does not exist", - ) - else: - logger.error( - f"The directory {self._background_dir_path} " - "does not exist", - ) - - # If neither of the input data directories exist, - # retrieve data from GIN repository and add list of files to config - else: - # Check if GIN URL and hash are defined (log error otherwise) - if self.data_url and self.data_hash: - # get list of files in GIN archive with pooch.retrieve - list_files_archive = pooch.retrieve( - url=self.data_url, - known_hash=self.data_hash, - path=Path( - self.input_data_dir - ).parent, # zip will be downloaded here - progressbar=True, - processor=pooch.Unzip( - extract_dir=Path(self.input_data_dir).stem, - # files are unpacked here, a dir - # *relative* to the path set in 'path' - ), - ) - logger.info( - "Fetching input data from the provided GIN repository" - ) - - # Check signal and background parent directories exist now - assert Path(self._signal_dir_path).resolve().exists() - assert Path(self._background_dir_path).resolve().exists() - - # Add signal files to config - self._list_signal_files = [ - f - for f in list_files_archive - if f.startswith( - str(Path(self._signal_dir_path).resolve()), - ) - ] - - # Add background files to config - self._list_background_files = [ - f - for f in list_files_archive - if f.startswith( - str(Path(self._background_dir_path).resolve()), - ) - ] - # If one of URL/hash to GIN repo not defined, throw an error - else: - logger.error( - "Input data not found locally, and URL/hash to " - "GIN repository not provided", - ) - - -def read_cellfinder_config( - input_config_path: str, log_on: bool = False -) -> CellfinderConfig: - """Instantiate a CellfinderConfig from the input json file. - - Assumes config is json serializable. - - Parameters - ---------- - input_config_path : Path - Absolute path to a cellfinder config file - log_on : bool, optional - whether to log the info messages from reading the config - to the logger, by default False - - Returns - ------- - CellfinderConfig: - The cellfinder config object, populated with data from the input - """ - - # read input config - with open(input_config_path) as cfg: - config_dict = json.load(cfg) - config = CellfinderConfig(**config_dict) - - # print config's origin to log if required - if log_on: - logger = logging.getLogger(LOGGER_NAME) - logger.info(f"Input config read from {input_config_path}") - if input_config_path == DEFAULT_JSON_CONFIG_PATH_CELLFINDER: - logger.info("Using default config file") - - return config - - -def setup(input_config_path: str) -> CellfinderConfig: - # setup logger - _ = setup_logger() - - # read config - cfg = read_cellfinder_config(input_config_path) - - return cfg - - -def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): - """Run workflow based on the cellfinder.core.main.main() - function. - - The steps are: - 1. Read the input signal and background data as two separate - Dask arrays. - 2. Run the main cellfinder pipeline on the input Dask arrays, - with the parameters defined in the input configuration (cfg). - 3. Save the detected cells as an xml file to the location specified in - the input configuration (cfg). - - Parameters - ---------- - cfg : CellfinderConfig - a class with the required setup methods and parameters for - the cellfinder workflow - """ - # Read input data as Dask arrays - signal_array = read_with_dask(str(cfg._signal_dir_path)) - background_array = read_with_dask(str(cfg._background_dir_path)) - - # Run main analysis using `cellfinder_run` - detected_cells = cellfinder_run( - signal_array, - background_array, - cfg.voxel_sizes, - cfg.start_plane, - cfg.end_plane, - cfg.trained_model, - cfg.model_weights, - cfg.model, - cfg.batch_size, - cfg.n_free_cpus, - cfg.network_voxel_sizes, - cfg.soma_diameter, - cfg.ball_xy_size, - cfg.ball_z_size, - cfg.ball_overlap_fraction, - cfg.log_sigma_size, - cfg.n_sds_above_mean_thresh, - cfg.soma_spread_factor, - cfg.max_cluster_size, - cfg.cube_width, - cfg.cube_height, - cfg.cube_depth, - cfg.network_depth, - ) - - # Save results to xml file - save_cells( - detected_cells, - cfg._detected_cells_path, - ) - - -def main( - input_config: str = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), -) -> CellfinderConfig: - """Setup and run cellfinder workflow. - - This function runs the setup steps required - to run the cellfinder workflow, and the - workflow itself. Note that only the workflow - will be benchmarked. - - Parameters - ---------- - input_config : str, optional - Absolute path to input config file, - by default str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) - - Returns - ------- - cfg : CellfinderConfig - a class with the required setup methods and parameters for - the cellfinder workflow - """ - # run setup - cfg = setup(input_config) - - # run workflow - run_workflow_from_cellfinder_run(cfg) # only this will be benchmarked - - return cfg - - -def main_app_wrapper(): - """Parse command line arguments and - run cellfinder setup and workflow - - This function is used to define an entry-point, - that allows the user to run the cellfinder workflow - for a given input config file as: - `cellfinder-workflow --config `. - - If no input config file is provided, the default is used. - - """ - # parse CLI arguments - args = config_parser( - sys.argv[1:], # sys.argv[0] is the script name - str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), - ) - - # run setup and workflow - _ = main(args.config) - - -if __name__ == "__main__": - main_app_wrapper() From e3b3900d416b52a267fda355b4f03515d9bea694 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 9 Jan 2024 19:47:04 +0000 Subject: [PATCH 34/41] docstrings and remove merge artifact --- tests/cellfinder_core/conftest.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index 63a037e8..24e0002c 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -40,7 +40,7 @@ def default_input_config_cellfinder() -> Path: @pytest.fixture(scope="session") def cellfinder_GIN_data() -> dict: - """Return the URL and hash to the GIN repository with the input data + """Return the data of the GIN repository with the input data Returns ------- @@ -59,7 +59,7 @@ def config_GIN_dict( ) -> dict: """ Return a config pointing to the location where GIN would be by default, - and download the data + and download the data there """ # read default config as a dictionary @@ -102,7 +102,10 @@ def config_local_dict( ) -> dict: """ Return a config pointing to a local dataset, - and ensure the data is downloaded there + and ensure the data exists there. + + The data is copied to the local directory from the + default location used in the config_GIN_dict fixture. """ import shutil From f6f2978619591f8a291783df00741c570616fd99 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 10 Jan 2024 15:10:57 +0000 Subject: [PATCH 35/41] fill in docstrings --- .../cellfinder_core/cellfinder_core.py | 463 ++++++++++++++++++ brainglobe_workflows/utils.py | 23 +- pyproject.toml | 2 +- tests/cellfinder_core/conftest.py | 99 +++- .../test_integration/test_cellfinder.py | 30 +- .../test_unit/test_cellfinder.py | 144 +++--- tests/cellfinder_core/test_unit/test_utils.py | 7 + 7 files changed, 670 insertions(+), 98 deletions(-) diff --git a/brainglobe_workflows/cellfinder_core/cellfinder_core.py b/brainglobe_workflows/cellfinder_core/cellfinder_core.py index e69de29b..65a94257 100644 --- a/brainglobe_workflows/cellfinder_core/cellfinder_core.py +++ b/brainglobe_workflows/cellfinder_core/cellfinder_core.py @@ -0,0 +1,463 @@ +"""This script reproduces the most common cellfinder workflow + +It receives as an (optional) command line input the path to a configuration +json file, that holds the values of the required parameters for the workflow. + +If no input json file is passed as a configuration, the default +configuration defined at brainglobe_workflows/cellfinder/default_config.json +is used. + +Example usage: + - to pass a custom configuration, run (from the cellfinder_main.py + parent directory): + python cellfinder_core.py --config path/to/input/config.json + - to use the default configuration, run + python cellfinder_core.py + + +""" + + +import datetime +import json +import logging +import os +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Optional, Union + +import pooch +from brainglobe_utils.IO.cells import save_cells +from cellfinder.core.main import main as cellfinder_run +from cellfinder.core.tools.IO import read_with_dask +from cellfinder.core.train.train_yml import depth_type + +from brainglobe_workflows.utils import ( + DEFAULT_JSON_CONFIG_PATH_CELLFINDER, + config_parser, + setup_logger, +) +from brainglobe_workflows.utils import __name__ as LOGGER_NAME + +Pathlike = Union[str, os.PathLike] + + +@dataclass +class CellfinderConfig: + """Define the parameters for the cellfinder workflow. + + There are three types of fields: + - required attributes: must be provided, they do not have a default value; + - optional attributes: they have a default value if not specified; + - internal attributes: their names start with _, indicating these are + private. Any functionality to update them should be a class method. + """ + + # Required parameters + voxel_sizes: tuple[float, float, float] + start_plane: int + end_plane: int + trained_model: Optional[os.PathLike] + model_weights: Optional[os.PathLike] + model: str + batch_size: int + n_free_cpus: int + network_voxel_sizes: tuple[int, int, int] + soma_diameter: int + ball_xy_size: int + ball_z_size: int + ball_overlap_fraction: float + log_sigma_size: float + n_sds_above_mean_thresh: int + soma_spread_factor: float + max_cluster_size: int + cube_width: int + cube_height: int + cube_depth: int + network_depth: depth_type + + # Optional parameters + + # install path: default path for downloaded and output data + _install_path: Pathlike = ( + Path.home() / ".brainglobe" / "workflows" / "cellfinder_core" + ) + + # input data paths + # Note: if not specified, the signal and background data + # are assumed to be under "signal" and "background" + # dirs under _install_path/cellfinder_test_data/ + # (see __post_init__ method) + input_data_dir: Optional[Pathlike] = None + signal_subdir: Pathlike = "signal" + background_subdir: Pathlike = "background" + + # output data paths + # Note: if output_parent_dir is not specified, + # it is assumed to be under _install_path + # (see __post_init__ method) + output_dir_basename: str = "cellfinder_output_" + detected_cells_filename: str = "detected_cells.xml" + output_parent_dir: Optional[Pathlike] = None + + # source of data to download + # if not specified in JSON, it is set to None + data_url: Optional[str] = None + data_hash: Optional[str] = None + + # Internal parameters + # even though these are optional we don't expect users to + # change them + _signal_dir_path: Optional[Pathlike] = None + _background_dir_path: Optional[Pathlike] = None + _list_signal_files: Optional[list] = None + _list_background_files: Optional[list] = None + _detected_cells_path: Pathlike = "" + _output_path: Pathlike = "" + + def __post_init__(self: "CellfinderConfig"): + """Executed after __init__ function. + + We use this method to define attributes of the data class + as a function of other attributes. + See https://peps.python.org/pep-0557/#post-init-processing + + The attributes added are input and output data paths + + Parameters + ---------- + self : CellfinderConfig + a CellfinderConfig instance + """ + + # Add input data paths to config + self.add_input_paths() + + # Add output paths to config + self.add_output_paths() + + def add_output_paths(self): + """Adds output paths to the config + + Specifically, it adds: + - output_parent_dir: set to a a timestamped output directory if not + set in __init__(); + - _detected_cells_path: path to the output file + + Parameters + ---------- + config : CellfinderConfig + a cellfinder config + """ + + # Fill in output directory if not specified + if self.output_parent_dir is None: + self.output_parent_dir = Path(self._install_path) + + # Add to config the path to timestamped output directory + timestamp = datetime.datetime.now() + timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") + self._output_path = Path(self.output_parent_dir) / ( + str(self.output_dir_basename) + timestamp_formatted + ) + self._output_path.mkdir( + parents=True, # create any missing parents + exist_ok=True, # ignore FileExistsError exceptions + ) + + # Add to config the path to the output file + self._detected_cells_path = ( + self._output_path / self.detected_cells_filename + ) + + def add_input_paths(self): + """Adds input data paths to the config. + + Specifically, it adds: + - input_data_dir: set to a default value if not set in __init__(); + - _signal_dir_path: full path to the directory with the signal files + - _background_dir_path: full path to the directory with the + background files. + - _list_signal_files: list of signal files + - _list_background_files: list of background files + + Parameters + ---------- + config : CellfinderConfig + a cellfinder config with input data files to be validated + + Notes + ----- + The signal and background files are first searched locally at the + given location. If not found, we attempt to download them from GIN + and place them at the specified location (input_data_dir). + + - If both parent data directories (signal and background) exist + locally, the lists of signal and background files are added to + the config. + - If exactly one of the parent data directories is missing, an error + message is logged. + - If neither of them exist, the data is retrieved from the provided GIN + repository. If no URL or hash to GIN is provided, an error is thrown. + + """ + # Fetch logger + logger = logging.getLogger(LOGGER_NAME) + + # Fill in input data directory if not specified + if self.input_data_dir is None: + self.input_data_dir = ( + Path(self._install_path) / "cellfinder_test_data" + ) + + # Fill in signal and background paths derived from 'input_data_dir' + self._signal_dir_path = self.input_data_dir / Path(self.signal_subdir) + self._background_dir_path = self.input_data_dir / Path( + self.background_subdir + ) + + # Check if input data directories (signal and background) exist + # locally. + # If both directories exist, get list of signal and background files + if ( + Path(self._signal_dir_path).exists() + and Path(self._background_dir_path).exists() + ): + logger.info("Fetching input data from the local directories") + + self._list_signal_files = [ + f + for f in Path(self._signal_dir_path).resolve().iterdir() + if f.is_file() + ] + self._list_background_files = [ + f + for f in Path(self._background_dir_path).resolve().iterdir() + if f.is_file() + ] + + # If exactly one of the input data directories is missing, print error + elif ( + Path(self._signal_dir_path).resolve().exists() + or Path(self._background_dir_path).resolve().exists() + ): + if not Path(self._signal_dir_path).resolve().exists(): + logger.error( + f"The directory {self._signal_dir_path} does not exist", + ) + else: + logger.error( + f"The directory {self._background_dir_path} " + "does not exist", + ) + + # If neither of the input data directories exist, + # retrieve data from GIN repository and add list of files to config + else: + # Check if GIN URL and hash are defined (log error otherwise) + if self.data_url and self.data_hash: + # get list of files in GIN archive with pooch.retrieve + list_files_archive = pooch.retrieve( + url=self.data_url, + known_hash=self.data_hash, + path=Path( + self.input_data_dir + ).parent, # zip will be downloaded here + progressbar=True, + processor=pooch.Unzip( + extract_dir=Path(self.input_data_dir).stem, + # files are unpacked here, a dir + # *relative* to the path set in 'path' + ), + ) + logger.info( + "Fetching input data from the provided GIN repository" + ) + + # Check signal and background parent directories exist now + assert Path(self._signal_dir_path).resolve().exists() + assert Path(self._background_dir_path).resolve().exists() + + # Add signal files to config + self._list_signal_files = [ + f + for f in list_files_archive + if f.startswith( + str(Path(self._signal_dir_path).resolve()), + ) + ] + + # Add background files to config + self._list_background_files = [ + f + for f in list_files_archive + if f.startswith( + str(Path(self._background_dir_path).resolve()), + ) + ] + # If one of URL/hash to GIN repo not defined, throw an error + else: + logger.error( + "Input data not found locally, and URL/hash to " + "GIN repository not provided", + ) + + +def read_cellfinder_config( + input_config_path: str, log_on: bool = False +) -> CellfinderConfig: + """Instantiate a CellfinderConfig from the input json file. + + Assumes config is json serializable. + + Parameters + ---------- + input_config_path : Path + Absolute path to a cellfinder config file + log_on : bool, optional + whether to log the info messages from reading the config + to the logger, by default False + + Returns + ------- + CellfinderConfig: + The cellfinder config object, populated with data from the input + """ + + # read input config + with open(input_config_path) as cfg: + config_dict = json.load(cfg) + config = CellfinderConfig(**config_dict) + + # print config's origin to log if required + if log_on: + logger = logging.getLogger(LOGGER_NAME) + logger.info(f"Input config read from {input_config_path}") + if input_config_path == DEFAULT_JSON_CONFIG_PATH_CELLFINDER: + logger.info("Using default config file") + + return config + + +def setup(input_config_path: str) -> CellfinderConfig: + # setup logger + _ = setup_logger() + + # read config + cfg = read_cellfinder_config(input_config_path) + + return cfg + + +def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): + """Run workflow based on the cellfinder.core.main.main() + function. + + The steps are: + 1. Read the input signal and background data as two separate + Dask arrays. + 2. Run the main cellfinder pipeline on the input Dask arrays, + with the parameters defined in the input configuration (cfg). + 3. Save the detected cells as an xml file to the location specified in + the input configuration (cfg). + + Parameters + ---------- + cfg : CellfinderConfig + a class with the required setup methods and parameters for + the cellfinder workflow + """ + # Read input data as Dask arrays + signal_array = read_with_dask(str(cfg._signal_dir_path)) + background_array = read_with_dask(str(cfg._background_dir_path)) + + # Run main analysis using `cellfinder_run` + detected_cells = cellfinder_run( + signal_array, + background_array, + cfg.voxel_sizes, + cfg.start_plane, + cfg.end_plane, + cfg.trained_model, + cfg.model_weights, + cfg.model, + cfg.batch_size, + cfg.n_free_cpus, + cfg.network_voxel_sizes, + cfg.soma_diameter, + cfg.ball_xy_size, + cfg.ball_z_size, + cfg.ball_overlap_fraction, + cfg.log_sigma_size, + cfg.n_sds_above_mean_thresh, + cfg.soma_spread_factor, + cfg.max_cluster_size, + cfg.cube_width, + cfg.cube_height, + cfg.cube_depth, + cfg.network_depth, + ) + + # Save results to xml file + save_cells( + detected_cells, + cfg._detected_cells_path, + ) + + +def main( + input_config: str = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), +) -> CellfinderConfig: + """Setup and run cellfinder workflow. + + This function runs the setup steps required + to run the cellfinder workflow, and the + workflow itself. Note that only the workflow + will be benchmarked. + + Parameters + ---------- + input_config : str, optional + Absolute path to input config file, + by default str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) + + Returns + ------- + cfg : CellfinderConfig + a class with the required setup methods and parameters for + the cellfinder workflow + """ + # run setup + cfg = setup(input_config) + + # run workflow + run_workflow_from_cellfinder_run(cfg) # only this will be benchmarked + + return cfg + + +def main_app_wrapper(): + """Parse command line arguments and + run cellfinder setup and workflow + + This function is used to define an entry-point, + that allows the user to run the cellfinder workflow + for a given input config file as: + `cellfinder-workflow --config `. + + If no input config file is provided, the default is used. + + """ + # parse CLI arguments + args = config_parser( + sys.argv[1:], # sys.argv[0] is the script name + str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), + ) + + # run setup and workflow + _ = main(args.config) + + +if __name__ == "__main__": + main_app_wrapper() diff --git a/brainglobe_workflows/utils.py b/brainglobe_workflows/utils.py index 4b3bdac3..9f1eb8d5 100644 --- a/brainglobe_workflows/utils.py +++ b/brainglobe_workflows/utils.py @@ -70,14 +70,23 @@ def config_parser( # initialise argument parser parser = argparse.ArgumentParser( description=( - "To launch the workflow with " - "a specific set of input parameters, run: " - "`python brainglobe_workflows/cellfinder.py " - "--config path/to/config.json`" - "where path/to/input/config.json is the json file " - "containing the workflow parameters." - ) + """ + To launch the cellfinder workflow with default parameters, run: + `cellfinder-workflow`. + The default parameters are those specifed in brainglobe_workflows/ + cellfinder_core/configs/cellfinder.json. + + + To launch the cellfinder workflow with a specific set of input + parameters, run: + `cellfinder-workflow --config path/to/config.json`, + where `path/to/input/config.json` is the json file with the + desired parameters. + """ + ), + formatter_class=argparse.RawTextHelpFormatter, ) + # add arguments parser.add_argument( "-c", diff --git a/pyproject.toml b/pyproject.toml index 6ba3d3ce..809eed3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ "Topic :: Scientific/Engineering :: Image Recognition", ] -# Below the dependenciess for brainmapper (the cellfinder CLI tool) only +# Below the dependencies for brainmapper (the cellfinder CLI tool) only # (i.e., only what users will need for brainmapper) dependencies = [ "brainglobe>=1.0.0", diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index 24e0002c..f37c10cc 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -8,11 +8,23 @@ @pytest.fixture(autouse=True) def mock_home_directory(monkeypatch: pytest.MonkeyPatch): + """ + Monkeypatch pathlib.Path.home() + + Instead of returning the usual home path, the + monkeypatched version returns the path to + Path.home() / ".brainglobe-tests" + + Parameters + ---------- + monkeypatch : pytest.MonkeyPatch + a monkeypatch fixture + """ # define mock home path home_path = Path.home() # actual home path mock_home_path = home_path / ".brainglobe-tests" - # create directory if it doesn't exist + # create mock home directory if it doesn't exist if not mock_home_path.exists(): mock_home_path.mkdir() @@ -25,7 +37,9 @@ def mock_home(): @pytest.fixture() def default_input_config_cellfinder() -> Path: - """Return path to default input config for cellfinder workflow + """ + Fixture for the path to the default input + configuration file for the cellfinder workflow Returns ------- @@ -40,7 +54,8 @@ def default_input_config_cellfinder() -> Path: @pytest.fixture(scope="session") def cellfinder_GIN_data() -> dict: - """Return the data of the GIN repository with the input data + """ + Fixture for the location of the test data in the GIN repository Returns ------- @@ -58,15 +73,32 @@ def config_GIN_dict( cellfinder_GIN_data: dict, default_input_config_cellfinder: Path ) -> dict: """ - Return a config pointing to the location where GIN would be by default, - and download the data there + Fixture that returns a config as a dictionary, pointing to the location + where the GIN data would be by default, and download the data there. + + If the file exists in the given path and the hash matches, pooch.retrieve() + will not download the file and instead its path is returned. + + Parameters + ---------- + cellfinder_GIN_data : dict + dictionary with the location of the test data in the GIN repository + default_input_config_cellfinder : Path + path to the default input configuration file for the cellfinder + workflow + + Returns + ------- + dict + dictionary with the config for a workflow that uses the downloaded + GIN data """ # read default config as a dictionary with open(default_input_config_cellfinder) as cfg: config_dict = json.load(cfg) - # modify + # modify the default config: # - add url # - add data hash # - remove input_data_dir if present @@ -85,6 +117,8 @@ def config_GIN_dict( ) # download GIN data to default location for GIN + # if the file exists in the given path and the hash matches, + # it will not be downloaded and the absolute path to the file is returned. pooch.retrieve( url=cellfinder_GIN_data["url"], known_hash=cellfinder_GIN_data["hash"], @@ -101,21 +135,31 @@ def config_local_dict( config_GIN_dict, # forces download to GIN default location ) -> dict: """ - Return a config pointing to a local dataset, - and ensure the data exists there. + Fixture that returns a config as a dictionary, pointing to a local dataset. The data is copied to the local directory from the default location used in the config_GIN_dict fixture. + + Parameters + ---------- + config_GIN_dict : dict + dictionary with the config for a workflow that uses the downloaded + GIN data + + Returns + ------- + dict + dictionary with the config for a workflow that uses local data """ import shutil - # copy GIN config as dict + # copy GIN config as a dictionary config_dict = config_GIN_dict.copy() - # modify dict + # modify the GIN config: # - remove url # - remove data hash - # - point to a local directory under home in input_data_dir + # - set input data directory to a local directory under home config_dict["data_url"] = None config_dict["data_hash"] = None config_dict["input_data_dir"] = str(Path.home() / "local_cellfinder_data") @@ -140,6 +184,23 @@ def config_local_dict( @pytest.fixture() def config_GIN_json(config_GIN_dict: dict, tmp_path: Path) -> Path: + """ + Fixture that returns a config as a JSON file path, that points to GIN + downloaded data as input + + Parameters + ---------- + config_GIN_dict : dict + dictionary with the config for a workflow that uses the downloaded + GIN data + tmp_path : Path + Pytest fixture providing a temporary path + + Returns + ------- + Path + path to a cellfinder config JSON file + """ # define location of input config file config_file_path = tmp_path / "input_config.json" @@ -152,6 +213,22 @@ def config_GIN_json(config_GIN_dict: dict, tmp_path: Path) -> Path: @pytest.fixture() def config_local_json(config_local_dict: dict, tmp_path: Path) -> Path: + """ + Fixture that returns config as a JSON file path, that points to local data + as input + + Parameters + ---------- + config_local_dict : dict + _description_ + tmp_path : Path + Pytest fixture providing a temporary path + + Returns + ------- + Path + path to a cellfinder config JSON file + """ # define location of input config file config_file_path = tmp_path / "input_config.json" diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index 260880b3..e15bfd2b 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -6,14 +6,14 @@ def test_main(): - """Test main function for setting up and running cellfinder workflow + """ + Test main function for setting up and running cellfinder workflow with no inputs - """ - + # import inside the test function so that required functions are + # monkeypatched first from brainglobe_workflows.cellfinder_core.cellfinder_core import main - # otherwise imported before monkeypatching? # run main cfg = main() @@ -32,17 +32,14 @@ def test_main_w_inputs( input_config: Optional[str], request: pytest.FixtureRequest, ): - """Test main function for setting up and running cellfinder workflow + """ + Test main function for setting up and running cellfinder workflow with inputs Parameters ---------- input_config : Optional[str] - Path to input config json file - monkeypatch : pytest.MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test + Path to input config JSON file request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ @@ -57,7 +54,8 @@ def test_main_w_inputs( @pytest.mark.skip() def test_script(): - """Test running the cellfinder worklfow from the command line + """ + Test running the cellfinder worklfow from the command line with no inputs """ from brainglobe_workflows.cellfinder_core.cellfinder_core import ( @@ -74,7 +72,6 @@ def test_script(): # Path.home() is not monkeypatched :( subprocess_output = subprocess.run( subprocess_input, - # input=Path.home() ) # check returncode @@ -93,7 +90,8 @@ def test_script_w_inputs( input_config: Optional[str], request: pytest.FixtureRequest, ): - """Test running the cellfinder worklfow from the command line with inputs + """ + Test running the cellfinder worklfow from the command line with inputs Parameters ---------- @@ -130,7 +128,8 @@ def test_script_w_inputs( @pytest.mark.skip() def test_entry_point(): - """Test running the cellfinder workflow via the predefined entry point + """ + Test running the cellfinder workflow via the predefined entry point with no inputs """ @@ -158,7 +157,8 @@ def test_entry_point_w_inputs( input_config: Optional[str], request: pytest.FixtureRequest, ): - """Test running the cellfinder workflow via the predefined entry point + """ + Test running the cellfinder workflow via the predefined entry point with inputs Parameters diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 6329b875..c039d493 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -13,12 +13,31 @@ def config_force_GIN_dict( config_GIN_dict: dict, tmp_path: Path, monkeypatch ) -> dict: """ - Return a config pointing to a temporary directory where to download GIN - data, without downloading the data first. + Fixture returning a config as a dictionary, which has a + Pytest-generated temporary directory as input data location, + and that monkeypatches pooch.retrieve() Since there is no data at the input_data_dir location, the GIN download - will be triggered + will be triggered, but the monkeypatched pooch.retrieve() will copy the + files rather than download them. + + Parameters + ---------- + config_GIN_dict : dict + dictionary with the config for a workflow that uses the downloaded + GIN data + tmp_path : Path + path to pytest-generated temporary directory + monkeypatch : pytest.MonkeyPatch + a monkeypatch fixture + + Returns + ------- + dict + dictionary with the config for a workflow that triggers the downloaded + GIN data """ + import shutil import pooch @@ -29,8 +48,8 @@ def config_force_GIN_dict( # point to a temporary directory in input_data_dir config_dict["input_data_dir"] = str(tmp_path) - # monkeypatch pooch.retrieve - # when called: copy GIN downloaded data, instead of re-downloading + # monkeypatch pooch.retrieve() + # when called copy GIN downloaded data, instead of downloading it def mock_pooch_download( url="", known_hash="", path="", progressbar="", processor="" ): @@ -71,57 +90,70 @@ def mock_pooch_download( @pytest.fixture() def config_missing_signal_dict(config_local_dict: dict) -> dict: """ - Return a config pointing to a local dataset, whose signal directory - does not exist + Fixture that returns a config as a dictionary, pointing to a local dataset, + whose signal directory does not exist Parameters ---------- config_local_dict : _type_ - _description_ + dictionary with the config for a workflow that uses local data Returns ------- - Path - Path to default input config - + dict + dictionary with the config for a workflow that uses local data, but + whose signal directory does not exist. """ - from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER + config_dict = config_local_dict.copy() + config_dict["signal_subdir"] = "_" + + return config_dict - return DEFAULT_JSON_CONFIG_PATH_CELLFINDER +@pytest.fixture() +def config_missing_background_dict(config_local_dict: dict) -> dict: + """ + Fixture that returns a config as a dictionary, pointing to a local dataset, + whose background directory does not exist -@pytest.fixture(scope="session") -def cellfinder_GIN_data() -> dict: - """Return the URL and hash to the GIN repository with the input data + Parameters + ---------- + config_local_dict : dict + dictionary with the config for a workflow that uses local data Returns ------- dict - URL and hash of the GIN repository with the cellfinder test data + dictionary with the config for a workflow that uses local data, but + whose background directory does not exist. """ - return { - "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", - "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", # noqa - } + config_dict = config_local_dict.copy() + config_dict["background_subdir"] = "_" + + return config_dict @pytest.fixture() -def config_local_dict( - cellfinder_GIN_data: dict, default_input_config_cellfinder: Path -) -> dict: - """ - Return a config pointing to a local dataset, - and ensure the data is downloaded there +def config_not_GIN_nor_local_dict(config_local_dict: dict) -> dict: """ + Fixture that returns a config as a dictionary, whose input_data_dir + directory does not exist and with no references to a GIN dataset. - # read default config as dict - with open(default_input_config_cellfinder) as cfg: - config_dict = json.load(cfg) + Parameters + ---------- + config_local_dict : dict + dictionary with the config for a workflow that uses local data + + Returns + ------- + dict + dictionary with the config for a workflow that uses local data, but + whose input_data_dir directory does not exist and with no references + to a GIN dataset. + """ + config_dict = config_local_dict.copy() + config_dict["input_data_dir"] = "_" - # modify dict - # - remove url - # - remove data hash - # - add input_data_dir config_dict["data_url"] = None config_dict["data_hash"] = None config_dict["input_data_dir"] = Path.home() / "local_cellfinder_data" @@ -163,16 +195,19 @@ def test_add_input_paths( message_pattern: str, request: pytest.FixtureRequest, ): - """Test signal and background files addition to the cellfinder config + """ + Test the addition of signal and background files to the cellfinder config Parameters ---------- caplog : pytest.LogCaptureFixture Pytest fixture to capture the logs during testing - input_config_dict : dicy + input_config_dict : dict input config as a dict message_pattern : str Expected pattern in the log + request : pytest.FixtureRequest + Pytest fixture to enable requesting fixtures by name """ # instantiate custom logger @@ -205,30 +240,19 @@ def test_read_cellfinder_config( caplog: pytest.LogCaptureFixture, request: pytest.FixtureRequest, ): - """Test setup steps for the cellfinder workflow, using the default config - and passing a specific config file. - - These setup steps include: - - instantiating a CellfinderConfig object using the input json file, - - add the signal and background files to the config if these are not - defined, - - create a timestamped directory for the output of the workflow if - it doesn't exist and add its path to the config + """Test reading a cellfinder config Parameters ---------- - input_config : str - Name of input config json file + input_config_path : str + path to input config file message : str - Expected log message - monkeypatch : pytest.MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test + Expected message in the log caplog : pytest.LogCaptureFixture Pytest fixture to capture the logs during testing request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name + """ from brainglobe_workflows.cellfinder_core.cellfinder_core import ( setup, @@ -284,8 +308,8 @@ def test_read_cellfinder_config( def test_setup( input_config: str, custom_logger_name: str, request: pytest.FixtureRequest ): - """Test full setup for cellfinder workflow, using the default config - and passing a specific config file. + """ + Test the full setup for the cellfinder workflow. Parameters ---------- @@ -293,10 +317,6 @@ def test_setup( Path to input config file custom_logger_name : str Name of custom logger - monkeypatch : MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ @@ -329,17 +349,13 @@ def test_run_workflow_from_cellfinder_run( input_config: str, request: pytest.FixtureRequest, ): - """Test running cellfinder workflow with default input config - (fetches data from GIN) and local input config + """ + Test running cellfinder workflow Parameters ---------- input_config : str Path to input config json file - monkeypatch : MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ diff --git a/tests/cellfinder_core/test_unit/test_utils.py b/tests/cellfinder_core/test_unit/test_utils.py index 2ec8d19e..f857912f 100644 --- a/tests/cellfinder_core/test_unit/test_utils.py +++ b/tests/cellfinder_core/test_unit/test_utils.py @@ -31,6 +31,13 @@ def test_setup_logger(custom_logger_name: str): [[], ["--config", str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)]], ) def test_config_parser(list_input_args: List[str]): + """Test parser for config argument + + Parameters + ---------- + list_input_args : List[str] + a list with the command-line input arguments + """ args = config_parser( list_input_args, str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), From 6bc68fb82a0c42777f9d48887596721f850d979b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 10 Jan 2024 15:19:05 +0000 Subject: [PATCH 36/41] add GIN default location as a fixture --- tests/cellfinder_core/conftest.py | 59 ++++++++++++++----- .../test_unit/test_cellfinder.py | 21 ++++--- 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index f37c10cc..ad6e4eb9 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -68,9 +68,31 @@ def cellfinder_GIN_data() -> dict: } +@pytest.fixture() +def GIN_default_location() -> Path: + """A fixture returning a path to the default location + where GIN data is downloaded. + + Returns + ------- + Path + path to the default location where to download GIN data + """ + + return ( + Path.home() + / ".brainglobe" + / "workflows" + / "cellfinder_core" + / "cellfinder_test_data" + ) + + @pytest.fixture() def config_GIN_dict( - cellfinder_GIN_data: dict, default_input_config_cellfinder: Path + cellfinder_GIN_data: dict, + default_input_config_cellfinder: Path, + GIN_default_location: Path, ) -> dict: """ Fixture that returns a config as a dictionary, pointing to the location @@ -86,6 +108,8 @@ def config_GIN_dict( default_input_config_cellfinder : Path path to the default input configuration file for the cellfinder workflow + GIN_default_location : Path + path to the default location where to download GIN data Returns ------- @@ -108,13 +132,13 @@ def config_GIN_dict( del config_dict["input_data_dir"] # GIN downloaded data default location - GIN_default_location = ( - Path.home() - / ".brainglobe" - / "workflows" - / "cellfinder_core" - / "cellfinder_test_data" - ) + # GIN_default_location = ( + # Path.home() + # / ".brainglobe" + # / "workflows" + # / "cellfinder_core" + # / "cellfinder_test_data" + # ) # download GIN data to default location for GIN # if the file exists in the given path and the hash matches, @@ -132,7 +156,8 @@ def config_GIN_dict( @pytest.fixture() def config_local_dict( - config_GIN_dict, # forces download to GIN default location + config_GIN_dict: dict, + GIN_default_location: Path, ) -> dict: """ Fixture that returns a config as a dictionary, pointing to a local dataset. @@ -145,6 +170,8 @@ def config_local_dict( config_GIN_dict : dict dictionary with the config for a workflow that uses the downloaded GIN data + GIN_default_location : Path + path to the default location where to download GIN data\ Returns ------- @@ -166,13 +193,13 @@ def config_local_dict( # copy data from default GIN location to the local location # GIN downloaded data default location - GIN_default_location = ( - Path.home() - / ".brainglobe" - / "workflows" - / "cellfinder_core" - / "cellfinder_test_data" - ) + # GIN_default_location = ( + # Path.home() + # / ".brainglobe" + # / "workflows" + # / "cellfinder_core" + # / "cellfinder_test_data" + # ) shutil.copytree( GIN_default_location, config_dict["input_data_dir"], diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index c039d493..b14997d6 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -10,7 +10,10 @@ @pytest.fixture() def config_force_GIN_dict( - config_GIN_dict: dict, tmp_path: Path, monkeypatch + config_GIN_dict: dict, + tmp_path: Path, + GIN_default_location: Path, + monkeypatch: pytest.MonkeyPatch, ) -> dict: """ Fixture returning a config as a dictionary, which has a @@ -28,6 +31,8 @@ def config_force_GIN_dict( GIN data tmp_path : Path path to pytest-generated temporary directory + GIN_default_location : Path + path to the default location where to download GIN data monkeypatch : pytest.MonkeyPatch a monkeypatch fixture @@ -54,13 +59,13 @@ def mock_pooch_download( url="", known_hash="", path="", progressbar="", processor="" ): # GIN downloaded data default location - GIN_default_location = ( - Path.home() - / ".brainglobe" - / "workflows" - / "cellfinder_core" - / "cellfinder_test_data" - ) + # GIN_default_location = ( + # Path.home() + # / ".brainglobe" + # / "workflows" + # / "cellfinder_core" + # / "cellfinder_test_data" + # ) # Copy destination GIN_copy_destination = tmp_path From 5cdbbebaa7585831007feb8e659c506e643b61b2 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 10 Jan 2024 15:25:09 +0000 Subject: [PATCH 37/41] remove commented out GIN default location --- tests/cellfinder_core/conftest.py | 17 ----------------- .../test_unit/test_cellfinder.py | 12 ++---------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index ad6e4eb9..1062ca38 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -131,15 +131,6 @@ def config_GIN_dict( if "input_data_dir" in config_dict.keys(): del config_dict["input_data_dir"] - # GIN downloaded data default location - # GIN_default_location = ( - # Path.home() - # / ".brainglobe" - # / "workflows" - # / "cellfinder_core" - # / "cellfinder_test_data" - # ) - # download GIN data to default location for GIN # if the file exists in the given path and the hash matches, # it will not be downloaded and the absolute path to the file is returned. @@ -192,14 +183,6 @@ def config_local_dict( config_dict["input_data_dir"] = str(Path.home() / "local_cellfinder_data") # copy data from default GIN location to the local location - # GIN downloaded data default location - # GIN_default_location = ( - # Path.home() - # / ".brainglobe" - # / "workflows" - # / "cellfinder_core" - # / "cellfinder_test_data" - # ) shutil.copytree( GIN_default_location, config_dict["input_data_dir"], diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index b14997d6..8183dcae 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -58,15 +58,6 @@ def config_force_GIN_dict( def mock_pooch_download( url="", known_hash="", path="", progressbar="", processor="" ): - # GIN downloaded data default location - # GIN_default_location = ( - # Path.home() - # / ".brainglobe" - # / "workflows" - # / "cellfinder_core" - # / "cellfinder_test_data" - # ) - # Copy destination GIN_copy_destination = tmp_path @@ -245,7 +236,8 @@ def test_read_cellfinder_config( caplog: pytest.LogCaptureFixture, request: pytest.FixtureRequest, ): - """Test reading a cellfinder config + """ + Test reading a cellfinder config Parameters ---------- From a3e53709f3fda4610f72fa141bc5bf468dc8a36c Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 10 Jan 2024 15:28:02 +0000 Subject: [PATCH 38/41] remove tests for running script (monkeypatched home function not passed to subprocess, and all subsequent steps already tested) --- .../test_integration/test_cellfinder.py | 76 ------------------- 1 file changed, 76 deletions(-) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index e15bfd2b..1d458d2c 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -1,10 +1,8 @@ import subprocess -import sys from pathlib import Path import pytest from typing import Optional - def test_main(): """ Test main function for setting up and running cellfinder workflow @@ -52,80 +50,6 @@ def test_main_w_inputs( assert Path(cfg._detected_cells_path).is_file() -@pytest.mark.skip() -def test_script(): - """ - Test running the cellfinder worklfow from the command line - with no inputs - """ - from brainglobe_workflows.cellfinder_core.cellfinder_core import ( - __file__ as script_path, - ) - - # define CLI input - subprocess_input = [ - sys.executable, - str(script_path), - ] - - # run workflow - # Path.home() is not monkeypatched :( - subprocess_output = subprocess.run( - subprocess_input, - ) - - # check returncode - assert subprocess_output.returncode == 0 - - -@pytest.mark.skip() -@pytest.mark.parametrize( - "input_config", - [ - "config_local_json", - "config_GIN_json", - ], -) -def test_script_w_inputs( - input_config: Optional[str], - request: pytest.FixtureRequest, -): - """ - Test running the cellfinder worklfow from the command line with inputs - - Parameters - ---------- - input_config : Optional[str] - Path to input config json file - request : pytest.FixtureRequest - Pytest fixture to enable requesting fixtures by name - """ - - # path to script - script_path = ( - Path(__file__).resolve().parents[3] - / "brainglobe_workflows" - / "cellfinder_core" - / "cellfinder.py" - ) - - # define CLI input - subprocess_input = [ - sys.executable, - str(script_path), - ] - subprocess_input.append("--config") - subprocess_input.append(str(request.getfixturevalue(input_config))) - - # run workflow - subprocess_output = subprocess.run( - subprocess_input, - ) - - # check returncode - assert subprocess_output.returncode == 0 - - @pytest.mark.skip() def test_entry_point(): """ From c4c998df9871ff8e9a667d77e274ad55b5818b7e Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 10 Jan 2024 15:32:47 +0000 Subject: [PATCH 39/41] replace entry point test for smoketest --- .../test_integration/test_cellfinder.py | 49 ++----------------- 1 file changed, 5 insertions(+), 44 deletions(-) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index 1d458d2c..687a1058 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -3,6 +3,7 @@ import pytest from typing import Optional + def test_main(): """ Test main function for setting up and running cellfinder workflow @@ -50,54 +51,14 @@ def test_main_w_inputs( assert Path(cfg._detected_cells_path).is_file() -@pytest.mark.skip() -def test_entry_point(): - """ - Test running the cellfinder workflow via the predefined entry point - with no inputs - """ - - # define CLI input - subprocess_input = ["cellfinder-workflow"] - - # run workflow - subprocess_output = subprocess.run( - subprocess_input, - ) - - # check returncode - assert subprocess_output.returncode == 0 - - -@pytest.mark.skip() -@pytest.mark.parametrize( - "input_config", - [ - "config_local_json", - "config_GIN_json", - ], -) -def test_entry_point_w_inputs( - input_config: Optional[str], - request: pytest.FixtureRequest, -): +def test_entry_point_help(): """ - Test running the cellfinder workflow via the predefined entry point - with inputs - - Parameters - ---------- - input_config : Optional[str] - Path to input config json file - request : pytest.FixtureRequest - Pytest fixture to enable requesting fixtures by name + Smoke test the cellfinder workflow entry point by checking + help is printed out successfully """ # define CLI input - subprocess_input = ["cellfinder-workflow"] - # append config - subprocess_input.append("--config") - subprocess_input.append(str(request.getfixturevalue(input_config))) + subprocess_input = ["cellfinder-workflow", "--help"] # run workflow subprocess_output = subprocess.run( From f84be907a95c390baa38b84a1eeb89f6330108ce Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 10 Jan 2024 16:48:37 +0000 Subject: [PATCH 40/41] fixes from rebase --- tests/cellfinder_core/conftest.py | 2 + .../test_unit/test_cellfinder.py | 88 ++++++++++--------- 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index 1062ca38..f683d617 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -1,5 +1,7 @@ """Pytest fixtures shared across unit and integration tests""" + +import json from pathlib import Path import pooch diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 8183dcae..e300c743 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -4,7 +4,7 @@ from pathlib import Path import pytest -import pooch + from brainglobe_workflows.utils import setup_logger @@ -152,26 +152,10 @@ def config_not_GIN_nor_local_dict(config_local_dict: dict) -> dict: config_dict["data_url"] = None config_dict["data_hash"] = None - config_dict["input_data_dir"] = Path.home() / "local_cellfinder_data" - - # fetch data from GIN and download locally to local location? - pooch.retrieve( - url=cellfinder_GIN_data["url"], - known_hash=cellfinder_GIN_data["hash"], - path=Path( - config_dict["input_data_dir"] - ).parent, # path to download zip to - progressbar=True, - processor=pooch.Unzip( - extract_dir=Path(config_dict["input_data_dir"]).stem - # path to unzipped dir, *relative* to 'path' - ), - ) - return config_dict + return config_dict -@pytest.mark.skip(reason="focus of PR62") @pytest.mark.parametrize( "input_config_dict, message_pattern", [ @@ -183,6 +167,16 @@ def config_not_GIN_nor_local_dict(config_local_dict: dict) -> dict: "config_local_dict", "Fetching input data from the local directories", ), + ( + "config_missing_signal_dict", + "The directory .+ does not exist$", + ), + ("config_missing_background_dict", "The directory .+ does not exist$"), + ( + "config_not_GIN_nor_local_dict", + "Input data not found locally, and URL/hash to " + "GIN repository not provided", + ), ], ) def test_add_input_paths( @@ -206,13 +200,15 @@ def test_add_input_paths( Pytest fixture to enable requesting fixtures by name """ + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( + CellfinderConfig, + ) + # instantiate custom logger _ = setup_logger() - # read json as Cellfinder config - # ---> change so that the fixture is the config object! - # config = read_cellfinder_config(input_configs_dir / input_config) - _ = request.getfixturevalue(input_config_dict) + # instantiate config object + _ = CellfinderConfig(**request.getfixturevalue(input_config_dict)) # check log messages assert len(caplog.messages) > 0 @@ -225,14 +221,12 @@ def test_add_input_paths( "input_config_path, message", [ ("default_input_config_cellfinder", "Using default config file"), - # ("config_GIN", "Input config read from"), + ("config_local_json", "Input config read from"), ], ) def test_read_cellfinder_config( input_config_path: str, message: str, - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, caplog: pytest.LogCaptureFixture, request: pytest.FixtureRequest, ): @@ -252,20 +246,25 @@ def test_read_cellfinder_config( """ from brainglobe_workflows.cellfinder_core.cellfinder_core import ( - setup, - DEFAULT_JSON_CONFIG_PATH_CELLFINDER + read_cellfinder_config, ) - # setup logger + # instantiate custom logger _ = setup_logger() - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - # monkeypatch.chdir(tmp_path) + # read Cellfinder config + config = read_cellfinder_config( + request.getfixturevalue(input_config_path), log_on=True + ) - # setup workflow - config = setup(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) + # read json as dict + with open(request.getfixturevalue(input_config_path)) as cfg: + config_dict = json.load(cfg) + + # check keys of dictionary are a subset of Cellfinder config attributes + assert all( + [ky in config.__dataclass_fields__.keys() for ky in config_dict.keys()] + ) # check logs assert message in caplog.text @@ -279,12 +278,12 @@ def test_read_cellfinder_config( assert all([Path(f).is_file() for f in config._list_background_files]) # check output directory exists - assert Path(config.output_path).resolve().is_dir() + assert Path(config._output_path).resolve().is_dir() # check output directory name has correct format out = re.fullmatch( str(config.output_dir_basename) + "\\d{8}_\\d{6}$", - Path(config.output_path).stem, + Path(config._output_path).stem, ) assert out is not None assert out.group() is not None @@ -292,7 +291,7 @@ def test_read_cellfinder_config( # check output file path is as expected assert ( Path(config._detected_cells_path) - == Path(config.output_path) / config.detected_cells_filename + == Path(config._output_path) / config.detected_cells_filename ) @@ -300,6 +299,8 @@ def test_read_cellfinder_config( "input_config", [ "default_input_config_cellfinder", + "config_local_json", + "config_GIN_json", ], ) def test_setup( @@ -321,11 +322,11 @@ def test_setup( CellfinderConfig, ) from brainglobe_workflows.cellfinder_core.cellfinder_core import ( - setup as setup_full, + setup as setup_workflow, ) # run setup on default configuration - cfg = setup_full(str(request.getfixturevalue(input_config))) + cfg = setup_workflow(str(request.getfixturevalue(input_config))) # check logger exists logger = logging.getLogger(custom_logger_name) @@ -340,11 +341,12 @@ def test_setup( "input_config", [ "default_input_config_cellfinder", + "config_local_json", + "config_GIN_json", ], ) def test_run_workflow_from_cellfinder_run( - input_config: str, - request: pytest.FixtureRequest, + input_config: str, request: pytest.FixtureRequest ): """ Test running cellfinder workflow @@ -360,11 +362,11 @@ def test_run_workflow_from_cellfinder_run( run_workflow_from_cellfinder_run, ) from brainglobe_workflows.cellfinder_core.cellfinder_core import ( - setup as setup_full, + setup as setup_workflow, ) # run setup - cfg = setup_full(str(request.getfixturevalue(input_config))) + cfg = setup_workflow(str(request.getfixturevalue(input_config))) # run workflow run_workflow_from_cellfinder_run(cfg) From 6b9f90cb5941068e7197aa47577ba0904aeeebd8 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 10 Jan 2024 16:59:15 +0000 Subject: [PATCH 41/41] fix ruff --- tests/cellfinder_core/test_integration/test_cellfinder.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index 687a1058..9af0c0b0 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -1,8 +1,9 @@ import subprocess from pathlib import Path -import pytest from typing import Optional +import pytest + def test_main(): """ @@ -66,4 +67,4 @@ def test_entry_point_help(): ) # check returncode - assert subprocess_output.returncode == 0 \ No newline at end of file + assert subprocess_output.returncode == 0