diff --git a/MANIFEST.in b/MANIFEST.in
index 4d2eeb43..2c5098f4 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -6,17 +6,18 @@ exclude *.yaml
 exclude *.yml
 exclude Dockerfile
 exclude *.ini
-exclude asv.conf.json
-exclude brainglobe_workflows/cellfinder/default_config.json
 
 recursive-include brainglobe_workflows *.py
+recursive-include brainglobe_workflows/configs *.json
+recursive-include brainglobe_benchmarks *.py
+recursive-exclude brainglobe_benchmarks/results *
+include asv.conf.json
 
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
 
 global-include *.pxd
 
-prune benchmarks
 prune docs
 prune tests
 prune resources
diff --git a/asv.conf.json b/asv.conf.json
index d620a545..8ce9490d 100644
--- a/asv.conf.json
+++ b/asv.conf.json
@@ -11,7 +11,8 @@
 
     // The URL or local path of the source code repository for the
     // project being benchmarked
-    "repo": ".",
+    // "repo": ".",
+    "repo": "https://github.com/brainglobe/brainglobe-workflows",
 
     // The Python project's subdirectory in your repo.  If missing or
     // the empty string, the project is assumed to be located at the root
@@ -39,14 +40,14 @@
 
     // List of branches to benchmark. If not provided, defaults to "master"
     // (for git) or "default" (for mercurial).
-    "branches": ["main"], // for git
+    "branches": ["smg/tests-refactor"], // for git
     // "branches": ["default"],    // for mercurial
 
     // The DVCS being used.  If not set, it will be automatically
     // determined from "repo" by looking at the protocol in the URL
     // (if remote), or by looking for special directories, such as
     // ".git" (if local).
-    // "dvcs": "git",
+    "dvcs": "git",
 
     // The tool to use to create environments.  May be "conda",
     // "virtualenv", "mamba" (above 3.8)
@@ -146,7 +147,7 @@
 
     // The directory (relative to the current directory) that benchmarks are
     // stored in.  If not provided, defaults to "benchmarks"
-    // "benchmark_dir": "benchmarks",
+    "benchmark_dir": "brainglobe_benchmarks",
 
     // The directory (relative to the current directory) to cache the Python
     // environments in.  If not provided, defaults to "env"
@@ -154,11 +155,11 @@
 
     // The directory (relative to the current directory) that raw benchmark
     // results are stored in.  If not provided, defaults to "results".
-    "results_dir": "benchmarks/results",
+    "results_dir": "brainglobe_benchmarks/results",
 
     // The directory (relative to the current directory) that the html tree
     // should be written to.  If not provided, defaults to "html".
-    "html_dir": "benchmarks/html",
+    "html_dir": "brainglobe_benchmarks/html",
 
     // The number of characters to retain in the commit hashes.
     // "hash_length": 8,
diff --git a/benchmarks/__init__.py b/brainglobe_benchmarks/__init__.py
similarity index 100%
rename from benchmarks/__init__.py
rename to brainglobe_benchmarks/__init__.py
diff --git a/benchmarks/cellfinder.py b/brainglobe_benchmarks/cellfinder.py
similarity index 95%
rename from benchmarks/cellfinder.py
rename to brainglobe_benchmarks/cellfinder.py
index 76d364bc..e471aad7 100644
--- a/benchmarks/cellfinder.py
+++ b/brainglobe_benchmarks/cellfinder.py
@@ -7,14 +7,12 @@
 from cellfinder_core.main import main as cellfinder_run
 from cellfinder_core.tools.IO import read_with_dask
 
-from brainglobe_workflows.cellfinder.cellfinder_main import (
-    DEFAULT_JSON_CONFIG_PATH,
+from brainglobe_workflows.cellfinder import (
     CellfinderConfig,
     run_workflow_from_cellfinder_run,
 )
-from brainglobe_workflows.cellfinder.cellfinder_main import (
-    setup as setup_cellfinder_workflow,
-)
+from brainglobe_workflows.cellfinder import setup as setup_cellfinder_workflow
+from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
 
 class TimeBenchmarkPrepGIN:
@@ -79,7 +77,7 @@ class TimeBenchmarkPrepGIN:
     min_run_count = 2  # default:2
 
     # Custom attributes
-    input_config_path = str(DEFAULT_JSON_CONFIG_PATH)
+    input_config_path = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)
 
     def setup_cache(
         self,
@@ -114,7 +112,7 @@ def setup_cache(
             known_hash=config.data_hash,
             path=config.install_path,
             progressbar=True,
-            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),
+            processor=pooch.Unzip(extract_dir=config.data_dir_relative),
         )
 
         # Check paths to input data should now exist in config
diff --git a/brainglobe_workflows/cellfinder.py b/brainglobe_workflows/cellfinder.py
new file mode 100644
index 00000000..ec6dfa60
--- /dev/null
+++ b/brainglobe_workflows/cellfinder.py
@@ -0,0 +1,420 @@
+"""This script reproduces the most common cellfinder workflow
+
+It receives as an (optional) command line input the path to a configuration
+json file, that holds the values of the required parameters for the workflow.
+
+If no input json file is passed as a configuration, the default
+configuration defined at brainglobe_workflows/cellfinder/default_config.json
+is used.
+
+Example usage:
+ - to pass a custom configuration, run (from the cellfinder_main.py
+   parent directory):
+    python cellfinder_main.py --config path/to/input/config.json
+ - to use the default configuration, run
+    python cellfinder_main.py
+
+
+"""
+
+
+import datetime
+import json
+import logging
+import os
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+import pooch
+from brainglobe_utils.IO.cells import save_cells
+from cellfinder_core.main import main as cellfinder_run
+from cellfinder_core.tools.IO import read_with_dask
+from cellfinder_core.train.train_yml import depth_type
+
+from brainglobe_workflows.utils import (
+    DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
+    config_parser,
+    setup_logger,
+)
+from brainglobe_workflows.utils import __name__ as LOGGER_NAME
+
+Pathlike = Union[str, os.PathLike]
+
+
+@dataclass
+class CellfinderConfig:
+    """
+    Define input and output data locations, and the parameters for
+    the cellfinder preprocessing steps.
+    """
+
+    # input data
+    # data_dir_relative: parent directory to signal and background,
+    # relative to install path
+    data_dir_relative: Pathlike
+    signal_subdir: str
+    background_subdir: str
+
+    # output
+    output_path_basename_relative: Pathlike
+    detected_cells_filename: Pathlike
+
+    # preprocessing parameters
+    voxel_sizes: Tuple[float, float, float]
+    start_plane: int
+    end_plane: int
+    trained_model: Optional[
+        os.PathLike
+    ]  # if None, it will use a default model
+    model_weights: Optional[os.PathLike]
+    model: str
+    batch_size: int
+    n_free_cpus: int
+    network_voxel_sizes: Tuple[int, int, int]
+    soma_diameter: int
+    ball_xy_size: int
+    ball_z_size: int
+    ball_overlap_fraction: float
+    log_sigma_size: float
+    n_sds_above_mean_thresh: int
+    soma_spread_factor: float
+    max_cluster_size: int
+    cube_width: int
+    cube_height: int
+    cube_depth: int
+    network_depth: depth_type
+
+    # install path (root for all inputs and outputs)
+    install_path: Pathlike = ".cellfinder_workflows"
+
+    # origin of data to download (if required)
+    data_url: Optional[str] = None
+    data_hash: Optional[str] = None
+
+    # The following attributes are added
+    # during the setup phase of the workflow
+    list_signal_files: Optional[list] = None
+    list_background_files: Optional[list] = None
+    output_path: Pathlike = ""
+    detected_cells_path: Pathlike = ""
+    signal_dir_path: Pathlike = ""
+    background_dir_path: Pathlike = ""
+
+
+def read_cellfinder_config(input_config_path: Path):
+    """Instantiate a CellfinderConfig from the input json file
+    (assumes config is json serializable)
+
+
+    Parameters
+    ----------
+    input_config_path : Path
+        Absolute path to a cellfinder config file
+
+    Returns
+    -------
+    CellfinderConfig:
+        The cellfinder config object, populated with data from the input
+    """
+    # read input config
+    with open(input_config_path) as cfg:
+        config_dict = json.load(cfg)
+    config = CellfinderConfig(**config_dict)
+
+    return config
+
+
+def add_signal_and_background_files(
+    config: CellfinderConfig,
+) -> CellfinderConfig:
+    """
+    Adds the lists of input data files (signal and background)
+    to the config.
+
+    These files are first searched locally. If not found, we
+    attempt to download them from GIN.
+
+    Specifically:
+    - If both parent data directories (signal and background) exist locally,
+    the lists of signal and background files are added to the config.
+    - If exactly one of the parent data directories is missing, an error
+    message is logged.
+    - If neither of them exist, the data is retrieved from the provided GIN
+    repository. If no URL or hash to GIN is provided, an error is thrown.
+
+    Parameters
+    ----------
+    config : CellfinderConfig
+        a cellfinder config with input data files to be validated
+
+    Returns
+    -------
+    config : CellfinderConfig
+        a cellfinder config with updated input data lists.
+    """
+    # Fetch logger
+    logger = logging.getLogger(LOGGER_NAME)
+
+    # Check if input data directories (signal and background) exist locally.
+    # If both directories exist, get list of signal and background files
+    if (
+        Path(config.signal_dir_path).exists()
+        and Path(config.background_dir_path).exists()
+    ):
+        logger.info("Fetching input data from the local directories")
+
+        config.list_signal_files = [
+            f
+            for f in Path(config.signal_dir_path).resolve().iterdir()
+            if f.is_file()
+        ]
+        config.list_background_files = [
+            f
+            for f in Path(config.background_dir_path).resolve().iterdir()
+            if f.is_file()
+        ]
+
+    # If exactly one of the input data directories is missing, print error
+    elif (
+        Path(config.signal_dir_path).resolve().exists()
+        or Path(config.background_dir_path).resolve().exists()
+    ):
+        if not Path(config.signal_dir_path).resolve().exists():
+            logger.error(
+                f"The directory {config.signal_dir_path} does not exist"
+            )
+        else:
+            logger.error(
+                f"The directory {config.background_dir_path} " "does not exist"
+            )
+
+    # If neither of the input data directories exist,
+    # retrieve data from GIN repository and add list of files to config
+    else:
+        # Check if GIN URL and hash are defined (log error otherwise)
+        if config.data_url and config.data_hash:
+            # get list of files in GIN archive with pooch.retrieve
+            list_files_archive = pooch.retrieve(
+                url=config.data_url,
+                known_hash=config.data_hash,
+                path=config.install_path,  # zip will be downloaded here
+                progressbar=True,
+                processor=pooch.Unzip(
+                    extract_dir=config.data_dir_relative
+                    # path to unzipped dir,
+                    # *relative* to the path set in 'path'
+                ),
+            )
+            logger.info("Fetching input data from the provided GIN repository")
+
+            # Check signal and background parent directories exist now
+            assert Path(config.signal_dir_path).resolve().exists()
+            assert Path(config.background_dir_path).resolve().exists()
+
+            # Add signal files to config
+            config.list_signal_files = [
+                f
+                for f in list_files_archive
+                if f.startswith(
+                    str(Path(config.signal_dir_path).resolve())
+                )  # if str(config.signal_dir_path) in f
+            ]
+
+            # Add background files to config
+            config.list_background_files = [
+                f
+                for f in list_files_archive
+                if f.startswith(
+                    str(Path(config.background_dir_path).resolve())
+                )
+            ]
+        # If one of URL/hash to GIN repo not defined, throw an error
+        else:
+            logger.error(
+                "Input data not found locally, and URL/hash to "
+                "GIN repository not provided"
+            )
+
+    return config
+
+
+def setup_workflow(input_config_path: Path) -> CellfinderConfig:
+    """Run setup steps prior to executing the workflow
+
+    These setup steps include:
+    - instantiating a CellfinderConfig object with the required parameters,
+    - checking if the input data exists locally, and fetching from
+    GIN repository otherwise,
+    - adding the path to the input data files to the config, and
+    - creating a timestamped directory for the output of the workflow if
+    it doesn't exist and adding its path to the config
+
+    Parameters
+    ----------
+    input_config_path : Path
+        path to the input config file
+
+    Returns
+    -------
+    config : CellfinderConfig
+        a dataclass whose attributes are the parameters
+        for running cellfinder.
+    """
+
+    # Fetch logger
+    logger = logging.getLogger(LOGGER_NAME)
+
+    # Check config file exists
+    assert input_config_path.exists()
+
+    # Instantiate a CellfinderConfig from the input json file
+    # (assumes config is json serializable)
+    config = read_cellfinder_config(input_config_path)
+
+    # Print info logs for status
+    logger.info(f"Input config read from {input_config_path}")
+    if input_config_path == DEFAULT_JSON_CONFIG_PATH_CELLFINDER:
+        logger.info("Using default config file")
+
+    # Add lists of input data files to the config,
+    # if these are not defined yet
+    if not (config.list_signal_files and config.list_background_files):
+        # build fullpaths to input directories
+        config.signal_dir_path = str(
+            Path(config.install_path)
+            / config.data_dir_relative
+            / config.signal_subdir
+        )
+        config.background_dir_path = str(
+            Path(config.install_path)
+            / config.data_dir_relative
+            / config.background_subdir
+        )
+
+        # add signal and background files to config
+        config = add_signal_and_background_files(config)
+
+    # Create timestamped output directory if it doesn't exist
+    timestamp = datetime.datetime.now()
+    timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S")
+    output_path_timestamped = Path(config.install_path) / (
+        str(config.output_path_basename_relative) + timestamp_formatted
+    )
+    output_path_timestamped.mkdir(
+        parents=True,  # create any missing parents
+        exist_ok=True,  # ignore FileExistsError exceptions
+    )
+
+    # Add output path and output file path to config
+    config.output_path = output_path_timestamped
+    config.detected_cells_path = (
+        config.output_path / config.detected_cells_filename
+    )
+
+    return config
+
+
+def setup(input_config_path: str) -> CellfinderConfig:
+    # setup logger
+    _ = setup_logger()
+
+    # run setup steps and return config
+    cfg = setup_workflow(Path(input_config_path))
+
+    return cfg
+
+
+def run_workflow_from_cellfinder_run(cfg: CellfinderConfig):
+    """
+    Run workflow based on the cellfinder_core.main.main()
+    function.
+
+    The steps are:
+    1. Read the input signal and background data as two separate
+       Dask arrays.
+    2. Run the main cellfinder pipeline on the input Dask arrays,
+       with the parameters defined in the input configuration (cfg).
+    3. Save the detected cells as an xml file to the location specified in
+       the input configuration (cfg).
+
+    Parameters
+    ----------
+    cfg : CellfinderConfig
+        a class with the required setup methods and parameters for
+        the cellfinder workflow
+    """
+    # Read input data as Dask arrays
+    signal_array = read_with_dask(cfg.signal_dir_path)
+    background_array = read_with_dask(cfg.background_dir_path)
+
+    # Run main analysis using `cellfinder_run`
+    detected_cells = cellfinder_run(
+        signal_array, background_array, cfg.voxel_sizes
+    )
+
+    # Save results to xml file
+    save_cells(
+        detected_cells,
+        cfg.detected_cells_path,
+    )
+
+
+def main(
+    input_config: str = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER),
+) -> CellfinderConfig:
+    """
+    Setup and run cellfinder workflow.
+
+    This function runs the setup steps required
+    to run the cellfinder workflow, and the
+    workflow itself. Note that only the workflow
+    will be benchmarked.
+
+    Parameters
+    ----------
+    input_config : str, optional
+        Absolute path to input config file,
+        by default str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)
+
+    Returns
+    -------
+    cfg : CellfinderConfig
+        a class with the required setup methods and parameters for
+        the cellfinder workflow
+    """
+    # run setup
+    cfg = setup(input_config)
+
+    # run workflow
+    run_workflow_from_cellfinder_run(cfg)  # only this will be benchmarked
+
+    return cfg
+
+
+def main_app_wrapper():
+    """
+    Parse command line arguments and
+    run cellfinder setup and workflow
+
+    This function is used to define an entry-point,
+    that allows the user to run the cellfinder workflow
+    for a given input config file as:
+    `cellfinder-workflow --config <path-to-input-config>`.
+
+    If no input config file is provided, the default is used.
+
+    """
+    # parse CLI arguments
+    args = config_parser(
+        sys.argv[1:],  # sys.argv[0] is the script name
+        str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER),
+    )
+
+    # run setup and workflow
+    _ = main(args.config)
+
+
+if __name__ == "__main__":
+    main_app_wrapper()
diff --git a/brainglobe_workflows/cellfinder/cellfinder_main.py b/brainglobe_workflows/cellfinder/cellfinder_main.py
deleted file mode 100644
index fd19db34..00000000
--- a/brainglobe_workflows/cellfinder/cellfinder_main.py
+++ /dev/null
@@ -1,404 +0,0 @@
-"""This script reproduces the most common cellfinder workflow
-
-It receives as an (optional) command line input the path to a configuration
-json file, that holds the values of the required parameters for the workflow.
-
-If no input json file is passed as a configuration, the default
-configuration defined at brainglobe_workflows/cellfinder/default_config.json
-is used.
-
-Example usage:
- - to pass a custom configuration, run (from the cellfinder_main.py
-   parent directory):
-    python cellfinder_main.py --config path/to/input/config.json
- - to use the default configuration, run
-    python cellfinder_main.py
-
-
-"""
-
-import argparse
-import datetime
-import json
-import logging
-import os
-import sys
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Optional, Tuple, Union
-
-import pooch
-from brainglobe_utils.IO.cells import save_cells
-from cellfinder_core.main import main as cellfinder_run
-from cellfinder_core.tools.IO import read_with_dask
-from cellfinder_core.train.train_yml import depth_type
-
-Pathlike = Union[str, os.PathLike]
-
-DEFAULT_JSON_CONFIG_PATH = (
-    Path(__file__).resolve().parent / "default_config.json"
-)
-
-
-@dataclass
-class CellfinderConfig:
-    """
-    Define input and output data locations, and the parameters for
-    the cellfinder preprocessing steps.
-    """
-
-    # cellfinder workflows cache directory
-    install_path: Pathlike
-
-    # cached subdirectory to save data to
-    extract_dir_relative: Pathlike
-    signal_subdir: str
-    background_subdir: str
-    output_path_basename_relative: Pathlike
-    detected_cells_filename: Pathlike
-
-    # preprocessing parameters
-    voxel_sizes: Tuple[float, float, float]
-    start_plane: int
-    end_plane: int
-    trained_model: Optional[
-        os.PathLike
-    ]  # if None, it will use a default model
-    model_weights: Optional[os.PathLike]
-    model: str
-    batch_size: int
-    n_free_cpus: int
-    network_voxel_sizes: Tuple[int, int, int]
-    soma_diameter: int
-    ball_xy_size: int
-    ball_z_size: int
-    ball_overlap_fraction: float
-    log_sigma_size: float
-    n_sds_above_mean_thresh: int
-    soma_spread_factor: float
-    max_cluster_size: int
-    cube_width: int
-    cube_height: int
-    cube_depth: int
-    network_depth: depth_type
-
-    # origin of data to download (if required)
-    data_url: Optional[str] = None
-    data_hash: Optional[str] = None
-
-    # The following attributes are added
-    # during the setup phase of the workflow
-    list_signal_files: Optional[list] = None
-    list_background_files: Optional[list] = None
-    output_path: Pathlike = ""
-    signal_dir_path: Pathlike = ""
-    background_dir_path: Pathlike = ""
-    detected_cells_path: Pathlike = ""
-
-
-def setup(argv=None) -> CellfinderConfig:
-    def parse_cli_arguments(argv_) -> argparse.Namespace:
-        """Define argument parser for cellfinder
-        workflow script.
-
-        It expects a path to a json file with the
-        parameters required to run the workflow.
-        If none is provided, the default
-
-        Returns
-        -------
-        args : argparse.Namespace
-            command line input arguments parsed
-        """
-        # initialise argument parser
-        parser = argparse.ArgumentParser(
-            description=(
-                "To launch the workflow with "
-                "a specific set of input parameters, run: "
-                "`python cellfinder_main.py --config path/to/config.json`"
-                "where path/to/input/config.json is the json file "
-                "containing the workflow parameters."
-            )
-        )
-        # add arguments
-        parser.add_argument(
-            "-c",
-            "--config",
-            default=str(DEFAULT_JSON_CONFIG_PATH),
-            type=str,
-            metavar="CONFIG",  # a name for usage messages
-            help="",
-        )
-
-        # build parser object
-        args = parser.parse_args(argv_)
-
-        # print error if required arguments not provided
-        if not args.config:
-            logger.error("Paths to input config not provided.")
-            parser.print_help()
-
-        return args
-
-    def setup_logger() -> logging.Logger:
-        """Setup a logger for this script
-
-        The logger's level is set to DEBUG, and it
-        is linked to a handler that writes to the
-        console and whose level is
-
-        Returns
-        -------
-        logging.Logger
-            a logger object
-        """
-        # define handler that writes to stdout
-        console_handler = logging.StreamHandler(sys.stdout)
-        console_format = logging.Formatter(
-            "%(name)s %(levelname)s: %(message)s"
-        )
-        console_handler.setFormatter(console_format)
-
-        # define logger and link to handler
-        logger = logging.getLogger(
-            __name__
-        )  # if imported as a module, the logger is named after the module
-        logger.setLevel(logging.DEBUG)
-        logger.addHandler(console_handler)
-        return logger
-
-    def setup_workflow(input_config_path: Path) -> CellfinderConfig:
-        """Run setup steps prior to executing the workflow
-
-        These setup steps include:
-        - instantiating a CellfinderConfig object with the required parameters,
-        - checking if the input data exists locally, and fetching from
-        GIN repository otherwise,
-        - adding the path to the input data files to the config, and
-        - creating a timestamped directory for the output of the workflow if
-        it doesn't exist and adding its path to the config
-
-        Parameters
-        ----------
-        input_config_path : Path
-            path to the input config file
-
-        Returns
-        -------
-        config : CellfinderConfig
-            a dataclass whose attributes are the parameters
-            for running cellfinder.
-        """
-
-        # Check config file exists
-        assert input_config_path.exists()
-
-        # Instantiate a CellfinderConfig from the input json file
-        # (assumes config is json serializable)
-        with open(input_config_path) as cfg:
-            config_dict = json.load(cfg)
-        config = CellfinderConfig(**config_dict)
-
-        # Print info logs for status
-        logger.info(f"Input config read from {input_config_path}")
-        if input_config_path == DEFAULT_JSON_CONFIG_PATH:
-            logger.info("Using default config file")
-
-        # Retrieve and add lists of input data to the config,
-        # if these are defined yet
-        if not (config.list_signal_files and config.list_signal_files):
-            # build fullpaths to inputs
-            config.signal_dir_path = str(
-                Path(config.install_path)
-                / config.extract_dir_relative
-                / config.signal_subdir
-            )
-            config.background_dir_path = str(
-                Path(config.install_path)
-                / config.extract_dir_relative
-                / config.background_subdir
-            )
-            # retrieve data
-            config = retrieve_input_data(config)
-
-        # Create timestamped output directory if it doesn't exist
-        timestamp = datetime.datetime.now()
-        timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S")
-        output_path_timestamped = Path(config.install_path) / (
-            str(config.output_path_basename_relative) + timestamp_formatted
-        )
-        output_path_timestamped.mkdir(parents=True, exist_ok=True)
-
-        # Add output path and output file path to config
-        config.output_path = output_path_timestamped
-        config.detected_cells_path = (
-            config.output_path / config.detected_cells_filename
-        )
-
-        return config
-
-    def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
-        """
-        Adds the lists of input data files (signal and background)
-        to the config.
-
-        It first checks if the input data exists locally.
-        - If both directories (signal and background) exist, the lists of
-        signal and background files are added to the config.
-        - If exactly one of the input data directories is missing, an error
-        message is logged.
-        - If neither of them exist, the data is retrieved from the provided GIN
-        repository. If no URL or hash to GIN is provided, an error is shown.
-
-        Parameters
-        ----------
-        config : CellfinderConfig
-            a dataclass whose attributes are the parameters
-            for running cellfinder.
-
-        Returns
-        -------
-        config : CellfinderConfig
-            a dataclass whose attributes are the parameters
-            for running cellfinder.
-        """
-        # Check if input data (signal and background) exist locally.
-        # If both directories exist, get list of signal and background files
-        if (
-            Path(config.signal_dir_path).exists()
-            and Path(config.background_dir_path).exists()
-        ):
-            logger.info("Fetching input data from the local directories")
-
-            config.list_signal_files = [
-                f
-                for f in Path(config.signal_dir_path).resolve().iterdir()
-                if f.is_file()
-            ]
-            config.list_background_files = [
-                f
-                for f in Path(config.background_dir_path).resolve().iterdir()
-                if f.is_file()
-            ]
-
-        # If exactly one of the input data directories is missing, print error
-        elif (
-            Path(config.signal_dir_path).resolve().exists()
-            or Path(config.background_dir_path).resolve().exists()
-        ):
-            if not Path(config.signal_dir_path).resolve().exists():
-                logger.error(
-                    f"The directory {config.signal_dir_path} does not exist"
-                )
-            else:
-                logger.error(
-                    f"The directory {config.background_dir_path} "
-                    "does not exist"
-                )
-
-        # If neither of them exist, retrieve data from GIN repository
-        else:
-            # check if GIN URL and hash are defined (log error otherwise)
-            if (not config.data_url) or (not config.data_hash):
-                logger.error(
-                    "Input data not found locally, and URL/hash to "
-                    "GIN repository not provided"
-                )
-
-            else:
-                # get list of files in GIN archive with pooch.retrieve
-                list_files_archive = pooch.retrieve(
-                    url=config.data_url,
-                    known_hash=config.data_hash,
-                    path=config.install_path,  # zip will be downloaded here
-                    progressbar=True,
-                    processor=pooch.Unzip(
-                        extract_dir=config.extract_dir_relative
-                        # path to unzipped dir,
-                        # *relative* to the path set in 'path'
-                    ),
-                )
-                logger.info(
-                    "Fetching input data from the provided GIN repository"
-                )
-
-                # Check signal and background parent directories exist now
-                assert Path(config.signal_dir_path).resolve().exists()
-                assert Path(config.background_dir_path).resolve().exists()
-
-                # Add signal files to config
-                config.list_signal_files = [
-                    f
-                    for f in list_files_archive
-                    if f.startswith(
-                        str(Path(config.signal_dir_path).resolve())
-                    )  # if str(config.signal_dir_path) in f
-                ]
-
-                # Add background files to config
-                config.list_background_files = [
-                    f
-                    for f in list_files_archive
-                    if f.startswith(
-                        str(Path(config.background_dir_path).resolve())
-                    )  # if str(config.background_dir_path) in f
-                ]
-
-        return config
-
-    # parse command line input arguments:
-    # sys.argv in most cases except for testing
-    # see https://paiml.com/docs/home/books/testing-in-python/chapter08-monkeypatching/#the-simplest-monkeypatching
-    argv = argv or sys.argv[1:]
-    args = parse_cli_arguments(argv)
-
-    # setup logger
-    logger = setup_logger()
-
-    # run setup steps and return config
-    cfg = setup_workflow(Path(args.config))
-
-    return cfg
-
-
-def run_workflow_from_cellfinder_run(cfg: CellfinderConfig):
-    """
-    Run workflow based on the cellfinder_core.main.main()
-    function.
-
-    The steps are:
-    1. Read the input signal and background data as two separate
-       Dask arrays.
-    2. Run the main cellfinder pipeline on the input Dask arrays,
-       with the parameters defined in the input configuration (cfg).
-    3. Save the detected cells as an xml file to the location specified in
-       the input configuration (cfg).
-
-    Parameters
-    ----------
-    cfg : CellfinderConfig
-        a class with the required setup methods and parameters for
-        the cellfinder workflow
-    """
-    # Read input data as Dask arrays
-    signal_array = read_with_dask(cfg.signal_dir_path)
-    background_array = read_with_dask(cfg.background_dir_path)
-
-    # Run main analysis using `cellfinder_run`
-    detected_cells = cellfinder_run(
-        signal_array, background_array, cfg.voxel_sizes
-    )
-
-    # Save results to xml file
-    save_cells(
-        detected_cells,
-        cfg.detected_cells_path,
-    )
-
-
-if __name__ == "__main__":
-    # run setup
-    cfg = setup()
-
-    # run workflow
-    run_workflow_from_cellfinder_run(cfg)  # only this will be benchmarked
diff --git a/brainglobe_workflows/cellfinder/default_config.json b/brainglobe_workflows/configs/cellfinder.json
similarity index 95%
rename from brainglobe_workflows/cellfinder/default_config.json
rename to brainglobe_workflows/configs/cellfinder.json
index a80a4ba4..daf056a5 100644
--- a/brainglobe_workflows/cellfinder/default_config.json
+++ b/brainglobe_workflows/configs/cellfinder.json
@@ -2,7 +2,7 @@
   "install_path": ".cellfinder_workflows",
   "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
   "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
-  "extract_dir_relative": "cellfinder_test_data",
+  "data_dir_relative": "cellfinder_test_data",
   "signal_subdir": "signal",
   "background_subdir": "background",
   "output_path_basename_relative": "cellfinder_output_",
diff --git a/brainglobe_workflows/utils.py b/brainglobe_workflows/utils.py
new file mode 100644
index 00000000..4b3bdac3
--- /dev/null
+++ b/brainglobe_workflows/utils.py
@@ -0,0 +1,94 @@
+import argparse
+import logging
+import sys
+from pathlib import Path
+from typing import List
+
+DEFAULT_JSON_CONFIGS_PATH = Path(__file__).resolve().parent / "configs"
+
+DEFAULT_JSON_CONFIG_PATH_CELLFINDER = (
+    DEFAULT_JSON_CONFIGS_PATH / "cellfinder.json"
+)
+
+
+def setup_logger() -> logging.Logger:
+    """Setup a logger for workflow runs
+
+    The logger's level is set to DEBUG, and it
+    is linked to a handler that writes to the
+    console. This utility function helps run
+    workflows, and test their logs, in a
+    consistent way.
+
+    Returns
+    -------
+    logging.Logger
+        a logger object configured for workflow runs
+    """
+    # define handler that writes to stdout
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_format = logging.Formatter("%(name)s %(levelname)s: %(message)s")
+    console_handler.setFormatter(console_format)
+    console_handler.set_name("console_handler")
+
+    # define logger and link to handler
+    logger = logging.getLogger(
+        __name__
+    )  # if imported as a module, the logger is named after the module
+    logger.setLevel(logging.DEBUG)
+    logger.addHandler(console_handler)
+    return logger
+
+
+def config_parser(
+    argv: List[str],
+    default_config: str,
+) -> argparse.Namespace:
+    """Define argument parser for a workflow script.
+
+    The only CLI argument defined in the parser is
+    the input config file. The list of input arguments
+    `argv` can be an empty list.
+
+    Both the list of input arguments and the default config to use if
+    no config is specified must be passed as an input to this
+    function.
+
+    Parameters
+    ----------
+    argv_ : List[str]
+        _description_
+    default_config : str
+        _description_
+
+    Returns
+    -------
+    args : argparse.Namespace
+        command line input arguments parsed
+    """
+
+    # initialise argument parser
+    parser = argparse.ArgumentParser(
+        description=(
+            "To launch the workflow with "
+            "a specific set of input parameters, run: "
+            "`python brainglobe_workflows/cellfinder.py "
+            "--config path/to/config.json`"
+            "where path/to/input/config.json is the json file "
+            "containing the workflow parameters."
+        )
+    )
+    # add arguments
+    parser.add_argument(
+        "-c",
+        "--config",
+        default=default_config,
+        type=str,
+        metavar="CONFIG",  # a name for usage messages
+        help="",
+    )
+
+    # build parser object
+    args = parser.parse_args(argv)
+
+    return args
diff --git a/pyproject.toml b/pyproject.toml
index 9defcc92..6689ae2b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,8 +8,10 @@ authors = [
 ]
 description = "A collection of end-to-end data analysis workflows executed using BrainGlobe tools."
 readme = "README.md"
-license = { file = "LICENSE" }
+license = { file = "LICENSE" } #{text = "BSD-3-Clause"}
 requires-python = ">=3.9"
+dynamic = ["version"]
+
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Intended Audience :: Developers",
@@ -21,6 +23,7 @@ classifiers = [
     "Programming Language :: Python",
     "Topic :: Scientific/Engineering :: Image Recognition",
 ]
+
 dependencies = [
     "brainreg>=1.0.0",
     "cellfinder-core>=0.2.4,<1.0.0",
@@ -37,11 +40,9 @@ dependencies = [
     "scikit-image",
     "tifffile",
     "tqdm",
+    "asv",
+    "pooch",
 ]
-dynamic = ["version"]
-
-[project.scripts]
-cellfinder = "brainglobe_workflows.main:main"
 
 [project.optional-dependencies]
 dev = [
@@ -65,10 +66,22 @@ napari = [
 "Homepage" = "https://brainglobe.info"
 "Source Code" = "https://github.com/brainglobe/brainglobe-workflows"
 
+[project.scripts]
+cellfinder-workflow = "brainglobe_workflows.cellfinder:main_app_wrapper"
+cellfinder = "brainglobe_workflows.main:main"
+
 [build-system]
 requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"]
 build-backend = "setuptools.build_meta"
 
+[tool.setuptools]
+include-package-data = true
+zip-safe = false
+
+[tool.setuptools.packages.find]
+include = ["brainglobe_workflows"]
+exclude = ["tests", "resources"]
+
 [tool.black]
 target-version = ["py39", "py310"]
 skip-string-normalization = false
@@ -92,14 +105,6 @@ exclude = ["__init__.py", "build", ".eggs"]
 select = ["I", "E", "F"]
 fix = true
 
-[tool.setuptools]
-include-package-data = true
-zip-safe = false
-
-[tool.setuptools.packages.find]
-include = ["brainglobe_workflows"]
-exclude = ["benchmarks", "tests", "resources"]
-
 [tool.setuptools_scm]
 
 [tool.tox]
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..83e05553
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,98 @@
+"""Pytest fixtures shared across unit and integration tests"""
+
+from pathlib import Path
+
+import pooch
+import pytest
+
+from brainglobe_workflows.cellfinder import read_cellfinder_config
+
+
+@pytest.fixture()
+def input_configs_dir() -> Path:
+    """Return the directory path to the input configs
+    used for testing
+
+    Returns
+    -------
+    Path
+        Test data directory path
+    """
+    return Path(__file__).parent / "data"
+
+
+@pytest.fixture(scope="session")
+def cellfinder_GIN_data() -> dict:
+    """Return the URL and hash to the GIN repository with the input data
+
+    Returns
+    -------
+    dict
+        URL and hash of the GIN repository with the cellfinder test data
+    """
+    return {
+        "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
+        "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",  # noqa
+    }
+
+
+@pytest.fixture()
+def input_config_fetch_GIN(input_configs_dir: Path) -> Path:
+    """
+    Return the cellfinder config json file that is configured to fetch from GIN
+
+    Parameters
+    ----------
+    input_configs_dir : Path
+        Path to the directory holding the test config files.
+
+    Returns
+    -------
+    Path
+        Path to the config json file for fetching data from GIN
+    """
+    return input_configs_dir / "input_data_GIN.json"
+
+
+@pytest.fixture()
+def input_config_fetch_local(
+    input_configs_dir: Path,
+    cellfinder_GIN_data: dict,
+) -> Path:
+    """
+    Download the cellfinder data locally and return the config json
+    file configured to fetch local data.
+
+    The data is downloaded to a directory under the current working
+    directory (that is, to a directory under the directory from where
+    pytest is launched).
+
+    Parameters
+    ----------
+    input_configs_dir : Path
+        Path to the directory holding the test config files.
+    cellfinder_GIN_data : dict
+        URL and hash of the GIN repository with the cellfinder test data
+
+    Returns
+    -------
+    Path
+        Path to the config json file for fetching data locally
+    """
+    # read local config
+    input_config_path = input_configs_dir / "input_data_locally.json"
+    config = read_cellfinder_config(input_config_path)
+
+    # fetch data from GIN and download locally
+    pooch.retrieve(
+        url=cellfinder_GIN_data["url"],
+        known_hash=cellfinder_GIN_data["hash"],
+        path=config.install_path,  # path to download zip to
+        progressbar=True,
+        processor=pooch.Unzip(
+            extract_dir=config.data_dir_relative
+            # path to unzipped dir, *relative*  to 'path'
+        ),
+    )
+
+    return input_config_path
diff --git a/tests/data/input_data_GIN.json b/tests/data/input_data_GIN.json
new file mode 100644
index 00000000..daf056a5
--- /dev/null
+++ b/tests/data/input_data_GIN.json
@@ -0,0 +1,39 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
+  "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
+  "data_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "signal",
+  "background_subdir": "background",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/tests/data/input_data_locally.json b/tests/data/input_data_locally.json
new file mode 100644
index 00000000..e3761543
--- /dev/null
+++ b/tests/data/input_data_locally.json
@@ -0,0 +1,37 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "data_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "signal",
+  "background_subdir": "background",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/tests/data/input_data_missing_background.json b/tests/data/input_data_missing_background.json
new file mode 100644
index 00000000..52454f9b
--- /dev/null
+++ b/tests/data/input_data_missing_background.json
@@ -0,0 +1,37 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "data_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "signal",
+  "background_subdir": "__",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/tests/data/input_data_missing_signal.json b/tests/data/input_data_missing_signal.json
new file mode 100644
index 00000000..22c5247b
--- /dev/null
+++ b/tests/data/input_data_missing_signal.json
@@ -0,0 +1,37 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "data_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "__",
+  "background_subdir": "background",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/tests/data/input_data_not_locally_or_GIN.json b/tests/data/input_data_not_locally_or_GIN.json
new file mode 100644
index 00000000..e3761543
--- /dev/null
+++ b/tests/data/input_data_not_locally_or_GIN.json
@@ -0,0 +1,37 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "data_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "signal",
+  "background_subdir": "background",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/brainglobe_workflows/cellfinder/__init__.py b/tests/test_integration/brainglobe_benchmarks/__init__.py
similarity index 100%
rename from brainglobe_workflows/cellfinder/__init__.py
rename to tests/test_integration/brainglobe_benchmarks/__init__.py
diff --git a/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
new file mode 100644
index 00000000..44d031cc
--- /dev/null
+++ b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
@@ -0,0 +1,100 @@
+import json
+import subprocess
+from pathlib import Path
+
+import pytest
+from asv import util
+
+
+@pytest.fixture()
+def asv_config_monkeypatched_path(tmp_path: Path) -> str:
+    """
+    Create a monkeypatched asv.conf.json file
+    in a Pytest-generated temporary directory
+    and return its path
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to pytest-generated temporary directory
+
+    Returns
+    -------
+    str
+        Path to monkeypatched asv config file
+    """
+    # read reference asv config
+    asv_original_path = Path(__file__).resolve().parents[3] / "asv.conf.json"
+    asv_monkeypatched_dict = util.load_json(
+        asv_original_path, js_comments=True
+    )
+
+    # change directories
+    for ky in ["env_dir", "results_dir", "html_dir"]:
+        asv_monkeypatched_dict[ky] = str(
+            Path(tmp_path) / asv_monkeypatched_dict[ky]
+        )
+
+    # change repo to URL rather than local
+    asv_monkeypatched_dict[
+        "repo"
+    ] = "https://github.com/brainglobe/brainglobe-workflows.git"
+
+    # define path to a temp json file to dump config data
+    asv_monkeypatched_path = tmp_path / "asv.conf.json"
+
+    # save monkeypatched config data to json file
+    with open(asv_monkeypatched_path, "w") as js:
+        json.dump(asv_monkeypatched_dict, js)
+
+    # check json file exists
+    assert asv_monkeypatched_path.is_file()
+
+    return str(asv_monkeypatched_path)
+
+
+@pytest.mark.skip(reason="will be worked on a separate PR")
+def test_run_benchmarks(asv_config_monkeypatched_path):
+    # --- ideally monkeypatch an asv config so that results are in tmp_dir?
+
+    # set up machine (env_dir, results_dir, html_dir)
+    asv_machine_output = subprocess.run(
+        [
+            "asv",
+            "machine",
+            "--yes",
+            "--config",
+            asv_config_monkeypatched_path,
+        ]
+    )
+    assert asv_machine_output.returncode == 0
+
+    # run benchmarks
+    asv_benchmark_output = subprocess.run(
+        [
+            "asv",
+            "run",
+            "--config",
+            asv_config_monkeypatched_path,
+            # "--dry-run"
+            # # Do not save any results to disk? not truly testing then
+        ],
+        cwd=str(
+            Path(asv_config_monkeypatched_path).parent
+        ),  # run from where asv config is
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+    )
+    # STDOUT: "· Cloning project\n· Fetching recent changes\n·
+    # Creating environments\n· No __init__.py file in 'benchmarks'\n"
+
+    # check returncode
+    assert asv_benchmark_output.returncode == 0
+
+    # check logs?
+
+    # delete directories?
+    # check teardown after yield:
+    # https://docs.pytest.org/en/6.2.x/fixture.html#yield-fixtures-recommended
diff --git a/tests/test_integration/brainglobe_workflows/__init__.py b/tests/test_integration/brainglobe_workflows/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
new file mode 100644
index 00000000..1f179b7b
--- /dev/null
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -0,0 +1,154 @@
+import subprocess
+import sys
+from pathlib import Path
+from typing import Optional
+
+import pytest
+
+from brainglobe_workflows.cellfinder import main
+
+
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        None,
+        "input_config_fetch_GIN",
+        "input_config_fetch_local",
+    ],
+)
+def test_main(
+    input_config: Optional[str],
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    request: pytest.FixtureRequest,
+):
+    """Test main function for setting up and running cellfinder workflow
+
+    Parameters
+    ----------
+    input_config : Optional[str]
+        Path to input config json file
+    monkeypatch : pytest.MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
+    """
+    # monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
+
+    # run main
+    if not input_config:
+        cfg = main()
+    else:
+        cfg = main(str(request.getfixturevalue(input_config)))
+
+    # check output files exist
+    assert Path(cfg.detected_cells_path).is_file()
+
+
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        None,
+        "input_config_fetch_GIN",
+        "input_config_fetch_local",
+    ],
+)
+def test_script(
+    input_config: Optional[str],
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    request: pytest.FixtureRequest,
+):
+    """Test running the cellfinder worklfow from the command line
+
+    Parameters
+    ----------
+    input_config : Optional[str]
+        Path to input config json file
+    monkeypatch : pytest.MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
+    """
+    # monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
+
+    # define CLI input
+    script_path = (
+        Path(__file__).resolve().parents[3]
+        / "brainglobe_workflows"
+        / "cellfinder.py"
+    )
+    subprocess_input = [
+        sys.executable,
+        str(script_path),
+    ]
+    # append config if required
+    if input_config:
+        subprocess_input.append("--config")
+        subprocess_input.append(str(request.getfixturevalue(input_config)))
+
+    # run workflow script from the CLI
+    subprocess_output = subprocess.run(
+        subprocess_input,
+    )
+
+    # check returncode
+    assert subprocess_output.returncode == 0
+
+
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        None,
+        "input_config_fetch_GIN",
+        "input_config_fetch_local",
+    ],
+)
+def test_entry_point(
+    input_config: Optional[str],
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    request: pytest.FixtureRequest,
+):
+    """Test running the cellfinder workflow via the predefined entry point
+
+    Parameters
+    ----------
+    input_config : Optional[str]
+        Path to input config json file
+    monkeypatch : pytest.MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
+    """
+    # monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
+
+    # define CLI input
+    subprocess_input = ["cellfinder-workflow"]
+    # append config if required
+    if input_config:
+        subprocess_input.append("--config")
+        subprocess_input.append(str(request.getfixturevalue(input_config)))
+
+    # run workflow with no CLI arguments,
+    subprocess_output = subprocess.run(
+        subprocess_input,
+    )
+
+    # check returncode
+    assert subprocess_output.returncode == 0
diff --git a/tests/test_integration/conftest.py b/tests/test_integration/conftest.py
deleted file mode 100644
index d9207917..00000000
--- a/tests/test_integration/conftest.py
+++ /dev/null
@@ -1,290 +0,0 @@
-import json
-from pathlib import Path
-from typing import Any
-
-import pooch
-import pytest
-
-from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig
-
-
-def make_config_dict_fetch_from_local(cellfinder_cache_dir: Path) -> dict:
-    """Generate a config dictionary with the required parameters
-    for the workflow
-
-    The input data is assumed to be locally at cellfinder_cache_dir.
-    The results are saved in a timestamped output subdirectory under
-    cellfinder_cache_dir
-
-    Parameters
-    ----------
-    cellfinder_cache_dir : Path
-        Path to the directory where the downloaded input data will be unzipped,
-        and the output will be saved
-
-    Returns
-    -------
-    dict
-        dictionary with the required parameters for the workflow
-    """
-    return {
-        "install_path": cellfinder_cache_dir,
-        "extract_dir_relative": "cellfinder_test_data",  # relative path
-        "signal_subdir": "signal",
-        "background_subdir": "background",
-        "output_path_basename_relative": "cellfinder_output_",
-        "detected_cells_filename": "detected_cells.xml",
-        "voxel_sizes": [5, 2, 2],  # microns
-        "start_plane": 0,
-        "end_plane": -1,
-        "trained_model": None,  # if None, it will use a default model
-        "model_weights": None,
-        "model": "resnet50_tv",
-        "batch_size": 32,
-        "n_free_cpus": 2,
-        "network_voxel_sizes": [5, 1, 1],
-        "soma_diameter": 16,
-        "ball_xy_size": 6,
-        "ball_z_size": 15,
-        "ball_overlap_fraction": 0.6,
-        "log_sigma_size": 0.2,
-        "n_sds_above_mean_thresh": 10,
-        "soma_spread_factor": 1.4,
-        "max_cluster_size": 100000,
-        "cube_width": 50,
-        "cube_height": 50,
-        "cube_depth": 20,
-        "network_depth": "50",
-    }
-
-
-def make_config_dict_fetch_from_GIN(
-    cellfinder_cache_dir: Path,
-    data_url: str,
-    data_hash: str,
-) -> dict:
-    """Generate a config dictionary with the required parameters
-    for the workflow
-
-    The input data is fetched from GIN and downloaded to cellfinder_cache_dir.
-    The results are also saved in a timestamped output subdirectory under
-    cellfinder_cache_dir
-
-    Parameters
-    ----------
-    cellfinder_cache_dir : Path
-        Path to the directory where the downloaded input data will be unzipped,
-        and the output will be saved
-    data_url: str
-        URL to the GIN repository with the data to download
-    data_hash: str
-        Hash of the data to download
-
-    Returns
-    -------
-    dict
-        dictionary with the required parameters for the workflow
-    """
-
-    config = make_config_dict_fetch_from_local(cellfinder_cache_dir)
-    config["data_url"] = data_url
-    config["data_hash"] = data_hash
-
-    return config
-
-
-def prep_json(obj: Any) -> Any:
-    """
-    Returns a JSON encodable version of the input object.
-
-    It uses the JSON default encoder for all objects
-    except those of type `Path`.
-
-
-    Parameters
-    ----------
-    obj : Any
-        _description_
-
-    Returns
-    -------
-    Any
-        JSON serializable version of input object
-    """
-    if isinstance(obj, Path):
-        return str(obj)
-    else:
-        json_decoder = json.JSONEncoder()
-        return json_decoder.default(obj)
-
-
-@pytest.fixture(autouse=True)
-def cellfinder_cache_dir(tmp_path: Path) -> Path:
-    """Create a .cellfinder_workflows directory
-    under a temporary pytest directory and return
-    its path.
-
-    The temporary directory is available via pytest's tmp_path
-    fixture. A new temporary directory is created every function call
-    (i.e., scope="function")
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to pytest-generated temporary directory
-
-    Returns
-    -------
-    Path
-        path to the created cellfinder_workflows cache directory
-    """
-
-    return Path(tmp_path) / ".cellfinder_workflows"
-
-
-@pytest.fixture(scope="session")
-def data_url() -> str:
-    """Return the URL to the GIN repository with the input data
-
-    Returns
-    -------
-    str
-        URL to the GIN repository with the input data
-    """
-    return "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip"
-
-
-@pytest.fixture(scope="session")
-def data_hash() -> str:
-    """Return the hash of the GIN input data
-
-    Returns
-    -------
-    str
-        Hash to the GIN input data
-    """
-    return "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914"
-
-
-@pytest.fixture(scope="session")
-def default_json_config_path() -> Path:
-    """Return the path to the json file
-    with the default config parameters
-
-    Returns
-    -------
-    Path
-        path to the json file with the default config parameters
-    """
-    from brainglobe_workflows.cellfinder.cellfinder_main import (
-        DEFAULT_JSON_CONFIG_PATH,
-    )
-
-    return DEFAULT_JSON_CONFIG_PATH
-
-
-@pytest.fixture()
-def path_to_config_fetch_GIN(
-    tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str
-) -> Path:
-    """Create an input config that fetches data from GIN and
-    return its path
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to a fresh pytest-generated temporary directory. The
-        generated config is saved here.
-
-    cellfinder_cache_dir : Path
-        path to the cellfinder cache directory, where the paths
-        in the config should point to.
-
-    data_url: str
-        URL to the GIN repository with the input data
-
-    data_hash: str
-        hash to the GIN input data
-
-    Returns
-    -------
-    input_config_path : Path
-        path to config file that fetches data from GIN
-    """
-    # create config dict
-    config_dict = make_config_dict_fetch_from_GIN(
-        cellfinder_cache_dir, data_url, data_hash
-    )
-
-    # create a temp json file to dump config data
-    input_config_path = (
-        tmp_path / "input_config.json"
-    )  # save it in a temp dir separate from cellfinder_cache_dir
-
-    # save config data to json file
-    with open(input_config_path, "w") as js:
-        json.dump(config_dict, js, default=prep_json)
-
-    # check json file exists
-    assert Path(input_config_path).is_file()
-
-    return input_config_path
-
-
-@pytest.fixture()
-def path_to_config_fetch_local(
-    tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str
-) -> Path:
-    """Create an input config that points to local data and
-    return its path.
-
-    The local data is downloaded from GIN, but no reference
-    to the GIN repository is included in the config.
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to a fresh pytest-generated temporary directory. The
-        generated config is saved here.
-
-    cellfinder_cache_dir : Path
-        path to the cellfinder cache directory, where the paths
-        in the config should point to.
-
-    data_url: str
-        URL to the GIN repository with the input data
-
-    data_hash: str
-        hash to the GIN input data
-
-    Returns
-    -------
-    path_to_config_fetch_GIN : Path
-        path to a config file that fetches data from GIN
-    """
-
-    # instantiate basic config (assumes data is local)
-    config_dict = make_config_dict_fetch_from_local(cellfinder_cache_dir)
-    config = CellfinderConfig(**config_dict)
-
-    # download GIN data to specified local directory
-    pooch.retrieve(
-        url=data_url,
-        known_hash=data_hash,
-        path=config.install_path,  # path to download zip to
-        progressbar=True,
-        processor=pooch.Unzip(
-            extract_dir=config.extract_dir_relative
-            # path to unzipped dir, *relative*  to 'path'
-        ),
-    )
-
-    # save config to json
-    input_config_path = tmp_path / "input_config.json"
-    with open(input_config_path, "w") as js:
-        json.dump(config_dict, js, default=prep_json)
-
-    # check json file exists
-    assert Path(input_config_path).is_file()
-
-    return input_config_path
diff --git a/tests/test_integration/test_cellfinder_workflow.py b/tests/test_integration/test_cellfinder_workflow.py
deleted file mode 100644
index e55d0a46..00000000
--- a/tests/test_integration/test_cellfinder_workflow.py
+++ /dev/null
@@ -1,211 +0,0 @@
-import json
-import subprocess
-import sys
-from pathlib import Path
-
-from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig
-
-
-def test_run_with_default_config(tmp_path, default_json_config_path):
-    """Test workflow run with no command line arguments
-
-    If no command line arguments are provided, the default
-    config at brainglobe_workflows/cellfinder/default_config.json
-    should be used.
-
-    After the workflow is run we check that:
-    - there are no errors (via returncode),
-    - the logs reflect the default config file was used, and
-    - a single output directory exists with the expected
-      output file inside it
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to a pytest-generated temporary directory.
-    """
-
-    # run workflow with no CLI arguments,
-    # with cwd=tmp_path
-    subprocess_output = subprocess.run(
-        [
-            sys.executable,
-            Path(__file__).resolve().parents[2]
-            / "brainglobe_workflows"
-            / "cellfinder"
-            / "cellfinder_main.py",
-        ],
-        cwd=tmp_path,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        encoding="utf-8",
-    )
-
-    # check returncode
-    assert subprocess_output.returncode == 0
-
-    # check logs
-    assert "Using default config file" in subprocess_output.stdout
-
-    # Check one output directory exists and has expected
-    # output file inside it
-    assert_outputs(default_json_config_path, tmp_path)
-
-
-def test_run_with_GIN_data(
-    path_to_config_fetch_GIN,
-):
-    """Test workflow runs when passing a config that fetches data
-    from the GIN repository
-
-    After the workflow is run we check that:
-    - there are no errors (via returncode),
-    - the logs reflect the input config file was used,
-    - the logs reflect the data was downloaded from GIN, and
-    - a single output directory exists with the expected
-      output file inside it
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to a pytest-generated temporary directory.
-    """
-    # run workflow with CLI and capture log
-    subprocess_output = subprocess.run(
-        [
-            sys.executable,
-            Path(__file__).resolve().parents[2]
-            / "brainglobe_workflows"
-            / "cellfinder"
-            / "cellfinder_main.py",
-            "--config",
-            str(path_to_config_fetch_GIN),
-        ],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        encoding="utf-8",
-    )
-
-    # check returncode
-    assert subprocess_output.returncode == 0
-
-    # check logs
-    assert (
-        f"Input config read from {str(path_to_config_fetch_GIN)}"
-        in subprocess_output.stdout
-    )
-    assert (
-        "Fetching input data from the provided GIN repository"
-        in subprocess_output.stdout
-    )
-
-    # check one output directory exists and
-    # has expected output file inside it
-    assert_outputs(path_to_config_fetch_GIN)
-
-
-def test_run_with_local_data(
-    path_to_config_fetch_local,
-):
-    """Test workflow runs when passing a config that uses
-    local data
-
-    After the workflow is run we check that:
-    - there are no errors (via returncode),
-    - the logs reflect the input config file was used,
-    - the logs reflect the data was found locally, and
-    - a single output directory exists with the expected
-      output file inside it
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to a pytest-generated temporary directory.
-    """
-
-    # run workflow with CLI
-    subprocess_output = subprocess.run(
-        [
-            sys.executable,
-            Path(__file__).resolve().parents[2]
-            / "brainglobe_workflows"
-            / "cellfinder"
-            / "cellfinder_main.py",
-            "--config",
-            str(path_to_config_fetch_local),
-        ],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        encoding="utf-8",
-    )
-
-    # check returncode
-    assert subprocess_output.returncode == 0
-
-    # check logs
-    assert (
-        f"Input config read from {str(path_to_config_fetch_local)}"
-        in subprocess_output.stdout
-    )
-    assert (
-        "Fetching input data from the local directories"
-        in subprocess_output.stdout
-    )
-
-    # check one output directory exists and
-    # has expected output file inside it
-    assert_outputs(path_to_config_fetch_local)
-
-
-def assert_outputs(path_to_config, parent_dir_of_install_path=""):
-    """Helper function to determine whether the output is
-    as expected.
-
-    It checks that:
-     - a single output directory exists, and
-     - the expected output file exists inside it
-
-    Note that config.output_path is only defined after the workflow
-    setup is run, because its name is timestamped. Therefore,
-    we search for an output directory based on config.output_path_basename.
-
-    Parameters
-    ----------
-    path_to_config : Path
-        path to the input config used to generate the
-        output.
-
-    parent_dir_of_install_path : str, optional
-        If the install_path in the input config is relative to the
-        directory the script is launched from (as is the case in the
-        default_config.json file), the absolute path to its parent_dir
-        must be specified here. If the paths to install_path is
-        absolute, this input is not required. By default "".
-    """
-
-    # load input config
-    with open(path_to_config) as config:
-        config_dict = json.load(config)
-    config = CellfinderConfig(**config_dict)
-
-    # check one output directory exists and
-    # it has expected output file inside it
-    output_path_without_timestamp = (
-        Path(parent_dir_of_install_path)
-        / config.install_path
-        / config.output_path_basename_relative
-    )
-    output_path_timestamped = [
-        x
-        for x in output_path_without_timestamp.parent.glob("*")
-        if x.is_dir() and x.name.startswith(output_path_without_timestamp.name)
-    ]
-
-    assert len(output_path_timestamped) == 1
-    assert (output_path_timestamped[0]).exists()
-    assert (
-        output_path_timestamped[0] / config.detected_cells_filename
-    ).is_file()
diff --git a/tests/test_unit/brainglobe_benchmarks/__init__.py b/tests/test_unit/brainglobe_benchmarks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_unit/brainglobe_workflows/__init__.py b/tests/test_unit/brainglobe_workflows/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_unit/brainglobe_workflows/conftest.py b/tests/test_unit/brainglobe_workflows/conftest.py
new file mode 100644
index 00000000..ae85bc53
--- /dev/null
+++ b/tests/test_unit/brainglobe_workflows/conftest.py
@@ -0,0 +1,15 @@
+import pytest
+
+
+@pytest.fixture()
+def custom_logger_name() -> str:
+    """Return name of custom logger created in workflow utils
+
+    Returns
+    -------
+    str
+        Name of custom logger
+    """
+    from brainglobe_workflows.utils import __name__ as logger_name
+
+    return logger_name
diff --git a/tests/test_unit/brainglobe_workflows/test_cellfinder.py b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
new file mode 100644
index 00000000..ddb4c706
--- /dev/null
+++ b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
@@ -0,0 +1,343 @@
+import json
+import logging
+import re
+from pathlib import Path
+
+import pooch
+import pytest
+
+from brainglobe_workflows.cellfinder import (
+    CellfinderConfig,
+    add_signal_and_background_files,
+    read_cellfinder_config,
+    run_workflow_from_cellfinder_run,
+    setup_workflow,
+)
+from brainglobe_workflows.cellfinder import setup as setup_full
+from brainglobe_workflows.utils import setup_logger
+
+
+@pytest.fixture()
+def default_input_config_cellfinder() -> Path:
+    """Return path to default input config for cellfinder workflow
+
+    Returns
+    -------
+    Path
+        Path to default input config
+
+    """
+    from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+
+    return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+
+
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        "input_data_GIN.json",
+        "input_data_locally.json",
+        "input_data_missing_background.json",
+        "input_data_missing_signal.json",
+        "input_data_not_locally_or_GIN.json",
+    ],
+)
+def test_read_cellfinder_config(input_config: str, input_configs_dir: Path):
+    """Test for reading a cellfinder config file
+
+    Parameters
+    ----------
+    input_config : str
+        Name of input config json file
+    input_configs_dir : Path
+        Test data directory path
+    """
+    # path to config json file
+    input_config_path = input_configs_dir / input_config
+
+    # read json as Cellfinder config
+    config = read_cellfinder_config(input_config_path)
+
+    # read json as dict
+    with open(input_config_path) as cfg:
+        config_dict = json.load(cfg)
+
+    # check keys of dictionary are a subset of Cellfinder config attributes
+    assert all(
+        [ky in config.__dataclass_fields__.keys() for ky in config_dict.keys()]
+    )
+
+
+@pytest.mark.parametrize(
+    "input_config, message_pattern",
+    [
+        (
+            "input_data_GIN.json",
+            "Fetching input data from the provided GIN repository",
+        ),
+        (
+            "input_data_locally.json",
+            "Fetching input data from the local directories",
+        ),
+        (
+            "input_data_missing_background.json",
+            "The directory .+ does not exist$",
+        ),
+        ("input_data_missing_signal.json", "The directory .+ does not exist$"),
+        (
+            "input_data_not_locally_or_GIN.json",
+            "Input data not found locally, and URL/hash to "
+            "GIN repository not provided",
+        ),
+    ],
+)
+def test_add_signal_and_background_files(
+    caplog: pytest.LogCaptureFixture,
+    tmp_path: Path,
+    cellfinder_GIN_data: dict,
+    input_configs_dir: Path,
+    input_config: str,
+    message_pattern: str,
+):
+    """Test signal and background files addition to the cellfinder config
+
+    Parameters
+    ----------
+    caplog : pytest.LogCaptureFixture
+        Pytest fixture to capture the logs during testing
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    cellfinder_GIN_data : dict
+        Dict holding the URL and hash of the cellfinder test data in GIN
+    input_configs_dir : Path
+        Test data directory path
+    input_config : str
+        Name of input config json file
+    message_pattern : str
+        Expected pattern in the log
+    """
+    # instantiate our custom logger
+    _ = setup_logger()
+
+    # read json as Cellfinder config
+    config = read_cellfinder_config(input_configs_dir / input_config)
+
+    # monkeypatch cellfinder config:
+    # set install_path to pytest temporary directory
+    config.install_path = tmp_path / config.install_path
+
+    # check lists of signal and background files are not defined
+    assert not (config.list_signal_files and config.list_background_files)
+
+    # build fullpaths to input data directories
+    config.signal_dir_path = str(
+        Path(config.install_path)
+        / config.data_dir_relative
+        / config.signal_subdir
+    )
+    config.background_dir_path = str(
+        Path(config.install_path)
+        / config.data_dir_relative
+        / config.background_subdir
+    )
+
+    # monkeypatch cellfinder config:
+    # if config is "local" or "signal/background missing":
+    # ensure signal and background data from GIN are downloaded locally
+    if input_config in [
+        "input_data_locally.json",
+        "input_data_missing_signal.json",
+        "input_data_missing_background.json",
+    ]:
+        # fetch data from GIN and download locally
+        pooch.retrieve(
+            url=cellfinder_GIN_data["url"],
+            known_hash=cellfinder_GIN_data["hash"],
+            path=config.install_path,  # path to download zip to
+            progressbar=True,
+            processor=pooch.Unzip(
+                extract_dir=config.data_dir_relative
+                # path to unzipped dir, *relative*  to 'path'
+            ),
+        )
+
+    # add signal and background files lists to config
+    add_signal_and_background_files(config)
+
+    # check log messages
+    assert len(caplog.messages) > 0
+    out = re.fullmatch(message_pattern, caplog.messages[-1])
+    assert out is not None
+    assert out.group() is not None
+
+
+@pytest.mark.parametrize(
+    "input_config, message",
+    [
+        ("default_input_config_cellfinder", "Using default config file"),
+        ("input_config_fetch_GIN", "Input config read from"),
+    ],
+)
+def test_setup_workflow(
+    input_config: str,
+    message: str,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    caplog: pytest.LogCaptureFixture,
+    request: pytest.FixtureRequest,
+):
+    """Test setup steps for the cellfinder workflow, using the default config
+    and passing a specific config file.
+
+    These setup steps include:
+    - instantiating a CellfinderConfig object using the input json file,
+    - add the signal and background files to the config if these are not
+      defined,
+    - create a timestamped directory for the output of the workflow if
+      it doesn't exist and add its path to the config
+
+    Parameters
+    ----------
+    input_config : str
+        Name of input config json file
+    message : str
+        Expected log message
+    monkeypatch : pytest.MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    caplog : pytest.LogCaptureFixture
+        Pytest fixture to capture the logs during testing
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
+    """
+
+    # setup logger
+    _ = setup_logger()
+
+    # monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
+
+    # setup workflow
+    config = setup_workflow(request.getfixturevalue(input_config))
+
+    # check logs
+    assert message in caplog.text
+
+    # check all signal files exist
+    assert config.list_signal_files
+    assert all([Path(f).is_file() for f in config.list_signal_files])
+
+    # check all background files exist
+    assert config.list_background_files
+    assert all([Path(f).is_file() for f in config.list_background_files])
+
+    # check output directory exists
+    assert Path(config.output_path).resolve().is_dir()
+
+    # check output directory name has correct format
+    out = re.fullmatch(
+        str(config.output_path_basename_relative) + "\\d{8}_\\d{6}$",
+        Path(config.output_path).stem,
+    )
+    assert out is not None
+    assert out.group() is not None
+
+    # check output file path
+    assert (
+        Path(config.detected_cells_path)
+        == Path(config.output_path) / config.detected_cells_filename
+    )
+
+
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        "default_input_config_cellfinder",
+        "input_config_fetch_GIN",
+        "input_config_fetch_local",
+    ],
+)
+def test_setup(
+    input_config: str,
+    custom_logger_name: str,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    request: pytest.FixtureRequest,
+):
+    """Test full setup for cellfinder workflow, using the default config
+    and passing a specific config file.
+
+    Parameters
+    ----------
+    input_config : str
+        Path to input config file
+    custom_logger_name : str
+        Name of custom logger
+    monkeypatch : MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
+    """
+    # Monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
+
+    # run setup on default configuration
+    cfg = setup_full(request.getfixturevalue(input_config))
+
+    # check logger exists
+    logger = logging.getLogger(custom_logger_name)
+    assert logger.level == logging.DEBUG
+    assert logger.hasHandlers()
+
+    # check config is CellfinderConfig
+    assert isinstance(cfg, CellfinderConfig)
+
+
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        "default_input_config_cellfinder",
+        "input_config_fetch_GIN",
+        "input_config_fetch_local",
+    ],
+)
+def test_run_workflow_from_cellfinder_run(
+    input_config: str,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    request: pytest.FixtureRequest,
+):
+    """Test running cellfinder workflow with default input config
+    (fetches data from GIN) and local input config
+
+    Parameters
+    ----------
+    input_config : str
+        Path to input config json file
+    monkeypatch : MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
+    """
+    # monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
+
+    # run setup
+    cfg = setup_full(str(request.getfixturevalue(input_config)))
+
+    # run workflow
+    run_workflow_from_cellfinder_run(cfg)
+
+    # check output files are those expected?
+    assert Path(cfg.detected_cells_path).is_file()
diff --git a/tests/test_unit/brainglobe_workflows/test_utils.py b/tests/test_unit/brainglobe_workflows/test_utils.py
new file mode 100644
index 00000000..2ec8d19e
--- /dev/null
+++ b/tests/test_unit/brainglobe_workflows/test_utils.py
@@ -0,0 +1,39 @@
+import logging
+from typing import List
+
+import pytest
+
+from brainglobe_workflows.utils import (
+    DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
+    config_parser,
+    setup_logger,
+)
+
+
+def test_setup_logger(custom_logger_name: str):
+    """Test custom logger is correctly created
+
+    Parameters
+    ----------
+    custom_logger_name : str
+        Pytest fixture for the custom logger name
+    """
+    logger = setup_logger()
+
+    assert logger.level == logging.DEBUG
+    assert logger.name == custom_logger_name
+    assert logger.hasHandlers()
+    assert logger.handlers[0].name == "console_handler"
+
+
+@pytest.mark.parametrize(
+    "list_input_args",
+    [[], ["--config", str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)]],
+)
+def test_config_parser(list_input_args: List[str]):
+    args = config_parser(
+        list_input_args,
+        str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER),
+    )
+
+    assert args.config
diff --git a/tests/test_unit/test_placeholder.py b/tests/test_unit/test_placeholder.py
deleted file mode 100644
index 3ada1ee4..00000000
--- a/tests/test_unit/test_placeholder.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def test_placeholder():
-    assert True