diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index e4e5487..953b0fd 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -24,7 +24,7 @@ jobs:
             python -m pip install --upgrade pip
             python -m pip install ruff
       - name: Run style checks
-        run: ruff .
+        run: ruff check .
 
   run_tests:
     needs: check_style
diff --git a/.gitignore b/.gitignore
index 3bfe751..2a0ee24 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 docs/generated/
 docs/auto_examples/
 docs/modules/
+docs/sg_execution_times.rst
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/MANIFEST.in b/MANIFEST.in
index 09d993a..8bd58d4 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,5 @@
 include README.rst LICENSE environment.yml requirements.txt
-recursive-include netneurotools/data *
 include versioneer.py
+include netneurotools/datasets/datasets.json
+include netneurotools/datasets/references.json
+include netneurotools/datasets/netneurotools.bib
\ No newline at end of file
diff --git a/dev_environment.yml b/dev_environment.yml
deleted file mode 100644
index dd89823..0000000
--- a/dev_environment.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-name: netneurotools
-channels:
-    - defaults
-    - conda-forge
-dependencies:
-    - python>=3.6
-    - flake8
-    - matplotlib
-    - mayavi
-    - nibabel
-    - nilearn
-    - numba
-    - "numpy>=1.16"
-    - pandas
-    - pip
-    - "pytest>=3.6"
-    - pytest-cov
-    - scikit-learn
-    - "scipy>=1.4.0"
-    - "sphinx>=1.2"
-    - sphinx-gallery
-    - sphinx_rtd_theme
-    - versioneer
-    - pip:
-        - git+https://github.com/aestrivex/bctpy.git#egg=bctpy
-        - pysurfer
diff --git a/docs/api.rst b/docs/api.rst
index a5adc5c..601c92d 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -8,71 +8,102 @@ Python Reference API
 .. contents:: **List of modules**
    :local:
 
-.. _ref_network:
+.. _ref_datasets:
 
-:mod:`netneurotools.networks` - Constructing networks
------------------------------------------------------
+:mod:`netneurotools.datasets` - Automatic dataset fetching
+----------------------------------------------------------
 
-.. automodule:: netneurotools.networks
+.. automodule:: netneurotools.datasets
    :no-members:
    :no-inherited-members:
 
-.. currentmodule:: netneurotools.networks
+.. currentmodule:: netneurotools.datasets
+
+To download templates
 
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-   func_consensus
-   struct_consensus
-   threshold_network
-   binarize_network
-   match_length_degree_distribution
-   randmio_und
-   strength_preserving_rand_sa
-   strength_preserving_rand_sa_mse_opt
-   strength_preserving_rand_sa_dir
 
-.. _ref_modularity:
+   fetch_fsaverage
+   fetch_hcp_standards
+   fetch_civet
+   fetch_conte69
+   fetch_yerkes19
 
-:mod:`netneurotools.modularity` - Calculating network modularity
-----------------------------------------------------------------
+To download atlases
 
-.. automodule:: netneurotools.modularity
-   :no-members:
-   :no-inherited-members:
+.. autosummary::
+   :template: function.rst
+   :toctree: generated/
 
-.. currentmodule:: netneurotools.modularity
+    fetch_cammoun2012
+    fetch_schaefer2018
+    fetch_mmpall
+    fetch_pauli2018
+    fetch_ye2020
+    fetch_voneconomo
+
+To download project-related data
 
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-   consensus_modularity
-   zrand
-   get_modularity
-   get_modularity_z
-   get_modularity_sig
+   fetch_vazquez_rodriguez2019
+   fetch_mirchi2018
+   fetch_hansen_manynetworks
+   fetch_hansen_receptors
+   fetch_hansen_genecognition
+   fetch_hansen_brainstem
+   fetch_shafiei_hcpmeg
+   fetch_suarez_mami
+   fetch_famous_gmat
+   fetch_neurosynth
+
 
-.. _ref_cluster:
+.. _ref_network:
 
-:mod:`netneurotools.cluster` - Working with clusters
-----------------------------------------------------
+:mod:`netneurotools.networks` - Constructing networks
+-----------------------------------------------------
 
-.. automodule:: netneurotools.cluster
+.. automodule:: netneurotools.networks
    :no-members:
    :no-inherited-members:
 
-.. currentmodule:: netneurotools.cluster
+.. currentmodule:: netneurotools.networks
+
+To construct consensus networks
 
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-   find_consensus
-   match_assignments
-   reorder_assignments
-   match_cluster_labels
+   func_consensus
+   struct_consensus
+
+To randomize networks
+
+.. autosummary::
+   :template: function.rst
+   :toctree: generated/
+
+   randmio_und
+   match_length_degree_distribution
+   strength_preserving_rand_sa
+   strength_preserving_rand_sa_mse_opt
+   strength_preserving_rand_sa_dir
+
+Convenient functions
+
+.. autosummary::
+   :template: function.rst
+   :toctree: generated/
+
+   binarize_network
+   threshold_network
+
 
 .. _ref_plotting:
 
@@ -85,50 +116,42 @@ Python Reference API
 
 .. currentmodule:: netneurotools.plotting
 
+PySurfer
+
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-   sort_communities
-   plot_mod_heatmap
    plot_conte69
    plot_fslr
    plot_fsaverage
    plot_fsvertex
-   plot_point_brain
 
-.. _ref_stats:
+Pyvista
 
-:mod:`netneurotools.stats` - General statistics functions
----------------------------------------------------------
+.. autosummary::
+   :template: function.rst
+   :toctree: generated/
 
-.. automodule:: netneurotools.stats
-   :no-members:
-   :no-inherited-members:
+   pv_plot_surface
 
-.. currentmodule:: netneurotools.stats
+matplotlib
 
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-   gen_spinsamples
-   residualize
-   get_mad_outliers
-   efficient_pearsonr
-   permtest_1samp
-   permtest_rel
-   permtest_pearsonr
-   get_dominance_stats
-   network_pearsonr
-   network_pearsonr_numba
-   network_pearsonr_pairwise
-   effective_resistance
-   network_polarisation
-   network_variance
-   network_variance_numba
-   network_covariance
-   network_covariance_numba
+   plot_point_brain
+   plot_mod_heatmap
+
+Fun color & colormap stuff
+
+.. autosummary::
+   :template: function.rst
+   :toctree: generated/
+
+   available_cmaps
+
 
 .. _ref_metrics:
 
@@ -141,146 +164,172 @@ Python Reference API
 
 .. currentmodule:: netneurotools.metrics
 
+Brain network metrics
+
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-   _binarize
    degrees_und
    degrees_dir
    distance_wei_floyd
    retrieve_shortest_path
-   communicability_bin
-   communicability_wei
-   rich_feeder_peripheral
    navigation_wu
    get_navigation_path_length
-   search_information
+   communicability_bin
+   communicability_wei
    path_transitivity
-   flow_graph
+   search_information
    mean_first_passage_time
    diffusion_efficiency
    resource_efficiency_bin
+   flow_graph
+   assortativity
    matching_ind_und
-   _graph_laplacian
-
-.. _ref_datasets:
+   rich_feeder_peripheral
 
-:mod:`netneurotools.datasets` - Automatic dataset fetching
-----------------------------------------------------------
+Network spreading
 
-.. automodule:: netneurotools.datasets
-   :no-members:
-   :no-inherited-members:
+.. autosummary::
+   :template: function.rst
+   :toctree: generated/
 
-.. currentmodule:: netneurotools.datasets
+   simulate_atrophy
 
-Functions to download atlases and templates
+Statistical network metrics
 
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-    fetch_cammoun2012
-    fetch_civet
-    fetch_conte69
-    fetch_fsaverage
-    fetch_pauli2018
-    fetch_schaefer2018
-    fetch_hcp_standards
-    fetch_voneconomo
+   network_pearsonr
+   network_pearsonr_numba
+   network_pearsonr_pairwise
+   effective_resistance
+   network_polarisation
+   network_variance
+   network_variance_numba
+   network_covariance
+   network_covariance_numba
+
 
-Functions to download real-world datasets
+.. _ref_modularity:
+
+:mod:`netneurotools.modularity` - Calculating network modularity
+----------------------------------------------------------------
+
+.. automodule:: netneurotools.modularity
+   :no-members:
+   :no-inherited-members:
+
+.. currentmodule:: netneurotools.modularity
 
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-   fetch_connectome
-   fetch_mirchi2018
-   fetch_vazquez_rodriguez2019
+   match_cluster_labels
+   match_assignments
+   reorder_assignments
+   find_consensus
+   consensus_modularity
+   zrand
+   get_modularity
+   get_modularity_z
+   get_modularity_sig
+
+
+.. _ref_stats:
 
-Functions to generate (pseudo-random) datasets
+:mod:`netneurotools.stats` - General statistics functions
+---------------------------------------------------------
+
+.. automodule:: netneurotools.stats
+   :no-members:
+   :no-inherited-members:
+
+.. currentmodule:: netneurotools.stats
+
+Correlations
 
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-    make_correlated_xy
+   efficient_pearsonr
+   weighted_pearsonr
+   make_correlated_xy
 
-.. _ref_freesurfer:
+Permutation tests
 
-:mod:`netneurotools.freesurfer` - FreeSurfer compatibility functions
---------------------------------------------------------------------
+.. autosummary::
+   :template: function.rst
+   :toctree: generated/
 
-.. automodule:: netneurotools.freesurfer
-   :no-members:
-   :no-inherited-members:
+   permtest_1samp
+   permtest_rel
+   permtest_pearsonr
 
-.. currentmodule:: netneurotools.freesurfer
+Regressions
 
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-   apply_prob_atlas
-   find_parcel_centroids
-   parcels_to_vertices
-   vertices_to_parcels
-   spin_data
-   spin_parcels
+   residualize
+   get_dominance_stats
+
 
-.. _ref_civet:
+.. _ref_spatial:
 
-:mod:`netneurotools.civet` - CIVET compatibility functions
-----------------------------------------------------------
+:mod:`netneurotools.spatial` - Spatial statistics
+-------------------------------------------------
 
-.. automodule:: netneurotools.civet
+.. automodule:: netneurotools.spatial
    :no-members:
    :no-inherited-members:
 
-.. currentmodule:: netneurotools.civet
+.. currentmodule:: netneurotools.spatial
+
+Calculating spatial statistics
 
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-   read_civet
-   civet_to_freesurfer
+   morans_i
+   local_morans_i
 
-.. _ref_utils:
 
-:mod:`netneurotools.utils` - Miscellaneous, grab bag utilities
---------------------------------------------------------------
+.. _ref_interface:
 
-.. automodule:: netneurotools.utils
+:mod:`netneurotools.interface` - Interface for external tools
+-------------------------------------------------------------
+
+.. automodule:: netneurotools.interface
    :no-members:
    :no-inherited-members:
 
-.. currentmodule:: netneurotools.utils
+.. currentmodule:: netneurotools.interface
 
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-   run
-   add_constant
-   get_triu
-   get_centroids
 
-.. _ref_colors:
+.. _ref_experimental:
 
-:mod:`netneurotools.colors` - Useful colormaps
---------------------------------------------------------------
+:mod:`netneurotools.experimental` - Functions in alpha stage
+------------------------------------------------------------
 
-.. automodule:: netneurotools.colors
+.. automodule:: netneurotools.experimental
    :no-members:
    :no-inherited-members:
 
-.. currentmodule:: netneurotools.colors
+.. currentmodule:: netneurotools.experimental
 
 .. autosummary::
    :template: function.rst
    :toctree: generated/
 
-   available_cmaps
+
diff --git a/docs/conf.py b/docs/conf.py
index 37ee2e5..6ac2598 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -47,6 +47,7 @@
 autodoc_default_options = {'members': True, 'inherited-members': True}
 numpydoc_show_class_members = False
 autoclass_content = "class"
+napoleon_use_param = False
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
diff --git a/environment.yml b/environment.yml
deleted file mode 100644
index 5984c5b..0000000
--- a/environment.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-name: netneurotools
-channels:
-    - defaults
-    - conda-forge
-dependencies:
-    - python>=3.6
-    - matplotlib
-    - nibabel
-    - nilearn
-    - "numpy>=1.16"
-    - pip
-    - scikit-learn
-    - "scipy>=1.4.0"
-    - pip:
-        - git+https://github.com/aestrivex/bctpy.git#egg=bctpy
diff --git a/examples/plot_consensus_clustering.py b/examples/plot_consensus_clustering.py
index 13faab1..69c337f 100644
--- a/examples/plot_consensus_clustering.py
+++ b/examples/plot_consensus_clustering.py
@@ -81,9 +81,9 @@
 # We'll provide these different assignments to our consensus-finding algorithm
 # which will generate one final community assignment vector:
 
-from netneurotools import cluster
+from netneurotools import modularity
 
-consensus = cluster.find_consensus(np.column_stack(ci), seed=1234)
+consensus = modularity.find_consensus(np.column_stack(ci), seed=1234)
 plotting.plot_mod_heatmap(corr, consensus, cmap='viridis')
 
 ###############################################################################
diff --git a/examples/plot_perm_pvals.py b/examples/plot_perm_pvals.py
index 702d28a..6f87056 100644
--- a/examples/plot_perm_pvals.py
+++ b/examples/plot_perm_pvals.py
@@ -28,8 +28,8 @@
 # We can use ``scipy.stats`` for a standard parametric test to assess whether
 # the array is different from zero:
 
-from scipy import stats
-print(stats.ttest_1samp(rvs, 0.0))
+import scipy.stats as sstats
+print(sstats.ttest_1samp(rvs, 0.0))
 
 ###############################################################################
 # And can do the same thing with permutations using ``netneurotools.stats``:
@@ -88,7 +88,7 @@
 # These two arrays shouldn't be meaningfully different, and we can test that
 # with a standard parametric test:
 
-print(stats.ttest_rel(rvs1, rvs2))
+print(sstats.ttest_rel(rvs1, rvs2))
 
 ###############################################################################
 # Or with a non-parametric permutation test:
@@ -114,13 +114,12 @@
 #
 # First, we'll generate two correlated variables:
 
-from netneurotools import datasets
-x, y = datasets.make_correlated_xy(corr=0.2, size=100)
+x, y = nnstats.make_correlated_xy(corr=0.2, size=100)
 
 ###############################################################################
 # We can generate the Pearson correlation with the standard parametric p-value:
 
-print(stats.pearsonr(x, y))
+print(sstats.pearsonr(x, y))
 
 ###############################################################################
 # Or use permutation testing to derive the p-value:
@@ -132,7 +131,7 @@
 # :func:`~.permtest_rel` apply here, so you can provide same-sized arrays and
 # correlations will only be calculated for paired columns:
 
-a, b = datasets.make_correlated_xy(corr=0.9, size=100)
+a, b = nnstats.make_correlated_xy(corr=0.9, size=100)
 arr1, arr2 = np.column_stack([x, a]), np.column_stack([y, b])
 print(nnstats.permtest_pearsonr(arr1, arr2))
 
diff --git a/netneurotools/__init__.py b/netneurotools/__init__.py
index 8163cc1..2ca326a 100644
--- a/netneurotools/__init__.py
+++ b/netneurotools/__init__.py
@@ -1,6 +1,7 @@
-__all__ = [
-    '__version__',
-]
-
-from . import _version
-__version__ = _version.get_versions()['version']
+
+from . import _version
+__version__ = _version.get_versions()['version']
+
+__all__ = [
+    '__version__'
+]
diff --git a/netneurotools/civet.py b/netneurotools/civet.py
deleted file mode 100644
index 5c247c0..0000000
--- a/netneurotools/civet.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for working with CIVET data (ugh)."""
-
-import nibabel as nib
-import numpy as np
-from scipy.interpolate import griddata
-
-from .datasets import fetch_civet, fetch_fsaverage
-
-_MNI305to152 = np.array([[0.9975, -0.0073, 0.0176, -0.0429],
-                         [0.0146, 1.0009, -0.0024, 1.5496],
-                         [-0.0130, -0.0093, 0.9971, 1.1840],
-                         [0.0000, 0.0000, 0.0000, 1.0000]])
-
-
-def read_civet(fname):
-    """
-    Read a CIVET-style .obj geometry file.
-
-    Parameters
-    ----------
-    fname : str or os.PathLike
-        Filepath to .obj file
-
-    Returns
-    -------
-    vertices : (N, 3)
-    triangles : (T, 3)
-    """
-    k, polygons = 0, []
-    with open(fname, 'r') as src:
-        n_vert = int(src.readline().split()[6])
-        vertices = np.zeros((n_vert, 3))
-        for i, line in enumerate(src):
-            if i < n_vert:
-                vertices[i] = [float(i) for i in line.split()]
-            elif i >= (2 * n_vert) + 5:
-                if not line.strip():
-                    k = 1
-                elif k == 1:
-                    polygons.extend([int(i) for i in line.split()])
-
-    triangles = np.reshape(np.asarray(polygons), (-1, 3))
-
-    return vertices, triangles
-
-
-def civet_to_freesurfer(brainmap, surface='mid', version='v1',
-                        freesurfer='fsaverage6', method='nearest',
-                        data_dir=None):
-    """
-    Project `brainmap` in CIVET space to `freesurfer` fsaverage space.
-
-    Uses a nearest-neighbor projection based on the geometry of the vertices
-
-    Parameters
-    ----------
-    brainmap : array_like
-        CIVET brainmap to be converted to freesurfer space
-    surface : {'white', 'mid'}, optional
-        Which CIVET surface to use for geometry of `brainmap`. Default: 'mid'
-    version : {'v1', 'v2'}, optional
-        Which CIVET version to use for geometry of `brainmap`. Default: 'v1'
-    freesurfer : str, optional
-        Which version of FreeSurfer space to project data to. Must be one of
-        {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'}.
-        Default: 'fsaverage6'
-    method : {'nearest', 'linear'}, optional
-        What method of interpolation to use when projecting the data between
-        surfaces. Default: 'nearest'
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-
-    Returns
-    -------
-    data : np.ndarray
-        Provided `brainmap` mapped to FreeSurfer
-    """
-    brainmap = np.asarray(brainmap)
-    densities = (81924, 327684)
-    n_vert = brainmap.shape[0]
-    if n_vert not in densities:
-        raise ValueError('Unable to interpret `brainmap` space; provided '
-                         'array must have length in {}. Received: {}'
-                         .format(densities, n_vert))
-
-    n_vert = n_vert // 2
-    icbm = fetch_civet(density='41k' if n_vert == 40962 else '164k',
-                       version=version, data_dir=data_dir, verbose=0)[surface]
-    fsavg = fetch_fsaverage(version=freesurfer, data_dir=data_dir, verbose=0)
-    fsavg = fsavg['pial' if surface == 'mid' else surface]
-
-    data = []
-    for n, hemi in enumerate(('lh', 'rh')):
-        sl = slice(n_vert * n, n_vert * (n + 1))
-        vert_cv, _ = read_civet(getattr(icbm, hemi))
-        vert_fs = nib.affines.apply_affine(
-            _MNI305to152, nib.freesurfer.read_geometry(getattr(fsavg, hemi))[0]
-        )
-        data.append(griddata(vert_cv, brainmap[sl], vert_fs, method=method))
-
-    return np.hstack(data)
diff --git a/netneurotools/data/osf.json b/netneurotools/data/osf.json
deleted file mode 100644
index 289ecbc..0000000
--- a/netneurotools/data/osf.json
+++ /dev/null
@@ -1,405 +0,0 @@
-{
-    "atl-cammoun2012": {
-        "gcs": {
-            "url": [
-                "mb37e",
-                "5ce6bb4423fec40017e82c5e"
-            ],
-            "md5": "266c4520af768e766328fb8e6648005d"
-        },
-        "fsaverage": {
-            "url": [
-                "mb37e",
-                "5ce6c30523fec40017e83439"
-            ],
-            "md5": "2a19eb4744c0ce6c243f721bd43ecff0"
-        },
-        "fsaverage5": {
-            "url": [
-                "mb37e",
-                "5e189a1c57341903868036dd"
-            ],
-            "md5": "2afb22e1887d47f1ca81c340fff7692b"
-        },
-        "fsaverage6": {
-            "url": [
-                "mb37e",
-                "5e189a1b5734190380804072"
-            ],
-            "md5": "1df743bff13316f67bd41d13ec691c97"
-        },
-        "MNI152NLin2009aSym": {
-            "url": [
-                "mb37e",
-                "5e2f4bf0e71ef800301880c2"
-            ],
-            "md5": "9da30bad22d732aa5f00a6d178d087c4"
-        },
-        "fslr32k": {
-            "url": [
-                "mb37e",
-                "5e2f4bf1e71ef80027189c56"
-            ],
-            "md5": "a5177319d5e0b8825a91d503ded1a59e"
-        }
-    },
-    "atl-pauli2018": [
-        {
-            "url": [
-                "jkzwp",
-                "5b11fa3364f25a001973dce0"
-            ],
-            "md5": "62dd6ff405d3a8b89ee188cafa3a7f6a",
-            "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_probabilistic.nii.gz"
-        },
-        {
-            "url": [
-                "jkzwp",
-                "5b11fa2ff1f288000e625a7f"
-            ],
-            "md5": "5a5b6246921be08456304875447c68ed",
-            "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_deterministic.nii.gz"
-        },
-        {
-            "url": [
-                "mb37e",
-                "5c93b4f034062c001b1ef50d"
-            ],
-            "md5": "390a693abeb1a583151f30aa8798bab5",
-            "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_info.csv"
-        }
-    ],
-    "tpl-conte69": {
-        "url": [
-            "fvuh8",
-            "5b198ec5ec24e20011b48548"
-        ],
-        "md5": "bd944e3f9f343e0e51e562b440960529"
-    },
-    "tpl-yerkes19": {
-        "url": [
-            "mb37e",
-            "60ae93d504e91a005f1761ab"
-        ],
-        "md5": "9ee4f1605fb690a85b04b61549d62925"
-    },
-    "tpl-fsaverage": {
-        "fsaverage": {
-            "url": [
-                "mb37e",
-                "5c82830a1d73810018bdacea"
-            ],
-            "md5": "1e82c52ed21d06d4e6e7341c725c5262"
-        },
-        "fsaverage3": {
-            "url": [
-                "mb37e",
-                "5d9f83b6f6b03e000e1ba285"
-            ],
-            "md5": "b4182495d341364e3f7c5b86284d8d20"
-        },
-        "fsaverage4": {
-            "url": [
-                "mb37e",
-                "5d9f83b7fcf91f00111c7473"
-            ],
-            "md5": "5a481421dc1286c7bd9b8a47db5fad0b"
-        },
-        "fsaverage5": {
-            "url": [
-                "mb37e",
-                "5d9f83b6f6b03e00101c932f"
-            ],
-            "md5": "cc75f7290c03970a8b8a06dfc215e925"
-        },
-        "fsaverage6": {
-            "url": [
-                "mb37e",
-                "5d9f83b7a7bc73000cea05f1"
-            ],
-            "md5": "8f75b95c0e47ae935d10745baefa2c49"
-        }
-    },
-    "tpl-civet": {
-        "v1": {
-            "civet41k": {
-                "url": [
-                    "mb37e",
-                    "601daffd84ecf800fe031868"
-                ],
-                "md5": "b27219c876464992e1b61da1c60d8d6e"
-            }
-        },
-        "v2": {
-            "civet41k": {
-                "url": [
-                    "mb37e",
-                    "601dafe77ad0a80119d9483c"
-                ],
-                "md5": "a47b015e471c6a800d236f107fda5b4a"
-            },
-            "civet164k": {
-                "url": [
-                    "mb37e",
-                    "601dafe87ad0a8011ad94938"
-                ],
-                "md5": "02537ea65d5366acd8de729022a34bab"
-            }
-        }
-    },
-    "ds-connectomes": {
-        "celegans": {
-            "url": [
-                "mb37e",
-                "5d9b8e4aa7bc73000be65508"
-            ],
-            "md5": "f35cd893bc1aff4e8184a528fcda14b9",
-            "keys": [
-                "conn",
-                "dist",
-                "labels"
-            ]
-        },
-        "drosophila": {
-            "url": [
-                "mb37e",
-                "5d9b8e4aa7bc73000ce65d00"
-            ],
-            "md5": "6a67a4fc1b4f35b72c42cca4d0827249",
-            "keys": [
-                "conn",
-                "coords",
-                "labels",
-                "networks"
-            ]
-        },
-        "human_func_scale033": {
-            "url": [
-                "mb37e",
-                "5d9b8e4afcf91f000f18f57b"
-            ],
-            "md5": "1988ab427d9bc0de075bbe600ce0a27f",
-            "keys": [
-                "conn",
-                "coords",
-                "labels"
-            ]
-        },
-        "human_func_scale060": {
-            "url": [
-                "mb37e",
-                "5d9b8e4aa7bc73000de67117"
-            ],
-            "md5": "4191f5a2b0c5063dcba9935ea0ef0bfe",
-            "keys": [
-                "conn",
-                "coords",
-                "labels"
-            ]
-        },
-        "human_func_scale125": {
-            "url": [
-                "mb37e",
-                "5d9b8e4b26eb50000e78c987"
-            ],
-            "md5": "533e11cf9fea67d536648c9ef939a5f5",
-            "keys": [
-                "conn",
-                "coords",
-                "labels"
-            ]
-        },
-        "human_func_scale250": {
-            "url": [
-                "mb37e",
-                "5d9b8e4efcf91f0012190ba1"
-            ],
-            "md5": "4abc7324c2a9ae04ef6cf5555149b3f4",
-            "keys": [
-                "conn",
-                "coords",
-                "labels"
-            ]
-        },
-        "human_func_scale500": {
-            "url": [
-                "mb37e",
-                "5d9b8e4ff6b03e000d18b5a1"
-            ],
-            "md5": "637c6057476b2508f15f244d528e156d",
-            "keys": [
-                "conn",
-                "coords",
-                "labels"
-            ]
-        },
-        "human_struct_scale033": {
-            "url": [
-                "mb37e",
-                "5d9b8e4f26eb50000e78c993"
-            ],
-            "md5": "27a2101f2f04e0fc8de09a8248793235",
-            "keys": [
-                "conn",
-                "coords",
-                "dist",
-                "labels"
-            ]
-        },
-        "human_struct_scale060": {
-            "url": [
-                "mb37e",
-                "5d9b8e4da7bc73000be6550e"
-            ],
-            "md5": "9289265ab1bd0fa18611eeaf1afce745",
-            "keys": [
-                "conn",
-                "coords",
-                "dist",
-                "labels"
-            ]
-        },
-        "human_struct_scale125": {
-            "url": [
-                "mb37e",
-                "5d9b8e50f6b03e000e18aa37"
-            ],
-            "md5": "07e60b141809babe8c2645d93cd24984",
-            "keys": [
-                "conn",
-                "coords",
-                "dist",
-                "labels"
-            ]
-        },
-        "human_struct_scale250": {
-            "url": [
-                "mb37e",
-                "5d9b8e51fcf91f001118fdc2"
-            ],
-            "md5": "56f9ca8b4ecc63ef9aaf64a606755c09",
-            "keys": [
-                "conn",
-                "coords",
-                "dist",
-                "labels"
-            ]
-        },
-        "human_struct_scale500": {
-            "url": [
-                "mb37e",
-                "5d9b8e51a7bc73000ee65769"
-            ],
-            "md5": "94724e0446f8cb06207a4521ba1df20f",
-            "keys": [
-                "conn",
-                "coords",
-                "dist",
-                "labels"
-            ]
-        },
-        "macaque_markov": {
-            "url": [
-                "mb37e",
-                "5d9b8e56a7bc73000ce65d11"
-            ],
-            "md5": "5ce43182afc9c4f779db2c0306afb202",
-            "keys": [
-                "conn",
-                "dist",
-                "labels"
-            ]
-        },
-        "macaque_modha": {
-            "url": [
-                "mb37e",
-                "5d9b8e5626eb50000d78abd0"
-            ],
-            "md5": "f467c62b2670feaf75c93d90d5ed5de6",
-            "keys": [
-                "conn",
-                "coords",
-                "dist",
-                "labels"
-            ]
-        },
-        "mouse": {
-            "url": [
-                "mb37e",
-                "5d9b8e5626eb50000e78c9a0"
-            ],
-            "md5": "dba5cbbb9e72c1cacda945086d77a125",
-            "keys": [
-                "conn",
-                "coords",
-                "dist",
-                "labels",
-                "acronyms"
-            ]
-        },
-        "rat": {
-            "url": [
-                "mb37e",
-                "5d9b8e56f6b03e000f18d06f"
-            ],
-            "md5": "9e1f12ce4fa42082a76d62f89670f5d0",
-            "keys": [
-                "conn",
-                "labels"
-            ]
-        }
-    },
-    "ds-vazquez_rodriguez2019": {
-        "url": [
-            "mb37e",
-            "5d9f5aa4f6b03e000e1b819e"
-        ],
-        "md5": "c710365a2cc5cddb8a2fbb5f6ae421a3"
-    },
-    "atl-schaefer2018": {
-        "fsaverage": {
-            "url": [
-                "mb37e",
-                "5dbc8d7dcfc96c000dc3581c"
-            ],
-            "md5": "74dfe4237efaccabf057897c49e8af94"
-        },
-        "fsaverage5": {
-            "url": [
-                "mb37e",
-                "5dbc8d7daf84c3000eebffb2"
-            ],
-            "md5": "45a8c784f1979eb33a119bdab912a51f"
-        },
-        "fsaverage6": {
-            "url": [
-                "mb37e",
-                "5dbc8d7bcfc96c000ec6dca2"
-            ],
-            "md5": "8738daccab4648c3e891a1c8d3a9ec1f"
-        },
-        "fslr32k": {
-            "url": [
-                "mb37e",
-                "5e3086e4af75930094bdd507"
-            ],
-            "md5": "d8378f33107ed5d98c27e8070ebb5aa2"
-        }
-    },
-    "atl-mmpall": {
-        "fslr32k": {
-            "url": [
-                "mb37e",
-                "6047bac259e910009b83114f"
-            ],
-            "md5": "fd641742685a239d9c3f60e19a280ca2"
-        }
-    },
-    "atl-voneconomo_koskinas": {
-        "url": [
-            "mb37e",
-            "5ed80005fabc45000d639900"
-        ],
-        "md5": "67085e2577d21dc3a742f4fcde6e3b18"
-    }
-}
diff --git a/netneurotools/datasets/__init__.py b/netneurotools/datasets/__init__.py
index cfe50c1..0cd400e 100644
--- a/netneurotools/datasets/__init__.py
+++ b/netneurotools/datasets/__init__.py
@@ -1,16 +1,53 @@
-"""Functions for fetching and generating datasets."""
+"""Functions for handling datasets."""
+
+
+from .fetch_template import (
+    fetch_fsaverage, fetch_hcp_standards, fetch_civet,
+    fetch_conte69, fetch_yerkes19
+)
+
+
+from .fetch_atlas import (
+    # cortical
+    fetch_cammoun2012, fetch_schaefer2018, fetch_mmpall,
+    # subcortical
+    fetch_pauli2018, fetch_ye2020,
+    # annotation
+    fetch_voneconomo
+)
+
+
+from .fetch_project import (
+    # old projects
+    fetch_vazquez_rodriguez2019, fetch_mirchi2018,
+    # new projects
+    fetch_hansen_manynetworks, fetch_hansen_receptors, fetch_hansen_genecognition,
+    fetch_hansen_brainstem, fetch_shafiei_hcpmeg, fetch_suarez_mami,
+    # example data
+    fetch_famous_gmat,
+    # resources
+    fetch_neurosynth
+)
+
+from .datasets_utils import (
+    FREESURFER_IGNORE, _get_freesurfer_subjid
+)
+
 
 __all__ = [
-    'fetch_cammoun2012', 'fetch_pauli2018', 'fetch_fsaverage', 'fetch_conte69',
-    'fetch_connectome', 'available_connectomes', 'fetch_vazquez_rodriguez2019',
-    'fetch_mirchi2018', 'make_correlated_xy', 'fetch_schaefer2018',
-    'fetch_hcp_standards', 'fetch_voneconomo', 'fetch_mmpall', 'fetch_civet'
+    # fetch_template
+    'fetch_fsaverage', 'fetch_hcp_standards', 'fetch_civet',
+    'fetch_conte69', 'fetch_yerkes19',
+    # fetch_atlas
+    'fetch_cammoun2012', 'fetch_schaefer2018', 'fetch_mmpall',
+    'fetch_pauli2018', 'fetch_ye2020',
+    'fetch_voneconomo',
+    # fetch_project
+    'fetch_vazquez_rodriguez2019', 'fetch_mirchi2018',
+    'fetch_hansen_manynetworks', 'fetch_hansen_receptors', 'fetch_hansen_genecognition',
+    'fetch_hansen_brainstem', 'fetch_shafiei_hcpmeg', 'fetch_suarez_mami',
+    'fetch_famous_gmat',
+    'fetch_neurosynth',
+    # datasets_utils
+    'FREESURFER_IGNORE', '_get_freesurfer_subjid'
 ]
-
-from .fetchers import (fetch_cammoun2012, fetch_pauli2018, fetch_fsaverage,
-                       fetch_conte69, fetch_yerkes19, fetch_connectome,
-                       available_connectomes, fetch_vazquez_rodriguez2019,
-                       fetch_schaefer2018, fetch_hcp_standards,
-                       fetch_voneconomo, fetch_mmpall, fetch_civet)
-from .generators import (make_correlated_xy)
-from .mirchi import (fetch_mirchi2018)
diff --git a/netneurotools/datasets/mirchi.py b/netneurotools/datasets/_mirchi2018.py
similarity index 71%
rename from netneurotools/datasets/mirchi.py
rename to netneurotools/datasets/_mirchi2018.py
index 190ae63..9addfea 100644
--- a/netneurotools/datasets/mirchi.py
+++ b/netneurotools/datasets/_mirchi2018.py
@@ -1,13 +1,9 @@
-# -*- coding: utf-8 -*-
 """Code for re-generating results from Mirchi et al., 2018 (SCAN)."""
 
-import os
 from urllib.request import HTTPError, urlopen
 
 import numpy as np
 
-from .utils import _get_data_dir
-
 
 TIMESERIES = ("https://s3.amazonaws.com/openneuro/ds000031/ds000031_R1.0.2"
               "/uncompressed/derivatives/sub-01/ses-{0}/"
@@ -132,47 +128,3 @@ def _get_panas(data_dir=None, resume=True, verbose=1):
         measures[subscale] = measure.sum(axis=-1)
 
     return measures
-
-
-def fetch_mirchi2018(data_dir=None, resume=True, verbose=1):
-    """
-    Download (and creates) dataset for replicating Mirchi et al., 2018, SCAN.
-
-    Parameters
-    ----------
-    data_dir : str, optional
-        Directory to check for existing data files (if they exist) or to save
-        generated data files. Files should be named mirchi2018_fc.npy and
-        mirchi2018_panas.csv for the functional connectivity and behavioral
-        data, respectively.
-
-    Returns
-    -------
-    X : (73, 198135) numpy.ndarray
-        Functional connections from MyConnectome rsfMRI time series data
-    Y : (73, 13) numpy.ndarray
-        PANAS subscales from MyConnectome behavioral data
-    """
-    data_dir = os.path.join(_get_data_dir(data_dir=data_dir), 'ds-mirchi2018')
-    os.makedirs(data_dir, exist_ok=True)
-
-    X_fname = os.path.join(data_dir, 'myconnectome_fc.npy')
-    Y_fname = os.path.join(data_dir, 'myconnectome_panas.csv')
-
-    if not os.path.exists(X_fname):
-        X = _get_fc(data_dir=data_dir, resume=resume, verbose=verbose)
-        np.save(X_fname, X, allow_pickle=False)
-    else:
-        X = np.load(X_fname, allow_pickle=False)
-
-    if not os.path.exists(Y_fname):
-        Y = _get_panas(data_dir=data_dir, resume=resume, verbose=verbose)
-        np.savetxt(Y_fname, np.column_stack(list(Y.values())),
-                   header=','.join(Y.keys()), delimiter=',', fmt='%i')
-        # convert dictionary to structured array before returning
-        Y = np.array([tuple(row) for row in np.column_stack(list(Y.values()))],
-                     dtype=dict(names=list(Y.keys()), formats=['i8'] * len(Y)))
-    else:
-        Y = np.genfromtxt(Y_fname, delimiter=',', names=True, dtype=int)
-
-    return X, Y
diff --git a/netneurotools/datasets/datasets.json b/netneurotools/datasets/datasets.json
new file mode 100644
index 0000000..e9fdd66
--- /dev/null
+++ b/netneurotools/datasets/datasets.json
@@ -0,0 +1,267 @@
+{
+    "atl-cammoun2012": {
+        "gcs": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5ce6bb4423fec40017e82c5e"
+            ],
+            "md5": "266c4520af768e766328fb8e6648005d"
+        },
+        "fsaverage": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5ce6c30523fec40017e83439"
+            ],
+            "md5": "2a19eb4744c0ce6c243f721bd43ecff0"
+        },
+        "fsaverage5": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5e189a1c57341903868036dd"
+            ],
+            "md5": "2afb22e1887d47f1ca81c340fff7692b"
+        },
+        "fsaverage6": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5e189a1b5734190380804072"
+            ],
+            "md5": "1df743bff13316f67bd41d13ec691c97"
+        },
+        "MNI152NLin2009aSym": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5e2f4bf0e71ef800301880c2"
+            ],
+            "md5": "9da30bad22d732aa5f00a6d178d087c4"
+        },
+        "fslr32k": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5e2f4bf1e71ef80027189c56"
+            ],
+            "md5": "a5177319d5e0b8825a91d503ded1a59e"
+        }
+    },
+    "atl-pauli2018": {
+        "probabilistic": {
+            "url-type": "osf",
+            "url": [
+                "jkzwp",
+                "5b11fa3364f25a001973dce0"
+            ],
+            "md5": "62dd6ff405d3a8b89ee188cafa3a7f6a",
+            "folder-name": "atl-pauli2018",
+            "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_probabilistic.nii.gz"
+        },
+        "deterministic": {
+            "url-type": "osf",
+            "url": [
+                "jkzwp",
+                "5b11fa2ff1f288000e625a7f"
+            ],
+            "md5": "5a5b6246921be08456304875447c68ed",
+            "folder-name": "atl-pauli2018",
+            "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_deterministic.nii.gz"
+        },
+        "info": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5c93b4f034062c001b1ef50d"
+            ],
+            "md5": "390a693abeb1a583151f30aa8798bab5",
+            "folder-name": "atl-pauli2018",
+            "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_info.csv"
+        }
+    },
+    "tpl-conte69": {
+        "url-type": "osf",
+        "url": [
+            "fvuh8",
+            "5b198ec5ec24e20011b48548"
+        ],
+        "md5": "bd944e3f9f343e0e51e562b440960529"
+    },
+    "tpl-yerkes19": {
+        "url-type": "osf",
+        "url": [
+            "mb37e",
+            "60ae93d504e91a005f1761ab"
+        ],
+        "md5": "9ee4f1605fb690a85b04b61549d62925"
+    },
+    "tpl-fsaverage": {
+        "fsaverage": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5c82830a1d73810018bdacea"
+            ],
+            "md5": "1e82c52ed21d06d4e6e7341c725c5262"
+        },
+        "fsaverage3": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5d9f83b6f6b03e000e1ba285"
+            ],
+            "md5": "b4182495d341364e3f7c5b86284d8d20"
+        },
+        "fsaverage4": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5d9f83b7fcf91f00111c7473"
+            ],
+            "md5": "5a481421dc1286c7bd9b8a47db5fad0b"
+        },
+        "fsaverage5": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5d9f83b6f6b03e00101c932f"
+            ],
+            "md5": "cc75f7290c03970a8b8a06dfc215e925"
+        },
+        "fsaverage6": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5d9f83b7a7bc73000cea05f1"
+            ],
+            "md5": "8f75b95c0e47ae935d10745baefa2c49"
+        }
+    },
+    "tpl-civet": {
+        "v1": {
+            "civet41k": {
+                "url-type": "osf",
+                "url": [
+                    "mb37e",
+                    "601daffd84ecf800fe031868"
+                ],
+                "md5": "b27219c876464992e1b61da1c60d8d6e"
+            }
+        },
+        "v2": {
+            "civet41k": {
+                "url-type": "osf",
+                "url": [
+                    "mb37e",
+                    "601dafe77ad0a80119d9483c"
+                ],
+                "md5": "a47b015e471c6a800d236f107fda5b4a"
+            },
+            "civet164k": {
+                "url-type": "osf",
+                "url": [
+                    "mb37e",
+                    "601dafe87ad0a8011ad94938"
+                ],
+                "md5": "02537ea65d5366acd8de729022a34bab"
+            }
+        }
+    },
+    "ds-famous_gmat": {
+        "url-type": "osf",
+        "url": [
+            "mb37e",
+            "664683ca4664da9ebced6b70"
+        ],
+        "md5": "b803de1058579881a759f475704e9f35"
+    },
+    "ds-vazquez_rodriguez2019": {
+        "url-type": "osf",
+        "url": [
+            "mb37e",
+            "5d9f5aa4f6b03e000e1b819e"
+        ],
+        "md5": "c710365a2cc5cddb8a2fbb5f6ae421a3"
+    },
+    "atl-schaefer2018": {
+        "fsaverage": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5dbc8d7dcfc96c000dc3581c"
+            ],
+            "md5": "74dfe4237efaccabf057897c49e8af94"
+        },
+        "fsaverage5": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5dbc8d7daf84c3000eebffb2"
+            ],
+            "md5": "45a8c784f1979eb33a119bdab912a51f"
+        },
+        "fsaverage6": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5dbc8d7bcfc96c000ec6dca2"
+            ],
+            "md5": "8738daccab4648c3e891a1c8d3a9ec1f"
+        },
+        "fslr32k": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "5e3086e4af75930094bdd507"
+            ],
+            "md5": "d8378f33107ed5d98c27e8070ebb5aa2"
+        }
+    },
+    "atl-mmpall": {
+        "fslr32k": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "6047bac259e910009b83114f"
+            ],
+            "md5": "fd641742685a239d9c3f60e19a280ca2"
+        }
+    },
+    "atl-voneconomo_koskinas": {
+        "url-type": "osf",
+        "url": [
+            "mb37e",
+            "5ed80005fabc45000d639900"
+        ],
+        "md5": "67085e2577d21dc3a742f4fcde6e3b18"
+    },
+    "tpl-hcp_standards": {
+        "standard_mesh_atlases": {
+            "url-type": "osf",
+            "url": [
+                "mb37e",
+                "6643d2ab2eacc48a57097091"
+            ],
+            "md5": "806abac71f76b8dba8af467ef313c3f7",
+            "keys": [
+                "fs_LR_32k",
+                "fsaverage",
+                "fsaverage5",
+                "fsaverage6",
+                "MNI152NLin2009cAsym"
+            ]
+        }
+    },
+    "ds-hansen_manynetworks": {
+        "url-type": "github-release",
+        "url": [
+            "netneurolab",
+            "hansen_many_networks",
+            "v1.0.0"
+        ],
+        "folder-name": "hansen_many_networks-1.0.0",
+        "md5": "9e503c759506293aa441054cfd206ccc"
+    }
+}
diff --git a/netneurotools/datasets/datasets_utils.py b/netneurotools/datasets/datasets_utils.py
new file mode 100644
index 0000000..82bd228
--- /dev/null
+++ b/netneurotools/datasets/datasets_utils.py
@@ -0,0 +1,291 @@
+"""Utilites for loading / creating datasets."""
+
+import json
+import os
+from collections import namedtuple
+import importlib.resources
+
+
+SURFACE = namedtuple('Surface', ('lh', 'rh'))
+
+FREESURFER_IGNORE = [
+    'unknown', 'corpuscallosum', 'Background+FreeSurfer_Defined_Medial_Wall'
+]
+
+
+def _get_data_dir(data_dir=None):
+    """
+    Get path to netneurotools data directory.
+
+    Parameters
+    ----------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+
+    Returns
+    -------
+    data_dir : str
+        Path to use as data directory
+    """
+    if data_dir is None:
+        data_dir = os.environ.get('NNT_DATA', os.path.join('~', 'nnt-data'))
+    data_dir = os.path.expanduser(data_dir)
+    if not os.path.exists(data_dir):
+        os.makedirs(data_dir)
+
+    return data_dir
+
+
+def _decode_urls(data):
+    """
+    Format `data` object with OSF API URL.
+
+    Parameters
+    ----------
+    data : object
+        If dict with a `url` key, will format OSF_API with relevant values
+
+    Returns
+    -------
+    data : object
+        Input data with all `url` dict keys formatted
+    """
+    OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}"
+    GITHUB_RELEASE_API = "https://github.com/{}/{}/archive/refs/tags/{}.tar.gz"
+
+    if isinstance(data, str) or isinstance(data, list):
+        return data
+    elif 'url' in data:
+        if data['url-type'] == 'osf':
+            data['url'] = OSF_API.format(*data['url'])
+        elif data['url-type'] == 'github-release':
+            data['url'] = GITHUB_RELEASE_API.format(*data['url'])
+        else:
+            raise ValueError("URL type {} not recognized".format(data['url-type']))
+
+    for key, value in data.items():
+        data[key] = _decode_urls(value)
+
+    return data
+
+
+def _load_resource_json(relative_path):
+    """
+    Load JSON file from package resources.
+
+    Parameters
+    ----------
+    relative_path : str
+        Path to JSON file relative to package resources
+
+    Returns
+    -------
+    resource_json : dict
+        JSON file loaded as a dictionary
+    """
+    # handling pkg_resources.resource_filename deprecation
+    if getattr(importlib.resources, 'files', None) is not None:
+        f_resource = importlib.resources.files("netneurotools") / relative_path
+    else:
+        from pkg_resources import resource_filename
+        f_resource = resource_filename('netneurotools', relative_path)
+
+    with open(f_resource) as src:
+        resource_json = json.load(src)
+
+    return resource_json
+
+
+NNT_DATASETS = _load_resource_json('datasets/datasets.json')
+NNT_DATASETS = _decode_urls(NNT_DATASETS)
+
+
+def _get_dataset_info(name):
+    """
+    Return url and MD5 checksum for dataset `name`.
+
+    Parameters
+    ----------
+    name : str
+        Name of dataset
+
+    Returns
+    -------
+    url : str
+        URL from which to download dataset
+    md5 : str
+        MD5 checksum for file downloade from `url`
+    """
+    try:
+        return NNT_DATASETS[name]
+    except KeyError:
+        raise KeyError(
+            f"Provided dataset {name} is not valid. "
+            f"Must be one of: {sorted(NNT_DATASETS.keys())}"
+        ) from None
+
+
+NNT_REFERENCES = _load_resource_json('datasets/references.json')
+
+
+def _get_reference_info(name, verbose=1, return_dict=False):
+    """
+    Return reference information for dataset `name`.
+
+    Parameters
+    ----------
+    name : str
+        Name of dataset
+
+    Returns
+    -------
+    reference : str
+        Reference information for dataset
+    """
+    try:
+        curr_refs = NNT_REFERENCES[name]
+        if verbose:
+            print("Please cite the following papers if you are using this function:")
+            for bib_category, bib_category_items in curr_refs.items():
+                print(f"  [{bib_category}]:")
+                for bib_item in bib_category_items:
+                    print(f"    {bib_item['citation']}")
+
+        if return_dict:
+            return curr_refs
+    except KeyError:
+        raise KeyError(
+            f"Provided dataset {name} is not valid. "
+            f"Must be one of: {sorted(NNT_REFERENCES.keys())}"
+        ) from None
+
+
+def _fill_reference_json(bib_file, json_file, overwrite=False, use_defaults=False):
+    """
+    Fill in citation information for references in a JSON file.
+
+    For internal use only.
+
+    Parameters
+    ----------
+    bib_file : str
+        Path to BibTeX file containing references
+    json_file : str
+        Path to JSON file containing references
+    overwrite : bool, optional
+        Whether to overwrite existing citation information. Default: False
+    use_defaults : bool, optional
+        Whether to use default paths for `bib_file` and `json_file`. Default: False
+
+    Returns
+    -------
+    None
+    """
+    if use_defaults:
+        bib_file = \
+            importlib.resources.files("netneurotools") / "datasets/netneurotools.bib"
+        json_file = \
+            importlib.resources.files("netneurotools") / "datasets/references.json"
+
+    from pybtex import PybtexEngine
+    engine = PybtexEngine()
+
+    def _get_citation(key):
+        s = engine.format_from_file(
+            filename=bib_file, style="unsrt",
+            citations=[key], output_backend="plaintext"
+            )
+        return s.strip("\n").replace("[1] ", "")
+
+    with open(json_file) as src:
+        nnt_refs = json.load(src)
+
+    for _, value in nnt_refs.items():
+        for bib_category in value:
+            for bib_item in value[bib_category]:
+                if bib_item["bibkey"] not in ["", None]:
+                    if bib_item["citation"] == "" or overwrite:
+                        bib_item["citation"] = _get_citation(bib_item["bibkey"])
+
+    with open(json_file, "w") as dst:
+        json.dump(nnt_refs, dst, indent=4)
+
+
+def _check_freesurfer_subjid(subject_id, subjects_dir=None):
+    """
+    Check that `subject_id` exists in provided FreeSurfer `subjects_dir`.
+
+    Parameters
+    ----------
+    subject_id : str
+        FreeSurfer subject ID
+    subjects_dir : str, optional
+        Path to FreeSurfer subject directory. If not set, will inherit from
+        the environmental variable $SUBJECTS_DIR. Default: None
+
+    Returns
+    -------
+    subject_id : str
+        FreeSurfer subject ID, as provided
+    subjects_dir : str
+        Full filepath to `subjects_dir`
+
+    Raises
+    ------
+    FileNotFoundError
+    """
+    # check inputs for subjects_dir and subject_id
+    if subjects_dir is None or not os.path.isdir(subjects_dir):
+        try:
+            subjects_dir = os.environ['SUBJECTS_DIR']
+        except KeyError:
+            subjects_dir = os.getcwd()
+    else:
+        subjects_dir = os.path.abspath(subjects_dir)
+
+    subjdir = os.path.join(subjects_dir, subject_id)
+    if not os.path.isdir(subjdir):
+        raise FileNotFoundError(
+            f'Cannot find specified subject id {subject_id} in '
+            f'provided subject directory {subjects_dir}.'
+        )
+
+    return subject_id, subjects_dir
+
+
+def _get_freesurfer_subjid(subject_id, subjects_dir=None):
+    """
+    Get fsaverage version `subject_id`, fetching if required.
+
+    Parameters
+    ----------
+    subject_id : str
+        FreeSurfer subject ID
+    subjects_dir : str, optional
+        Path to FreeSurfer subject directory. If not set, will inherit from
+        the environmental variable $SUBJECTS_DIR. Default: None
+
+    Returns
+    -------
+    subject_id : str
+        FreeSurfer subject ID
+    subjects_dir : str
+        Path to subject directory with `subject_id`
+    """
+    # check for FreeSurfer install w/fsaverage; otherwise, fetch required
+    try:
+        subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir)
+    except FileNotFoundError:
+        if 'fsaverage' not in subject_id:
+            raise ValueError(
+                f'Provided subject {subject_id} does not exist in provided '
+                f'subjects_dir {subjects_dir}'
+            ) from None
+        from .fetch_template import fetch_fsaverage
+        fetch_fsaverage(subject_id)
+        subjects_dir = os.path.join(_get_data_dir(), 'tpl-fsaverage')
+        subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir)
+
+    return subject_id, subjects_dir
diff --git a/netneurotools/datasets/fetch_atlas.py b/netneurotools/datasets/fetch_atlas.py
new file mode 100644
index 0000000..5df542c
--- /dev/null
+++ b/netneurotools/datasets/fetch_atlas.py
@@ -0,0 +1,451 @@
+"""Functions for fetching atlas data."""
+import itertools
+import warnings
+
+try:
+    # nilearn 0.10.3
+    from nilearn.datasets._utils import fetch_files
+except ImportError:
+    from nilearn.datasets.utils import _fetch_files as fetch_files
+
+from sklearn.utils import Bunch
+
+from .datasets_utils import (
+    SURFACE,
+    _get_data_dir, _get_dataset_info, _get_reference_info
+)
+
+
+def fetch_cammoun2012(
+        version='MNI152NLin2009aSym',
+        data_dir=None, resume=True, verbose=1
+    ):
+    """
+    Download files for Cammoun et al., 2012 multiscale parcellation.
+
+    This dataset contains
+
+    If you used this data, please cite 1_.
+
+    Parameters
+    ----------
+    version : str, optional
+        Specifies which version of the dataset to download, where
+        'MNI152NLin2009aSym' will return .nii.gz atlas files defined in MNI152
+        space, 'fsaverageX' will return .annot files defined in fsaverageX
+        space (FreeSurfer 6.0.1), 'fslr32k' will return .label.gii files in
+        fs_LR_32k HCP space, and 'gcs' will return FreeSurfer-style .gcs
+        probabilistic atlas files for generating new, subject-specific
+        parcellations. Default: 'MNI152NLin2009aSym'
+
+    Returns
+    -------
+    filenames : :class:`sklearn.utils.Bunch`
+        Dictionary-like object with keys ['scale033', 'scale060', 'scale125',
+        'scale250', 'scale500'], where corresponding values are lists of
+        filepaths to downloaded parcellation files.
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    Notes
+    -----
+    License: https://raw.githubusercontent.com/LTS5/cmp/master/COPYRIGHT
+
+    References
+    ----------
+    .. [1] Leila Cammoun, Xavier Gigandet, Djalel Meskaldji, Jean Philippe
+        Thiran, Olaf Sporns, Kim Q Do, Philippe Maeder, Reto Meuli, and Patric
+        Hagmann. Mapping the human connectome at multiple scales with diffusion
+        spectrum mri. Journal of neuroscience methods, 203(2):386\u2013397,
+        2012.
+    """
+    if version == 'surface':
+        warnings.warn('Providing `version="surface"` is deprecated and will '
+                      'be removed in a future release. For consistent '
+                      'behavior please use `version="fsaverage"` instead.',
+                      DeprecationWarning, stacklevel=2)
+        version = 'fsaverage'
+    elif version == 'volume':
+        warnings.warn('Providing `version="volume"` is deprecated and will '
+                      'be removed in a future release. For consistent '
+                      'behavior please use `version="MNI152NLin2009aSym"` '
+                      'instead.',
+                      DeprecationWarning, stacklevel=2)
+        version = 'MNI152NLin2009aSym'
+
+    versions = [
+        'gcs', 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k',
+        'MNI152NLin2009aSym'
+    ]
+    if version not in versions:
+        raise ValueError(
+            f'The version of Cammoun et al., 2012 parcellation '
+            f'requested {version} does not exist. Must be one of {versions}'
+        )
+
+    dataset_name = 'atl-cammoun2012'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500']
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)[version]
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{dataset_name}.tar.gz'
+    }
+
+    # filenames differ based on selected version of dataset
+    if version == 'MNI152NLin2009aSym':
+        _filenames = [
+            f'{dataset_name}/{version}/'
+            f'atl-Cammoun2012_space-MNI152NLin2009aSym_res-{res[-3:]}'
+            f'_deterministic{suff}'
+            for res in keys for suff in ['.nii.gz']
+        ] + [
+            f'{dataset_name}/{version}/'
+            f'atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv'
+        ]
+    elif version == 'fslr32k':
+        _filenames = [
+            f'{dataset_name}/{version}/'
+            f'atl-Cammoun2012_space-fslr32k_res-{res[-3:]}_hemi-{hemi}'
+            f'_deterministic{suff}'
+            for res in keys for hemi in ['L', 'R'] for suff in ['.label.gii']
+        ]
+    elif version in ('fsaverage', 'fsaverage5', 'fsaverage6'):
+        _filenames = [
+            f'{dataset_name}/{version}/'
+            f'atl-Cammoun2012_space-{version}_res-{res[-3:]}_hemi-{hemi}'
+            f'_deterministic{suff}'
+            for res in keys for hemi in ['L', 'R'] for suff in ['.annot']
+        ]
+    else:
+        _filenames = [
+            f'{dataset_name}/{version}/'
+            f'atl-Cammoun2012_res-{res[5:]}_hemi-{hemi}'
+            f'_probabilistic{suff}'
+            for res in keys[:-1] + ['scale500v1', 'scale500v2', 'scale500v3']
+            for hemi in ['L', 'R'] for suff in ['.gcs', '.ctab']
+        ]
+    _files = [(f, info['url'], opts) for f in _filenames]
+    data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose)
+
+    if version == 'MNI152NLin2009aSym':
+        keys += ['info']
+    elif version in ('fslr32k', 'fsaverage', 'fsaverage5', 'fsaverage6'):
+        data = [SURFACE(*data[i:i + 2]) for i in range(0, len(data), 2)]
+    else:
+        data = [data[::2][i:i + 2] for i in range(0, len(data) // 2, 2)]
+        # deal with the fact that last scale is split into three files :sigh:
+        data = data[:-3] + [list(itertools.chain.from_iterable(data[-3:]))]
+
+    return Bunch(**dict(zip(keys, data)))
+
+
+def fetch_schaefer2018(
+        version='fsaverage',
+        data_dir=None, resume=True, verbose=1
+    ):
+    """
+    Download FreeSurfer .annot files for Schaefer et al., 2018 parcellation.
+
+    This dataset contains
+
+    If you used this data, please cite 1_.
+
+    Parameters
+    ----------
+    version : {'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'}
+        Specifies which surface annotation files should be matched to. Default:
+        'fsaverage'
+
+    Returns
+    -------
+    filenames : :class:`sklearn.utils.Bunch`
+        Dictionary-like object with keys of format '{}Parcels{}Networks' where
+        corresponding values are the left/right hemisphere annotation files
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    Notes
+    -----
+    License: https://github.com/ThomasYeoLab/CBIG/blob/master/LICENSE.md
+
+    References
+    ----------
+    .. [1] Alexander Schaefer, Ru Kong, Evan M Gordon, Timothy O Laumann,
+        Xi-Nian Zuo, Avram J Holmes, Simon B Eickhoff, and BT Thomas Yeo.
+        Local-global parcellation of the human cerebral cortex from intrinsic
+        functional connectivity mri. Cerebral cortex, 28(9):3095\u20133114,
+        2018.
+    """
+    versions = ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k']
+    if version not in versions:
+        raise ValueError(
+            f'The version of Schaefer et al., 2018 parcellation '
+            f'requested "{version}" does not exist. Must be one of {versions}'
+        )
+
+    dataset_name = 'atl-schaefer2018'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    keys = [
+        f'{p}Parcels{n}Networks'
+        for p in range(100, 1001, 100) for n in [7, 17]
+    ]
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)[version]
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{dataset_name}.tar.gz'
+    }
+
+    if version == 'fslr32k':
+        hemispheres, suffix = ['LR'], 'dlabel.nii'
+    else:
+        hemispheres, suffix = ['L', 'R'], 'annot'
+
+    _filenames = [
+        f'{dataset_name}/{version}/'
+        f'atl-Schaefer2018_space-{version}_hemi-{hemi}_desc-{desc}'
+        f'_deterministic.{suffix}'
+        for desc in keys for hemi in hemispheres
+    ]
+
+    _files = [(f, info['url'], opts) for f in _filenames]
+
+    data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose)
+
+    if suffix == 'annot':
+        data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)]
+
+    return Bunch(**dict(zip(keys, data)))
+
+
+def fetch_mmpall(
+        version='fslr32k',
+        data_dir=None, resume=True, verbose=1
+    ):
+    """
+    Download .label.gii files for Glasser et al., 2016 MMPAll atlas.
+
+    This dataset contains
+
+    If you used this data, please cite 1_.
+
+    Parameters
+    ----------
+    version : {'fslr32k'}
+        Specifies which surface annotation files should be matched to. Default:
+        'fslr32k'
+
+    Returns
+    -------
+    filenames : :class:`sklearn.utils.Bunch`
+        Namedtuple with fields ('lh', 'rh') corresponding to filepaths to
+        left/right hemisphere parcellation files
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    Notes
+    -----
+    License: https://www.humanconnectome.org/study/hcp-young-adult/document/wu-minn-hcp-consortium-open-access-data-use-terms
+
+    References
+    ----------
+    .. [1] Matthew F Glasser, Timothy S Coalson, Emma C Robinson, Carl D Hacker,
+        John Harwell, Essa Yacoub, Kamil Ugurbil, Jesper Andersson, Christian F
+        Beckmann, Mark Jenkinson, and others. A multi-modal parcellation of
+        human cerebral cortex. Nature, 536(7615):171\u2013178, 2016.
+    """
+    versions = ['fslr32k']
+    if version not in versions:
+        raise ValueError(
+            f'The version of Glasser et al., 2016 parcellation '
+            f'requested "{version}" does not exist. Must be one of {versions}'
+        )
+
+    dataset_name = 'atl-mmpall'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)[version]
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{dataset_name}.tar.gz'
+    }
+
+    _filenames = [
+        f'{dataset_name}/{version}/'
+        f'atl-MMPAll_space-{version}_hemi-{hemi}_deterministic.label.gii'
+        for hemi in ['L', 'R']
+    ]
+    _files = [(f, info['url'], opts) for f in _filenames]
+
+    data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose)
+
+    return SURFACE(*data)
+
+
+def fetch_pauli2018(data_dir=None, resume=True, verbose=1):
+    """
+    Download files for Pauli et al., 2018 subcortical parcellation.
+
+    This dataset contains
+
+    If you used this data, please cite 1_.
+
+    Returns
+    -------
+    filenames : :class:`sklearn.utils.Bunch`
+        Dictionary-like object with keys ['probabilistic', 'deterministic'],
+        where corresponding values are filepaths to downloaded atlas files.
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    Notes
+    -----
+    License: CC-BY Attribution 4.0 International
+
+    References
+    ----------
+    .. [1] Wolfgang M Pauli, Amanda N Nili, and J Michael Tyszka. A
+        high-resolution probabilistic in vivo atlas of human subcortical brain
+        nuclei. Scientific data, 5(1):1\u201313, 2018.
+    """
+    dataset_name = 'atl-pauli2018'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    keys = ['probabilistic', 'deterministic', 'info']
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)
+
+    _files = []
+    for _, v in info.items():
+        _f = f'{v["folder-name"]}/{v["file-name"]}'
+        _url = v['url']
+        _opts = {
+            'md5sum': v['md5'],
+            'move': f'{v["folder-name"]}/{v["file-name"]}'
+        }
+        _files.append(
+            (_f, _url, _opts)
+        )
+
+    data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose)
+
+    return Bunch(**dict(zip(keys, data)))
+
+
+def fetch_ye2020():
+    """Fetch Ye et al., 2020 subcortical parcellation."""
+    pass
+
+
+def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1):
+    """
+    Fetch von-Economo Koskinas probabilistic FreeSurfer atlas.
+
+    This dataset contains
+
+    If you used this data, please cite 1_.
+
+    Returns
+    -------
+    filenames : :class:`sklearn.utils.Bunch`
+        Dictionary-like object with keys ['gcs', 'ctab', 'info']
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    Notes
+    -----
+    License: CC-BY-NC-SA 4.0
+
+    References
+    ----------
+    .. [1] Lianne H Scholtens, Marcel A de Reus, Siemon C de Lange, Ruben
+        Schmidt, and Martijn P van den Heuvel. An mri von economo\u2013koskinas
+        atlas. NeuroImage, 170:249\u2013256, 2018.
+    """
+    dataset_name = 'atl-voneconomo_koskinas'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    keys = ['gcs', 'ctab', 'info']
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{dataset_name}.tar.gz'
+    }
+
+    _filenames = [
+        f'{dataset_name}/'
+        f'atl-vonEconomoKoskinas_hemi-{hemi}_probabilistic.{suff}'
+        for hemi in ['L', 'R'] for suff in ['gcs', 'ctab']
+    ] + [
+        f'{dataset_name}/atl-vonEconomoKoskinas_info.csv'
+    ]
+    _files = [(f, info['url'], opts) for f in _filenames]
+    data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose)
+
+    data = [SURFACE(*data[:-1:2])] + [SURFACE(*data[1:-1:2])] + [data[-1]]
+
+    return Bunch(**dict(zip(keys, data)))
diff --git a/netneurotools/datasets/fetch_project.py b/netneurotools/datasets/fetch_project.py
new file mode 100644
index 0000000..ce3def1
--- /dev/null
+++ b/netneurotools/datasets/fetch_project.py
@@ -0,0 +1,346 @@
+"""Functions for fetching project data."""
+import os
+from pathlib import Path
+import numpy as np
+
+try:
+    # nilearn 0.10.3
+    from nilearn.datasets._utils import fetch_files
+except ImportError:
+    from nilearn.datasets.utils import _fetch_files as fetch_files
+
+from sklearn.utils import Bunch
+
+from .datasets_utils import (
+    _get_data_dir, _get_dataset_info, _get_reference_info
+)
+
+from ._mirchi2018 import _get_fc, _get_panas
+
+
+def fetch_vazquez_rodriguez2019(data_dir=None, resume=True, verbose=1):
+    """
+    Download files from Vazquez-Rodriguez et al., 2019, PNAS.
+
+    This dataset contains one file: rsquared_gradient.csv, which contains
+    two columns: rsquared and gradient.
+
+    If you used this data, please cite [1]_.
+
+    Returns
+    -------
+    data : :class:`sklearn.utils.Bunch`
+        Dictionary-like object with fetched data.
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    References
+    ----------
+    .. [1] Bertha V\u00e1zquez-Rodr\u00edguez, Laura E Su\u00e1rez, Ross D
+        Markello, Golia Shafiei, Casey Paquola, Patric Hagmann, Martijn P Van
+        Den Heuvel, Boris C Bernhardt, R Nathan Spreng, and Bratislav Misic.
+        Gradients of structure\u2013function tethering across neocortex.
+        Proceedings of the National Academy of Sciences,
+        116(42):21219\u201321227, 2019.
+    """
+    dataset_name = 'ds-vazquez_rodriguez2019'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{dataset_name}.tar.gz'
+    }
+    fetched = fetch_files(
+        data_dir,
+        files=[(dataset_name, info['url'], opts)],
+        resume=resume, verbose=verbose
+    )
+    fetched = Path(fetched[0])
+
+    # load data
+    rsq, grad = np.loadtxt(
+        fetched / "rsquared_gradient.csv",
+        delimiter=',', skiprows=1
+    ).T
+    data = {
+        'rsquared': rsq,
+        'gradient': grad
+    }
+
+    return Bunch(**data)
+
+
+def fetch_mirchi2018(data_dir=None, resume=True, verbose=1):
+    """
+    Download (and creates) dataset for replicating Mirchi et al., 2018, SCAN.
+
+    Parameters
+    ----------
+    data_dir : str, optional
+        Directory to check for existing data files (if they exist) or to save
+        generated data files. Files should be named mirchi2018_fc.npy and
+        mirchi2018_panas.csv for the functional connectivity and behavioral
+        data, respectively.
+
+    Returns
+    -------
+    X : (73, 198135) numpy.ndarray
+        Functional connections from MyConnectome rsfMRI time series data
+    Y : (73, 13) numpy.ndarray
+        PANAS subscales from MyConnectome behavioral data
+    """
+    data_dir = os.path.join(_get_data_dir(data_dir=data_dir), 'ds-mirchi2018')
+    os.makedirs(data_dir, exist_ok=True)
+
+    X_fname = os.path.join(data_dir, 'myconnectome_fc.npy')
+    Y_fname = os.path.join(data_dir, 'myconnectome_panas.csv')
+
+    if not os.path.exists(X_fname):
+        X = _get_fc(data_dir=data_dir, resume=resume, verbose=verbose)
+        np.save(X_fname, X, allow_pickle=False)
+    else:
+        X = np.load(X_fname, allow_pickle=False)
+
+    if not os.path.exists(Y_fname):
+        Y = _get_panas(data_dir=data_dir, resume=resume, verbose=verbose)
+        np.savetxt(Y_fname, np.column_stack(list(Y.values())),
+                   header=','.join(Y.keys()), delimiter=',', fmt='%i')
+        # convert dictionary to structured array before returning
+        Y = np.array([tuple(row) for row in np.column_stack(list(Y.values()))],
+                     dtype=dict(names=list(Y.keys()), formats=['i8'] * len(Y)))
+    else:
+        Y = np.genfromtxt(Y_fname, delimiter=',', names=True, dtype=int)
+
+    return X, Y
+
+
+def fetch_hansen_manynetworks(data_dir=None, resume=True, verbose=1):
+    """
+    Download files from Hansen et al., 2023, PLOS Biology.
+
+    This dataset contains
+
+    If you used this data, please cite [1]_.
+
+    Returns
+    -------
+    filenames : :class:`sklearn.utils.Bunch`
+        Dictionary-like object with fetched data.
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    References
+    ----------
+    .. [1]
+    """
+    dataset_name = 'ds-hansen_manynetworks'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{dataset_name}/{dataset_name}.tar.gz'
+    }
+    # the download info["folder-name"].tar.gz was moved to
+    # {dataset_name}/{dataset_name}.tar.gz and uncompressed
+    # to keep the same structure as other datasets
+    fetched = fetch_files(
+        data_dir,
+        files=[(f'{dataset_name}/{info["folder-name"]}', info['url'], opts)],
+        resume=resume, verbose=verbose
+    )
+    fetched = Path(fetched[0])
+
+    # load data
+    data = {
+        "cammoun033": {
+            "gene": fetched / "data/Cammoun033/gene_coexpression.npy",
+            "func": fetched / "data/Cammoun033/func_coactivation.npy",
+        },
+        "schaefer100": {
+            "gene": fetched / "data/Schaefer100/gene_coexpression.npy",
+        },
+        "schaefer400": {
+            "gene": fetched / "data/Schaefer400/gene_coexpression.npy",
+        }
+    }
+
+    return Bunch(**data)
+
+
+def fetch_hansen_receptors():
+    """Download files from Hansen et al., 2022, Nature Neuroscience."""
+    pass
+
+
+def fetch_hansen_genecognition():
+    """Download files from Hansen et al., 2021, Nature Human Behaviour."""
+    pass
+
+
+def fetch_hansen_brainstem():
+    """Download files from Hansen et al., 2024."""
+    pass
+
+
+def fetch_shafiei_hcpmeg():
+    """Download files from Shafiei et al., 2022 & Shafiei et al., 2023."""
+    pass
+
+
+def fetch_suarez_mami():
+    """Download files from Suarez et al., 2022, eLife."""
+    pass
+
+
+def fetch_famous_gmat(
+        dataset,
+        data_dir=None, resume=True, verbose=1
+    ):
+    """
+    Download files from multi-species connectomes.
+
+    This dataset contains
+
+    If you used this data, please cite celegans [1]_, drosophila [2]_, human
+    [3]_, macaque_markov [4]_, macaque_modha [5]_, mouse [6]_, rat [7]_.
+
+    Parameters
+    ----------
+    dataset : str
+        Specifies which dataset to download.
+
+    Returns
+    -------
+    data : :class:`sklearn.utils.Bunch`
+        Dictionary-like object with, at a minimum, keys ['conn', 'labels',
+        'ref'] providing connectivity / correlation matrix, region labels, and
+        relevant reference. Other possible keys include 'dist' (an array of
+        Euclidean distances between regions of 'conn'), 'coords' (an array of
+        xyz coordinates for regions of 'conn'), 'acronyms' (an array of
+        acronyms for regions of 'conn'), and 'networks' (an array of network
+        affiliations for regions of 'conn').
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    References
+    ----------
+    .. [1] Lav R Varshney, Beth L Chen, Eric Paniagua, David H Hall, and Dmitri
+        B Chklovskii. Structural properties of the caenorhabditis elegans
+        neuronal network. PLoS computational biology, 7(2):e1001066, 2011.
+    .. [2] Ann-Shyn Chiang, Chih-Yung Lin, Chao-Chun Chuang, Hsiu-Ming Chang,
+        Chang-Huain Hsieh, Chang-Wei Yeh, Chi-Tin Shih, Jian-Jheng Wu, Guo-Tzau
+        Wang, Yung-Chang Chen, and others. Three-dimensional reconstruction of
+        brain-wide wiring networks in drosophila at single-cell resolution.
+        Current biology, 21(1):1\u201311, 2011.
+    .. [3] Alessandra Griffa, Yasser Alem\u00e1n-G\u00f3mez, and Patric Hagmann.
+        Structural and functional connectome from 70 young healthy adults [data
+        set]. Zenodo, 2019.
+    .. [4] Nikola T Markov, Maria Ercsey-Ravasz, Camille Lamy, Ana Rita Ribeiro
+        Gomes, Lo\u00efc Magrou, Pierre Misery, Pascale Giroud, Pascal Barone,
+        Colette Dehay, Zolt\u00e1n Toroczkai, and others. The role of long-range
+        connections on the specificity of the macaque interareal cortical
+        network. Proceedings of the National Academy of Sciences,
+        110(13):5187\u20135192, 2013.
+    .. [5] Dharmendra S Modha and Raghavendra Singh. Network architecture of the
+        long-distance pathways in the macaque brain. Proceedings of the National
+        Academy of Sciences, 107(30):13485\u201313490, 2010.
+    .. [6] Mikail Rubinov, Rolf JF Ypma, Charles Watson, and Edward T Bullmore.
+        Wiring cost and topological participation of the mouse brain connectome.
+        Proceedings of the National Academy of Sciences,
+        112(32):10032\u201310037, 2015.
+    .. [7] Mihail Bota, Olaf Sporns, and Larry W Swanson. Architecture of the
+        cerebral cortical association connectome underlying cognition.
+        Proceedings of the National Academy of Sciences,
+        112(16):E2093\u2013E2101, 2015.
+    """
+    available_connectomes = [
+        'celegans',
+        'drosophila',
+        'human_func_scale033',
+        'human_func_scale060',
+        'human_func_scale125',
+        'human_func_scale250',
+        'human_func_scale500',
+        'human_struct_scale033',
+        'human_struct_scale060',
+        'human_struct_scale125',
+        'human_struct_scale250',
+        'human_struct_scale500',
+        'macaque_markov',
+        'macaque_modha',
+        'mouse',
+        'rat'
+    ]
+
+    if dataset not in available_connectomes:
+        raise ValueError(
+            f'Provided dataset {dataset} not available; '
+            f'must be one of {available_connectomes}'
+        )
+
+    base_dataset_name = 'ds-famous_gmat'
+    _get_reference_info(base_dataset_name, verbose=verbose)
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(base_dataset_name)
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{base_dataset_name}.tar.gz'
+    }
+    fetched = fetch_files(
+        data_dir,
+        files=[(base_dataset_name, info['url'], opts)],
+        resume=resume, verbose=verbose
+    )
+    fetched = Path(fetched[0])
+
+    data = {}
+    for f in (fetched / dataset).glob("*.csv"):
+        try:
+            data[f.stem] = np.loadtxt(f, delimiter=',')
+        except ValueError:
+            data[f.stem] = np.loadtxt(f, delimiter=',', dtype=str)
+
+    return Bunch(**data)
+
+
+def fetch_neurosynth():
+    """Download Neurosynth data."""
+    pass
diff --git a/netneurotools/datasets/fetch_template.py b/netneurotools/datasets/fetch_template.py
new file mode 100644
index 0000000..fca331e
--- /dev/null
+++ b/netneurotools/datasets/fetch_template.py
@@ -0,0 +1,409 @@
+"""Functions for fetching template data."""
+
+
+import json
+from pathlib import Path
+import os.path as op
+
+try:
+    # nilearn 0.10.3
+    from nilearn.datasets._utils import fetch_files
+except ImportError:
+    from nilearn.datasets.utils import _fetch_files as fetch_files
+
+from sklearn.utils import Bunch
+
+from .datasets_utils import (
+    SURFACE,
+    _get_data_dir, _get_dataset_info, _get_reference_info, _check_freesurfer_subjid
+)
+
+
+def fetch_fsaverage(
+        version='fsaverage',
+        data_dir=None, resume=True, verbose=1
+    ):
+    """
+    Download files for fsaverage FreeSurfer template.
+
+    This dataset contains
+
+    If you used this data, please cite 1_, 2_, 3_.
+
+    Parameters
+    ----------
+    version : str, optional
+        One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5',
+        'fsaverage6'}. Default: 'fsaverage'
+
+    Returns
+    -------
+    filenames : :class:`sklearn.utils.Bunch`
+        Dictionary-like object with keys ['surf'] where corresponding values
+        are length-2 lists downloaded template files (each list composed of
+        files for the left and right hemisphere).
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    References
+    ----------
+    .. [1] Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical
+        surface-based analysis: i. segmentation and surface reconstruction.
+        Neuroimage, 9(2):179\u2013194, 1999.
+    .. [2] Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical
+        surface-based analysis: ii: inflation, flattening, and a surface-based
+        coordinate system. Neuroimage, 9(2):195\u2013207, 1999.
+    .. [3] Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale.
+        High-resolution intersubject averaging and a coordinate system for the
+        cortical surface. Human brain mapping, 8(4):272\u2013284, 1999.
+    """
+    versions = [
+        'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'
+    ]
+    if version not in versions:
+        raise ValueError(
+            f'The version of fsaverage requested {version} does not '
+            f'exist. Must be one of {versions}'
+        )
+
+    dataset_name = 'tpl-fsaverage'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    keys = ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)[version]
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{dataset_name}.tar.gz'
+    }
+
+    _filenames = [
+        f"{version}/surf/{hemi}.{surf}"
+        for surf in keys for hemi in ['lh', 'rh']
+    ]
+
+    try:
+        # use local FreeSurfer data if available
+        data_dir = _check_freesurfer_subjid(version)[1]
+        data = [op.join(data_dir, f) for f in _filenames]
+    except FileNotFoundError:
+        _filenames = [f"{dataset_name}/{_}" for _ in _filenames]
+        _files = [(f, info['url'], opts) for f in _filenames]
+        data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose)
+
+    data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)]
+
+    return Bunch(**dict(zip(keys, data)))
+
+
+def fetch_hcp_standards(data_dir=None, resume=True, verbose=1):
+    """
+    Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP.
+
+    This dataset contains
+
+    The original file was from 3_, but is no longer available. The archived
+    file is available from 4_.
+
+    If you used this data, please cite 1_, 2_.
+
+    Returns
+    -------
+    standards : str
+        Filepath to standard_mesh_atlases directory
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    References
+    ----------
+    .. [1] David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna
+        Barch,Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen,
+        Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome
+        project: a data acquisition perspective. Neuroimage,
+        62(4):2222\u20132231, 2012.
+    .. [2] Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson,
+        Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad
+        Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal
+        preprocessing pipelines for the human connectome project. Neuroimage,
+        80:105\u2013124, 2013.
+    .. [3] http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip
+    .. [4] https://web.archive.org/web/20220121035833/http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip
+    """
+    dataset_name = 'tpl-hcp_standards'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)["standard_mesh_atlases"]
+
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{dataset_name}.tar.gz'
+    }
+    fetched = fetch_files(
+        data_dir,
+        files=[(f'{dataset_name}/standard_mesh_atlases', info['url'], opts)],
+        resume=resume, verbose=verbose
+    )
+    fetched = Path(fetched[0])
+
+    return fetched
+
+
+def fetch_civet(
+        density='41k', version='v1',
+        data_dir=None, resume=True, verbose=1
+    ):
+    """
+    Fetch CIVET surface files.
+
+    This dataset contains
+
+    If you used this data, please cite 1_, 2_, 3_.
+
+    Parameters
+    ----------
+    density : {'41k', '164k'}, optional
+        Which density of the CIVET-space geometry files to fetch. The
+        high-resolution '164k' surface only exists for version 'v2'
+    version : {'v1, 'v2'}, optional
+        Which version of the CIVET surfaces to use. Default: 'v2'
+
+    Returns
+    -------
+    filenames : :class:`sklearn.utils.Bunch`
+        Dictionary-like object with keys ['mid', 'white'] containing geometry
+        files for CIVET surface. Note for version 'v1' the 'mid' and 'white'
+        files are identical.
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    Notes
+    -----
+    License: https://github.com/aces/CIVET_Full_Project/blob/master/LICENSE
+
+    References
+    ----------
+    .. [1] Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An
+        unbiased iterative group registration template for cortical surface
+        analysis. Neuroimage, 34(4):1535\u20131544, 2007.
+    .. [2] Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli,
+        and DL Collins. Unbiased nonlinear average age-appropriate brain
+        templates from birth to adulthood. NeuroImage, 47:S102, 2009.
+    .. [3] Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K
+        Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet
+        image-processing environment: a fully automated comprehensive pipeline
+        for anatomical neuroimaging research. proceedings of the 12th annual
+        meeting of the organization for human brain mapping. Florence, Italy,
+        pages 2266, 2006.
+    """
+    densities = ['41k', '164k']
+    if density not in densities:
+        raise ValueError(
+            f'The density of CIVET requested "{density}" does not exist. '
+            f'Must be one of {densities}'
+        )
+    versions = ['v1', 'v2']
+    if version not in versions:
+        raise ValueError(
+            f'The version of CIVET requested "{version}" does not exist. '
+            f'Must be one of {versions}'
+        )
+
+    if version == 'v1' and density == '164k':
+        raise ValueError('The "164k" density CIVET surface only exists for '
+                         'version "v2"')
+
+    dataset_name = 'tpl-civet'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    keys = ['mid', 'white']
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)[version][f'civet{density}']
+
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{dataset_name}.tar.gz'
+    }
+
+    _filenames = [
+        f"{dataset_name}/{version}/civet{density}/"
+        f"tpl-civet_space-ICBM152_hemi-{hemi}_den-{density}_{surf}.obj"
+        for surf in keys for hemi in ['L', 'R']
+    ]
+    _files = [(f, info['url'], opts) for f in _filenames]
+
+    data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose)
+
+    data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)]
+
+    return Bunch(**dict(zip(keys, data)))
+
+
+def fetch_conte69(data_dir=None, resume=True, verbose=1):
+    """
+    Download files for Van Essen et al., 2012 Conte69 template.
+
+    This dataset contains
+
+    If you used this data, please cite 1_, 2_.
+
+    Returns
+    -------
+    filenames : :class:`sklearn.utils.Bunch`
+        Dictionary-like object with keys ['midthickness', 'inflated',
+        'vinflated'], where corresponding values are lists of filepaths to
+        downloaded template files.
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    References
+    ----------
+    .. [1] David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch,
+        Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio
+        Corbetta, Sandra W Curtiss, and others. The human connectome project: a
+        data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.
+    .. [2] David C Van Essen, Matthew F Glasser, Donna L Dierker, John Harwell,
+        and Timothy Coalson. Parcellations and hemispheric asymmetries of human
+        cerebral cortex analyzed on surface-based atlases. Cerebral cortex,
+        22(10):2241\u20132262, 2012.
+    .. [3] http://brainvis.wustl.edu/wiki/index.php//Caret:Atlases/Conte69_Atlas
+    """
+    dataset_name = 'tpl-conte69'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    keys = ['midthickness', 'inflated', 'vinflated']
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{dataset_name}.tar.gz'
+    }
+
+    _filenames = [
+        f"{dataset_name}/tpl-conte69_space-MNI305_variant-fsLR32k_{res}.{hemi}.surf.gii"
+        for res in keys for hemi in ['L', 'R']
+    ] + [
+        f"{dataset_name}/template_description.json"
+    ]
+    _files = [(f, info['url'], opts) for f in _filenames]
+
+    data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose)
+
+    with open(data[-1], 'r') as src:
+        data[-1] = json.load(src)
+
+    # bundle hemispheres together
+    data = [SURFACE(*data[:-1][i:i + 2]) for i in range(0, 6, 2)] + [data[-1]]
+
+    return Bunch(**dict(zip(keys + ['info'], data)))
+
+
+def fetch_yerkes19(data_dir=None, resume=None, verbose=1):
+    """
+    Download files for Donahue et al., 2016 Yerkes19 template.
+
+    This dataset contains
+
+    If you used this data, please cite 1_.
+
+    Returns
+    -------
+    filenames : :class:`sklearn.utils.Bunch`
+        Dictionary-like object with keys ['midthickness', 'inflated',
+        'vinflated'], where corresponding values are lists of filepaths to
+        downloaded template files.
+
+    Other Parameters
+    ----------------
+    data_dir : str, optional
+        Path to use as data directory. If not specified, will check for
+        environmental variable 'NNT_DATA'; if that is not set, will use
+        `~/nnt-data` instead. Default: None
+    resume : bool, optional
+        Whether to attempt to resume partial download, if possible. Default: True
+    verbose : int, optional
+        Modifies verbosity of download, where higher numbers mean more updates.
+        Default: 1
+
+    References
+    ----------
+    .. [1] Chad J Donahue, Stamatios N Sotiropoulos, Saad Jbabdi, Moises
+        Hernandez-Fernandez, Timothy E Behrens, Tim B Dyrby, Timothy Coalson,
+        Henry Kennedy, Kenneth Knoblauch, David C Van Essen, and others. Using
+        diffusion tractography to predict cortical connection strength and
+        distance: a quantitative comparison with tracers in the monkey. Journal
+        of Neuroscience, 36(25):6758\u20136770, 2016.
+    .. [2] https://balsa.wustl.edu/reference/show/976nz
+    """
+    dataset_name = 'tpl-yerkes19'
+    _get_reference_info(dataset_name, verbose=verbose)
+
+    keys = ['midthickness', 'inflated', 'vinflated']
+
+    data_dir = _get_data_dir(data_dir=data_dir)
+    info = _get_dataset_info(dataset_name)
+    opts = {
+        'uncompress': True,
+        'md5sum': info['md5'],
+        'move': f'{dataset_name}.tar.gz'
+    }
+    _filenames = [
+        f"{dataset_name}/tpl-yerkes19_space-fsLR32k_{res}.{hemi}.surf.gii"
+        for res in keys for hemi in ['L', 'R']
+
+    ]
+    _files = [(f, info['url'], opts) for f in _filenames]
+
+    data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose)
+
+    # bundle hemispheres together
+    data = [SURFACE(*data[i:i + 2]) for i in range(0, 6, 2)]
+
+    return Bunch(**dict(zip(keys + ['info'], data)))
diff --git a/netneurotools/datasets/fetchers.py b/netneurotools/datasets/fetchers.py
deleted file mode 100644
index b2fa95d..0000000
--- a/netneurotools/datasets/fetchers.py
+++ /dev/null
@@ -1,882 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for fetching datasets from the internet."""
-
-from collections import namedtuple
-import itertools
-import json
-import os.path as op
-import warnings
-
-try:
-    # nilearn 0.10.3
-    from nilearn.datasets._utils import fetch_files as _fetch_files
-except ImportError:
-    from nilearn.datasets.utils import _fetch_files
-
-import numpy as np
-from sklearn.utils import Bunch
-
-from .utils import _get_data_dir, _get_dataset_info
-from ..utils import check_fs_subjid
-
-SURFACE = namedtuple('Surface', ('lh', 'rh'))
-
-
-def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None,
-                      resume=True, verbose=1):
-    """
-    Download files for Cammoun et al., 2012 multiscale parcellation.
-
-    Parameters
-    ----------
-    version : str, optional
-        Specifies which version of the dataset to download, where
-        'MNI152NLin2009aSym' will return .nii.gz atlas files defined in MNI152
-        space, 'fsaverageX' will return .annot files defined in fsaverageX
-        space (FreeSurfer 6.0.1), 'fslr32k' will return .label.gii files in
-        fs_LR_32k HCP space, and 'gcs' will return FreeSurfer-style .gcs
-        probabilistic atlas files for generating new, subject-specific
-        parcellations. Default: 'MNI152NLin2009aSym'
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    filenames : :class:`sklearn.utils.Bunch`
-        Dictionary-like object with keys ['scale033', 'scale060', 'scale125',
-        'scale250', 'scale500'], where corresponding values are lists of
-        filepaths to downloaded parcellation files.
-
-    References
-    ----------
-    Cammoun, L., Gigandet, X., Meskaldji, D., Thiran, J. P., Sporns, O., Do, K.
-    Q., Maeder, P., and Meuli, R., & Hagmann, P. (2012). Mapping the human
-    connectome at multiple scales with diffusion spectrum MRI. Journal of
-    Neuroscience Methods, 203(2), 386-397.
-
-    Notes
-    -----
-    License: https://raw.githubusercontent.com/LTS5/cmp/master/COPYRIGHT
-    """
-    if version == 'surface':
-        warnings.warn('Providing `version="surface"` is deprecated and will '
-                      'be removed in a future release. For consistent '
-                      'behavior please use `version="fsaverage"` instead.',
-                      DeprecationWarning, stacklevel=2)
-        version = 'fsaverage'
-    elif version == 'volume':
-        warnings.warn('Providing `version="volume"` is deprecated and will '
-                      'be removed in a future release. For consistent '
-                      'behavior please use `version="MNI152NLin2009aSym"` '
-                      'instead.',
-                      DeprecationWarning, stacklevel=2)
-        version = 'MNI152NLin2009aSym'
-
-    versions = [
-        'gcs', 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k',
-        'MNI152NLin2009aSym'
-    ]
-    if version not in versions:
-        raise ValueError('The version of Cammoun et al., 2012 parcellation '
-                         'requested "{}" does not exist. Must be one of {}'
-                         .format(version, versions))
-
-    dataset_name = 'atl-cammoun2012'
-    keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500']
-
-    data_dir = _get_data_dir(data_dir=data_dir)
-    info = _get_dataset_info(dataset_name)[version]
-    if url is None:
-        url = info['url']
-
-    opts = {
-        'uncompress': True,
-        'md5sum': info['md5'],
-        'move': '{}.tar.gz'.format(dataset_name)
-    }
-
-    # filenames differ based on selected version of dataset
-    if version == 'MNI152NLin2009aSym':
-        filenames = [
-            'atl-Cammoun2012_space-MNI152NLin2009aSym_res-{}_deterministic{}'
-            .format(res[-3:], suff) for res in keys for suff in ['.nii.gz']
-        ] + ['atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv']
-    elif version == 'fslr32k':
-        filenames = [
-            'atl-Cammoun2012_space-fslr32k_res-{}_hemi-{}_deterministic{}'
-            .format(res[-3:], hemi, suff) for res in keys
-            for hemi in ['L', 'R'] for suff in ['.label.gii']
-        ]
-    elif version in ('fsaverage', 'fsaverage5', 'fsaverage6'):
-        filenames = [
-            'atl-Cammoun2012_space-{}_res-{}_hemi-{}_deterministic{}'
-            .format(version, res[-3:], hemi, suff) for res in keys
-            for hemi in ['L', 'R'] for suff in ['.annot']
-        ]
-    else:
-        filenames = [
-            'atl-Cammoun2012_res-{}_hemi-{}_probabilistic{}'
-            .format(res[5:], hemi, suff)
-            for res in keys[:-1] + ['scale500v1', 'scale500v2', 'scale500v3']
-            for hemi in ['L', 'R'] for suff in ['.gcs', '.ctab']
-        ]
-
-    files = [
-        (op.join(dataset_name, version, f), url, opts) for f in filenames
-    ]
-    data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
-
-    if version == 'MNI152NLin2009aSym':
-        keys += ['info']
-    elif version in ('fslr32k', 'fsaverage', 'fsaverage5', 'fsaverage6'):
-        data = [SURFACE(*data[i:i + 2]) for i in range(0, len(data), 2)]
-    else:
-        data = [data[::2][i:i + 2] for i in range(0, len(data) // 2, 2)]
-        # deal with the fact that last scale is split into three files :sigh:
-        data = data[:-3] + [list(itertools.chain.from_iterable(data[-3:]))]
-
-    return Bunch(**dict(zip(keys, data)))
-
-
-def fetch_conte69(data_dir=None, url=None, resume=True, verbose=1):
-    """
-    Download files for Van Essen et al., 2012 Conte69 template.
-
-    Parameters
-    ----------
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    filenames : :class:`sklearn.utils.Bunch`
-        Dictionary-like object with keys ['midthickness', 'inflated',
-        'vinflated'], where corresponding values are lists of filepaths to
-        downloaded template files.
-
-    References
-    ----------
-    http://brainvis.wustl.edu/wiki/index.php//Caret:Atlases/Conte69_Atlas
-
-    Van Essen, D. C., Glasser, M. F., Dierker, D. L., Harwell, J., & Coalson,
-    T. (2011). Parcellations and hemispheric asymmetries of human cerebral
-    cortex analyzed on surface-based atlases. Cerebral cortex, 22(10),
-    2241-2262.
-
-    Notes
-    -----
-    License: ???
-    """
-    dataset_name = 'tpl-conte69'
-    keys = ['midthickness', 'inflated', 'vinflated']
-
-    data_dir = _get_data_dir(data_dir=data_dir)
-    info = _get_dataset_info(dataset_name)
-    if url is None:
-        url = info['url']
-
-    opts = {
-        'uncompress': True,
-        'md5sum': info['md5'],
-        'move': '{}.tar.gz'.format(dataset_name)
-    }
-
-    filenames = [
-        'tpl-conte69/tpl-conte69_space-MNI305_variant-fsLR32k_{}.{}.surf.gii'
-        .format(res, hemi) for res in keys for hemi in ['L', 'R']
-    ] + ['tpl-conte69/template_description.json']
-
-    data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames],
-                        resume=resume, verbose=verbose)
-
-    with open(data[-1], 'r') as src:
-        data[-1] = json.load(src)
-
-    # bundle hemispheres together
-    data = [SURFACE(*data[:-1][i:i + 2]) for i in range(0, 6, 2)] + [data[-1]]
-
-    return Bunch(**dict(zip(keys + ['info'], data)))
-
-
-def fetch_yerkes19(data_dir=None, url=None, resume=None, verbose=1):
-    """
-    Download files for Donahue et al., 2016 Yerkes19 template.
-
-    Parameters
-    ----------
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    filenames : :class:`sklearn.utils.Bunch`
-        Dictionary-like object with keys ['midthickness', 'inflated',
-        'vinflated'], where corresponding values are lists of filepaths to
-        downloaded template files.
-
-    References
-    ----------
-    https://balsa.wustl.edu/reference/show/976nz
-
-    Donahue, C. J., Sotiropoulos, S. N., Jbabdi, S., Hernandez-Fernandez, M.,
-    Behrens, T. E., Dyrby, T. B., ... & Glasser, M. F. (2016). Using diffusion
-    tractography to predict cortical connection strength and distance: a
-    quantitative comparison with tracers in the monkey. Journal of
-    Neuroscience, 36(25), 6758-6770.
-
-    Notes
-    -----
-    License: ???
-    """
-    dataset_name = 'tpl-yerkes19'
-    keys = ['midthickness', 'inflated', 'vinflated']
-
-    data_dir = _get_data_dir(data_dir=data_dir)
-    info = _get_dataset_info(dataset_name)
-    if url is None:
-        url = info['url']
-
-    opts = {
-        'uncompress': True,
-        'md5sum': info['md5'],
-        'move': '{}.tar.gz'.format(dataset_name)
-    }
-
-    filenames = [
-        'tpl-yerkes19/tpl-yerkes19_space-fsLR32k_{}.{}.surf.gii'
-        .format(res, hemi) for res in keys for hemi in ['L', 'R']
-    ]
-
-    data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames],
-                        resume=resume, verbose=verbose)
-
-    # bundle hemispheres together
-    data = [SURFACE(*data[i:i + 2]) for i in range(0, 6, 2)]
-
-    return Bunch(**dict(zip(keys + ['info'], data)))
-
-
-def fetch_pauli2018(data_dir=None, url=None, resume=True, verbose=1):
-    """
-    Download files for Pauli et al., 2018 subcortical parcellation.
-
-    Parameters
-    ----------
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    filenames : :class:`sklearn.utils.Bunch`
-        Dictionary-like object with keys ['probabilistic', 'deterministic'],
-        where corresponding values are filepaths to downloaded atlas files.
-
-    References
-    ----------
-    Pauli, W. M., Nili, A. N., & Tyszka, J. M. (2018). A high-resolution
-    probabilistic in vivo atlas of human subcortical brain nuclei. Scientific
-    Data, 5, 180063.
-
-    Notes
-    -----
-    License: CC-BY Attribution 4.0 International
-    """
-    dataset_name = 'atl-pauli2018'
-    keys = ['probabilistic', 'deterministic', 'info']
-
-    data_dir = _get_data_dir(data_dir=data_dir)
-    info = _get_dataset_info(dataset_name)
-
-    # format the query how _fetch_files() wants things and then download data
-    files = [
-        (i['name'], i['url'], dict(md5sum=i['md5'], move=i['name']))
-        for i in info
-    ]
-
-    data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
-
-    return Bunch(**dict(zip(keys, data)))
-
-
-def fetch_fsaverage(version='fsaverage', data_dir=None, url=None, resume=True,
-                    verbose=1):
-    """
-    Download files for fsaverage FreeSurfer template.
-
-    Parameters
-    ----------
-    version : str, optional
-        One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5',
-        'fsaverage6'}. Default: 'fsaverage'
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    filenames : :class:`sklearn.utils.Bunch`
-        Dictionary-like object with keys ['surf'] where corresponding values
-        are length-2 lists downloaded template files (each list composed of
-        files for the left and right hemisphere).
-    """
-    versions = [
-        'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'
-    ]
-    if version not in versions:
-        raise ValueError('The version of fsaverage requested "{}" does not '
-                         'exist. Must be one of {}'.format(version, versions))
-
-    dataset_name = 'tpl-fsaverage'
-    keys = ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']
-
-    data_dir = _get_data_dir(data_dir=data_dir)
-    info = _get_dataset_info(dataset_name)[version]
-    if url is None:
-        url = info['url']
-
-    opts = {
-        'uncompress': True,
-        'md5sum': info['md5'],
-        'move': '{}.tar.gz'.format(dataset_name)
-    }
-
-    filenames = [
-        op.join(version, 'surf', '{}.{}'.format(hemi, surf))
-        for surf in keys for hemi in ['lh', 'rh']
-    ]
-
-    try:
-        data_dir = check_fs_subjid(version)[1]
-        data = [op.join(data_dir, f) for f in filenames]
-    except FileNotFoundError:
-        data = _fetch_files(data_dir, resume=resume, verbose=verbose,
-                            files=[(op.join(dataset_name, f), url, opts)
-                                   for f in filenames])
-
-    data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)]
-
-    return Bunch(**dict(zip(keys, data)))
-
-
-def available_connectomes():
-    """
-    List datasets available via :func:`~.fetch_connectome`.
-
-    Returns
-    -------
-    datasets : list of str
-        List of available datasets
-    """
-    return sorted(_get_dataset_info('ds-connectomes').keys())
-
-
-def fetch_connectome(dataset, data_dir=None, url=None, resume=True,
-                     verbose=1):
-    """
-    Download files from multi-species connectomes.
-
-    Parameters
-    ----------
-    dataset : str
-        Specifies which dataset to download; must be one of the datasets listed
-        in :func:`netneurotools.datasets.available_connectomes()`.
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    data : :class:`sklearn.utils.Bunch`
-        Dictionary-like object with, at a minimum, keys ['conn', 'labels',
-        'ref'] providing connectivity / correlation matrix, region labels, and
-        relevant reference. Other possible keys include 'dist' (an array of
-        Euclidean distances between regions of 'conn'), 'coords' (an array of
-        xyz coordinates for regions of 'conn'), 'acronyms' (an array of
-        acronyms for regions of 'conn'), and 'networks' (an array of network
-        affiliations for regions of 'conn')
-
-    References
-    ----------
-    See `ref` key of returned dictionary object for relevant dataset reference
-    """
-    if dataset not in available_connectomes():
-        raise ValueError('Provided dataset {} not available; must be one of {}'
-                         .format(dataset, available_connectomes()))
-
-    dataset_name = 'ds-connectomes'
-
-    data_dir = op.join(_get_data_dir(data_dir=data_dir), dataset_name)
-    info = _get_dataset_info(dataset_name)[dataset]
-    if url is None:
-        url = info['url']
-    opts = {
-        'uncompress': True,
-        'md5sum': info['md5'],
-        'move': '{}.tar.gz'.format(dataset)
-    }
-
-    filenames = [
-        op.join(dataset, '{}.csv'.format(fn)) for fn in info['keys']
-    ] + [op.join(dataset, 'ref.txt')]
-    data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames],
-                        resume=resume, verbose=verbose)
-
-    # load data
-    for n, arr in enumerate(data[:-1]):
-        try:
-            data[n] = np.loadtxt(arr, delimiter=',')
-        except ValueError:
-            data[n] = np.loadtxt(arr, delimiter=',', dtype=str)
-    with open(data[-1]) as src:
-        data[-1] = src.read().strip()
-
-    return Bunch(**dict(zip(info['keys'] + ['ref'], data)))
-
-
-def fetch_vazquez_rodriguez2019(data_dir=None, url=None, resume=True,
-                                verbose=1):
-    """
-    Download files from Vazquez-Rodriguez et al., 2019, PNAS.
-
-    Parameters
-    ----------
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    data : :class:`sklearn.utils.Bunch`
-        Dictionary-like object with keys ['rsquared', 'gradient'] containing
-        1000 values from
-
-    References
-    ----------
-    See `ref` key of returned dictionary object for relevant dataset reference
-    """
-    dataset_name = 'ds-vazquez_rodriguez2019'
-
-    data_dir = _get_data_dir(data_dir=data_dir)
-    info = _get_dataset_info(dataset_name)
-    if url is None:
-        url = info['url']
-    opts = {
-        'uncompress': True,
-        'md5sum': info['md5'],
-        'move': '{}.tar.gz'.format(dataset_name)
-    }
-
-    filenames = [
-        op.join(dataset_name, 'rsquared_gradient.csv')
-    ]
-    data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames],
-                        resume=resume, verbose=verbose)
-
-    # load data
-    rsq, grad = np.loadtxt(data[0], delimiter=',', skiprows=1).T
-
-    return Bunch(rsquared=rsq, gradient=grad)
-
-
-def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None,
-                       resume=True, verbose=1):
-    """
-    Download FreeSurfer .annot files for Schaefer et al., 2018 parcellation.
-
-    Parameters
-    ----------
-    version : {'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'}
-        Specifies which surface annotation files should be matched to. Default:
-        'fsaverage'
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    filenames : :class:`sklearn.utils.Bunch`
-        Dictionary-like object with keys of format '{}Parcels{}Networks' where
-        corresponding values are the left/right hemisphere annotation files
-
-    References
-    ----------
-    Schaefer, A., Kong, R., Gordon, E. M., Laumann, T. O., Zuo, X. N., Holmes,
-    A. J., ... & Yeo, B. T. (2017). Local-global parcellation of the human
-    cerebral cortex from intrinsic functional connectivity MRI. Cerebral
-    Cortex, 28(9), 3095-3114.
-
-    Notes
-    -----
-    License: https://github.com/ThomasYeoLab/CBIG/blob/master/LICENSE.md
-    """
-    versions = ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k']
-    if version not in versions:
-        raise ValueError('The version of Schaefer et al., 2018 parcellation '
-                         'requested "{}" does not exist. Must be one of {}'
-                         .format(version, versions))
-
-    dataset_name = 'atl-schaefer2018'
-    keys = [
-        '{}Parcels{}Networks'.format(p, n)
-        for p in range(100, 1001, 100) for n in [7, 17]
-    ]
-
-    data_dir = _get_data_dir(data_dir=data_dir)
-    info = _get_dataset_info(dataset_name)[version]
-    if url is None:
-        url = info['url']
-
-    opts = {
-        'uncompress': True,
-        'md5sum': info['md5'],
-        'move': '{}.tar.gz'.format(dataset_name)
-    }
-
-    if version == 'fslr32k':
-        hemispheres, suffix = ['LR'], 'dlabel.nii'
-    else:
-        hemispheres, suffix = ['L', 'R'], 'annot'
-    filenames = [
-        'atl-Schaefer2018_space-{}_hemi-{}_desc-{}_deterministic.{}'
-        .format(version, hemi, desc, suffix)
-        for desc in keys for hemi in hemispheres
-    ]
-
-    files = [(op.join(dataset_name, version, f), url, opts)
-             for f in filenames]
-    data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
-
-    if suffix == 'annot':
-        data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)]
-
-    return Bunch(**dict(zip(keys, data)))
-
-
-def fetch_hcp_standards(data_dir=None, url=None, resume=True, verbose=1):
-    """
-    Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP.
-
-    Parameters
-    ----------
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    standards : str
-        Filepath to standard_mesh_atlases directory
-    """
-    if url is None:
-        url = 'https://web.archive.org/web/20220121035833/' + \
-              'http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip'
-    dataset_name = 'standard_mesh_atlases'
-    data_dir = _get_data_dir(data_dir=data_dir)
-    opts = {
-        'uncompress': True,
-        'move': '{}.zip'.format(dataset_name)
-    }
-    filenames = [
-        'L.sphere.32k_fs_LR.surf.gii', 'R.sphere.32k_fs_LR.surf.gii'
-    ]
-    files = [(op.join(dataset_name, f), url, opts) for f in filenames]
-    _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
-
-    return op.join(data_dir, dataset_name)
-
-
-def fetch_mmpall(version='fslr32k', data_dir=None, url=None, resume=True,
-                 verbose=1):
-    """
-    Download .label.gii files for Glasser et al., 2016 MMPAll atlas.
-
-    Parameters
-    ----------
-    version : {'fslr32k'}
-        Specifies which surface annotation files should be matched to. Default:
-        'fslr32k'
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    filenames : :class:`sklearn.utils.Bunch`
-        Namedtuple with fields ('lh', 'rh') corresponding to filepaths to
-        left/right hemisphere parcellation files
-
-    References
-    ----------
-    Glasser, M. F., Coalson, T. S., Robinson, E. C., Hacker, C. D., Harwell,
-    J., Yacoub, E., ... & Van Essen, D. C. (2016). A multi-modal parcellation
-    of human cerebral cortex. Nature, 536(7615), 171-178.
-
-    Notes
-    -----
-    License: https://www.humanconnectome.org/study/hcp-young-adult/document/
-    wu-minn-hcp-consortium-open-access-data-use-terms
-    """
-    versions = ['fslr32k']
-    if version not in versions:
-        raise ValueError('The version of Glasser et al., 2016 parcellation '
-                         'requested "{}" does not exist. Must be one of {}'
-                         .format(version, versions))
-
-    dataset_name = 'atl-mmpall'
-
-    data_dir = _get_data_dir(data_dir=data_dir)
-    info = _get_dataset_info(dataset_name)[version]
-    if url is None:
-        url = info['url']
-    opts = {
-        'uncompress': True,
-        'md5sum': info['md5'],
-        'move': '{}.tar.gz'.format(dataset_name)
-    }
-
-    hemispheres = ['L', 'R']
-    filenames = [
-        'atl-MMPAll_space-{}_hemi-{}_deterministic.label.gii'
-        .format(version, hemi) for hemi in hemispheres
-    ]
-
-    files = [(op.join(dataset_name, version, f), url, opts) for f in filenames]
-    data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
-
-    return SURFACE(*data)
-
-
-def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1):
-    """
-    Fetch von-Economo Koskinas probabilistic FreeSurfer atlas.
-
-    Parameters
-    ----------
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    filenames : :class:`sklearn.utils.Bunch`
-        Dictionary-like object with keys ['gcs', 'ctab', 'info']
-
-    References
-    ----------
-    Scholtens, L. H., de Reus, M. A., de Lange, S. C., Schmidt, R., & van den
-    Heuvel, M. P. (2018). An MRI von Economo–Koskinas atlas. NeuroImage, 170,
-    249-256.
-
-    Notes
-    -----
-    License: CC-BY-NC-SA 4.0
-    """
-    dataset_name = 'atl-voneconomo_koskinas'
-    keys = ['gcs', 'ctab', 'info']
-
-    data_dir = _get_data_dir(data_dir=data_dir)
-    info = _get_dataset_info(dataset_name)
-    if url is None:
-        url = info['url']
-    opts = {
-        'uncompress': True,
-        'md5sum': info['md5'],
-        'move': '{}.tar.gz'.format(dataset_name)
-    }
-    filenames = [
-        'atl-vonEconomoKoskinas_hemi-{}_probabilistic.{}'.format(hemi, suff)
-        for hemi in ['L', 'R'] for suff in ['gcs', 'ctab']
-    ] + ['atl-vonEconomoKoskinas_info.csv']
-    files = [(op.join(dataset_name, f), url, opts) for f in filenames]
-    data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
-    data = [SURFACE(*data[:-1:2])] + [SURFACE(*data[1:-1:2])] + [data[-1]]
-
-    return Bunch(**dict(zip(keys, data)))
-
-
-def fetch_civet(density='41k', version='v1', data_dir=None, url=None,
-                resume=True, verbose=1):
-    """
-    Fetch CIVET surface files.
-
-    Parameters
-    ----------
-    density : {'41k', '164k'}, optional
-        Which density of the CIVET-space geometry files to fetch. The
-        high-resolution '164k' surface only exists for version 'v2'
-    version : {'v1, 'v2'}, optional
-        Which version of the CIVET surfaces to use. Default: 'v2'
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-    url : str, optional
-        URL from which to download data. Default: None
-    resume : bool, optional
-        Whether to attempt to resume partial download, if possible. Default:
-        True
-    verbose : int, optional
-        Modifies verbosity of download, where higher numbers mean more updates.
-        Default: 1
-
-    Returns
-    -------
-    filenames : :class:`sklearn.utils.Bunch`
-        Dictionary-like object with keys ['mid', 'white'] containing geometry
-        files for CIVET surface. Note for version 'v1' the 'mid' and 'white'
-        files are identical.
-
-    References
-    ----------
-    Y. Ad-Dab’bagh, O. Lyttelton, J.-S. Muehlboeck, C. Lepage, D. Einarson, K.
-    Mok, O. Ivanov, R. Vincent, J. Lerch, E. Fombonne, A. C. Evans, The CIVET
-    image-processing environment: A fully automated comprehensive pipeline for
-    anatomical neuroimaging research. Proceedings of the 12th Annual Meeting of
-    the Organization for Human Brain Mapping (2006).
-
-    Notes
-    -----
-    License: https://github.com/aces/CIVET_Full_Project/blob/master/LICENSE
-    """
-    densities = ['41k', '164k']
-    if density not in densities:
-        raise ValueError('The density of CIVET requested "{}" does not exist. '
-                         'Must be one of {}'.format(density, densities))
-    versions = ['v1', 'v2']
-    if version not in versions:
-        raise ValueError('The version of CIVET requested "{}" does not exist. '
-                         'Must be one of {}'.format(version, versions))
-
-    if version == 'v1' and density == '164k':
-        raise ValueError('The "164k" density CIVET surface only exists for '
-                         'version "v2"')
-
-    dataset_name = 'tpl-civet'
-    keys = ['mid', 'white']
-
-    data_dir = _get_data_dir(data_dir=data_dir)
-    info = _get_dataset_info(dataset_name)[version]['civet{}'.format(density)]
-    if url is None:
-        url = info['url']
-
-    opts = {
-        'uncompress': True,
-        'md5sum': info['md5'],
-        'move': '{}.tar.gz'.format(dataset_name)
-    }
-    filenames = [
-        op.join(dataset_name, version, 'civet{}'.format(density),
-                'tpl-civet_space-ICBM152_hemi-{}_den-{}_{}.obj'
-                .format(hemi, density, surf))
-        for surf in keys for hemi in ['L', 'R']
-    ]
-
-    data = _fetch_files(data_dir, resume=resume, verbose=verbose,
-                        files=[(f, url, opts) for f in filenames])
-
-    data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)]
-
-    return Bunch(**dict(zip(keys, data)))
diff --git a/netneurotools/datasets/generators.py b/netneurotools/datasets/generators.py
deleted file mode 100644
index 42c4f56..0000000
--- a/netneurotools/datasets/generators.py
+++ /dev/null
@@ -1,93 +0,0 @@
-
-# -*- coding: utf-8 -*-
-"""Functions for making "random" datasets."""
-
-import numpy as np
-from sklearn.utils.validation import check_random_state
-
-
-def make_correlated_xy(corr=0.85, size=10000, seed=None, tol=0.001):
-    """
-    Generate random vectors that are correlated to approximately `corr`.
-
-    Parameters
-    ----------
-    corr : [-1, 1] float or (N, N) numpy.ndarray, optional
-        The approximate correlation desired. If a float is provided, two
-        vectors with the specified level of correlation will be generated. If
-        an array is provided, it is assumed to be a symmetrical correlation
-        matrix and ``len(corr)`` vectors with the specified levels of
-        correlation will be generated. Default: 0.85
-    size : int or tuple, optional
-        Desired size of the generated vectors. Default: 1000
-    seed : {int, np.random.RandomState instance, None}, optional
-        Seed for random number generation. Default: None
-    tol : [0, 1] float, optional
-        Tolerance of correlation between generated `vectors` and specified
-        `corr`. Default: 0.001
-
-    Returns
-    -------
-    vectors : numpy.ndarray
-        Random vectors of size `size` with correlation specified by `corr`
-
-    Examples
-    --------
-    >>> from netneurotools import datasets
-
-    By default two vectors are generated with specified correlation
-
-    >>> x, y = datasets.make_correlated_xy()
-    >>> np.corrcoef(x, y)  # doctest: +SKIP
-    array([[1.        , 0.85083661],
-           [0.85083661, 1.        ]])
-    >>> x, y = datasets.make_correlated_xy(corr=0.2)
-    >>> np.corrcoef(x, y)  # doctest: +SKIP
-    array([[1.        , 0.20069953],
-           [0.20069953, 1.        ]])
-
-    You can also provide correlation matrices to generate more than two vectors
-    if desired. Note that this makes it more difficult to ensure the actual
-    correlations are close to the desired values:
-
-    >>> corr = [[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]]
-    >>> out = datasets.make_correlated_xy(corr=corr)
-    >>> out.shape
-    (3, 10000)
-    >>> np.corrcoef(out)  # doctest: +SKIP
-    array([[1.        , 0.50965273, 0.30235686],
-           [0.50965273, 1.        , 0.01089107],
-           [0.30235686, 0.01089107, 1.        ]])
-    """
-    rs = check_random_state(seed)
-
-    # no correlations outside [-1, 1] bounds
-    if np.any(np.abs(corr) > 1):
-        raise ValueError('Provided `corr` must (all) be in range [-1, 1].')
-
-    # if we're given a single number, assume two vectors are desired
-    if isinstance(corr, (int, float)):
-        covs = np.ones((2, 2)) * 0.111
-        covs[(0, 1), (1, 0)] *= corr
-    # if we're given a correlation matrix, assume `N` vectors are desired
-    elif isinstance(corr, (list, np.ndarray)):
-        corr = np.asarray(corr)
-        if corr.ndim != 2 or len(corr) != len(corr.T):
-            raise ValueError('If `corr` is a list or array, must be a 2D '
-                             'square array, not {}'.format(corr.shape))
-        if np.any(np.diag(corr) != 1):
-            raise ValueError('Diagonal of `corr` must be 1.')
-        covs = corr * 0.111
-    means = [0] * len(covs)
-
-    # generate the variables
-    count = 0
-    while count < 500:
-        vectors = rs.multivariate_normal(mean=means, cov=covs, size=size).T
-        flat = vectors.reshape(len(vectors), -1)
-        # if diff between actual and desired correlations less than tol, break
-        if np.all(np.abs(np.corrcoef(flat) - (covs / 0.111)) < tol):
-            break
-        count += 1
-
-    return vectors
diff --git a/netneurotools/datasets/netneurotools.bib b/netneurotools/datasets/netneurotools.bib
new file mode 100644
index 0000000..1e78958
--- /dev/null
+++ b/netneurotools/datasets/netneurotools.bib
@@ -0,0 +1,253 @@
+@article{cammoun2012mapping,
+  title={Mapping the human connectome at multiple scales with diffusion spectrum MRI},
+  author={Cammoun, Leila and Gigandet, Xavier and Meskaldji, Djalel and Thiran, Jean Philippe and Sporns, Olaf and Do, Kim Q and Maeder, Philippe and Meuli, Reto and Hagmann, Patric},
+  journal={Journal of neuroscience methods},
+  volume={203},
+  number={2},
+  pages={386--397},
+  year={2012},
+  publisher={Elsevier}
+}
+
+@article{pauli2018high,
+  title={A high-resolution probabilistic in vivo atlas of human subcortical brain nuclei},
+  author={Pauli, Wolfgang M and Nili, Amanda N and Tyszka, J Michael},
+  journal={Scientific data},
+  volume={5},
+  number={1},
+  pages={1--13},
+  year={2018},
+  publisher={Nature Publishing Group}
+}
+
+@article{van2012human,
+  title={The Human Connectome Project: a data acquisition perspective},
+  author={Van Essen, David C and Ugurbil, Kamil and Auerbach, Edward and Barch, Deanna and Behrens, Timothy EJ and Bucholz, Richard and Chang, Acer and Chen, Liyong and Corbetta, Maurizio and Curtiss, Sandra W and others},
+  journal={Neuroimage},
+  volume={62},
+  number={4},
+  pages={2222--2231},
+  year={2012},
+  publisher={Elsevier}
+}
+
+@article{van2012parcellations,
+  title={Parcellations and hemispheric asymmetries of human cerebral cortex analyzed on surface-based atlases},
+  author={Van Essen, David C and Glasser, Matthew F and Dierker, Donna L and Harwell, John and Coalson, Timothy},
+  journal={Cerebral cortex},
+  volume={22},
+  number={10},
+  pages={2241--2262},
+  year={2012},
+  publisher={Oxford University Press}
+}
+
+@article{glasser2013minimal,
+  title={The minimal preprocessing pipelines for the Human Connectome Project},
+  author={Glasser, Matthew F and Sotiropoulos, Stamatios N and Wilson, J Anthony and Coalson, Timothy S and Fischl, Bruce and Andersson, Jesper L and Xu, Junqian and Jbabdi, Saad and Webster, Matthew and Polimeni, Jonathan R and others},
+  journal={Neuroimage},
+  volume={80},
+  pages={105--124},
+  year={2013},
+  publisher={Elsevier}
+}
+
+@article{donahue2016using,
+  title={Using diffusion tractography to predict cortical connection strength and distance: a quantitative comparison with tracers in the monkey},
+  author={Donahue, Chad J and Sotiropoulos, Stamatios N and Jbabdi, Saad and Hernandez-Fernandez, Moises and Behrens, Timothy E and Dyrby, Tim B and Coalson, Timothy and Kennedy, Henry and Knoblauch, Kenneth and Van Essen, David C and others},
+  journal={Journal of Neuroscience},
+  volume={36},
+  number={25},
+  pages={6758--6770},
+  year={2016},
+  publisher={Soc Neuroscience}
+}
+
+@article{dale1999cortical,
+  title={Cortical surface-based analysis: I. Segmentation and surface reconstruction},
+  author={Dale, Anders M and Fischl, Bruce and Sereno, Martin I},
+  journal={Neuroimage},
+  volume={9},
+  number={2},
+  pages={179--194},
+  year={1999},
+  publisher={Elsevier}
+}
+
+@article{fischl1999cortical,
+  title={Cortical surface-based analysis: II: inflation, flattening, and a surface-based coordinate system},
+  author={Fischl, Bruce and Sereno, Martin I and Dale, Anders M},
+  journal={Neuroimage},
+  volume={9},
+  number={2},
+  pages={195--207},
+  year={1999},
+  publisher={Elsevier}
+}
+
+@article{fischl1999high,
+  title={High-resolution intersubject averaging and a coordinate system for the cortical surface},
+  author={Fischl, Bruce and Sereno, Martin I and Tootell, Roger BH and Dale, Anders M},
+  journal={Human brain mapping},
+  volume={8},
+  number={4},
+  pages={272--284},
+  year={1999},
+  publisher={Wiley Online Library}
+}
+
+@article{lyttelton2007unbiased,
+  title={An unbiased iterative group registration template for cortical surface analysis},
+  author={Lyttelton, Oliver and Boucher, Maxime and Robbins, Steven and Evans, Alan},
+  journal={Neuroimage},
+  volume={34},
+  number={4},
+  pages={1535--1544},
+  year={2007},
+  publisher={Elsevier}
+}
+
+@article{fonov2009unbiased,
+  title={Unbiased nonlinear average age-appropriate brain templates from birth to adulthood},
+  author={Fonov, Vladimir S and Evans, Alan C and McKinstry, Robert C and Almli, C Robert and Collins, DL},
+  journal={NeuroImage},
+  volume={47},
+  pages={S102},
+  year={2009},
+  publisher={Elsevier}
+}
+
+@article{ad2006civet,
+  title={The CIVET image-processing environment: A fully automated comprehensive pipeline for anatomical neuroimaging research. Proceedings of the 12th annual meeting of the organization for human brain mapping},
+  author={Ad-Dab'bagh, Y and Lyttelton, O and Muehlboeck, J and Lepage, C and Einarson, D and Mok, K and Ivanov, O and Vincent, R and Lerch, J and Fombonne, E},
+  journal={Florence, Italy},
+  pages={2266},
+  year={2006}
+}
+
+@article{varshney2011structural,
+  title={Structural properties of the Caenorhabditis elegans neuronal network},
+  author={Varshney, Lav R and Chen, Beth L and Paniagua, Eric and Hall, David H and Chklovskii, Dmitri B},
+  journal={PLoS computational biology},
+  volume={7},
+  number={2},
+  pages={e1001066},
+  year={2011},
+  publisher={Public Library of Science San Francisco, USA}
+}
+
+@article{chiang2011three,
+  title={Three-dimensional reconstruction of brain-wide wiring networks in Drosophila at single-cell resolution},
+  author={Chiang, Ann-Shyn and Lin, Chih-Yung and Chuang, Chao-Chun and Chang, Hsiu-Ming and Hsieh, Chang-Huain and Yeh, Chang-Wei and Shih, Chi-Tin and Wu, Jian-Jheng and Wang, Guo-Tzau and Chen, Yung-Chang and others},
+  journal={Current biology},
+  volume={21},
+  number={1},
+  pages={1--11},
+  year={2011},
+  publisher={Elsevier}
+}
+
+@article{griffa2019lausanne,
+  title={Structural and functional connectome from 70 young healthy adults [data set]},
+  author={Griffa, Alessandra and Alem{\'a}n-G{\'o}mez, Yasser and Hagmann, Patric},
+  journal={Zenodo},
+  year={2019}
+}
+
+@article{markov2013role,
+  title={The role of long-range connections on the specificity of the macaque interareal cortical network},
+  author={Markov, Nikola T and Ercsey-Ravasz, Maria and Lamy, Camille and Ribeiro Gomes, Ana Rita and Magrou, Lo{\"\i}c and Misery, Pierre and Giroud, Pascale and Barone, Pascal and Dehay, Colette and Toroczkai, Zolt{\'a}n and others},
+  journal={Proceedings of the National Academy of Sciences},
+  volume={110},
+  number={13},
+  pages={5187--5192},
+  year={2013},
+  publisher={National Acad Sciences}
+}
+
+@article{modha2010network,
+  title={Network architecture of the long-distance pathways in the macaque brain},
+  author={Modha, Dharmendra S and Singh, Raghavendra},
+  journal={Proceedings of the National Academy of Sciences},
+  volume={107},
+  number={30},
+  pages={13485--13490},
+  year={2010},
+  publisher={National Acad Sciences}
+}
+
+@article{rubinov2015wiring,
+  title={Wiring cost and topological participation of the mouse brain connectome},
+  author={Rubinov, Mikail and Ypma, Rolf JF and Watson, Charles and Bullmore, Edward T},
+  journal={Proceedings of the National Academy of Sciences},
+  volume={112},
+  number={32},
+  pages={10032--10037},
+  year={2015},
+  publisher={National Acad Sciences}
+}
+
+@article{bota2015architecture,
+  title={Architecture of the cerebral cortical association connectome underlying cognition},
+  author={Bota, Mihail and Sporns, Olaf and Swanson, Larry W},
+  journal={Proceedings of the National Academy of Sciences},
+  volume={112},
+  number={16},
+  pages={E2093--E2101},
+  year={2015},
+  publisher={National Acad Sciences}
+}
+
+@article{vazquez2019gradients,
+  title={Gradients of structure--function tethering across neocortex},
+  author={V{\'a}zquez-Rodr{\'\i}guez, Bertha and Su{\'a}rez, Laura E and Markello, Ross D and Shafiei, Golia and Paquola, Casey and Hagmann, Patric and Van Den Heuvel, Martijn P and Bernhardt, Boris C and Spreng, R Nathan and Misic, Bratislav},
+  journal={Proceedings of the National Academy of Sciences},
+  volume={116},
+  number={42},
+  pages={21219--21227},
+  year={2019},
+  publisher={National Acad Sciences}
+}
+
+@article{hansen2023integrating,
+  title={Integrating multimodal and multiscale connectivity blueprints of the human cerebral cortex in health and disease},
+  author={Hansen, Justine Y and Shafiei, Golia and Voigt, Katharina and Liang, Emma X and Cox, Sylvia ML and Leyton, Marco and Jamadar, Sharna D and Misic, Bratislav},
+  journal={PLoS biology},
+  volume={21},
+  number={9},
+  pages={e3002314},
+  year={2023},
+  publisher={Public Library of Science San Francisco, CA USA}
+}
+
+@article{schaefer2018local,
+  title={Local-global parcellation of the human cerebral cortex from intrinsic functional connectivity MRI},
+  author={Schaefer, Alexander and Kong, Ru and Gordon, Evan M and Laumann, Timothy O and Zuo, Xi-Nian and Holmes, Avram J and Eickhoff, Simon B and Yeo, BT Thomas},
+  journal={Cerebral cortex},
+  volume={28},
+  number={9},
+  pages={3095--3114},
+  year={2018},
+  publisher={Oxford University Press}
+}
+
+@article{glasser2016multi,
+  title={A multi-modal parcellation of human cerebral cortex},
+  author={Glasser, Matthew F and Coalson, Timothy S and Robinson, Emma C and Hacker, Carl D and Harwell, John and Yacoub, Essa and Ugurbil, Kamil and Andersson, Jesper and Beckmann, Christian F and Jenkinson, Mark and others},
+  journal={Nature},
+  volume={536},
+  number={7615},
+  pages={171--178},
+  year={2016},
+  publisher={Nature Publishing Group}
+}
+
+@article{scholtens2018mri,
+  title={An mri von economo--koskinas atlas},
+  author={Scholtens, Lianne H and de Reus, Marcel A and de Lange, Siemon C and Schmidt, Ruben and van den Heuvel, Martijn P},
+  journal={NeuroImage},
+  volume={170},
+  pages={249--256},
+  year={2018},
+  publisher={Elsevier}
+}
diff --git a/netneurotools/datasets/references.json b/netneurotools/datasets/references.json
new file mode 100644
index 0000000..b5214a9
--- /dev/null
+++ b/netneurotools/datasets/references.json
@@ -0,0 +1,220 @@
+{
+    "atl-cammoun2012": {
+        "primary": [
+            {
+                "citation": "Leila Cammoun, Xavier Gigandet, Djalel Meskaldji, Jean Philippe Thiran, Olaf Sporns, Kim Q Do, Philippe Maeder, Reto Meuli, and Patric Hagmann. Mapping the human connectome at multiple scales with diffusion spectrum mri. Journal of neuroscience methods, 203(2):386\u2013397, 2012.",
+                "bibkey": "cammoun2012mapping"
+            }
+        ]
+    },
+    "atl-pauli2018": {
+        "primary": [
+            {
+                "citation": "Wolfgang M Pauli, Amanda N Nili, and J Michael Tyszka. A high-resolution probabilistic in vivo atlas of human subcortical brain nuclei. Scientific data, 5(1):1\u201313, 2018.",
+                "bibkey": "pauli2018high"
+            }
+        ]
+    },
+    "tpl-conte69": {
+        "primary": [
+            {
+                "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.",
+                "bibkey": "van2012human"
+            },
+            {
+                "citation": "David C Van Essen, Matthew F Glasser, Donna L Dierker, John Harwell, and Timothy Coalson. Parcellations and hemispheric asymmetries of human cerebral cortex analyzed on surface-based atlases. Cerebral cortex, 22(10):2241\u20132262, 2012.",
+                "bibkey": "van2012parcellations"
+            }
+        ]
+    },
+    "tpl-yerkes19": {
+        "primary": [
+            {
+                "citation": "Chad J Donahue, Stamatios N Sotiropoulos, Saad Jbabdi, Moises Hernandez-Fernandez, Timothy E Behrens, Tim B Dyrby, Timothy Coalson, Henry Kennedy, Kenneth Knoblauch, David C Van Essen, and others. Using diffusion tractography to predict cortical connection strength and distance: a quantitative comparison with tracers in the monkey. Journal of Neuroscience, 36(25):6758\u20136770, 2016.",
+                "bibkey": "donahue2016using"
+            }
+        ]
+    },
+    "tpl-fsaverage": {
+        "primary": [
+            {
+                "citation": "Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical surface-based analysis: i. segmentation and surface reconstruction. Neuroimage, 9(2):179\u2013194, 1999.",
+                "bibkey": "dale1999cortical"
+            },
+            {
+                "citation": "Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical surface-based analysis: ii: inflation, flattening, and a surface-based coordinate system. Neuroimage, 9(2):195\u2013207, 1999.",
+                "bibkey": "fischl1999cortical"
+            },
+            {
+                "citation": "Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale. High-resolution intersubject averaging and a coordinate system for the cortical surface. Human brain mapping, 8(4):272\u2013284, 1999.",
+                "bibkey": "fischl1999high"
+            }
+        ]
+    },
+    "tpl-civet": {
+        "primary": [
+            {
+                "citation": "Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An unbiased iterative group registration template for cortical surface analysis. Neuroimage, 34(4):1535\u20131544, 2007.",
+                "bibkey": "lyttelton2007unbiased"
+            },
+            {
+                "citation": "Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli, and DL Collins. Unbiased nonlinear average age-appropriate brain templates from birth to adulthood. NeuroImage, 47:S102, 2009.",
+                "bibkey": "fonov2009unbiased"
+            },
+            {
+                "citation": "Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet image-processing environment: a fully automated comprehensive pipeline for anatomical neuroimaging research. proceedings of the 12th annual meeting of the organization for human brain mapping. Florence, Italy, pages 2266, 2006.",
+                "bibkey": "ad2006civet"
+            }
+        ]
+    },
+    "ds-famous_gmat": {
+        "primary": [
+            {
+                "citation": "",
+                "bibkey": ""
+            }
+        ],
+        "celegans": [
+            {
+                "citation": "Lav R Varshney, Beth L Chen, Eric Paniagua, David H Hall, and Dmitri B Chklovskii. Structural properties of the caenorhabditis elegans neuronal network. PLoS computational biology, 7(2):e1001066, 2011.",
+                "bibkey": "varshney2011structural"
+            }
+        ],
+        "drosophila": [
+            {
+                "citation": "Ann-Shyn Chiang, Chih-Yung Lin, Chao-Chun Chuang, Hsiu-Ming Chang, Chang-Huain Hsieh, Chang-Wei Yeh, Chi-Tin Shih, Jian-Jheng Wu, Guo-Tzau Wang, Yung-Chang Chen, and others. Three-dimensional reconstruction of brain-wide wiring networks in drosophila at single-cell resolution. Current biology, 21(1):1\u201311, 2011.",
+                "bibkey": "chiang2011three"
+            }
+        ],
+        "human": [
+            {
+                "citation": "Alessandra Griffa, Yasser Alem\u00e1n-G\u00f3mez, and Patric Hagmann. Structural and functional connectome from 70 young healthy adults [data set]. Zenodo, 2019.",
+                "bibkey": "griffa2019lausanne"
+            }
+        ],
+        "macaque_markov": [
+            {
+                "citation": "Nikola T Markov, Maria Ercsey-Ravasz, Camille Lamy, Ana Rita Ribeiro Gomes, Lo\u00efc Magrou, Pierre Misery, Pascale Giroud, Pascal Barone, Colette Dehay, Zolt\u00e1n Toroczkai, and others. The role of long-range connections on the specificity of the macaque interareal cortical network. Proceedings of the National Academy of Sciences, 110(13):5187\u20135192, 2013.",
+                "bibkey": "markov2013role"
+            }
+        ],
+        "macaque_modha": [
+            {
+                "citation": "Dharmendra S Modha and Raghavendra Singh. Network architecture of the long-distance pathways in the macaque brain. Proceedings of the National Academy of Sciences, 107(30):13485\u201313490, 2010.",
+                "bibkey": "modha2010network"
+            }
+        ],
+        "mouse": [
+            {
+                "citation": "Mikail Rubinov, Rolf JF Ypma, Charles Watson, and Edward T Bullmore. Wiring cost and topological participation of the mouse brain connectome. Proceedings of the National Academy of Sciences, 112(32):10032\u201310037, 2015.",
+                "bibkey": "rubinov2015wiring"
+            }
+        ],
+        "rat": [
+            {
+                "citation": "Mihail Bota, Olaf Sporns, and Larry W Swanson. Architecture of the cerebral cortical association connectome underlying cognition. Proceedings of the National Academy of Sciences, 112(16):E2093\u2013E2101, 2015.",
+                "bibkey": "bota2015architecture"
+            }
+        ]
+    },
+    "ds-vazquez_rodriguez2019": {
+        "primary": [
+            {
+                "citation": "Bertha V\u00e1zquez-Rodr\u00edguez, Laura E Su\u00e1rez, Ross D Markello, Golia Shafiei, Casey Paquola, Patric Hagmann, Martijn P Van Den Heuvel, Boris C Bernhardt, R Nathan Spreng, and Bratislav Misic. Gradients of structure\u2013function tethering across neocortex. Proceedings of the National Academy of Sciences, 116(42):21219\u201321227, 2019.",
+                "bibkey": "vazquez2019gradients"
+            }
+        ]
+    },
+    "atl-schaefer2018": {
+        "primary": [
+            {
+                "citation": "Alexander Schaefer, Ru Kong, Evan M Gordon, Timothy O Laumann, Xi-Nian Zuo, Avram J Holmes, Simon B Eickhoff, and BT Thomas Yeo. Local-global parcellation of the human cerebral cortex from intrinsic functional connectivity mri. Cerebral cortex, 28(9):3095\u20133114, 2018.",
+                "bibkey": "schaefer2018local"
+            }
+        ]
+    },
+    "atl-mmpall": {
+        "primary": [
+            {
+                "citation": "Matthew F Glasser, Timothy S Coalson, Emma C Robinson, Carl D Hacker, John Harwell, Essa Yacoub, Kamil Ugurbil, Jesper Andersson, Christian F Beckmann, Mark Jenkinson, and others. A multi-modal parcellation of human cerebral cortex. Nature, 536(7615):171\u2013178, 2016.",
+                "bibkey": "glasser2016multi"
+            }
+        ]
+    },
+    "atl-voneconomo_koskinas": {
+        "primary": [
+            {
+                "citation": "Lianne H Scholtens, Marcel A de Reus, Siemon C de Lange, Ruben Schmidt, and Martijn P van den Heuvel. An mri von economo\u2013koskinas atlas. NeuroImage, 170:249\u2013256, 2018.",
+                "bibkey": "scholtens2018mri"
+            }
+        ]
+    },
+    "tpl-hcp_standards": {
+        "primary": [
+            {
+                "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.",
+                "bibkey": "van2012human"
+            },
+            {
+                "citation": "Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson, Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal preprocessing pipelines for the human connectome project. Neuroimage, 80:105\u2013124, 2013.",
+                "bibkey": "glasser2013minimal"
+            }
+        ]
+    },
+    "ds-hansen_manynetworks": {
+        "primary": [
+            {
+                "citation": "Justine Y Hansen, Golia Shafiei, Katharina Voigt, Emma X Liang, Sylvia ML Cox, Marco Leyton, Sharna D Jamadar, and Bratislav Misic. Integrating multimodal and multiscale connectivity blueprints of the human cerebral cortex in health and disease. PLoS biology, 21(9):e3002314, 2023.",
+                "bibkey": "hansen2023integrating"
+            }
+        ],
+        "gene": [
+            {
+                "citation": "",
+                "bibkey": ""
+            }
+        ],
+        "receptor": [
+            {
+                "citation": "",
+                "bibkey": ""
+            }
+        ],
+        "larminar": [
+            {
+                "citation": "",
+                "bibkey": ""
+            }
+        ],
+        "metabolic": [
+            {
+                "citation": "",
+                "bibkey": ""
+            }
+        ],
+        "haemodynamic": [
+            {
+                "citation": "",
+                "bibkey": ""
+            }
+        ],
+        "electrophysiological": [
+            {
+                "citation": "",
+                "bibkey": ""
+            }
+        ],
+        "temporal": [
+            {
+                "citation": "",
+                "bibkey": ""
+            }
+        ],
+        "cognitive": [
+            {
+                "citation": "",
+                "bibkey": ""
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/netneurotools/tests/__init__.py b/netneurotools/datasets/tests/__init__.py
similarity index 100%
rename from netneurotools/tests/__init__.py
rename to netneurotools/datasets/tests/__init__.py
diff --git a/netneurotools/datasets/tests/test_datasets_utils.py b/netneurotools/datasets/tests/test_datasets_utils.py
new file mode 100644
index 0000000..9d5d85c
--- /dev/null
+++ b/netneurotools/datasets/tests/test_datasets_utils.py
@@ -0,0 +1,35 @@
+"""For testing netneurotools.datasets.datasets_utils functionality."""
+import os
+
+import pytest
+
+from netneurotools.datasets import datasets_utils as utils
+
+
+@pytest.mark.parametrize('dset, expected', [
+    ('atl-cammoun2012', ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k',
+                         'MNI152NLin2009aSym', 'gcs']),
+    ('tpl-conte69', ['url', 'md5']),
+    ('atl-pauli2018', ['probabilistic', 'deterministic', 'info']),
+    ('tpl-fsaverage', ['fsaverage' + f for f in ['', '3', '4', '5', '6']]),
+    ('atl-schaefer2018', ['fsaverage', 'fsaverage6', 'fsaverage6'])
+])
+def test_get_dataset_info(dset, expected):
+    """Test getting dataset info."""
+    info = utils._get_dataset_info(dset)
+    if isinstance(info, dict):
+        assert all(k in info.keys() for k in expected)
+    elif isinstance(info, list):
+        for f in info:
+            assert all(k in f.keys() for k in expected)
+    else:
+        assert False
+
+    with pytest.raises(KeyError):
+        utils._get_dataset_info('notvalid')
+
+
+def test_get_data_dir(tmpdir):
+    """Test getting data directory."""
+    data_dir = utils._get_data_dir(tmpdir)
+    assert os.path.isdir(data_dir)
diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py
new file mode 100644
index 0000000..b55e58d
--- /dev/null
+++ b/netneurotools/datasets/tests/test_fetch.py
@@ -0,0 +1,206 @@
+"""For testing netneurotools.datasets.fetch_* functionality."""
+import os
+import pytest
+from pathlib import Path
+import numpy as np
+from netneurotools import datasets
+
+
+class TestFetchTemplate:
+    """Test fetching of template datasets."""
+
+    @pytest.mark.parametrize('version', [
+        'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'
+    ])
+    def test_fetch_fsaverage(self, tmpdir, version):
+        """Test fetching of fsaverage surfaces."""
+        fsaverage = datasets.fetch_fsaverage(
+            version=version, data_dir=tmpdir, verbose=0
+        )
+        for k in ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']:
+            assert hasattr(fsaverage, k)
+            assert len(fsaverage[k]) == 2
+            assert all(os.path.isfile(hemi) for hemi in fsaverage[k]), fsaverage[k]
+
+    def test_fetch_hcp_standards(self, tmpdir):
+        """Test fetching of HCP standard meshes."""
+        hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=0)
+        assert hcp.exists()
+
+    @pytest.mark.parametrize('version', [
+        'v1', 'v2'
+    ])
+    def test_fetch_civet(self, tmpdir, version):
+        """Test fetching of CIVET templates."""
+        civet = datasets.fetch_civet(version=version, data_dir=tmpdir, verbose=0)
+        for key in ('mid', 'white'):
+            assert key in civet
+            for hemi in ('lh', 'rh'):
+                assert hasattr(civet[key], hemi)
+                assert os.path.isfile(getattr(civet[key], hemi))
+
+    def test_fetch_conte69(self, tmpdir):
+        """Test fetching of Conte69 surfaces."""
+        conte = datasets.fetch_conte69(data_dir=tmpdir, verbose=0)
+        assert all(hasattr(conte, k) for k in
+                ['midthickness', 'inflated', 'vinflated', 'info'])
+
+    def test_fetch_yerkes19(self, tmpdir):
+        """Test fetching of Yerkes19 surfaces."""
+        yerkes19 = datasets.fetch_yerkes19(data_dir=tmpdir, verbose=0)
+        assert all(hasattr(yerkes19, k) for k in
+                ['midthickness', 'inflated', 'vinflated'])
+
+
+class TestFetchAtlas:
+    """Test fetching of atlas datasets."""
+
+    @pytest.mark.parametrize('version, expected', [
+        ('MNI152NLin2009aSym', [1, 1, 1, 1, 1]),
+        ('fsaverage', [2, 2, 2, 2, 2]),
+        ('fsaverage5', [2, 2, 2, 2, 2]),
+        ('fsaverage6', [2, 2, 2, 2, 2]),
+        ('fslr32k', [2, 2, 2, 2, 2]),
+        ('gcs', [2, 2, 2, 2, 6])
+    ])
+    def test_fetch_cammoun2012(self, tmpdir, version, expected):
+        """Test fetching of Cammoun2012 parcellations."""
+        keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500']
+        cammoun = datasets.fetch_cammoun2012(version, data_dir=tmpdir, verbose=0)
+
+        # output has expected keys
+        assert all(hasattr(cammoun, k) for k in keys)
+        # and keys are expected lengths!
+        for k, e in zip(keys, expected):
+            out = getattr(cammoun, k)
+            if isinstance(out, (tuple, list)):
+                assert len(out) == e
+            else:
+                assert isinstance(out, str) and out.endswith('.nii.gz')
+
+        if 'fsaverage' in version:
+            with pytest.warns(DeprecationWarning):
+                datasets.fetch_cammoun2012('surface', data_dir=tmpdir, verbose=0)
+
+    @pytest.mark.parametrize('version', [
+        'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'
+    ])
+    def test_fetch_schaefer2018(self, tmpdir, version):
+        """Test fetching of Schaefer2018 parcellations."""
+        keys = [
+            f'{p}Parcels{n}Networks'
+            for p in range(100, 1001, 100) for n in [7, 17]
+        ]
+        schaefer = datasets.fetch_schaefer2018(version, data_dir=tmpdir, verbose=0)
+
+        if version == 'fslr32k':
+            assert all(k in schaefer and os.path.isfile(schaefer[k]) for k in keys)
+        else:
+            for k in keys:
+                assert k in schaefer
+                assert len(schaefer[k]) == 2
+                assert all(os.path.isfile(hemi) for hemi in schaefer[k])
+
+    def test_fetch_mmpall(self, tmpdir):
+        """Test fetching of MMPAll parcellations."""
+        mmp = datasets.fetch_mmpall(data_dir=tmpdir, verbose=0)
+        assert len(mmp) == 2
+        assert all(os.path.isfile(hemi) for hemi in mmp)
+        assert all(hasattr(mmp, attr) for attr in ('lh', 'rh'))
+
+    def test_fetch_pauli2018(self, tmpdir):
+        """Test fetching of Pauli2018 parcellations."""
+        pauli = datasets.fetch_pauli2018(data_dir=tmpdir, verbose=0)
+        assert all(hasattr(pauli, k) and os.path.isfile(pauli[k]) for k in
+                ['probabilistic', 'deterministic', 'info'])
+
+    @pytest.mark.xfail
+    def test_fetch_ye2020(self, tmpdir):
+        """Test fetching of Ye2020 parcellations."""
+        assert False
+
+    def test_fetch_voneconomo(self, tmpdir):
+        """Test fetching of von Economo parcellations."""
+        vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0)
+        assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ['gcs', 'ctab'])
+        assert isinstance(vek.get('info'), str)
+
+
+class TestFetchProject:
+    """Test fetching of project datasets."""
+
+    def test_fetch_vazquez_rodriguez2019(self, tmpdir):
+        """Test fetching of Vazquez-Rodriguez2019 dataset."""
+        vazquez = datasets.fetch_vazquez_rodriguez2019(data_dir=tmpdir, verbose=0)
+        for k in ['rsquared', 'gradient']:
+            assert hasattr(vazquez, k)
+            assert isinstance(getattr(vazquez, k), np.ndarray)
+
+    @pytest.mark.xfail
+    def test_fetch_mirchi2018(self, tmpdir):
+        """Test fetching of Mirchi2018 dataset."""
+        assert False
+
+    def test_fetch_hansen_manynetworks(self, tmpdir):
+        """Test fetching of Hansen et al., 2023 many-networks dataset."""
+        hansen = datasets.fetch_hansen_manynetworks(data_dir=tmpdir, verbose=0)
+        assert "cammoun033" in hansen
+        assert "gene" in hansen["cammoun033"]
+        assert isinstance(hansen["cammoun033"]["gene"], Path)
+
+    @pytest.mark.xfail
+    def test_fetch_hansen_receptors(self, tmpdir):
+        """Test fetching of Hansen et al., 2022 receptor dataset."""
+        # hansen = datasets.fetch_hansen_receptors(data_dir=tmpdir, verbose=0)
+        assert False
+
+    @pytest.mark.xfail
+    def test_fetch_hansen_genecognition(self, tmpdir):
+        """Test fetching of Hansen et al., 2021 gene-cognition dataset."""
+        # hansen = datasets.fetch_hansen_genecognition(data_dir=tmpdir, verbose=0)
+        assert False
+
+    @pytest.mark.xfail
+    def test_fetch_hansen_brainstem(self, tmpdir):
+        """Test fetching of Hansen et al., 2024 brainstem dataset."""
+        # hansen = datasets.fetch_hansen_brainstem(data_dir=tmpdir, verbose=0)
+        assert False
+
+    @pytest.mark.xfail
+    def test_fetch_shafiei_hcpmeg(self, tmpdir):
+        """Test fetching of Shafiei et al., 2022 & 2023 HCP-MEG dataset."""
+        # shafiei = datasets.fetch_shafiei_hcpmeg(data_dir=tmpdir, verbose=0)
+        assert False
+
+    @pytest.mark.xfail
+    def test_fetch_suarez_mami(self, tmpdir):
+        """Test fetching of Suarez et al., 2022 mami dataset."""
+        # suarez = datasets.fetch_suarez_mami(data_dir=tmpdir, verbose=0)
+        assert False
+
+    @pytest.mark.parametrize('dataset, expected', [
+        ('celegans', ['conn', 'dist', 'labels', 'ref']),
+        ('drosophila', ['conn', 'coords', 'labels', 'networks', 'ref']),
+        ('human_func_scale033', ['conn', 'coords', 'labels', 'ref']),
+        ('human_func_scale060', ['conn', 'coords', 'labels', 'ref']),
+        ('human_func_scale125', ['conn', 'coords', 'labels', 'ref']),
+        ('human_func_scale250', ['conn', 'coords', 'labels', 'ref']),
+        ('human_func_scale500', ['conn', 'coords', 'labels', 'ref']),
+        ('human_struct_scale033', ['conn', 'coords', 'dist', 'labels', 'ref']),
+        ('human_struct_scale060', ['conn', 'coords', 'dist', 'labels', 'ref']),
+        ('human_struct_scale125', ['conn', 'coords', 'dist', 'labels', 'ref']),
+        ('human_struct_scale250', ['conn', 'coords', 'dist', 'labels', 'ref']),
+        ('human_struct_scale500', ['conn', 'coords', 'dist', 'labels', 'ref']),
+        ('macaque_markov', ['conn', 'dist', 'labels', 'ref']),
+        ('macaque_modha', ['conn', 'coords', 'dist', 'labels', 'ref']),
+        ('mouse', ['acronyms', 'conn', 'coords', 'dist', 'labels', 'ref']),
+        ('rat', ['conn', 'labels', 'ref']),
+    ])
+    def test_fetch_famous_gmat(self, tmpdir, dataset, expected):
+        """Test fetching of famous G.mat datasets."""
+        connectome = datasets.fetch_famous_gmat(dataset, data_dir=tmpdir, verbose=0)
+
+        expected.remove("ref")
+        for key in expected:
+            assert (key in connectome)
+            assert isinstance(connectome[key], str if key == 'ref' else np.ndarray)
diff --git a/netneurotools/datasets/utils.py b/netneurotools/datasets/utils.py
deleted file mode 100644
index 4339c57..0000000
--- a/netneurotools/datasets/utils.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Utilites for loading / creating datasets."""
-
-import json
-import os
-import importlib.resources
-
-if getattr(importlib.resources, 'files', None) is not None:
-    _importlib_avail = True
-else:
-    from pkg_resources import resource_filename
-    _importlib_avail = False
-
-
-def _osfify_urls(data):
-    """
-    Format `data` object with OSF API URL.
-
-    Parameters
-    ----------
-    data : object
-        If dict with a `url` key, will format OSF_API with relevant values
-
-    Returns
-    -------
-    data : object
-        Input data with all `url` dict keys formatted
-    """
-    OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}"
-
-    if isinstance(data, str):
-        return data
-    elif 'url' in data:
-        data['url'] = OSF_API.format(*data['url'])
-
-    try:
-        for key, value in data.items():
-            data[key] = _osfify_urls(value)
-    except AttributeError:
-        for n, value in enumerate(data):
-            data[n] = _osfify_urls(value)
-
-    return data
-
-
-if _importlib_avail:
-    osf = importlib.resources.files("netneurotools") / "data/osf.json"
-else:
-    osf = resource_filename('netneurotools', 'data/osf.json')
-
-with open(osf) as src:
-    OSF_RESOURCES = _osfify_urls(json.load(src))
-
-
-def _get_dataset_info(name):
-    """
-    Return url and MD5 checksum for dataset `name`.
-
-    Parameters
-    ----------
-    name : str
-        Name of dataset
-
-    Returns
-    -------
-    url : str
-        URL from which to download dataset
-    md5 : str
-        MD5 checksum for file downloade from `url`
-    """
-    try:
-        return OSF_RESOURCES[name]
-    except KeyError:
-        raise KeyError("Provided dataset '{}' is not valid. Must be one of: {}"
-                       .format(name, sorted(OSF_RESOURCES.keys()))) from None
-
-
-def _get_data_dir(data_dir=None):
-    """
-    Get path to netneurotools data directory.
-
-    Parameters
-    ----------
-    data_dir : str, optional
-        Path to use as data directory. If not specified, will check for
-        environmental variable 'NNT_DATA'; if that is not set, will use
-        `~/nnt-data` instead. Default: None
-
-    Returns
-    -------
-    data_dir : str
-        Path to use as data directory
-    """
-    if data_dir is None:
-        data_dir = os.environ.get('NNT_DATA', os.path.join('~', 'nnt-data'))
-    data_dir = os.path.expanduser(data_dir)
-    if not os.path.exists(data_dir):
-        os.makedirs(data_dir)
-
-    return data_dir
diff --git a/netneurotools/experimental/__init__.py b/netneurotools/experimental/__init__.py
new file mode 100644
index 0000000..911c0f7
--- /dev/null
+++ b/netneurotools/experimental/__init__.py
@@ -0,0 +1,4 @@
+"""Functions in alpha stage."""
+
+
+__all__ = []
diff --git a/netneurotools/freesurfer.py b/netneurotools/freesurfer.py
deleted file mode 100644
index 047590d..0000000
--- a/netneurotools/freesurfer.py
+++ /dev/null
@@ -1,662 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for working with FreeSurfer data and parcellations."""
-
-import os
-import os.path as op
-import warnings
-
-from nibabel.freesurfer import read_annot, read_geometry
-import numpy as np
-from scipy import sparse
-try:  # scipy >= 1.8.0
-    from scipy.ndimage._measurements import _stats, labeled_comprehension
-except ImportError:  # scipy < 1.8.0
-    from scipy.ndimage.measurements import _stats, labeled_comprehension
-from scipy.spatial.distance import cdist
-
-from .datasets import fetch_fsaverage
-from .stats import gen_spinsamples
-from .surface import make_surf_graph
-from .utils import check_fs_subjid, run
-
-FSIGNORE = [
-    'unknown', 'corpuscallosum', 'Background+FreeSurfer_Defined_Medial_Wall'
-]
-
-
-def apply_prob_atlas(subject_id, gcs, hemi, *, orig='white', annot=None,
-                     ctab=None, subjects_dir=None, use_cache=True,
-                     quiet=False):
-    """
-    Create an annotation file for `subject_id` by applying atlas in `gcs`.
-
-    Runs subprocess calling FreeSurfer's "mris_ca_label" function; as such,
-    FreeSurfer must be installed and accesible on the local system path.
-
-    Parameters
-    ----------
-    subject_id : str
-        FreeSurfer subject ID
-    gcs : str
-        Filepath to .gcs file containing classifier array
-    hemi : {'lh', 'rh'}
-        Hemisphere corresponding to `gcs` file
-    orig : str, optional
-        Original surface to which to apply classifer. Default: 'white'
-    annot : str, optional
-        Path to output annotation file to generate. If set to None, the name is
-        created from the provided `hemi` and `gcs`. If provided as a
-        relative path, it is assumed to stem from `subjects_dir`/`subject_id`.
-        Default: None
-    ctab : str, optional
-        Path to colortable corresponding to `gcs`. Default: None
-    subjects_dir : str, optional
-        Path to FreeSurfer subject directory. If not set, will inherit from
-        the environmental variable $SUBJECTS_DIR. Default: None
-    use_cache : bool, optional
-        Whether to check for existence of `annot` in directory specified by
-        `{subjects_dir}/{subject_id}/label' and use that, if it exists. If
-        False, will create a new annot file. Default: True
-    quiet : bool, optional
-        Whether to restrict status messages. Default: False
-
-    Returns
-    -------
-    annot : str
-        Path to generated annotation file
-    """
-    cmd = 'mris_ca_label {opts}{subject_id} {hemi} {hemi}.sphere.reg ' \
-          '{gcs} {annot}'
-
-    if hemi not in ['rh', 'lh']:
-        raise ValueError('Provided hemisphere designation `hemi` must be one '
-                         'of \'rh\' or \'lh\'. Provided: {}'.format(hemi))
-    if not op.isfile(gcs):
-        raise ValueError('Cannot find specified `gcs` file {}.'.format(gcs))
-
-    subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir)
-
-    # add all the options together, as specified
-    opts = ''
-    if ctab is not None and op.isfile(ctab):
-        opts += '-t {} '.format(ctab)
-    if orig is not None:
-        opts += '-orig {} '.format(orig)
-    if subjects_dir is not None:
-        opts += '-sdir {} '.format(subjects_dir)
-    else:
-        subjects_dir = os.environ['SUBJECTS_DIR']
-
-    # generate output filename
-    if annot is None:
-        base = '{}.{}.annot'.format(hemi, gcs[:-4])
-        annot = op.join(subjects_dir, subject_id, 'label', base)
-    else:
-        # if not a full path, assume relative from subjects_dir/subject_id
-        if not annot.startswith(op.abspath(os.sep)):
-            annot = op.join(subjects_dir, subject_id, annot)
-
-    # if annotation file doesn't exist or we explicitly want to make a new one
-    if not op.isfile(annot) or not use_cache:
-        run(cmd.format(opts=opts, subject_id=subject_id, hemi=hemi,
-                       gcs=gcs, annot=annot),
-            quiet=quiet)
-
-    return annot
-
-
-def _decode_list(vals):
-    """List decoder."""
-    return [val.decode() if hasattr(val, 'decode') else val for val in vals]
-
-
-def find_parcel_centroids(*, lhannot, rhannot, method='surface',
-                          version='fsaverage', surf='sphere', drop=None):
-    """
-    Return vertex coords corresponding to centroids of parcels in annotations.
-
-    Note that using any other `surf` besides the default of 'sphere' may result
-    in centroids that are not directly within the parcels themselves due to
-    sulcal folding patterns.
-
-    Parameters
-    ----------
-    {lh,rh}annot : str
-        Path to .annot file containing labels of parcels on the {left,right}
-        hemisphere. These must be specified as keyword arguments to avoid
-        accidental order switching.
-    method : {'average', 'surface', 'geodesic'}, optional
-        Method for calculation of parcel centroid. See Notes for more
-        information. Default: 'surface'
-    version : str, optional
-        Specifies which version of `fsaverage` provided annotation files
-        correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4',
-        'fsaverage5', 'fsaverage6'}. Default: 'fsaverage'
-    surf : str, optional
-        Specifies which surface projection of fsaverage to use for finding
-        parcel centroids. Default: 'sphere'
-    drop : list, optional
-        Specifies regions in {lh,rh}annot for which the parcel centroid should
-        not be calculated. If not specified, centroids for parcels defined in
-        `netneurotools.freesurfer.FSIGNORE` are not calculated. Default: None
-
-    Returns
-    -------
-    centroids : (N, 3) numpy.ndarray
-        xyz coordinates of vertices closest to the centroid of each parcel
-        defined in `lhannot` and `rhannot`
-    hemiid : (N,) numpy.ndarray
-        Array denoting hemisphere designation of coordinates in `centroids`,
-        where `hemiid=0` denotes the left and `hemiid=1` the right hemisphere
-
-    Notes
-    -----
-    The following methods can be used for finding parcel centroids:
-
-    1. ``method='average'``
-
-       Uses the arithmetic mean of the coordinates for the vertices in each
-       parcel. Note that in this case the calculated centroids will not act
-       actually fall on the surface of `surf`.
-
-    2. ``method='surface'``
-
-       Calculates the 'average' coordinates and then finds the closest vertex
-       on `surf`, where closest is defined as the vertex with the minimum
-       Euclidean distance.
-
-    3. ``method='geodesic'``
-
-       Uses the coordinates of the vertex with the minimum average geodesic
-       distance to all other vertices in the parcel. Note that this is slightly
-       more time-consuming than the other two methods, especially for
-       high-resolution meshes.
-    """
-    methods = ['average', 'surface', 'geodesic']
-    if method not in methods:
-        raise ValueError('Provided method for centroid calculation {} is '
-                         'invalid. Must be one of {}'.format(methods, methods))
-
-    if drop is None:
-        drop = FSIGNORE
-    drop = _decode_list(drop)
-
-    surfaces = fetch_fsaverage(version)[surf]
-
-    centroids, hemiid = [], []
-    for n, (annot, surf) in enumerate(zip([lhannot, rhannot], surfaces)):
-        vertices, faces = read_geometry(surf)
-        labels, ctab, names = read_annot(annot)
-        names = _decode_list(names)
-
-        for lab in np.unique(labels):
-            if names[lab] in drop:
-                continue
-            if method in ['average', 'surface']:
-                roi = np.atleast_2d(vertices[labels == lab].mean(axis=0))
-                if method == 'surface':  # find closest vertex on the sphere
-                    roi = vertices[np.argmin(cdist(vertices, roi), axis=0)[0]]
-            elif method == 'geodesic':
-                inds, = np.where(labels == lab)
-                roi = _geodesic_parcel_centroid(vertices, faces, inds)
-            centroids.append(roi)
-            hemiid.append(n)
-
-    return np.vstack(centroids), np.asarray(hemiid)
-
-
-def _geodesic_parcel_centroid(vertices, faces, inds):
-    """
-    Calculate parcel centroids based on surface distance.
-
-    Parameters
-    ----------
-    vertices : (N, 3)
-        Coordinates of vertices defining surface
-    faces : (F, 3)
-        Triangular faces defining surface
-    inds : (R,)
-        Indices of `vertices` that belong to parcel
-
-    Returns
-    -------
-    roi : (3,) numpy.ndarray
-        Vertex corresponding to centroid of parcel
-    """
-    mask = np.ones(len(vertices), dtype=bool)
-    mask[inds] = False
-    mat = make_surf_graph(vertices, faces, mask=mask)
-    paths = sparse.csgraph.dijkstra(mat, directed=False, indices=inds)[:, inds]
-
-    # the selected vertex is the one with the minimum average shortest path
-    # to the other vertices in the parcel
-    roi = vertices[inds[paths.mean(axis=1).argmin()]]
-
-    return roi
-
-
-def parcels_to_vertices(data, *, lhannot, rhannot, drop=None):
-    """
-    Project parcellated `data` to vertices defined in annotation files.
-
-    Assigns np.nan to all ROIs in `drop`
-
-    Parameters
-    ----------
-    data : (N,) numpy.ndarray
-        Parcellated data to be projected to vertices. Parcels should be ordered
-        by [left, right] hemisphere; ordering within hemisphere should
-        correspond to the provided annotation files.
-    {lh,rh}annot : str
-        Path to .annot file containing labels of parcels on the {left,right}
-        hemisphere. These must be specified as keyword arguments to avoid
-        accidental order switching.
-    drop : list, optional
-        Specifies regions in {lh,rh}annot that are not present in `data`. NaNs
-        will be inserted in place of the these regions in the returned data. If
-        not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE`
-        are assumed to not be present. Default: None
-
-    Returns
-    -------
-    projected : numpy.ndarray
-        Vertex-level data
-    """
-    if drop is None:
-        drop = FSIGNORE
-    drop = _decode_list(drop)
-
-    data = np.vstack(data).astype(float)
-
-    # check this so we're not unduly surprised by anything...
-    n_vert = expected = 0
-    for a in [lhannot, rhannot]:
-        vn, _, names = read_annot(a)
-        n_vert += len(vn)
-        names = _decode_list(names)
-        expected += len(names) - len(set(drop) & set(names))
-    if expected != len(data):
-        raise ValueError('Number of parcels in provided annotation files '
-                         'differs from size of parcellated data array.\n'
-                         '    EXPECTED: {} parcels\n'
-                         '    RECEIVED: {} parcels'
-                         .format(expected, len(data)))
-
-    projected = np.zeros((n_vert, data.shape[-1]), dtype=data.dtype)
-    start = end = n_vert = 0
-    for annot in [lhannot, rhannot]:
-        # read files and update end index for `data`
-        labels, ctab, names = read_annot(annot)
-        names = _decode_list(names)
-        todrop = set(names) & set(drop)
-        end += len(names) - len(todrop)  # unknown and corpuscallosum
-
-        # get indices of unknown and corpuscallosum and insert NaN values
-        inds = sorted([names.index(f) for f in todrop])
-        inds = [f - n for n, f in enumerate(inds)]
-        currdata = np.insert(data[start:end], inds, np.nan, axis=0)
-
-        # project to vertices and store
-        projected[n_vert:n_vert + len(labels), :] = currdata[labels]
-        start = end
-        n_vert += len(labels)
-
-    return np.squeeze(projected)
-
-
-def vertices_to_parcels(data, *, lhannot, rhannot, drop=None):
-    """
-    Reduce vertex-level `data` to parcels defined in annotation files.
-
-    Takes average of vertices within each parcel, excluding np.nan values
-    (i.e., np.nanmean). Assigns np.nan to parcels for which all vertices are
-    np.nan.
-
-    Parameters
-    ----------
-    data : (N,) numpy.ndarray
-        Vertex-level data to be reduced to parcels
-    {lh,rh}annot : str
-        Path to .annot file containing labels to parcels on the {left,right}
-        hemisphere
-    drop : list, optional
-        Specifies regions in {lh,rh}annot that should be removed from the
-        parcellated version of `data`. If not specified, vertices corresponding
-        to parcels defined in `netneurotools.freesurfer.FSIGNORE` will be
-        removed. Default: None
-
-    Returns
-    -------
-    reduced : numpy.ndarray
-        Parcellated `data`, without regions specified in `drop`
-    """
-    if drop is None:
-        drop = FSIGNORE
-    drop = _decode_list(drop)
-
-    data = np.vstack(data)
-
-    n_parc = expected = 0
-    for a in [lhannot, rhannot]:
-        vn, _, names = read_annot(a)
-        expected += len(vn)
-        names = _decode_list(names)
-        n_parc += len(names) - len(set(drop) & set(names))
-    if expected != len(data):
-        raise ValueError('Number of vertices in provided annotation files '
-                         'differs from size of vertex-level data array.\n'
-                         '    EXPECTED: {} vertices\n'
-                         '    RECEIVED: {} vertices'
-                         .format(expected, len(data)))
-
-    reduced = np.zeros((n_parc, data.shape[-1]), dtype=data.dtype)
-    start = end = n_parc = 0
-    for annot in [lhannot, rhannot]:
-        # read files and update end index for `data`
-        labels, ctab, names = read_annot(annot)
-        names = _decode_list(names)
-
-        indices = np.unique(labels)
-        end += len(labels)
-
-        for idx in range(data.shape[-1]):
-            # get average of vertex-level data within parcels
-            # set all NaN values to 0 before calling `_stats` because we are
-            # returning sums, so the 0 values won't impact the sums (if we left
-            # the NaNs then all parcels with even one NaN entry would be NaN)
-            currdata = np.squeeze(data[start:end, idx])
-            isna = np.isnan(currdata)
-            counts, sums = _stats(np.nan_to_num(currdata), labels, indices)
-
-            # however, we do need to account for the NaN values in the counts
-            # so that our means are similar to what we'd get from e.g.,
-            # np.nanmean here, our "sums" are the counts of NaN values in our
-            # parcels
-            _, nacounts = _stats(isna, labels, indices)
-            counts = (np.asanyarray(counts, dtype=float)
-                      - np.asanyarray(nacounts, dtype=float))
-
-            with np.errstate(divide='ignore', invalid='ignore'):
-                currdata = sums / counts
-
-            # get indices of unkown and corpuscallosum and delete from parcels
-            inds = sorted([names.index(f) for f in set(drop) & set(names)])
-            currdata = np.delete(currdata, inds)
-
-            # store parcellated data
-            reduced[n_parc:n_parc + len(names) - len(inds), idx] = currdata
-
-        start = end
-        n_parc += len(names) - len(inds)
-
-    return np.squeeze(reduced)
-
-
-def _get_fsaverage_coords(version='fsaverage', surface='sphere'):
-    """
-    Get vertex coordinates for specified `surface` of fsaverage `version`.
-
-    Parameters
-    ----------
-    version : str, optional
-        One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5',
-        'fsaverage6'}. Default: 'fsaverage'
-    surface : str, optional
-        Surface for which to return vertex coordinates. Default: 'sphere'
-
-    Returns
-    -------
-    coords : (N, 3) numpy.ndarray
-        xyz coordinates of vertices for {left,right} hemisphere
-    hemiid : (N,) numpy.ndarray
-        Array denoting hemisphere designation of entries in `coords`, where
-        `hemiid=0` denotes the left and `hemiid=1` the right hemisphere
-    """
-    # get coordinates and hemisphere designation for spin generation
-    lhsphere, rhsphere = fetch_fsaverage(version)[surface]
-    coords, hemi = [], []
-    for n, sphere in enumerate([lhsphere, rhsphere]):
-        coords.append(read_geometry(sphere)[0])
-        hemi.append(np.ones(len(coords[-1])) * n)
-
-    return np.vstack(coords), np.hstack(hemi)
-
-
-def _get_fsaverage_spins(version='fsaverage', spins=None, n_rotate=1000,
-                         **kwargs):
-    """
-    Generate spatial permutation resamples for fsaverage `version`.
-
-    If `spins` are provided then performs checks to confirm they are valid
-
-    Parameters
-    ----------
-    version : str, optional
-        Specifies which version of `fsaverage` for which to generate spins.
-        Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5',
-        'fsaverage6'}. Default: 'fsaverage'
-    spins : array_like, optional
-        Pre-computed spins to use instead of generating them on the fly. If not
-        provided will use other provided parameters to create them. Default:
-        None
-    n_rotate : int, optional
-        Number of rotations to generate. Default: 1000
-    return_cost : bool, optional
-        Whether to return cost array (specified as Euclidean distance) for each
-        coordinate for each rotation. Currently this option is not supported if
-        pre-computed `spins` are provided. Default: True
-    kwargs : key-value pairs
-        Keyword arguments passed to `netneurotools.stats.gen_spinsamples`
-
-    Returns
-    -------
-    spins : (N, S) numpy.ndarray
-        Resampling array
-    """
-    if spins is None:
-        coords, hemiid = _get_fsaverage_coords(version, 'sphere')
-        spins = gen_spinsamples(coords, hemiid, n_rotate=n_rotate,
-                                **kwargs)
-        if kwargs.get('return_cost'):
-            return spins
-
-    spins = np.asarray(spins, dtype='int32')
-    if spins.shape[-1] != n_rotate:
-        warnings.warn('Shape of provided `spins` array does not match '
-                      'number of rotations requested with `n_rotate`. '
-                      'Ignoring specified `n_rotate` parameter and using '
-                      'all provided `spins`.', stacklevel=2)
-        n_rotate = spins.shape[-1]
-
-    return spins, None
-
-
-def spin_data(data, *, lhannot, rhannot, version='fsaverage', n_rotate=1000,
-              spins=None, drop=None, verbose=False, **kwargs):
-    """
-    Project parcellated `data` to surface, rotates, and re-parcellates.
-
-    Projection to the surface uses `{lh,rh}annot` files. Rotation uses vertex
-    coordinates from the specified fsaverage `version` and relies on
-    :func:`netneurotools.stats.gen_spinsamples`. Re-parcellated data will not
-    be exactly identical to original values due to re-averaging process.
-    Parcels subsumed by regions in `drop` will be listed as NaN.
-
-    Parameters
-    ----------
-    data : (N,) numpy.ndarray
-        Parcellated data to be rotated. Parcels should be ordered by [left,
-        right] hemisphere; ordering within hemisphere should correspond to the
-        provided `{lh,rh}annot` annotation files.
-    {lh,rh}annot : str
-        Path to .annot file containing labels to parcels on the {left,right}
-        hemisphere
-    version : str, optional
-        Specifies which version of `fsaverage` provided annotation files
-        correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4',
-        'fsaverage5', 'fsaverage6'}. Default: 'fsaverage'
-    n_rotate : int, optional
-        Number of rotations to generate. Default: 1000
-    spins : array_like, optional
-        Pre-computed spins to use instead of generating them on the fly. If not
-        provided will use other provided parameters to create them. Default:
-        None
-    drop : list, optional
-        Specifies regions in {lh,rh}annot that are not present in `data`. NaNs
-        will be inserted in place of the these regions in the returned data. If
-        not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE`
-        are assumed to not be present. Default: None
-    verbose : bool, optional
-        Whether to print occasional status messages. Default: False
-    kwargs : key-value pairs
-        Keyword arguments passed to `netneurotools.stats.gen_spinsamples`
-
-    Returns
-    -------
-    rotated : (N, `n_rotate`) numpy.ndarray
-        Rotated `data
-    cost : (N, `n_rotate`,) numpy.ndarray
-        Cost (specified as Euclidean distance) of re-assigning each coordinate
-        for every rotation in `spinsamples`. Only provided if `return_cost` is
-        True.
-    """
-    if drop is None:
-        drop = FSIGNORE
-
-    # get coordinates and hemisphere designation for spin generation
-    vertices = parcels_to_vertices(data, lhannot=lhannot, rhannot=rhannot,
-                                   drop=drop)
-
-    # get spins + cost (if requested)
-    spins, cost = _get_fsaverage_spins(version=version, spins=spins,
-                                       n_rotate=n_rotate,
-                                       verbose=verbose, **kwargs)
-    if len(vertices) != len(spins):
-        raise ValueError('Provided annotation files have a different '
-                         'number of vertices than the specified fsaverage '
-                         'surface.\n    ANNOTATION: {} vertices\n     '
-                         'FSAVERAGE:  {} vertices'
-                         .format(len(vertices), len(spins)))
-
-    spun = np.zeros(data.shape + (n_rotate,))
-    for n in range(n_rotate):
-        if verbose:
-            msg = f'Reducing vertices to parcels: {n:>5}/{n_rotate}'
-            print(msg, end='\b' * len(msg), flush=True)
-        spun[..., n] = vertices_to_parcels(vertices[spins[:, n]],
-                                           lhannot=lhannot, rhannot=rhannot,
-                                           drop=drop)
-
-    if verbose:
-        print(' ' * len(msg) + '\b' * len(msg), end='', flush=True)
-
-    if kwargs.get('return_cost'):
-        return spun, cost
-
-    return spun
-
-
-def spin_parcels(*, lhannot, rhannot, version='fsaverage', n_rotate=1000,
-                 spins=None, drop=None, verbose=False, **kwargs):
-    """
-    Rotate parcels in `{lh,rh}annot` and re-assigns based on maximum overlap.
-
-    Vertex labels are rotated with :func:`netneurotools.stats.gen_spinsamples`
-    and a new label is assigned to each *parcel* based on the region maximally
-    overlapping with its boundaries.
-
-    Parameters
-    ----------
-    {lh,rh}annot : str
-        Path to .annot file containing labels to parcels on the {left,right}
-        hemisphere
-    version : str, optional
-        Specifies which version of `fsaverage` provided annotation files
-        correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4',
-        'fsaverage5', 'fsaverage6'}. Default: 'fsaverage'
-    n_rotate : int, optional
-        Number of rotations to generate. Default: 1000
-    spins : array_like, optional
-        Pre-computed spins to use instead of generating them on the fly. If not
-        provided will use other provided parameters to create them. Default:
-        None
-    drop : list, optional
-        Specifies regions in {lh,rh}annot that are not present in `data`. NaNs
-        will be inserted in place of the these regions in the returned data. If
-        not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE`
-        are assumed to not be present. Default: None
-    seed : {int, np.random.RandomState instance, None}, optional
-        Seed for random number generation. Default: None
-    verbose : bool, optional
-        Whether to print occasional status messages. Default: False
-    return_cost : bool, optional
-        Whether to return cost array (specified as Euclidean distance) for each
-        coordinate for each rotation. Default: True
-    kwargs : key-value pairs
-        Keyword arguments passed to `netneurotools.stats.gen_spinsamples`
-
-    Returns
-    -------
-    spinsamples : (N, `n_rotate`) numpy.ndarray
-        Resampling matrix to use in permuting data parcellated with labels from
-        {lh,rh}annot, where `N` is the number of parcels. Indices of -1
-        indicate that the parcel was completely encompassed by regions in
-        `drop` and should be ignored.
-    cost : (N, `n_rotate`,) numpy.ndarray
-        Cost (specified as Euclidean distance) of re-assigning each coordinate
-        for every rotation in `spinsamples`. Only provided if `return_cost` is
-        True.
-    """
-
-    def overlap(vals):
-        """Return most common non-negative value in `vals`; -1 if all neg."""
-        vals = np.asarray(vals)
-        vals, counts = np.unique(vals[vals > 0], return_counts=True)
-        try:
-            return vals[counts.argmax()]
-        except ValueError:
-            return -1
-
-    if drop is None:
-        drop = FSIGNORE
-    drop = _decode_list(drop)
-
-    # get vertex-level labels (set drop labels to - values)
-    vertices, end = [], 0
-    for n, annot in enumerate([lhannot, rhannot]):
-        labels, ctab, names = read_annot(annot)
-        names = _decode_list(names)
-        todrop = set(names) & set(drop)
-        inds = [names.index(f) - n for n, f in enumerate(todrop)]
-        labs = np.arange(len(names) - len(inds)) + (end - (len(inds) * n))
-        insert = np.arange(-1, -(len(inds) + 1), -1)
-        vertices.append(np.insert(labs, inds, insert)[labels])
-        end += len(names)
-    vertices = np.hstack(vertices)
-    labels = np.unique(vertices)
-    mask = labels > -1
-
-    # get spins + cost (if requested)
-    spins, cost = _get_fsaverage_spins(version=version, spins=spins,
-                                       n_rotate=n_rotate, verbose=verbose,
-                                       **kwargs)
-    if len(vertices) != len(spins):
-        raise ValueError('Provided annotation files have a different '
-                         'number of vertices than the specified fsaverage '
-                         'surface.\n    ANNOTATION: {} vertices\n     '
-                         'FSAVERAGE:  {} vertices'
-                         .format(len(vertices), len(spins)))
-
-    # spin and assign regions based on max overlap
-    regions = np.zeros((len(labels[mask]), n_rotate), dtype='int32')
-    for n in range(n_rotate):
-        if verbose:
-            msg = f'Calculating parcel overlap: {n:>5}/{n_rotate}'
-            print(msg, end='\b' * len(msg), flush=True)
-        regions[:, n] = labeled_comprehension(vertices[spins[:, n]], vertices,
-                                              labels, overlap, int, -1)[mask]
-
-    if kwargs.get('return_cost'):
-        return regions, cost
-
-    return regions
diff --git a/netneurotools/interface/__init__.py b/netneurotools/interface/__init__.py
new file mode 100644
index 0000000..1b474b1
--- /dev/null
+++ b/netneurotools/interface/__init__.py
@@ -0,0 +1,3 @@
+"""Functions for interfacing with common tools."""
+
+__all__ = []
diff --git a/netneurotools/interface/freesurfer.py b/netneurotools/interface/freesurfer.py
new file mode 100644
index 0000000..9efc02f
--- /dev/null
+++ b/netneurotools/interface/freesurfer.py
@@ -0,0 +1 @@
+"""Functions for working with FreeSurfer data and parcellations."""
diff --git a/netneurotools/interface/tests/__init__.py b/netneurotools/interface/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/interface/tests/test_freesurfer.py b/netneurotools/interface/tests/test_freesurfer.py
new file mode 100644
index 0000000..fcdcd23
--- /dev/null
+++ b/netneurotools/interface/tests/test_freesurfer.py
@@ -0,0 +1 @@
+"""For testing netneurotools.interface.freesurfer functionality."""
diff --git a/netneurotools/metrics/__init__.py b/netneurotools/metrics/__init__.py
new file mode 100644
index 0000000..51d8181
--- /dev/null
+++ b/netneurotools/metrics/__init__.py
@@ -0,0 +1,66 @@
+"""Magics on networks."""
+
+
+from .bct import (
+    # routing
+    degrees_und, degrees_dir,
+    distance_wei_floyd, retrieve_shortest_path,
+    navigation_wu, get_navigation_path_length,
+    # diffusion
+    communicability_bin, communicability_wei,
+    path_transitivity, search_information,
+    mean_first_passage_time, diffusion_efficiency,
+    resource_efficiency_bin, flow_graph,
+    # other
+    assortativity,
+    matching_ind_und,
+    rich_feeder_peripheral
+)
+
+
+from .metrics_utils import (
+    _fast_binarize,
+    _graph_laplacian,
+)
+
+
+from .spreading import (
+    simulate_atrophy
+)
+
+
+from .statistical import (
+    network_pearsonr,
+    network_pearsonr_numba,
+    network_pearsonr_pairwise,
+    effective_resistance,
+    network_polarisation,
+    network_variance,
+    network_variance_numba,
+    network_covariance,
+    network_covariance_numba
+)
+
+
+__all__ = [
+    # bct
+    'degrees_und', 'degrees_dir',
+    'distance_wei_floyd', 'retrieve_shortest_path',
+    'navigation_wu', 'get_navigation_path_length',
+    'communicability_bin', 'communicability_wei',
+    'path_transitivity', 'search_information',
+    'mean_first_passage_time', 'diffusion_efficiency',
+    'resource_efficiency_bin', 'flow_graph',
+    'assortativity', 'matching_ind_und',
+    'rich_feeder_peripheral',
+    # metrics_utils
+    '_fast_binarize', '_graph_laplacian',
+    # spreading
+    'simulate_atrophy',
+    # statistical
+    'network_pearsonr', 'network_pearsonr_numba',
+    'network_pearsonr_pairwise', 'effective_resistance',
+    'network_polarisation', 'network_variance',
+    'network_variance_numba', 'network_covariance',
+    'network_covariance_numba'
+]
diff --git a/netneurotools/metrics.py b/netneurotools/metrics/bct.py
similarity index 95%
rename from netneurotools/metrics.py
rename to netneurotools/metrics/bct.py
index 2b8abf2..3996350 100644
--- a/netneurotools/metrics.py
+++ b/netneurotools/metrics/bct.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 """
-Functions for calculating network metrics.
+Functions for calculating brain connectivity metrics.
 
 Uses naming conventions adopted from the Brain Connectivity
 Toolbox (https://sites.google.com/site/bctnet/).
@@ -18,26 +17,7 @@
 except ImportError:
     use_numba = False
 
-
-def _binarize(W):
-    """
-    Binarize a matrix.
-
-    Parameters
-    ----------
-    W : (N, N) array_like
-        Matrix to be binarized
-
-    Returns
-    -------
-    binarized : (N, N) numpy.ndarray
-        Binarized matrix
-    """
-    return (W > 0) * 1
-
-
-if use_numba:
-    _binarize = njit(_binarize)
+from .metrics_utils import _fast_binarize
 
 
 def degrees_und(W):
@@ -56,7 +36,7 @@ def degrees_und(W):
     deg : (N,) numpy.ndarray
         Degree of each node in `W`
     """
-    return np.sum(_binarize(W), axis=0)
+    return np.sum(_fast_binarize(W), axis=0)
 
 
 def degrees_dir(W):
@@ -78,7 +58,7 @@ def degrees_dir(W):
     deg : (N,) numpy.ndarray
         Degree (in-degree + out-degree) of each node in `W`
     """
-    W_bin = _binarize(W)
+    W_bin = _fast_binarize(W)
     deg_in = np.sum(W_bin, axis=0)
     deg_out = np.sum(W_bin, axis=1)
     deg = deg_in + deg_out
@@ -165,179 +145,6 @@ def retrieve_shortest_path(s, t, p_mat):
     retrieve_shortest_path = njit(retrieve_shortest_path)
 
 
-def communicability_bin(adjacency, normalize=False):
-    """
-    Compute the communicability of pairs of nodes in `adjacency`.
-
-    Parameters
-    ----------
-    adjacency : (N, N) array_like
-        Unweighted, direct/undirected connection weight/length array
-    normalize : bool, optional
-        Whether to normalize `adjacency` by largest eigenvalue prior to
-        calculation of communicability metric. Default: False
-
-    Returns
-    -------
-    comm : (N, N) numpy.ndarray
-        Symmetric array representing communicability of nodes {i, j}
-
-    References
-    ----------
-    Estrada, E., & Hatano, N. (2008). Communicability in complex networks.
-    Physical Review E, 77(3), 036111.
-
-    Examples
-    --------
-    >>> from netneurotools import metrics
-
-    >>> A = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 1]])
-    >>> Q = metrics.communicability_bin(A)
-    >>> Q
-    array([[4.19452805, 0.        , 3.19452805],
-           [1.47624622, 2.71828183, 3.19452805],
-           [3.19452805, 0.        , 4.19452805]])
-    """
-    if not np.any(np.logical_or(adjacency == 0, adjacency == 1)):
-        raise ValueError('Provided adjancecy matrix must be unweighted.')
-
-    # normalize by largest eigenvalue to prevent communicability metric from
-    # "blowing up"
-    if normalize:
-        norm = np.linalg.eigvals(adjacency).max()
-        adjacency = adjacency / norm
-
-    return scipy.sparse.linalg.expm(adjacency)
-
-
-def communicability_wei(adjacency):
-    """
-    Compute the communicability of pairs of nodes in `adjacency`.
-
-    Parameters
-    ----------
-    adjacency : (N, N) array_like
-        Weighted, direct/undirected connection weight/length array
-
-    Returns
-    -------
-    cmc : (N, N) numpy.ndarray
-        Symmetric array representing communicability of nodes {i, j}
-
-    References
-    ----------
-    Crofts, J. J., & Higham, D. J. (2009). A weighted communicability measure
-    applied to complex brain networks. Journal of the Royal Society Interface,
-    6(33), 411-414.
-
-    Examples
-    --------
-    >>> from netneurotools import metrics
-
-    >>> A = np.array([[2, 0, 3], [0, 2, 1], [0.5, 0, 1]])
-    >>> Q = metrics.communicability_wei(A)
-    >>> Q
-    array([[0.        , 0.        , 1.93581903],
-           [0.07810379, 0.        , 0.94712177],
-           [0.32263651, 0.        , 0.        ]])
-    """
-    # negative square root of nodal degrees
-    row_sum = adjacency.sum(1)
-    neg_sqrt = np.power(row_sum, -0.5)
-    square_sqrt = np.diag(neg_sqrt)
-
-    # normalize input matrix
-    for_expm = square_sqrt @ adjacency @ square_sqrt
-
-    # calculate matrix exponential of normalized matrix
-    cmc = scipy.sparse.linalg.expm(for_expm)
-    cmc[np.diag_indices_from(cmc)] = 0
-
-    return cmc
-
-
-def rich_feeder_peripheral(x, sc, stat='median'):
-    """
-    Calculate connectivity values in rich, feeder, and peripheral edges.
-
-    Parameters
-    ----------
-    x : (N, N) numpy.ndarray
-        Symmetric correlation or connectivity matrix
-    sc : (N, N) numpy.ndarray
-        Binary structural connectivity matrix
-    stat : {'mean', 'median'}, optional
-        Statistic to use over rich/feeder/peripheral links. Default: 'median'
-
-    Returns
-    -------
-    rfp : (3, k) numpy.ndarray
-        Array of median rich (0), feeder (1), and peripheral (2)
-        values, defined by `x`. `k` is the maximum degree defined on `sc`.
-    pvals : (3, k) numpy.ndarray
-        p-value for each link, computed using Welch's t-test.
-        Rich links are compared against non-rich links. Feeder links are
-        compared against peripheral links. Peripheral links are compared
-        against feeder links. T-test is one-sided.
-
-    Notes
-    -----
-    This code was written by Justine Hansen who promises to fix and even
-    optimize the code should any issues arise, provided you let her know.
-    """
-    stats = ['mean', 'median']
-    if stat not in stats:
-        raise ValueError(f'Provided stat {stat} not valid.\
-                         Must be one of {stats}')
-
-    nnodes = len(sc)
-    mask = np.triu(np.ones(nnodes), 1) > 0
-    node_degree = degrees_und(sc)
-    k = np.max(node_degree).astype(np.int64)
-    rfp_label = np.zeros((len(sc[mask]), k))
-
-    for degthresh in range(k):  # for each degree threshold
-        hub_idx = np.where(node_degree >= degthresh)  # find the hubs
-        hub = np.zeros([nnodes, 1])
-        hub[hub_idx, :] = 1
-
-        rfp = np.zeros([nnodes, nnodes])      # for each link, define rfp
-        for edge1 in range(nnodes):
-            for edge2 in range(nnodes):
-                if hub[edge1] + hub[edge2] == 2:
-                    rfp[edge1, edge2] = 1  # rich
-                if hub[edge1] + hub[edge2] == 1:
-                    rfp[edge1, edge2] = 2  # feeder
-                if hub[edge1] + hub[edge2] == 0:
-                    rfp[edge1, edge2] = 3  # peripheral
-        rfp_label[:, degthresh] = rfp[mask]
-
-    rfp = np.zeros([3, k])
-    pvals = np.zeros([3, k])
-    for degthresh in range(k):
-
-        redfunc = np.median if stat == 'median' else np.mean
-        for linktype in range(3):
-            rfp[linktype, degthresh] = redfunc(x[mask][rfp_label[:, degthresh]
-                                                       == linktype + 1])
-
-        # p-value (one-sided Welch's t-test)
-        _, pvals[0, degthresh] = ttest_ind(
-            x[mask][rfp_label[:, degthresh] == 1],
-            x[mask][rfp_label[:, degthresh] != 1],
-            equal_var=False, alternative='greater')
-        _, pvals[1, degthresh] = ttest_ind(
-            x[mask][rfp_label[:, degthresh] == 2],
-            x[mask][rfp_label[:, degthresh] == 3],
-            equal_var=False, alternative='greater')
-        _, pvals[2, degthresh] = ttest_ind(
-            x[mask][rfp_label[:, degthresh] == 3],
-            x[mask][rfp_label[:, degthresh] == 2],
-            equal_var=False, alternative='greater')
-
-    return rfp, pvals
-
-
 def navigation_wu(nav_dist_mat, sc_mat):
     """
     Compute network navigation.
@@ -461,20 +268,170 @@ def get_navigation_path_length(nav_paths, alt_dist_mat):
     `pl_dis = get_navigation_path_length(nav_paths, D)`
     D is Euclidean distance between node centroids.
 
-    See Also
-    --------
-    netneurotools.metrics.navigation_wu
-    """
-    nav_path_len = np.zeros_like(alt_dist_mat)
-    for nav_item in nav_paths:
-        i, j, _, hop, path = nav_item
-        if hop != -1:
-            nav_path_len[i, j] = np.sum(
-                [alt_dist_mat[path[_], path[_ + 1]] for _ in range(hop)]
-            )
-        else:
-            nav_path_len[i, j] = np.inf
-    return nav_path_len
+    See Also
+    --------
+    netneurotools.metrics.navigation_wu
+    """
+    nav_path_len = np.zeros_like(alt_dist_mat)
+    for nav_item in nav_paths:
+        i, j, _, hop, path = nav_item
+        if hop != -1:
+            nav_path_len[i, j] = np.sum(
+                [alt_dist_mat[path[_], path[_ + 1]] for _ in range(hop)]
+            )
+        else:
+            nav_path_len[i, j] = np.inf
+    return nav_path_len
+
+
+def communicability_bin(adjacency, normalize=False):
+    """
+    Compute the communicability of pairs of nodes in `adjacency`.
+
+    Parameters
+    ----------
+    adjacency : (N, N) array_like
+        Unweighted, direct/undirected connection weight/length array
+    normalize : bool, optional
+        Whether to normalize `adjacency` by largest eigenvalue prior to
+        calculation of communicability metric. Default: False
+
+    Returns
+    -------
+    comm : (N, N) numpy.ndarray
+        Symmetric array representing communicability of nodes {i, j}
+
+    References
+    ----------
+    Estrada, E., & Hatano, N. (2008). Communicability in complex networks.
+    Physical Review E, 77(3), 036111.
+
+    Examples
+    --------
+    >>> from netneurotools import metrics
+
+    >>> A = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 1]])
+    >>> Q = metrics.communicability_bin(A)
+    >>> Q
+    array([[4.19452805, 0.        , 3.19452805],
+           [1.47624622, 2.71828183, 3.19452805],
+           [3.19452805, 0.        , 4.19452805]])
+    """
+    if not np.any(np.logical_or(adjacency == 0, adjacency == 1)):
+        raise ValueError('Provided adjancecy matrix must be unweighted.')
+
+    # normalize by largest eigenvalue to prevent communicability metric from
+    # "blowing up"
+    if normalize:
+        norm = np.linalg.eigvals(adjacency).max()
+        adjacency = adjacency / norm
+
+    return scipy.sparse.linalg.expm(adjacency)
+
+
+def communicability_wei(adjacency):
+    """
+    Compute the communicability of pairs of nodes in `adjacency`.
+
+    Parameters
+    ----------
+    adjacency : (N, N) array_like
+        Weighted, direct/undirected connection weight/length array
+
+    Returns
+    -------
+    cmc : (N, N) numpy.ndarray
+        Symmetric array representing communicability of nodes {i, j}
+
+    References
+    ----------
+    Crofts, J. J., & Higham, D. J. (2009). A weighted communicability measure
+    applied to complex brain networks. Journal of the Royal Society Interface,
+    6(33), 411-414.
+
+    Examples
+    --------
+    >>> from netneurotools import metrics
+
+    >>> A = np.array([[2, 0, 3], [0, 2, 1], [0.5, 0, 1]])
+    >>> Q = metrics.communicability_wei(A)
+    >>> Q
+    array([[0.        , 0.        , 1.93581903],
+           [0.07810379, 0.        , 0.94712177],
+           [0.32263651, 0.        , 0.        ]])
+    """
+    # negative square root of nodal degrees
+    row_sum = adjacency.sum(1)
+    neg_sqrt = np.power(row_sum, -0.5)
+    square_sqrt = np.diag(neg_sqrt)
+
+    # normalize input matrix
+    for_expm = square_sqrt @ adjacency @ square_sqrt
+
+    # calculate matrix exponential of normalized matrix
+    cmc = scipy.sparse.linalg.expm(for_expm)
+    cmc[np.diag_indices_from(cmc)] = 0
+
+    return cmc
+
+
+def path_transitivity(D):
+    """
+    Calculate path transitivity.
+
+    This function implements path transitivity, calculating the density of
+    local detours (triangles) that are available along the shortest paths
+    between all pairs of nodes.
+
+    This function is adapted and optimized from the Brain Connectivity Toolbox.
+
+    .. warning::
+       Test before use.
+
+    Parameters
+    ----------
+    D : (N, N) ndarray
+        Weight or connection length matrix. Length matrix is recommended and
+        transform should have been applied.
+
+    Returns
+    -------
+    T_mat : (N, N) ndarray
+        Path transitivity matrix
+
+    References
+    ----------
+    .. [1] Goñi, J., Van Den Heuvel, M. P., Avena-Koenigsberger,
+       A., Velez de Mendizabal, N., Betzel, R. F., Griffa, A., ... &
+       Sporns, O. (2014). Resting-brain functional connectivity predicted
+       by analytic measures of network communication. Proceedings of the
+       National Academy of Sciences, 111(2), 833-838.
+    """
+    n = len(D)
+    m = np.zeros((n, n))
+    T_mat = np.zeros((n, n))
+
+    deg_wu = np.sum(D, axis=0)
+
+    for i in range(n - 1):
+        for j in range(i + 1, n):
+            sig_and = np.logical_and(D[i, :], D[j, :])
+            m[i, j] = np.dot(D[i, :] + D[j, :], sig_and) \
+                / (deg_wu[i] + deg_wu[j] - 2 * D[i, j])
+    m += m.transpose()
+
+    _, p_mat = distance_wei_floyd(D)
+
+    for i in range(n - 1):
+        for j in range(i + 1, n):
+            path = retrieve_shortest_path(i, j, p_mat)
+            K = len(path)
+            T_mat[i, j] = 2 \
+                * sum([m[i, j] for i, j in itertools.combinations(path, 2)]) \
+                / (K * (K - 1))
+    T_mat += T_mat.transpose()
+
+    return T_mat
 
 
 def search_information(W, D, has_memory=False):
@@ -580,116 +537,6 @@ def search_information(W, D, has_memory=False):
     return SI
 
 
-def path_transitivity(D):
-    """
-    Calculate path transitivity.
-
-    This function implements path transitivity, calculating the density of
-    local detours (triangles) that are available along the shortest paths
-    between all pairs of nodes.
-
-    This function is adapted and optimized from the Brain Connectivity Toolbox.
-
-    .. warning::
-       Test before use.
-
-    Parameters
-    ----------
-    D : (N, N) ndarray
-        Weight or connection length matrix. Length matrix is recommended and
-        transform should have been applied.
-
-    Returns
-    -------
-    T_mat : (N, N) ndarray
-        Path transitivity matrix
-
-    References
-    ----------
-    .. [1] Goñi, J., Van Den Heuvel, M. P., Avena-Koenigsberger,
-       A., Velez de Mendizabal, N., Betzel, R. F., Griffa, A., ... &
-       Sporns, O. (2014). Resting-brain functional connectivity predicted
-       by analytic measures of network communication. Proceedings of the
-       National Academy of Sciences, 111(2), 833-838.
-    """
-    n = len(D)
-    m = np.zeros((n, n))
-    T_mat = np.zeros((n, n))
-
-    deg_wu = np.sum(D, axis=0)
-
-    for i in range(n - 1):
-        for j in range(i + 1, n):
-            sig_and = np.logical_and(D[i, :], D[j, :])
-            m[i, j] = np.dot(D[i, :] + D[j, :], sig_and) \
-                / (deg_wu[i] + deg_wu[j] - 2 * D[i, j])
-    m += m.transpose()
-
-    _, p_mat = distance_wei_floyd(D)
-
-    for i in range(n - 1):
-        for j in range(i + 1, n):
-            path = retrieve_shortest_path(i, j, p_mat)
-            K = len(path)
-            T_mat[i, j] = 2 \
-                * sum([m[i, j] for i, j in itertools.combinations(path, 2)]) \
-                / (K * (K - 1))
-    T_mat += T_mat.transpose()
-
-    return T_mat
-
-
-def flow_graph(W, r=None, t=1):
-    """
-    Calculate flow graph.
-
-    This function implements flow graph, instantiates a continuous
-    time random walk on network. Waiting time for walkers at each
-    node are distributed as Poisson with rate parameter r.
-    This function returns the flow graph at time t.
-
-    .. warning::
-       Test before use.
-
-    Parameters
-    ----------
-    W : (N, N) ndarray
-        Symmetric adjacency matrix.
-    r : (N,) or (N, 1) ndarray, optional
-        Rate parameter. Will be set to np.ones((N, 1)) if not specified.
-        Default: None
-    t : int, optional
-        Markov time. Default: 1
-
-    Returns
-    -------
-    dyn : (N, N) ndarray
-        flow graph at time T
-
-    References
-    ----------
-    .. [1] Lambiotte, R., Sinatra, R., Delvenne, J. C., Evans, T. S.,
-       Barahona, M., & Latora, V. (2011). Flow graphs: Interweaving
-       dynamics and structure. Physical Review E, 84(1), 017102.
-    .. [2] https://github.com/brain-networks/local_scfc/blob/main/fcn/fcn_flow_graph.m
-    """
-    if r is None:
-        r = np.ones((W.shape[0], 1))
-    else:
-        if r.ndim == 1:
-            r = r[:, None]
-    deg_wu = np.sum(W, axis=0, keepdims=True)  # (1, N)
-    deg_rate = np.sum(deg_wu / r, axis=0, keepdims=True)  # (N, N) => (1, N)
-    ps = deg_wu / (deg_rate * r)  # (1, N) / (N, N) => (N, N)
-    laplacian = np.diagflat(r) - np.multiply(np.divide(W, deg_wu), r)  # elementwise
-    dyn = np.multiply(
-        deg_rate * scipy.sparse.linalg.expm(-t * laplacian),
-        ps
-    )  # elementwise
-    dyn = (dyn + dyn.T) / 2
-    return dyn
-
-
 def mean_first_passage_time(W, tol=1e-3):
     """
     Calculate mean first passage time.
@@ -824,7 +671,7 @@ def resource_efficiency_bin(W_bin, lambda_prob=0.5):
        morphospace of communication efficiency in complex networks. PLoS One,
        8(3), e58070.
     """
-    W_bin = _binarize(W_bin)
+    W_bin = _fast_binarize(W_bin)
     if not (0 < lambda_prob < 1):
         raise ValueError("lambda_prob must be between 0 and 1.")
 
@@ -871,6 +718,62 @@ def resource_efficiency_bin(W_bin, lambda_prob=0.5):
     return E_res, prob_spl
 
 
+def flow_graph(W, r=None, t=1):
+    """
+    Calculate flow graph.
+
+    This function implements flow graph, instantiates a continuous
+    time random walk on network. Waiting time for walkers at each
+    node are distributed as Poisson with rate parameter r.
+    This function returns the flow graph at time t.
+
+    .. warning::
+       Test before use.
+
+    Parameters
+    ----------
+    W : (N, N) ndarray
+        Symmetric adjacency matrix.
+    r : (N,) or (N, 1) ndarray, optional
+        Rate parameter. Will be set to np.ones((N, 1)) if not specified.
+        Default: None
+    t : int, optional
+        Markov time. Default: 1
+
+    Returns
+    -------
+    dyn : (N, N) ndarray
+        flow graph at time T
+
+    References
+    ----------
+    .. [1] Lambiotte, R., Sinatra, R., Delvenne, J. C., Evans, T. S.,
+       Barahona, M., & Latora, V. (2011). Flow graphs: Interweaving
+       dynamics and structure. Physical Review E, 84(1), 017102.
+    .. [2] https://github.com/brain-networks/local_scfc/blob/main/fcn/fcn_flow_graph.m
+    """
+    if r is None:
+        r = np.ones((W.shape[0], 1))
+    else:
+        if r.ndim == 1:
+            r = r[:, None]
+    deg_wu = np.sum(W, axis=0, keepdims=True)  # (1, N)
+    deg_rate = np.sum(deg_wu / r, axis=0, keepdims=True)  # (N, N) => (1, N)
+    ps = deg_wu / (deg_rate * r)  # (1, N) / (N, N) => (N, N)
+    laplacian = np.diagflat(r) - np.multiply(np.divide(W, deg_wu), r)  # elementwise
+    dyn = np.multiply(
+        deg_rate * scipy.sparse.linalg.expm(-t * laplacian),
+        ps
+    )  # elementwise
+    dyn = (dyn + dyn.T) / 2
+    return dyn
+
+
+def assortativity(W, r=None):
+    """Calculate assortativity."""
+    pass
+
+
 def matching_ind_und(W):
     """
     Calculate undirected matching index.
@@ -928,37 +831,83 @@ def matching_ind_und(W):
     return M0
 
 
-def _graph_laplacian(W):
-    r"""
-    Compute the graph Laplacian of a weighted adjacency matrix.
-
-    Graph Laplacian is defined as the degree matrix minus the adjacency
-    matrix :math:`L = D - W`, where :math:`D` is the degree matrix and
-    is defined as :math:`D_{ii} = \sum_j W_{ij}`.
-
-    The graph Laplacian matrix :math:`L` has the form of
-
-    .. math::
-        L = \begin{bmatrix}
-            d_1 & -w_{12} & \cdots & -w_{1n} \\
-            -w_{21} & d_2 & \cdots & -w_{2n} \\
-            \vdots & \vdots & \ddots & \vdots \\
-            -w_{n1} & -w_{n2} & \cdots & d_n
-        \end{bmatrix}
+def rich_feeder_peripheral(x, sc, stat='median'):
+    """
+    Calculate connectivity values in rich, feeder, and peripheral edges.
 
     Parameters
     ----------
-    W : (N, N) array_like
-        Weighted, directed/undirected connection weight/length array
+    x : (N, N) numpy.ndarray
+        Symmetric correlation or connectivity matrix
+    sc : (N, N) numpy.ndarray
+        Binary structural connectivity matrix
+    stat : {'mean', 'median'}, optional
+        Statistic to use over rich/feeder/peripheral links. Default: 'median'
 
     Returns
     -------
-    L : (N, N) numpy.ndarray
-        Graph Laplacian of `W`
+    rfp : (3, k) numpy.ndarray
+        Array of median rich (0), feeder (1), and peripheral (2)
+        values, defined by `x`. `k` is the maximum degree defined on `sc`.
+    pvals : (3, k) numpy.ndarray
+        p-value for each link, computed using Welch's t-test.
+        Rich links are compared against non-rich links. Feeder links are
+        compared against peripheral links. Peripheral links are compared
+        against feeder links. T-test is one-sided.
+
+    Notes
+    -----
+    This code was written by Justine Hansen who promises to fix and even
+    optimize the code should any issues arise, provided you let her know.
     """
-    D = np.diag(np.sum(W, axis=0))
-    return D - W
+    stats = ['mean', 'median']
+    if stat not in stats:
+        raise ValueError(f'Provided stat {stat} not valid.\
+                         Must be one of {stats}')
 
+    nnodes = len(sc)
+    mask = np.triu(np.ones(nnodes), 1) > 0
+    node_degree = degrees_und(sc)
+    k = np.max(node_degree).astype(np.int64)
+    rfp_label = np.zeros((len(sc[mask]), k))
 
-if use_numba:
-    _graph_laplacian = njit(_graph_laplacian)  # ("float64[:,::1](float64[:,::1])")
+    for degthresh in range(k):  # for each degree threshold
+        hub_idx = np.where(node_degree >= degthresh)  # find the hubs
+        hub = np.zeros([nnodes, 1])
+        hub[hub_idx, :] = 1
+
+        rfp = np.zeros([nnodes, nnodes])      # for each link, define rfp
+        for edge1 in range(nnodes):
+            for edge2 in range(nnodes):
+                if hub[edge1] + hub[edge2] == 2:
+                    rfp[edge1, edge2] = 1  # rich
+                if hub[edge1] + hub[edge2] == 1:
+                    rfp[edge1, edge2] = 2  # feeder
+                if hub[edge1] + hub[edge2] == 0:
+                    rfp[edge1, edge2] = 3  # peripheral
+        rfp_label[:, degthresh] = rfp[mask]
+
+    rfp = np.zeros([3, k])
+    pvals = np.zeros([3, k])
+    for degthresh in range(k):
+
+        redfunc = np.median if stat == 'median' else np.mean
+        for linktype in range(3):
+            rfp[linktype, degthresh] = redfunc(x[mask][rfp_label[:, degthresh]
+                                                       == linktype + 1])
+
+        # p-value (one-sided Welch's t-test)
+        _, pvals[0, degthresh] = ttest_ind(
+            x[mask][rfp_label[:, degthresh] == 1],
+            x[mask][rfp_label[:, degthresh] != 1],
+            equal_var=False, alternative='greater')
+        _, pvals[1, degthresh] = ttest_ind(
+            x[mask][rfp_label[:, degthresh] == 2],
+            x[mask][rfp_label[:, degthresh] == 3],
+            equal_var=False, alternative='greater')
+        _, pvals[2, degthresh] = ttest_ind(
+            x[mask][rfp_label[:, degthresh] == 3],
+            x[mask][rfp_label[:, degthresh] == 2],
+            equal_var=False, alternative='greater')
+
+    return rfp, pvals
diff --git a/netneurotools/metrics/communication.py b/netneurotools/metrics/communication.py
new file mode 100644
index 0000000..6d126b9
--- /dev/null
+++ b/netneurotools/metrics/communication.py
@@ -0,0 +1 @@
+"""Functions for calculating network communication metrics."""
diff --git a/netneurotools/metrics/control.py b/netneurotools/metrics/control.py
new file mode 100644
index 0000000..b1e4b1c
--- /dev/null
+++ b/netneurotools/metrics/control.py
@@ -0,0 +1 @@
+"""Functions for calculating network control metrics."""
diff --git a/netneurotools/metrics/metrics_utils.py b/netneurotools/metrics/metrics_utils.py
new file mode 100644
index 0000000..b32f632
--- /dev/null
+++ b/netneurotools/metrics/metrics_utils.py
@@ -0,0 +1,66 @@
+"""Functions for supporting network metrics."""
+
+import numpy as np
+
+try:
+    from numba import njit
+    use_numba = True
+except ImportError:
+    use_numba = False
+
+
+def _fast_binarize(W):
+    """
+    Binarize a matrix.
+
+    Parameters
+    ----------
+    W : (N, N) array_like
+        Matrix to be binarized
+
+    Returns
+    -------
+    binarized : (N, N) numpy.ndarray
+        Binarized matrix
+    """
+    return (W > 0) * 1
+
+
+if use_numba:
+    _fast_binarize = njit(_fast_binarize)
+
+
+def _graph_laplacian(W):
+    r"""
+    Compute the graph Laplacian of a weighted adjacency matrix.
+
+    Graph Laplacian is defined as the degree matrix minus the adjacency
+    matrix :math:`L = D - W`, where :math:`D` is the degree matrix and
+    is defined as :math:`D_{ii} = \sum_j W_{ij}`.
+
+    The graph Laplacian matrix :math:`L` has the form of
+
+    .. math::
+        L = \begin{bmatrix}
+            d_1 & -w_{12} & \cdots & -w_{1n} \\
+            -w_{21} & d_2 & \cdots & -w_{2n} \\
+            \vdots & \vdots & \ddots & \vdots \\
+            -w_{n1} & -w_{n2} & \cdots & d_n
+        \end{bmatrix}
+
+    Parameters
+    ----------
+    W : (N, N) array_like
+        Weighted, directed/undirected connection weight/length array
+
+    Returns
+    -------
+    L : (N, N) numpy.ndarray
+        Graph Laplacian of `W`
+    """
+    D = np.diag(np.sum(W, axis=0))
+    return D - W
+
+
+if use_numba:
+    _graph_laplacian = njit(_graph_laplacian)  # ("float64[:,::1](float64[:,::1])")
diff --git a/netneurotools/metrics/spreading.py b/netneurotools/metrics/spreading.py
new file mode 100644
index 0000000..8a5fb79
--- /dev/null
+++ b/netneurotools/metrics/spreading.py
@@ -0,0 +1,6 @@
+"""Functions for calculating network spreading models."""
+
+
+def simulate_atrophy():
+    """Simulate atrophy in a network."""
+    pass
diff --git a/netneurotools/metrics/statistical.py b/netneurotools/metrics/statistical.py
new file mode 100644
index 0000000..db1bed8
--- /dev/null
+++ b/netneurotools/metrics/statistical.py
@@ -0,0 +1,661 @@
+"""Functions for calculating statistical network metrics."""
+
+import numpy as np
+
+try:
+    from numba import njit
+    use_numba = True
+except ImportError:
+    use_numba = False
+
+from .metrics_utils import _graph_laplacian
+
+
+def network_pearsonr(annot1, annot2, weight):
+    r"""
+    Calculate pearson correlation between two annotation vectors.
+
+    .. warning::
+       Test before use.
+
+    Parameters
+    ----------
+    annot1 : (N,) array_like
+        First annotation vector, demean will be applied.
+    annot2 : (N,) array_like
+        Second annotation vector, demean will be applied.
+    weight : (N, N) array_like
+        Weight matrix. Diagonal elements should be 1.
+
+    Returns
+    -------
+    corr : float
+        Network correlation between `annot1` and `annot2`
+
+    Notes
+    -----
+    If Pearson correlation is represented as
+
+    .. math::
+        \rho_{x,y} = \dfrac{
+            \mathrm{sum}(I \times (\hat{x} \otimes \hat{y}))
+        }{
+            \sigma_x \sigma_y
+        }
+
+    The network correlation is defined analogously as
+
+    .. math::
+        \rho_{x,y,G} = \dfrac{
+            \mathrm{sum}(W \times (\hat{x} \otimes \hat{y}))
+        }{
+            \sigma_{x,W} \sigma_{y,W}
+        }
+
+    where :math:`\hat{x}` and :math:`\hat{y}` are the demeaned annotation vectors,
+
+    The weight matrix :math:`W` is used to represent the network structure.
+    It is usually in the form of :math:`W = \\exp(-kL)` where :math:`L` is the
+    length matrix and :math:`k` is a decay parameter.
+
+    Example using shortest path length as weight
+
+    .. code:: python
+
+        spl, _ = distance_wei_floyd(D) # input should be distance matrix
+        spl_wei = 1 / np.exp(spl)
+        netcorr = network_pearsonr(annot1, annot2, spl_wei)
+
+    Example using (inverse) effective resistance as weight
+
+    .. code:: python
+
+        R_eff = effective_resistance(W)
+        R_eff_norm = R_eff / np.max(R_eff)
+        W = 1 / R_eff_norm
+        W = W / np.max(W)
+        np.fill_diagonal(W, 1.0)
+        netcorr = network_pearsonr(annot1, annot2, W)
+
+    References
+    ----------
+    .. [1] Coscia, M. (2021). Pearson correlations on complex networks.
+       Journal of Complex Networks, 9(6), cnab036.
+       https://doi.org/10.1093/comnet/cnab036
+
+
+    See Also
+    --------
+    netneurotools.stats.network_pearsonr_pairwise
+    """
+    annot1 = annot1 - np.mean(annot1)
+    annot2 = annot2 - np.mean(annot2)
+    upper = np.sum(np.multiply(weight, np.outer(annot1, annot2)))
+    lower1 = np.sum(np.multiply(weight, np.outer(annot1, annot1)))
+    lower2 = np.sum(np.multiply(weight, np.outer(annot2, annot2)))
+    return upper / np.sqrt(lower1) / np.sqrt(lower2)
+
+
+def network_pearsonr_numba(annot1, annot2, weight):
+    """
+    Numba version of :meth:`netneurotools.stats.network_pearsonr`.
+
+    .. warning::
+       Test before use.
+
+    Parameters
+    ----------
+    annot1 : (N,) array_like
+        First annotation vector, demean will be applied.
+    annot2 : (N,) array_like
+        Second annotation vector, demean will be applied.
+    weight : (N, N) array_like
+        Weight matrix. Diagonal elements should be 1.
+
+    Returns
+    -------
+    corr : float
+        Network correlation between `annot1` and `annot2`
+    """
+    n = annot1.shape[0]
+    annot1 = annot1 - np.mean(annot1)
+    annot2 = annot2 - np.mean(annot2)
+    upper, lower1, lower2 = 0.0, 0.0, 0.0
+    for i in range(n):
+        for j in range(n):
+            upper += annot1[i] * annot2[j] * weight[i, j]
+            lower1 += annot1[i] * annot1[j] * weight[i, j]
+            lower2 += annot2[i] * annot2[j] * weight[i, j]
+    return upper / np.sqrt(lower1) / np.sqrt(lower2)
+
+
+if use_numba:
+    network_pearsonr_numba = njit(network_pearsonr_numba)
+
+
+def _cross_outer(annot_mat):
+    """
+    Calculate cross outer product of input matrix.
+
+    This functions is only used in `network_pearsonr_pairwise`.
+
+    Parameters
+    ----------
+    annot_mat : (N, D) array_like
+        Input matrix
+
+    Returns
+    -------
+    cross_outer : (N, N, D, D) numpy.ndarray
+        Cross outer product of `annot_mat`
+    """
+    n_samp, n_feat = annot_mat.shape
+    cross_outer = np.empty((n_samp, n_samp, n_feat, n_feat), annot_mat.dtype)
+    for a in range(n_samp):
+        for b in range(n_samp):
+            for c in range(n_feat):
+                for d in range(n_feat):
+                    cross_outer[a, b, c, d] = annot_mat[a, c] * annot_mat[b, d]
+    return cross_outer
+
+
+if use_numba:
+    # ("float64[:,:,:,::1](float64[:,::1])")
+    _cross_outer = njit(_cross_outer)
+
+
+def _multiply_sum(cross_outer, weight):
+    """
+    Multiply and sum cross outer product.
+
+    This functions is only used in `network_pearsonr_pairwise`.
+
+    Parameters
+    ----------
+    cross_outer : (N, N, D, D) array_like
+        Cross outer product of `annot_mat`
+    weight : (D, D) array_like
+        Weight matrix
+
+    Returns
+    -------
+    cross_outer_after : (N, N) numpy.ndarray
+        Result of multiplying and summing `cross_outer`
+    """
+    n_samp, _, n_dim, _ = cross_outer.shape
+    cross_outer_after = np.empty((n_samp, n_samp), cross_outer.dtype)
+    for i in range(n_samp):
+        for j in range(n_samp):
+            curr_sum = 0.0
+            for k in range(n_dim):
+                for l in range(n_dim):  # noqa: E741
+                    curr_sum += weight[k, l] * cross_outer[i, j, k, l]
+            cross_outer_after[i, j] = curr_sum
+    return cross_outer_after
+
+
+if use_numba:
+    # ("float64[:,::1](float64[:,:,:,::1],float64[:,::1])")
+    _multiply_sum = njit(_multiply_sum)
+
+
+def network_pearsonr_pairwise(annot_mat, weight):
+    """
+    Calculate pairwise network correlation between rows of `annot_mat`.
+
+    .. warning::
+       Test before use.
+
+    Parameters
+    ----------
+    annot_mat : (N, D) array_like
+        Input matrix
+    weight : (D, D) array_like
+        Weight matrix. Diagonal elements should be 1.
+
+    Returns
+    -------
+    corr_mat : (N, N) numpy.ndarray
+        Pairwise network correlation matrix
+
+    Notes
+    -----
+    This is a faster version of :meth:`netneurotools.stats.network_pearsonr`
+    for calculating pairwise network correlation between rows of `annot_mat`.
+    Check :meth:`netneurotools.stats.network_pearsonr` for details.
+
+    See Also
+    --------
+    netneurotools.stats.network_pearsonr
+    """
+    annot_mat_demean = annot_mat - np.mean(annot_mat, axis=1, keepdims=True)
+    if use_numba:
+        cross_outer = _cross_outer(annot_mat_demean)
+        cross_outer_after = _multiply_sum(cross_outer, weight)
+    else:
+        # https://stackoverflow.com/questions/24839481/python-matrix-outer-product
+        cross_outer = np.einsum('ac,bd->abcd', annot_mat_demean, annot_mat_demean)
+        cross_outer_after = np.sum(np.multiply(cross_outer, weight), axis=(2, 3))
+    # translating the two lines below in numba does not speed up much
+    lower = np.sqrt(np.diagonal(cross_outer_after))
+    return cross_outer_after / np.einsum('i,j', lower, lower)
+
+
+def _onehot_quadratic_form_broadcast(Q_star):
+    """
+    Calculate one-hot quadratic form of input matrix.
+
+    This functions is only used in `effective_resistance`.
+
+    Parameters
+    ----------
+    Q_star : (N, N) array_like
+        Input matrix
+
+    Returns
+    -------
+    R_eff : (N, N) numpy.ndarray
+        One-hot quadratic form of `Q_star`
+    """
+    n = Q_star.shape[0]
+    R_eff = np.empty((n, n), Q_star.dtype)
+    for i in range(n):
+        for j in range(n):
+            R_eff[i, j] = Q_star[i, i] - Q_star[j, i] - Q_star[i, j] + Q_star[j, j]
+    return R_eff
+
+
+if use_numba:
+    # ("float64[:,::1](float64[:,::1])")
+    _onehot_quadratic_form_broadcast = njit(_onehot_quadratic_form_broadcast)
+
+
+def effective_resistance(W, directed=True):
+    """
+    Calculate effective resistance matrix.
+
+    The effective resistance between two nodes in a graph, often used in the context
+    of electrical networks, is a measure that stems from the inverse of the Laplacian
+    matrix of the graph.
+
+    .. warning::
+       Test before use.
+
+    Parameters
+    ----------
+    W : (N, N) array_like
+        Weight matrix.
+    directed : bool, optional
+        Whether the graph is directed. This is used to determine whether to turn on
+        the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are
+        using a symmetric weight matrix (while real-valued implying hermitian), you
+        can set this to False for better performance. Default: True
+
+    Returns
+    -------
+    R_eff : (N, N) numpy.ndarray
+        Effective resistance matrix
+
+    Notes
+    -----
+    The effective resistance between two nodes :math:`i` and :math:`j` is defined as
+
+    .. math::
+        R_{ij} = (e_i - e_j)^T Q^* (e_i - e_j)
+
+    where :math:`Q^*` is the Moore-Penrose pseudoinverse of the Laplacian matrix
+    :math:`L` of the graph, and :math:`e_i` is the :math:`i`-th standard basis vector.
+
+    References
+    ----------
+    .. [1] Ellens, W., Spieksma, F. M., Van Mieghem, P., Jamakovic, A., & Kooij,
+       R. E. (2011). Effective graph resistance. Linear Algebra and Its Applications,
+       435(10), 2491–2506. https://doi.org/10.1016/j.laa.2011.02.024
+
+    See Also
+    --------
+    netneurotools.stats.network_polarisation
+    """
+    L = _graph_laplacian(W)
+    Q_star = np.linalg.pinv(L, hermitian=not directed)
+    if use_numba:
+        R_eff = _onehot_quadratic_form_broadcast(Q_star)
+    else:
+        Q_star_diag = np.diag(Q_star)
+        R_eff = \
+            Q_star_diag[:, np.newaxis] \
+            - Q_star \
+            - Q_star.T \
+            + Q_star_diag[np.newaxis, :]
+    return R_eff
+
+
+def _polariz_diff(vec):
+    """
+    Calculate difference between positive and negative parts of a vector.
+
+    This functions is only used in `network_polarisation`.
+
+    Parameters
+    ----------
+    vec : (N,) array_like
+        Input vector. Must have both positive and negative values.
+
+    Returns
+    -------
+    vec_diff : (N,) numpy.ndarray
+        Difference between positive and negative parts of `vec`
+    """
+    #
+    vec_pos = np.maximum(vec, 0.0)
+    vec_pos /= np.max(vec_pos)
+    #
+    vec_neg = np.minimum(vec, 0.0)
+    vec_neg = np.abs(vec_neg)
+    vec_neg /= np.max(vec_neg)
+    return (vec_pos - vec_neg)
+
+
+if use_numba:
+    _polariz_diff = njit(_polariz_diff)
+
+
+def _quadratic_form(W, vec_left, vec_right, squared=False):
+    """
+    Calculate quadratic form :math:`v_{left}^T W v_{right}`.
+
+    Parameters
+    ----------
+    W : (N, N) array_like
+        Input matrix.
+    vec_left : (N,) array_like
+        Left weight vector.
+    vec_right : (N,) array_like
+        Right weight vector.
+    squared : bool, optional
+        Whether to square the input weight matrix. Default: False
+
+    Returns
+    -------
+    quadratic_form : float
+        Quadratic form from `W`, `vec_left`, and `vec_right`
+    """
+    # [numpy]
+
+    # (vec_left.T @ W @ vec_right)[0, 0]
+    # [numba]
+    # vec = np.ascontiguousarray(vec[np.newaxis, :])
+    n = W.shape[0]
+    ret = 0.0
+    for i in range(n):
+        for j in range(n):
+            if squared:
+                ret += vec_left[i] * vec_right[j] * W[i, j]**2
+            else:
+                ret += vec_left[i] * vec_right[j] * W[i, j]
+    return ret
+
+
+if use_numba:
+    _quadratic_form = njit(_quadratic_form)
+
+
+def network_polarisation(vec, W, directed=True):
+    r"""
+    Calculate polarisation of a vector on a graph.
+
+    Network polarisation is a measure of polizzartion taken into account all the
+    three factors below [1]_:
+
+    - how extreme the opinions of the people are
+    - how much they organize into echo chambers, and
+    - how these echo chambers organize in the network
+
+    .. warning::
+       Test before use.
+
+    Parameters
+    ----------
+    vec : (N,) array_like
+        Polarization vector. Must have both positive and negative values. Will be
+        normalized between -1 and 1 internally.
+    W : (N, N) array_like
+        Weight matrix.
+    directed : bool, optional
+        Whether the graph is directed. This is used to determine whether to turn on
+        the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are
+        using a symmetric weight matrix (while real-valued implying hermitian), you
+        can set this to False for better performance. Default: True
+
+    Returns
+    -------
+    polariz : float
+        Polarization of `vec` on `W`
+
+    Notes
+    -----
+    The measure is based on the genralized Eucledian distance, defined as
+
+    .. math::
+        \delta_{G, o} = \sqrt{(o^+ - o^-)^T Q^* (o^+ - o^-)}
+
+    where :math:`o^+` and :math:`o^-` are the positive and negative parts of the
+    polarization vector, and :math:`Q^*` is the Moore-Penrose pseudoinverse
+    of the Laplacian matrix :math:`L` of the graph. Check :func:`effective_resistance`
+    for similarity.
+
+    References
+    ----------
+    .. [1] Hohmann, M., Devriendt, K., & Coscia, M. (2023). Quantifying ideological
+       polarization on a network using generalized Euclidean distance. Science Advances,
+       9(9), eabq2044. https://doi.org/10.1126/sciadv.abq2044
+
+    See Also
+    --------
+    netneurotools.stats.effective_resistance
+    """
+    L = _graph_laplacian(W)
+    Q_star = np.linalg.pinv(L, hermitian=not directed)
+    diff = _polariz_diff(vec)
+    if use_numba:
+        polariz_sq = _quadratic_form(Q_star, diff, diff, squared=False)
+    else:
+        polariz_sq = (diff.T @ Q_star @ diff)
+    return np.sqrt(polariz_sq)
+
+
+def network_variance(vec, D):
+    r"""
+    Calculate variance of a vector on a graph.
+
+    Network variance is a measure of variance taken into account the network
+    structure.
+
+    .. warning::
+       Test before use.
+
+    Parameters
+    ----------
+    vec : (N,) array_like
+        Input vector. Must be all positive.
+        Will be normalized internally as a probability distribution.
+    D : (N, N) array_like
+        Distance matrix.
+
+    Returns
+    -------
+    network_variance : float
+        Network variance of `vec` on `D`
+
+    Notes
+    -----
+    The network variance is defined as
+
+    .. math::
+        var(p) = \frac{1}{2} \sum_{i, j} p(i) p(j) d^2(i,j)
+
+    where :math:`p` is the probability distribution of `vec`, and :math:`d(i,j)`
+    is the distance between node :math:`i` and :math:`j`.
+
+    The distance matrix :math:`D` can make use of effective resistance or its
+    square root.
+
+    Example using effective resistance as weight matrix
+
+    .. code:: python
+
+        R_eff = effective_resistance(W)
+        netvar = network_variance(vec, R_eff)
+
+    References
+    ----------
+    .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022).
+       Variance and covariance of distributions on graphs. SIAM Review, 64(2),
+       343–359. https://doi.org/10.1137/20M1361328
+
+    See Also
+    --------
+    netneurotools.stats.network_covariance
+    """
+    p = vec / np.sum(vec)
+    return 0.5 * (p.T @ np.multiply(D, D) @ p)
+
+
+def network_variance_numba(vec, D):
+    """
+    Numba version of :meth:`netneurotools.stats.network_variance`.
+
+    Network variance is a measure of variance taken into account the network
+    structure.
+
+    .. warning::
+       Test before use.
+
+    Parameters
+    ----------
+    vec : (N,) array_like
+        Input vector. Must be all positive.
+        Will be normalized internally as a probability distribution.
+    D : (N, N) array_like
+        Distance matrix.
+
+    Returns
+    -------
+    network_variance : float
+        Network variance of `vec` on `D`
+    """
+    p = vec / np.sum(vec)
+    return 0.5 * _quadratic_form(D, p, p, squared=True)
+
+
+if use_numba:
+    network_variance_numba = njit(network_variance_numba)
+
+
+def network_covariance(joint_pmat, D, calc_marginal=True):
+    r"""
+    Calculate covariance of a joint probability matrix on a graph.
+
+    .. warning::
+       Test before use.
+
+    Parameters
+    ----------
+    joint_pmat : (N, N) array_like
+        Joint probability matrix. Please make sure that it is valid.
+    D : (N, N) array_like
+        Distance matrix.
+    calc_marginal : bool, optional
+        Whether to calculate marginal variance. It will be marginally faster if
+        :code:`calc_marginal=False` (returning marginal variances as 0). Default: True
+
+    Returns
+    -------
+    network_covariance : float
+        Covariance of `joint_pmat` on `D`
+    var_p : float
+        Marginal variance of `joint_pmat` on `D`.
+        Will be 0 if :code:`calc_marginal=False`
+    var_q : float
+        Marginal variance of `joint_pmat` on `D`.
+        Will be 0 if :code:`calc_marginal=False`
+
+    Notes
+    -----
+    The network variance is defined as
+
+    .. math::
+        cov(P) = \frac{1}{2} \sum_{i, j} [p(i) q(j) - P(i,j)] d^2(i,j)
+
+    where :math:`P` is the joint probability matrix, :math:`p` and :math:`q`
+    are the marginal probability distributions of `joint_pmat`, and :math:`d(i,j)`
+    is the distance between node :math:`i` and :math:`j`.
+
+    Check :func:`network_variance` for usage.
+
+    References
+    ----------
+    .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022).
+       Variance and covariance of distributions on graphs. SIAM Review, 64(2),
+       343–359. https://doi.org/10.1137/20M1361328
+
+    See Also
+    --------
+    netneurotools.stats.network_variance
+    """
+    p = np.sum(joint_pmat, axis=1)
+    q = np.sum(joint_pmat, axis=0)
+    D_sq = np.multiply(D, D)
+    cov = p.T @ D_sq @ q - np.sum(np.multiply(joint_pmat, D_sq))
+    if calc_marginal:
+        var_p = p.T @ D_sq @ p
+        var_q = q.T @ D_sq @ q
+    else:
+        var_p, var_q = 0, 0
+    return 0.5 * cov, 0.5 * var_p, 0.5 * var_q
+
+
+def network_covariance_numba(joint_pmat, D, calc_marginal=True):
+    """
+    Numba version of :meth:`netneurotools.stats.network_covariance`.
+
+    .. warning::
+       Test before use.
+
+    Parameters
+    ----------
+    joint_pmat : (N, N) array_like
+        Joint probability matrix. Please make sure that it is valid.
+    D : (N, N) array_like
+        Distance matrix.
+    calc_marginal : bool, optional
+        Whether to calculate marginal variance. It will be marginally faster if
+        :code:`calc_marginal=False` (returning marginal variances as 0). Default: True
+
+    Returns
+    -------
+    network_covariance : float
+        Covariance of `joint_pmat` on `D`
+    var_p : float
+        Marginal variance of `joint_pmat` on `D`.
+        Will be 0 if :code:`calc_marginal=False`
+    var_q : float
+        Marginal variance of `joint_pmat` on `D`.
+        Will be 0 if :code:`calc_marginal=False`
+    """
+    n = joint_pmat.shape[0]
+    p = np.sum(joint_pmat, axis=1)
+    q = np.sum(joint_pmat, axis=0)
+    cov = 0.0
+    var_p, var_q = 0.0, 0.0
+    for i in range(n):
+        for j in range(n):
+            cov += (p[i] * q[j] - joint_pmat[i, j]) * D[i, j]**2
+            if calc_marginal:
+                var_p += p[i] * p[j] * D[i, j]**2
+                var_q += q[i] * q[j] * D[i, j]**2
+    return 0.5 * cov, 0.5 * var_p, 0.5 * var_q
+
+
+if use_numba:
+    network_covariance_numba = njit(network_covariance_numba)
diff --git a/netneurotools/metrics/tests/__init__.py b/netneurotools/metrics/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/tests/test_metrics.py b/netneurotools/metrics/tests/test_bct.py
similarity index 72%
rename from netneurotools/tests/test_metrics.py
rename to netneurotools/metrics/tests/test_bct.py
index 253da0f..f83ab6a 100644
--- a/netneurotools/tests/test_metrics.py
+++ b/netneurotools/metrics/tests/test_bct.py
@@ -1,15 +1,15 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.metrics functionality."""
+"""For testing netneurotools.metrics.bct functionality."""
 
-import numpy as np
 import pytest
+import numpy as np
 
 from netneurotools import metrics
 
 rs = np.random.RandomState(1234)
 
 
-def test_communicability():
+def test_communicability_bin():
+    """Test communicability_bin function."""
     comm = metrics.communicability_bin(rs.choice([0, 1], size=(100, 100)))
     assert comm.shape == (100, 100)
 
@@ -18,6 +18,7 @@ def test_communicability():
 
 
 def test_communicability_wei():
+    """Test communicability_wei function."""
     comm = metrics.communicability_wei(rs.rand(100, 100))
     assert comm.shape == (100, 100)
     assert np.allclose(np.diag(comm), 0)
diff --git a/netneurotools/metrics/tests/test_communication.py b/netneurotools/metrics/tests/test_communication.py
new file mode 100644
index 0000000..dd066f8
--- /dev/null
+++ b/netneurotools/metrics/tests/test_communication.py
@@ -0,0 +1 @@
+"""For testing netneurotools.metrics.communication functionality."""
diff --git a/netneurotools/metrics/tests/test_control.py b/netneurotools/metrics/tests/test_control.py
new file mode 100644
index 0000000..28ad7c2
--- /dev/null
+++ b/netneurotools/metrics/tests/test_control.py
@@ -0,0 +1 @@
+"""For testing netneurotools.metrics.control functionality."""
diff --git a/netneurotools/metrics/tests/test_spreading.py b/netneurotools/metrics/tests/test_spreading.py
new file mode 100644
index 0000000..216c638
--- /dev/null
+++ b/netneurotools/metrics/tests/test_spreading.py
@@ -0,0 +1 @@
+"""For testing netneurotools.metrics.spreading functionality."""
diff --git a/netneurotools/metrics/tests/test_statistical.py b/netneurotools/metrics/tests/test_statistical.py
new file mode 100644
index 0000000..73caf2d
--- /dev/null
+++ b/netneurotools/metrics/tests/test_statistical.py
@@ -0,0 +1 @@
+"""For testing netneurotools.metrics.statistical functionality."""
diff --git a/netneurotools/modularity.py b/netneurotools/modularity.py
deleted file mode 100644
index 1831dd9..0000000
--- a/netneurotools/modularity.py
+++ /dev/null
@@ -1,316 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for working with network modularity."""
-
-import bct
-import numpy as np
-from sklearn.utils.validation import check_random_state
-from . import cluster
-
-try:
-    from numba import njit, prange
-    use_numba = True
-except ImportError:
-    prange = range
-    use_numba = False
-
-
-def consensus_modularity(adjacency, gamma=1, B='modularity',
-                         repeats=250, null_func=np.mean, seed=None):
-    """
-    Find community assignments from `adjacency` through consensus.
-
-    Performs `repeats` iterations of community detection on `adjacency` and
-    then uses consensus clustering on the resulting community assignments.
-
-    Parameters
-    ----------
-    adjacency : (N, N) array_like
-        Adjacency matrix (weighted/non-weighted) on which to perform consensus
-        community detection.
-    gamma : float, optional
-        Resolution parameter for modularity maximization. Default: 1
-    B : str or (N, N) array_like, optional
-        Null model to use for consensus clustering. If `str`, must be one of
-        ['modularity', 'potts', 'negative_sym', 'negative_asym']. Default:
-        'modularity'
-    repeats : int, optional
-        Number of times to repeat Louvain algorithm clustering. Default: 250
-    null_func : callable, optional
-        Function used to generate null model when performing consensus-based
-        clustering. Must accept a 2D array as input and return a single value.
-        Default: `np.mean`
-    seed : {int, np.random.RandomState instance, None}, optional
-        Seed for random number generation. Default: None
-
-    Returns
-    -------
-    consensus : (N,) np.ndarray
-        Consensus-derived community assignments
-    Q_all : array_like
-        Optimized modularity over all `repeats` community assignments
-    zrand_all : array_like
-        z-Rand score over all pairs of `repeats` community assignment vectors
-
-    References
-    ----------
-    Bassett, D. S., Porter, M. A., Wymbs, N. F., Grafton, S. T., Carlson,
-    J. M., & Mucha, P. J. (2013). Robust detection of dynamic community
-    structure in networks. Chaos: An Interdisciplinary Journal of Nonlinear
-    Science, 23(1), 013142.
-    """
-    # generate community partitions `repeat` times
-    comms, Q_all = zip(*[bct.community_louvain(adjacency, gamma=gamma, B=B)
-                         for i in range(repeats)])
-    comms = np.column_stack(comms)
-
-    # find consensus cluster assignments across all partitoning solutions
-    consensus = cluster.find_consensus(comms, null_func=null_func, seed=seed)
-
-    # get z-rand statistics for partition similarity (n.b. can take a while)
-    zrand_all = _zrand_partitions(comms)
-
-    return consensus, np.array(Q_all), zrand_all
-
-
-def _dummyvar(labels):
-    """
-    Generate dummy-coded array from provided community assignment `labels`.
-
-    Parameters
-    ----------
-    labels : (N,) array_like
-        Labels assigning `N` samples to `G` groups
-
-    Returns
-    -------
-    ci : (N, G) numpy.ndarray
-        Dummy-coded array where 1 indicates that a sample belongs to a group
-    """
-    comms = np.unique(labels)
-
-    ci = np.zeros((len(labels), len(comms)))
-    for n, grp in enumerate(comms):
-        ci[:, n] = labels == grp
-
-    return ci
-
-
-def zrand(X, Y):
-    """
-    Calculate the z-Rand index of two community assignments.
-
-    Parameters
-    ----------
-    X, Y : (n, 1) array_like
-        Community assignment vectors to compare
-
-    Returns
-    -------
-    z_rand : float
-        Z-rand index
-
-    References
-    ----------
-    Amanda L. Traud, Eric D. Kelsic, Peter J. Mucha, and Mason A. Porter.
-    (2011). Comparing Community Structure to Characteristics in Online
-    Collegiate Social Networks. SIAM Review, 53, 526-543.
-    """
-    if X.ndim > 1 or Y.ndim > 1:
-        if X.shape[-1] > 1 or Y.shape[-1] > 1:
-            raise ValueError('X and Y must have only one-dimension each. '
-                             'Please check inputs.')
-
-    Xf = X.flatten()
-    Yf = Y.flatten()
-
-    n = len(Xf)
-    indx, indy = _dummyvar(Xf), _dummyvar(Yf)
-    Xa = indx.dot(indx.T)
-    Ya = indy.dot(indy.T)
-
-    M = n * (n - 1) / 2
-    M1 = Xa.nonzero()[0].size / 2
-    M2 = Ya.nonzero()[0].size / 2
-
-    wab = np.logical_and(Xa, Ya).nonzero()[0].size / 2
-
-    mod = n * (n**2 - 3 * n - 2)
-    C1 = mod - (8 * (n + 1) * M1) + (4 * np.power(indx.sum(0), 3).sum())
-    C2 = mod - (8 * (n + 1) * M2) + (4 * np.power(indy.sum(0), 3).sum())
-
-    a = M / 16
-    b = ((4 * M1 - 2 * M)**2) * ((4 * M2 - 2 * M)**2) / (256 * (M**2))
-    c = C1 * C2 / (16 * n * (n - 1) * (n - 2))
-    d = ((((4 * M1 - 2 * M)**2) - (4 * C1) - (4 * M))
-         * (((4 * M2 - 2 * M)**2) - (4 * C2) - (4 * M))
-         / (64 * n * (n - 1) * (n - 2) * (n - 3)))
-
-    sigw2 = a - b + c + d
-    # catch any negatives
-    if sigw2 < 0:
-        return 0
-    z_rand = (wab - ((M1 * M2) / M)) / np.sqrt(sigw2)
-
-    return z_rand
-
-
-def _zrand_partitions(communities):
-    """
-    Calculate z-Rand for all pairs of assignments in `communities`.
-
-    Iterates through every pair of community assignment vectors in
-    `communities` and calculates the z-Rand score to assess their similarity.
-
-    Parameters
-    ----------
-    communities : (S, R) array_like
-        Community assignments for `S` samples over `R` partitions
-
-    Returns
-    -------
-    all_zrand : array_like
-        z-Rand score over all pairs of `R` partitions of community assignments
-    """
-    n_partitions = communities.shape[-1]
-    all_zrand = np.zeros(int(n_partitions * (n_partitions - 1) / 2))
-
-    for c1 in prange(n_partitions):
-        for c2 in prange(c1 + 1, n_partitions):
-            idx = int((c1 * n_partitions) + c2 - ((c1 + 1) * (c1 + 2) // 2))
-            all_zrand[idx] = zrand(communities[:, c1], communities[:, c2])
-
-    return all_zrand
-
-
-if use_numba:
-    _dummyvar = njit(_dummyvar)
-    zrand = njit(zrand)
-    _zrand_partitions = njit(_zrand_partitions, parallel=True)
-
-
-def get_modularity(adjacency, comm, gamma=1):
-    """
-    Calculate modularity contribution for each community in `comm`.
-
-    Parameters
-    ----------
-    adjacency : (N, N) array_like
-        Adjacency (e.g., correlation) matrix
-    comm : (N,) array_like
-        Community assignment vector splitting `N` subjects into `G` groups
-    gamma : float, optional
-        Resolution parameter used in original modularity maximization.
-        Default: 1
-
-    Returns
-    -------
-    comm_q : (G,) ndarray
-        Relative modularity for each community
-
-    See Also
-    --------
-    netneurotools.modularity.get_modularity_z
-    netneurotools.modularity.get_modularity_sig
-    """
-    adjacency, comm = np.asarray(adjacency), np.asarray(comm)
-    s = adjacency.sum()
-    B = adjacency - (gamma * np.outer(adjacency.sum(axis=1),
-                                      adjacency.sum(axis=0)) / s)
-
-    # find modularity contribution of each community
-    communities = np.unique(comm)
-    comm_q = np.empty(shape=communities.size)
-    for n, ci in enumerate(communities):
-        inds = comm == ci
-        comm_q[n] = B[np.ix_(inds, inds)].sum() / s
-
-    return comm_q
-
-
-def get_modularity_z(adjacency, comm, gamma=1, n_perm=10000, seed=None):
-    """
-    Calculate average z-score of community assignments by permutation.
-
-    Parameters
-    ----------
-    adjacency : (N, N) array_like
-        Adjacency (correlation) matrix
-    comm : (N,) array_like
-        Community assignment vector splitting `N` subjects into `G` groups
-    gamma : float, optional
-        Resolution parameter used in original modularity maximization.
-        Default: 1
-    n_perm : int, optional
-        Number of permutations. Default: 10000
-    seed : {int, np.random.RandomState instance, None}, optional
-        Seed for random number generation. Default: None
-
-    Returns
-    -------
-    q_z : float
-        Average Z-score of modularity of communities
-
-    See Also
-    --------
-    netneurotools.modularity.get_modularity
-    netneurotools.modularity.get_modularity_sig
-    """
-    rs = check_random_state(seed)
-
-    real_qs = get_modularity(adjacency, comm, gamma)
-    simu_qs = np.empty(shape=(np.unique(comm).size, n_perm))
-    for perm in range(n_perm):
-        simu_qs[:, perm] = get_modularity(adjacency,
-                                          rs.permutation(comm),
-                                          gamma)
-
-    # avoid instances where dist.std(1) == 0
-    std = simu_qs.std(axis=1)
-    if std == 0:
-        return np.mean(real_qs - simu_qs.mean(axis=1))
-    else:
-        return np.mean((real_qs - simu_qs.mean(axis=1)) / std)
-
-
-def get_modularity_sig(adjacency, comm, gamma=1, n_perm=10000, alpha=0.01,
-                       seed=None):
-    """
-    Calculate significance of community assignments in `comm` by permutation.
-
-    Parameters
-    ----------
-    adjacency : (N, N) array_like
-        Adjacency (correlation) matrix
-    comm : (N,) array_like
-        Community assignment vector
-    gamma : float
-        Resolution parameter used in original modularity maximization
-    n_perm : int, optional
-        Number of permutations to test against. Default: 10000
-    alpha : (0,1) float, optional
-        Alpha level to assess significance. Default: 0.01
-    seed : {int, np.random.RandomState instance, None}, optional
-        Seed for random number generation. Default: None
-
-    Returns
-    -------
-    ndarray
-        Significance of each community in `comm` (boolean)
-
-    See Also
-    --------
-    netneurotools.modularity.get_modularity_z
-    netneurotools.modularity.get_modularity_sig
-    """
-    rs = check_random_state(seed)
-
-    real_qs = get_modularity(adjacency, comm, gamma)
-    simu_qs = np.empty(shape=(np.unique(comm).size, n_perm))
-    for perm in range(n_perm):
-        simu_qs[:, perm] = get_modularity(adjacency,
-                                          rs.permutation(comm),
-                                          gamma)
-
-    q_sig = real_qs > np.percentile(simu_qs, 100 * (1 - alpha), axis=1)
-
-    return q_sig
diff --git a/netneurotools/modularity/__init__.py b/netneurotools/modularity/__init__.py
new file mode 100644
index 0000000..2fe84ad
--- /dev/null
+++ b/netneurotools/modularity/__init__.py
@@ -0,0 +1,25 @@
+"""Functions for working with network modularity."""
+
+
+from .modules import (
+    match_cluster_labels,
+    match_assignments,
+    reorder_assignments,
+    find_consensus,
+    consensus_modularity,
+    _dummyvar,
+    zrand,
+    _zrand_partitions,
+    get_modularity,
+    get_modularity_z,
+    get_modularity_sig,
+)
+
+
+__all__ = [
+    # modules
+    'match_cluster_labels', 'match_assignments', 'reorder_assignments',
+    'find_consensus', 'consensus_modularity', '_dummyvar', 'zrand',
+    '_zrand_partitions', 'get_modularity', 'get_modularity_z',
+    'get_modularity_sig',
+]
diff --git a/netneurotools/cluster.py b/netneurotools/modularity/modules.py
similarity index 56%
rename from netneurotools/cluster.py
rename to netneurotools/modularity/modules.py
index 4b46a9d..120e984 100644
--- a/netneurotools/cluster.py
+++ b/netneurotools/modularity/modules.py
@@ -1,11 +1,17 @@
-# -*- coding: utf-8 -*-
-"""Functions for clustering and working with cluster solutions."""
+"""Functions for working with network modules."""
 
 import bct
 import numpy as np
+from sklearn.utils.validation import check_random_state
 from scipy import optimize
 from scipy.cluster import hierarchy
-from sklearn.utils.validation import check_random_state
+
+try:
+    from numba import njit, prange
+    use_numba = True
+except ImportError:
+    prange = range
+    use_numba = False
 
 
 def _get_relabels(c1, c2):
@@ -64,14 +70,14 @@ def match_cluster_labels(source, target):
 
     Examples
     --------
-    >>> from netneurotools import cluster
+    >>> from netneurotools import modularity
 
     When cluster labels are perfectly matched but e.g., inverted the function
     will find a perfect mapping:
 
     >>> a = np.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0])
     >>> b = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
-    >>> cluster.match_cluster_labels(a, b)
+    >>> modularity.match_cluster_labels(a, b)
     array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
 
     However, the mapping will work even when cluster assignments between the
@@ -80,13 +86,13 @@ def match_cluster_labels(source, target):
 
     >>> a = np.array([0, 0, 0, 2, 2, 2, 2, 1, 1, 1])
     >>> b = np.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0])
-    >>> cluster.match_cluster_labels(a, b)
+    >>> modularity.match_cluster_labels(a, b)
     array([1, 1, 1, 0, 0, 0, 0, 2, 2, 2])
 
     If the source assignment has fewer clusters than the target the returned
     values may be discontinuous:
 
-    >>> cluster.match_cluster_labels(b, a)
+    >>> modularity.match_cluster_labels(b, a)
     array([0, 0, 0, 2, 2, 2, 2, 2, 2, 2])
     """
     # try and match the source to target
@@ -137,7 +143,7 @@ def match_assignments(assignments, target=None, seed=None):
 
     Examples
     --------
-    >>> from netneurotools import cluster
+    >>> from netneurotools import modularity
 
     First we can construct a matrix of `N` samples clustered `M` times (in this
     case, `M` is three) . Since cluster labels are generally arbitrary we can
@@ -157,7 +163,7 @@ def match_assignments(assignments, target=None, seed=None):
     of the columns will be randomly picked as the "target" solution, we provide
     a `seed` to ensure reproducibility in the selection:
 
-    >>> cluster.match_assignments(assignments, seed=1234)
+    >>> modularity.match_assignments(assignments, seed=1234)
     array([[1, 1, 1],
            [1, 1, 1],
            [1, 1, 1],
@@ -179,7 +185,7 @@ def match_assignments(assignments, target=None, seed=None):
     ...                         [1, 2, 0],
     ...                         [1, 1, 2],
     ...                         [1, 1, 2]])
-    >>> cluster.match_assignments(assignments)
+    >>> modularity.match_assignments(assignments)
     array([[0, 0, 0],
            [0, 0, 0],
            [0, 0, 0],
@@ -362,3 +368,305 @@ def find_consensus(assignments, null_func=np.mean, return_agreement=False,
         return consensus.astype(int), agreement * (agreement > threshold)
 
     return consensus.astype(int)
+
+
+def consensus_modularity(adjacency, gamma=1, B='modularity',
+                         repeats=250, null_func=np.mean, seed=None):
+    """
+    Find community assignments from `adjacency` through consensus.
+
+    Performs `repeats` iterations of community detection on `adjacency` and
+    then uses consensus clustering on the resulting community assignments.
+
+    Parameters
+    ----------
+    adjacency : (N, N) array_like
+        Adjacency matrix (weighted/non-weighted) on which to perform consensus
+        community detection.
+    gamma : float, optional
+        Resolution parameter for modularity maximization. Default: 1
+    B : str or (N, N) array_like, optional
+        Null model to use for consensus clustering. If `str`, must be one of
+        ['modularity', 'potts', 'negative_sym', 'negative_asym']. Default:
+        'modularity'
+    repeats : int, optional
+        Number of times to repeat Louvain algorithm clustering. Default: 250
+    null_func : callable, optional
+        Function used to generate null model when performing consensus-based
+        clustering. Must accept a 2D array as input and return a single value.
+        Default: `np.mean`
+    seed : {int, np.random.RandomState instance, None}, optional
+        Seed for random number generation. Default: None
+
+    Returns
+    -------
+    consensus : (N,) np.ndarray
+        Consensus-derived community assignments
+    Q_all : array_like
+        Optimized modularity over all `repeats` community assignments
+    zrand_all : array_like
+        z-Rand score over all pairs of `repeats` community assignment vectors
+
+    References
+    ----------
+    Bassett, D. S., Porter, M. A., Wymbs, N. F., Grafton, S. T., Carlson,
+    J. M., & Mucha, P. J. (2013). Robust detection of dynamic community
+    structure in networks. Chaos: An Interdisciplinary Journal of Nonlinear
+    Science, 23(1), 013142.
+    """
+    # generate community partitions `repeat` times
+    comms, Q_all = zip(*[bct.community_louvain(adjacency, gamma=gamma, B=B)
+                         for i in range(repeats)])
+    comms = np.column_stack(comms)
+
+    # find consensus cluster assignments across all partitoning solutions
+    consensus = find_consensus(comms, null_func=null_func, seed=seed)
+
+    # get z-rand statistics for partition similarity (n.b. can take a while)
+    zrand_all = _zrand_partitions(comms)
+
+    return consensus, np.array(Q_all), zrand_all
+
+
+def _dummyvar(labels):
+    """
+    Generate dummy-coded array from provided community assignment `labels`.
+
+    Parameters
+    ----------
+    labels : (N,) array_like
+        Labels assigning `N` samples to `G` groups
+
+    Returns
+    -------
+    ci : (N, G) numpy.ndarray
+        Dummy-coded array where 1 indicates that a sample belongs to a group
+    """
+    comms = np.unique(labels)
+
+    ci = np.zeros((len(labels), len(comms)))
+    for n, grp in enumerate(comms):
+        ci[:, n] = labels == grp
+
+    return ci
+
+
+def zrand(X, Y):
+    """
+    Calculate the z-Rand index of two community assignments.
+
+    Parameters
+    ----------
+    X, Y : (n, 1) array_like
+        Community assignment vectors to compare
+
+    Returns
+    -------
+    z_rand : float
+        Z-rand index
+
+    References
+    ----------
+    Amanda L. Traud, Eric D. Kelsic, Peter J. Mucha, and Mason A. Porter.
+    (2011). Comparing Community Structure to Characteristics in Online
+    Collegiate Social Networks. SIAM Review, 53, 526-543.
+    """
+    if X.ndim > 1 or Y.ndim > 1:
+        if X.shape[-1] > 1 or Y.shape[-1] > 1:
+            raise ValueError('X and Y must have only one-dimension each. '
+                             'Please check inputs.')
+
+    Xf = X.flatten()
+    Yf = Y.flatten()
+
+    n = len(Xf)
+    indx, indy = _dummyvar(Xf), _dummyvar(Yf)
+    Xa = indx.dot(indx.T)
+    Ya = indy.dot(indy.T)
+
+    M = n * (n - 1) / 2
+    M1 = Xa.nonzero()[0].size / 2
+    M2 = Ya.nonzero()[0].size / 2
+
+    wab = np.logical_and(Xa, Ya).nonzero()[0].size / 2
+
+    mod = n * (n**2 - 3 * n - 2)
+    C1 = mod - (8 * (n + 1) * M1) + (4 * np.power(indx.sum(0), 3).sum())
+    C2 = mod - (8 * (n + 1) * M2) + (4 * np.power(indy.sum(0), 3).sum())
+
+    a = M / 16
+    b = ((4 * M1 - 2 * M)**2) * ((4 * M2 - 2 * M)**2) / (256 * (M**2))
+    c = C1 * C2 / (16 * n * (n - 1) * (n - 2))
+    d = ((((4 * M1 - 2 * M)**2) - (4 * C1) - (4 * M))
+         * (((4 * M2 - 2 * M)**2) - (4 * C2) - (4 * M))
+         / (64 * n * (n - 1) * (n - 2) * (n - 3)))
+
+    sigw2 = a - b + c + d
+    # catch any negatives
+    if sigw2 < 0:
+        return 0
+    z_rand = (wab - ((M1 * M2) / M)) / np.sqrt(sigw2)
+
+    return z_rand
+
+
+def _zrand_partitions(communities):
+    """
+    Calculate z-Rand for all pairs of assignments in `communities`.
+
+    Iterates through every pair of community assignment vectors in
+    `communities` and calculates the z-Rand score to assess their similarity.
+
+    Parameters
+    ----------
+    communities : (S, R) array_like
+        Community assignments for `S` samples over `R` partitions
+
+    Returns
+    -------
+    all_zrand : array_like
+        z-Rand score over all pairs of `R` partitions of community assignments
+    """
+    n_partitions = communities.shape[-1]
+    all_zrand = np.zeros(int(n_partitions * (n_partitions - 1) / 2))
+
+    for c1 in prange(n_partitions):
+        for c2 in prange(c1 + 1, n_partitions):
+            idx = int((c1 * n_partitions) + c2 - ((c1 + 1) * (c1 + 2) // 2))
+            all_zrand[idx] = zrand(communities[:, c1], communities[:, c2])
+
+    return all_zrand
+
+
+if use_numba:
+    _dummyvar = njit(_dummyvar)
+    zrand = njit(zrand)
+    _zrand_partitions = njit(_zrand_partitions, parallel=True)
+
+
+def get_modularity(adjacency, comm, gamma=1):
+    """
+    Calculate modularity contribution for each community in `comm`.
+
+    Parameters
+    ----------
+    adjacency : (N, N) array_like
+        Adjacency (e.g., correlation) matrix
+    comm : (N,) array_like
+        Community assignment vector splitting `N` subjects into `G` groups
+    gamma : float, optional
+        Resolution parameter used in original modularity maximization.
+        Default: 1
+
+    Returns
+    -------
+    comm_q : (G,) ndarray
+        Relative modularity for each community
+
+    See Also
+    --------
+    netneurotools.modularity.get_modularity_z
+    netneurotools.modularity.get_modularity_sig
+    """
+    adjacency, comm = np.asarray(adjacency), np.asarray(comm)
+    s = adjacency.sum()
+    B = adjacency - (gamma * np.outer(adjacency.sum(axis=1),
+                                      adjacency.sum(axis=0)) / s)
+
+    # find modularity contribution of each community
+    communities = np.unique(comm)
+    comm_q = np.empty(shape=communities.size)
+    for n, ci in enumerate(communities):
+        inds = comm == ci
+        comm_q[n] = B[np.ix_(inds, inds)].sum() / s
+
+    return comm_q
+
+
+def get_modularity_z(adjacency, comm, gamma=1, n_perm=10000, seed=None):
+    """
+    Calculate average z-score of community assignments by permutation.
+
+    Parameters
+    ----------
+    adjacency : (N, N) array_like
+        Adjacency (correlation) matrix
+    comm : (N,) array_like
+        Community assignment vector splitting `N` subjects into `G` groups
+    gamma : float, optional
+        Resolution parameter used in original modularity maximization.
+        Default: 1
+    n_perm : int, optional
+        Number of permutations. Default: 10000
+    seed : {int, np.random.RandomState instance, None}, optional
+        Seed for random number generation. Default: None
+
+    Returns
+    -------
+    q_z : float
+        Average Z-score of modularity of communities
+
+    See Also
+    --------
+    netneurotools.modularity.get_modularity
+    netneurotools.modularity.get_modularity_sig
+    """
+    rs = check_random_state(seed)
+
+    real_qs = get_modularity(adjacency, comm, gamma)
+    simu_qs = np.empty(shape=(np.unique(comm).size, n_perm))
+    for perm in range(n_perm):
+        simu_qs[:, perm] = get_modularity(adjacency,
+                                          rs.permutation(comm),
+                                          gamma)
+
+    # avoid instances where dist.std(1) == 0
+    std = simu_qs.std(axis=1)
+    if std == 0:
+        return np.mean(real_qs - simu_qs.mean(axis=1))
+    else:
+        return np.mean((real_qs - simu_qs.mean(axis=1)) / std)
+
+
+def get_modularity_sig(adjacency, comm, gamma=1, n_perm=10000, alpha=0.01,
+                       seed=None):
+    """
+    Calculate significance of community assignments in `comm` by permutation.
+
+    Parameters
+    ----------
+    adjacency : (N, N) array_like
+        Adjacency (correlation) matrix
+    comm : (N,) array_like
+        Community assignment vector
+    gamma : float
+        Resolution parameter used in original modularity maximization
+    n_perm : int, optional
+        Number of permutations to test against. Default: 10000
+    alpha : (0,1) float, optional
+        Alpha level to assess significance. Default: 0.01
+    seed : {int, np.random.RandomState instance, None}, optional
+        Seed for random number generation. Default: None
+
+    Returns
+    -------
+    ndarray
+        Significance of each community in `comm` (boolean)
+
+    See Also
+    --------
+    netneurotools.modularity.get_modularity_z
+    netneurotools.modularity.get_modularity_sig
+    """
+    rs = check_random_state(seed)
+
+    real_qs = get_modularity(adjacency, comm, gamma)
+    simu_qs = np.empty(shape=(np.unique(comm).size, n_perm))
+    for perm in range(n_perm):
+        simu_qs[:, perm] = get_modularity(adjacency,
+                                          rs.permutation(comm),
+                                          gamma)
+
+    q_sig = real_qs > np.percentile(simu_qs, 100 * (1 - alpha), axis=1)
+
+    return q_sig
diff --git a/netneurotools/modularity/tests/__init__.py b/netneurotools/modularity/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/tests/test_cluster.py b/netneurotools/modularity/tests/test_modules.py
similarity index 58%
rename from netneurotools/tests/test_cluster.py
rename to netneurotools/modularity/tests/test_modules.py
index 59b9f8a..64248b9 100644
--- a/netneurotools/tests/test_cluster.py
+++ b/netneurotools/modularity/tests/test_modules.py
@@ -1,12 +1,13 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.cluster functionality."""
+"""For testing netneurotools.modularity.modules functionality."""
 
 import bct
-import numpy as np
 import pytest
+import numpy as np
 from sklearn.cluster import k_means, spectral_clustering
 
-from netneurotools import cluster
+from netneurotools import modularity
+
+rs = np.random.RandomState(1234)
 
 
 @pytest.mark.parametrize('c1, c2, out', [
@@ -28,10 +29,12 @@
      np.array([1, 1, 1, 3, 3, 3, 2, 2, 2]))
 ])
 def test_match_cluster_labels(c1, c2, out):
-    assert np.all(cluster.match_cluster_labels(c1, c2) == out)
+    """Test matching of cluster labels."""
+    assert np.all(modularity.match_cluster_labels(c1, c2) == out)
 
 
 def test_match_assignments():
+    """Test matching of clustering assignments."""
     # generate some random data to be clustered (must be symmetric)
     rs = np.random.RandomState(1234)
     data = rs.rand(100, 100)
@@ -48,7 +51,7 @@ def test_match_assignments():
 
     # match labels and assert that we got perfect matches (this is not 100%
     # guaranteed with spectral clustering but it is...pretty likely)
-    matched = cluster.match_assignments(assignments, seed=rs)
+    matched = modularity.match_assignments(assignments, seed=rs)
     assert np.all(matched[:, [0]] == matched)
 
     # check that we didn't _actually_ change cluster assignments with matching;
@@ -58,6 +61,7 @@ def test_match_assignments():
 
 
 def test_reorder_assignments():
+    """Test re-ordering of clustering assignments."""
     # generate a bunch of ~random(ish) clustering assignments that have a bit
     # of consistency but aren't all identical
     rs = np.random.RandomState(1234)
@@ -72,11 +76,11 @@ def test_reorder_assignments():
     # (we're re-labelling the matrix but k-means does not provide stable
     # clustering assignments so we shouldn't get identical assignments even
     # after "matching")
-    reordered, idx = cluster.reorder_assignments(assignments, seed=1234)
+    reordered, idx = modularity.reorder_assignments(assignments, seed=1234)
     assert not np.all(reordered[:, [0]] == reordered)
 
     # make sure that the returned idx does exactly what it's supposed to
-    matched = cluster.match_assignments(assignments, seed=1234)[idx]
+    matched = modularity.match_assignments(assignments, seed=1234)[idx]
     assert np.all(matched == reordered)
 
 
@@ -87,4 +91,44 @@ def test_reorder_assignments():
      np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]))
 ])
 def test_find_consensus(assignments, clusters):
-    assert np.all(cluster.find_consensus(assignments) == clusters)
+    """Test finding consensus clustering."""
+    assert np.all(modularity.find_consensus(assignments) == clusters)
+
+
+def test_dummyvar():
+    """Test generation of dummy variables."""
+    # generate small example dummy variable code
+    out = modularity._dummyvar(np.array([1, 1, 2, 3, 3]))
+    assert np.all(out == np.array([[1, 0, 0],
+                                   [1, 0, 0],
+                                   [0, 1, 0],
+                                   [0, 0, 1],
+                                   [0, 0, 1]]))
+
+    allones = np.array([1, 1, 1, 1, 1, 1, 1, 1])
+    assert np.all(modularity._dummyvar(allones) == allones)
+
+
+def test_zrand():
+    """Test calculation of zrand."""
+    # make the same two-group community assignments (with different labels)
+    label = np.ones((100, 1))
+    X, Y = np.vstack((label, label * 2)), np.vstack((label * 2, label))
+    # compare
+    assert modularity.zrand(X, Y) == modularity.zrand(X, Y[::-1])
+    random = rs.choice([0, 1], size=X.shape)
+    assert modularity.zrand(X, Y) > modularity.zrand(X, random)
+    assert modularity.zrand(X, Y) == modularity.zrand(X[:, 0], Y[:, 0])
+
+
+def test_zrand_partitions():
+    """Test calculation of zrand for partitions."""
+    # make random communities
+    comm = rs.choice(range(6), size=(10, 100))
+    all_diff = modularity._zrand_partitions(comm)
+    all_same = modularity._zrand_partitions(np.repeat(comm[:, [0]], 10, axis=1))
+
+    # partition of labels that are all the same should have higher average
+    # zrand and lower stdev zrand
+    assert np.nanmean(all_same) > np.nanmean(all_diff)
+    assert np.nanstd(all_same) < np.nanstd(all_diff)
diff --git a/netneurotools/networks/__init__.py b/netneurotools/networks/__init__.py
new file mode 100644
index 0000000..fb7d82a
--- /dev/null
+++ b/netneurotools/networks/__init__.py
@@ -0,0 +1,33 @@
+"""Functions for constucting networks."""
+
+
+from .consensus import (
+    func_consensus, struct_consensus
+)
+
+
+from .randomize import (
+    randmio_und,
+    match_length_degree_distribution,
+    strength_preserving_rand_sa,
+    strength_preserving_rand_sa_mse_opt,
+    strength_preserving_rand_sa_dir
+)
+
+
+from .networks_utils import (
+    binarize_network, threshold_network, get_triu
+)
+
+
+__all__ = [
+    # consensus
+    'func_consensus', 'struct_consensus',
+    # generative
+    # randomize
+    'randmio_und', 'match_length_degree_distribution',
+    'strength_preserving_rand_sa', 'strength_preserving_rand_sa_mse_opt',
+    'strength_preserving_rand_sa_dir',
+    # networks_utils
+    'binarize_network', 'threshold_network', 'get_triu'
+]
diff --git a/netneurotools/networks/consensus.py b/netneurotools/networks/consensus.py
new file mode 100644
index 0000000..cc48baf
--- /dev/null
+++ b/netneurotools/networks/consensus.py
@@ -0,0 +1,294 @@
+"""Functions for generating consensus networks."""
+
+import numpy as np
+from sklearn.utils.validation import (
+    check_random_state, check_array, check_consistent_length
+)
+
+
+def func_consensus(data, n_boot=1000, ci=95, seed=None):
+    """
+    Calculate thresholded group consensus functional connectivity graph.
+
+    This function concatenates all time series in `data` and computes a group
+    correlation matrix based on this extended time series. It then generates
+    length `T` bootstrapped samples from the concatenated matrix and estimates
+    confidence intervals for all correlations. Correlations whose sign is
+    consistent across bootstraps are retained; inconsistent correlations are
+    set to zero.
+
+    If `n_boot` is set to 0 or None a simple, group-averaged functional
+    connectivity matrix is estimated, instead.
+
+    Parameters
+    ----------
+    data : (N, T, S) array_like (or a list of S arrays, each shaped as (N, T))
+        Pre-processed functional time series, where `N` is the number of nodes,
+        `T` is the number of volumes in the time series, and `S` is the number
+        of subjects.
+    n_boot : int, optional
+        Number of bootstraps for which to generate correlation. Default: 1000
+    ci : (0, 100) float, optional
+        Confidence interval for which to assess the reliability of correlations
+        with bootstraps. Default: 95
+    seed : int, optional
+        Random seed. Default: None
+
+    Returns
+    -------
+    consensus : (N, N) numpy.ndarray
+        Thresholded, group-level correlation matrix
+
+    References
+    ----------
+    Mišić, B., Betzel, R. F., Nematzadeh, A., Goni, J., Griffa, A., Hagmann,
+    P., Flammini, A., Ahn, Y.-Y., & Sporns, O. (2015). Cooperative and
+    competitive spreading dynamics on the human connectome. Neuron, 86(6),
+    1518-1529.
+    """
+    # check inputs
+    rs = check_random_state(seed)
+    if ci > 100 or ci < 0:
+        raise ValueError("`ci` must be between 0 and 100.")
+
+    # group-average functional connectivity matrix desired instead of bootstrap
+    if n_boot == 0 or n_boot is None:
+        if isinstance(data, list):
+            corrs = [np.corrcoef(sub) for sub in data]
+        else:
+            corrs = [np.corrcoef(data[..., sub]) for sub in
+                     range(data.shape[-1])]
+        return np.nanmean(corrs, axis=0)
+
+    if isinstance(data, list):
+        collapsed_data = np.hstack(data)
+        nsample = int(collapsed_data.shape[-1] / len(data))
+    else:
+        collapsed_data = data.reshape((len(data), -1), order='F')
+        nsample = data.shape[1]
+
+    consensus = np.corrcoef(collapsed_data)
+
+    # only keep the upper triangle for the bootstraps to save on memory usage
+    triu_inds = np.triu_indices_from(consensus, k=1)
+    bootstrapped_corrmat = np.zeros((len(triu_inds[0]), n_boot))
+
+    # generate `n_boot` bootstrap correlation matrices by sampling `t` time
+    # points from the concatenated time series
+    for boot in range(n_boot):
+        inds = rs.randint(collapsed_data.shape[-1], size=nsample)
+        bootstrapped_corrmat[..., boot] = \
+            np.corrcoef(collapsed_data[:, inds])[triu_inds]
+
+    # extract the CIs from the bootstrapped correlation matrices
+    # we don't need the input anymore so overwrite it
+    bootstrapped_ci = np.percentile(bootstrapped_corrmat, [100 - ci, ci],
+                                    axis=-1, overwrite_input=True)
+
+    # remove unreliable (i.e., CI zero-crossing) correlations
+    # if the signs of the bootstrapped confidence intervals are different
+    # (i.e., their signs sum to 0), then we want to remove them
+    # so, take the logical not of the CI (CI = 0 ---> True) and create a mask
+    # then, set all connections from the consensus array inside the mask to 0
+    remove_inds = np.logical_not(np.sign(bootstrapped_ci).sum(axis=0))
+    mask = np.zeros_like(consensus, dtype=bool)
+    mask[triu_inds] = remove_inds
+    consensus[mask + mask.T] = 0
+
+    return consensus
+
+
+def _ecdf(data):
+    """
+    Estimate empirical cumulative distribution function of `data`.
+
+    Taken directly from StackOverflow. See original answer at
+    https://stackoverflow.com/questions/33345780.
+
+    Parameters
+    ----------
+    data : array_like
+
+    Returns
+    -------
+    prob : numpy.ndarray
+        Cumulative probability
+    quantiles : numpy.darray
+        Quantiles
+    """
+    sample = np.atleast_1d(data)
+
+    # find the unique values and their corresponding counts
+    quantiles, counts = np.unique(sample, return_counts=True)
+
+    # take the cumulative sum of the counts and divide by the sample size to
+    # get the cumulative probabilities between 0 and 1
+    prob = np.cumsum(counts).astype(float) / sample.size
+
+    # match MATLAB
+    prob, quantiles = np.append([0], prob), np.append(quantiles[0], quantiles)
+
+    return prob, quantiles
+
+
+def struct_consensus(data, distance, hemiid,
+                     conn_num_inter=None,
+                     conn_num_intra=None,
+                     weighted=False):
+    """
+    Calculate distance-dependent group consensus structural connectivity graph.
+
+    Takes as input a weighted stack of connectivity matrices with dimensions
+    (N, N, S) where `N` is the number of nodes and `S` is the number of
+    matrices or subjects. The matrices must be weighted, and ideally with
+    continuous weights (e.g. fractional anisotropy rather than streamline
+    count). The second input is a pairwise distance matrix, where distance(i,j)
+    is the Euclidean distance between nodes i and j. The final input is an
+    (N, 1) vector which labels nodes as belonging to the right (`hemiid==0`) or
+    left (`hemiid=1`) hemisphere (note that these values can be flipped as long
+    as `hemiid` contains only values of 0 and 1).
+
+    This function estimates the average edge length distribution and builds
+    a group-averaged connectivity matrix that approximates this distribution
+    with density equal to the mean density across subjects.
+
+    The algorithm works as follows:
+
+    1. Estimate the cumulative edge length distribution,
+    2. Divide the distribution into M length bins, one for each edge that will
+       be added to the group-average matrix, and
+    3. Within each bin, select the edge that is most consistently expressed
+       expressed across subjects, breaking ties according to average edge
+       weight (which is why the input matrix `data` must be weighted).
+
+    The algorithm works separately on within/between hemisphere links.
+    M is the sum of `conn_num_inter` and `conn_num_intra`, if provided.
+    Otherwise, M is estimated from the data.
+
+    Parameters
+    ----------
+    data : (N, N, S) array_like
+        Weighted connectivity matrices (i.e., fractional anisotropy), where `N`
+        is nodes and `S` is subjects
+    distance : (N, N) array_like
+        Array where `distance[i, j]` is the Euclidean distance between nodes
+        `i` and `j`
+    hemiid : (N, 1) array_like
+        Hemisphere designation for `N` nodes where a value of 0/1 indicates
+        node `N_{i}` is in the right/left hemisphere, respectively
+    conn_num_inter : int, optional
+        Number of inter-hemispheric connections to include in the consensus
+        matrix. If `None`, the number of inter-hemispheric connections will be
+        estimated from the data. Default = `None`.
+    conn_num_intra : int, optional
+        Number of intra-hemispheric connections to include in the consensus
+        matrix. If `None`, the number of intra-hemispheric connections will be
+        estimated from the data. Default = `None`.
+    weighted : bool
+        Flag indicating whether or not to return a weighted consensus map. If
+        `True`, the consensus will be multiplied by the mean of `data`.
+
+    Returns
+    -------
+    consensus : (N, N) numpy.ndarray
+        Binary (default) or mean-weighted group-level connectivity matrix
+
+    References
+    ----------
+    Betzel, R. F., Griffa, A., Hagmann, P., & Mišić, B. (2018). Distance-
+    dependent consensus thresholds for generating group-representative
+    structural brain networks. Network Neuroscience, 1-22.
+    """
+    # confirm input shapes are as expected
+    check_consistent_length(data, distance, hemiid)
+    try:
+        hemiid = check_array(hemiid, ensure_2d=True)
+    except ValueError:
+        raise ValueError('Provided hemiid must be a 2D array. Reshape your '
+                         'data using array.reshape(-1, 1) and try again.') from None
+
+    num_node, _, num_sub = data.shape      # info on connectivity matrices
+    pos_data = data > 0                    # location of + values in matrix
+    pos_data_count = pos_data.sum(axis=2)  # num sub with + values at each node
+
+    with np.errstate(divide='ignore', invalid='ignore'):
+        average_weights = data.sum(axis=2) / pos_data_count
+
+    # empty array to hold inter/intra hemispheric connections
+    consensus = np.zeros((num_node, num_node, 2))
+
+    for conn_type in range(2):  # iterate through inter/intra hemisphere conn
+        if conn_type == 0:      # get inter hemisphere edges
+            inter_hemi = (hemiid == 0) @ (hemiid == 1).T
+            keep_conn = np.logical_or(inter_hemi, inter_hemi.T)
+        else:                   # get intra hemisphere edges
+            right_hemi = (hemiid == 0) @ (hemiid == 0).T
+            left_hemi = (hemiid == 1) @ (hemiid == 1).T
+            keep_conn = np.logical_or(right_hemi @ right_hemi.T,
+                                      left_hemi @ left_hemi.T)
+
+        # mask the distance array for only those edges we want to examine
+        full_dist_conn = distance * keep_conn
+        upper_dist_conn = np.atleast_3d(np.triu(full_dist_conn))
+
+        # generate array of weighted (by distance), positive edges across subs
+        pos_dist = pos_data * upper_dist_conn
+        pos_dist = pos_dist[np.nonzero(pos_dist)]
+
+        # determine average # of positive edges across subs
+        # we will use this to bin the edge weights
+        if conn_type == 0:
+            if conn_num_inter is None:
+                avg_conn_num = len(pos_dist) / num_sub
+            else:
+                avg_conn_num = conn_num_inter
+        else:
+            if conn_num_intra is None:
+                avg_conn_num = len(pos_dist) / num_sub
+            else:
+                avg_conn_num = conn_num_intra
+
+        # estimate empirical CDF of weighted, positive edges across subs
+        cumprob, quantiles = _ecdf(pos_dist)
+        cumprob = np.round(cumprob * avg_conn_num).astype(int)
+
+        # empty array to hold group-average matrix for current connection type
+        # (i.e., inter/intra hemispheric connections)
+        group_conn_type = np.zeros((num_node, num_node))
+
+        # iterate through bins (for edge weights)
+        for n in range(1, int(avg_conn_num) + 1):
+            # get current quantile of interest
+            curr_quant = quantiles[np.logical_and(cumprob >= (n - 1),
+                                                  cumprob < n)]
+            if curr_quant.size == 0:
+                continue
+
+            # find edges in distance connectivity matrix w/i current quantile
+            mask = np.logical_and(full_dist_conn >= curr_quant.min(),
+                                  full_dist_conn <= curr_quant.max())
+            i, j = np.where(np.triu(mask))  # indices of edges of interest
+
+            c = pos_data_count[i, j]   # get num sub with + values at edges
+            w = average_weights[i, j]  # get averaged weight of edges
+
+            # find locations of edges most commonly represented across subs
+            indmax = np.argwhere(c == c.max())
+
+            # determine index of most frequent edge; break ties with higher
+            # weighted edge
+            if indmax.size == 1:  # only one edge found
+                group_conn_type[i[indmax], j[indmax]] = 1
+            else:                 # multiple edges found
+                indmax = indmax[np.argmax(w[indmax])]
+                group_conn_type[i[indmax], j[indmax]] = 1
+
+        consensus[:, :, conn_type] = group_conn_type
+
+    # collapse across hemispheric connections types and make symmetrical array
+    consensus = consensus.sum(axis=2)
+    consensus = np.logical_or(consensus, consensus.T).astype(int)
+
+    if weighted:
+        consensus = consensus * np.mean(data, axis=2)
+    return consensus
diff --git a/netneurotools/networks/generative.py b/netneurotools/networks/generative.py
new file mode 100644
index 0000000..7a7bff4
--- /dev/null
+++ b/netneurotools/networks/generative.py
@@ -0,0 +1 @@
+"""Functions for generative network models."""
diff --git a/netneurotools/networks/networks_utils.py b/netneurotools/networks/networks_utils.py
new file mode 100644
index 0000000..5085e55
--- /dev/null
+++ b/netneurotools/networks/networks_utils.py
@@ -0,0 +1,132 @@
+"""Functions for supporting network constuction."""
+
+import numpy as np
+from scipy.sparse import csgraph
+
+
+def get_triu(data, k=1):
+    """
+    Return vectorized version of upper triangle from `data`.
+
+    Parameters
+    ----------
+    data : (N, N) array_like
+        Input data
+    k : int, optional
+        Which diagonal to select from (where primary diagonal is 0). Default: 1
+
+    Returns
+    -------
+    triu : (N * N-1 / 2) numpy.ndarray
+        Upper triangle of `data`
+
+    Examples
+    --------
+    >>> from netneurotools import networks
+
+    >>> X = np.array([[1, 0.5, 0.25], [0.5, 1, 0.33], [0.25, 0.33, 1]])
+    >>> tri = networks.get_triu(X)
+    >>> tri
+    array([0.5 , 0.25, 0.33])
+    """
+    return data[np.triu_indices(len(data), k=k)].copy()
+
+
+def binarize_network(network, retain=10, keep_diag=False):
+    """
+    Keep top `retain` % of connections in `network` and binarizes.
+
+    Uses the upper triangle for determining connection percentage, which may
+    result in disconnected nodes. If this behavior is not desired see
+    :py:func:`netneurotools.networks.threshold_network`.
+
+    Parameters
+    ----------
+    network : (N, N) array_like
+        Input graph
+    retain : [0, 100] float, optional
+        Percent connections to retain. Default: 10
+    keep_diag : bool, optional
+        Whether to keep the diagonal instead of setting it to 0. Default: False
+
+    Returns
+    -------
+    binarized : (N, N) numpy.ndarray
+        Binarized, thresholded graph
+
+    See Also
+    --------
+    netneurotools.networks.threshold_network
+    """
+    if retain < 0 or retain > 100:
+        raise ValueError(
+            f'Value provided for `retain` is outside [0, 100]: {retain}'
+        )
+
+    prctile = 100 - retain
+    triu = get_triu(network)
+    thresh = np.percentile(triu, prctile, axis=0, keepdims=True)
+    binarized = np.array(network > thresh, dtype=int)
+
+    if not keep_diag:
+        binarized[np.diag_indices(len(binarized))] = 0
+
+    return binarized
+
+
+def threshold_network(network, retain=10):
+    """
+    Keep top `retain` % of connections in `network` and binarizes.
+
+    Uses a minimum spanning tree to ensure that no nodes are disconnected from
+    the resulting thresholded graph
+
+    Parameters
+    ----------
+    network : (N, N) array_like
+        Input graph
+    retain : [0, 100] float, optional
+        Percent connections to retain. Default: 10
+
+    Returns
+    -------
+    thresholded : (N, N) numpy.ndarray
+        Binarized, thresholded graph
+
+    See Also
+    --------
+    netneurotools.networks.binarize_network
+    """
+    if retain < 0 or retain > 100:
+        raise ValueError(
+            f'Value provided for `retain` must be a percent '
+            f'in range [0, 100]. Provided: {retain}'
+        )
+
+    # get number of nodes in graph and invert weights (MINIMUM spanning tree)
+    nodes = len(network)
+    graph = np.triu(network * -1)
+
+    # find MST and count # of edges in graph
+    mst = csgraph.minimum_spanning_tree(graph).todense()
+    mst_edges = np.sum(mst != 0)
+
+    # determine # of remaining edges and ensure we're not over the limit
+    remain = int((retain / 100) * ((nodes * (nodes - 1)) / 2)) - mst_edges
+    if remain < 0:
+        raise ValueError(
+            f'Minimum spanning tree with {mst_edges} edges exceeds desired '
+            f'connection density of {retain}% ({remain + mst_edges} edges). Cannot '
+            f'proceed with graph creation.'
+        )
+
+    # zero out edges already in MST and then get indices of next best edges
+    graph -= mst
+    inds = get_triu(graph).argsort()[:remain]
+    inds = tuple(e[inds] for e in np.triu_indices_from(graph, k=1))
+
+    # add edges to MST, symmetrize, and convert to binary matrix
+    mst[inds] = graph[inds]
+    mst = np.array((mst + mst.T) != 0, dtype=int)
+
+    return mst
diff --git a/netneurotools/networks.py b/netneurotools/networks/randomize.py
similarity index 61%
rename from netneurotools/networks.py
rename to netneurotools/networks/randomize.py
index 1803f8c..a91f9ec 100644
--- a/netneurotools/networks.py
+++ b/netneurotools/networks/randomize.py
@@ -1,14 +1,11 @@
-# -*- coding: utf-8 -*-
-"""Functions for generating group-level networks from individual measurements."""
+"""Functions for generating randomized networks."""
 
 import bct
 import numpy as np
 from tqdm import tqdm
-from scipy.sparse import csgraph
-from sklearn.utils.validation import (check_random_state, check_array,
-                                      check_consistent_length)
-
-from . import utils
+from sklearn.utils.validation import (
+    check_random_state
+)
 
 try:
     from numba import njit
@@ -17,388 +14,91 @@
     use_numba = False
 
 
-def func_consensus(data, n_boot=1000, ci=95, seed=None):
-    """
-    Calculate thresholded group consensus functional connectivity graph.
-
-    This function concatenates all time series in `data` and computes a group
-    correlation matrix based on this extended time series. It then generates
-    length `T` bootstrapped samples from the concatenated matrix and estimates
-    confidence intervals for all correlations. Correlations whose sign is
-    consistent across bootstraps are retained; inconsistent correlations are
-    set to zero.
-
-    If `n_boot` is set to 0 or None a simple, group-averaged functional
-    connectivity matrix is estimated, instead.
-
-    Parameters
-    ----------
-    data : (N, T, S) array_like (or a list of S arrays, each shaped as (N, T))
-        Pre-processed functional time series, where `N` is the number of nodes,
-        `T` is the number of volumes in the time series, and `S` is the number
-        of subjects.
-    n_boot : int, optional
-        Number of bootstraps for which to generate correlation. Default: 1000
-    ci : (0, 100) float, optional
-        Confidence interval for which to assess the reliability of correlations
-        with bootstraps. Default: 95
-    seed : int, optional
-        Random seed. Default: None
-
-    Returns
-    -------
-    consensus : (N, N) numpy.ndarray
-        Thresholded, group-level correlation matrix
-
-    References
-    ----------
-    Mišić, B., Betzel, R. F., Nematzadeh, A., Goni, J., Griffa, A., Hagmann,
-    P., Flammini, A., Ahn, Y.-Y., & Sporns, O. (2015). Cooperative and
-    competitive spreading dynamics on the human connectome. Neuron, 86(6),
-    1518-1529.
-    """
-    # check inputs
-    rs = check_random_state(seed)
-    if ci > 100 or ci < 0:
-        raise ValueError("`ci` must be between 0 and 100.")
-
-    # group-average functional connectivity matrix desired instead of bootstrap
-    if n_boot == 0 or n_boot is None:
-        if isinstance(data, list):
-            corrs = [np.corrcoef(sub) for sub in data]
-        else:
-            corrs = [np.corrcoef(data[..., sub]) for sub in
-                     range(data.shape[-1])]
-        return np.nanmean(corrs, axis=0)
-
-    if isinstance(data, list):
-        collapsed_data = np.hstack(data)
-        nsample = int(collapsed_data.shape[-1] / len(data))
-    else:
-        collapsed_data = data.reshape((len(data), -1), order='F')
-        nsample = data.shape[1]
-
-    consensus = np.corrcoef(collapsed_data)
-
-    # only keep the upper triangle for the bootstraps to save on memory usage
-    triu_inds = np.triu_indices_from(consensus, k=1)
-    bootstrapped_corrmat = np.zeros((len(triu_inds[0]), n_boot))
-
-    # generate `n_boot` bootstrap correlation matrices by sampling `t` time
-    # points from the concatenated time series
-    for boot in range(n_boot):
-        inds = rs.randint(collapsed_data.shape[-1], size=nsample)
-        bootstrapped_corrmat[..., boot] = \
-            np.corrcoef(collapsed_data[:, inds])[triu_inds]
-
-    # extract the CIs from the bootstrapped correlation matrices
-    # we don't need the input anymore so overwrite it
-    bootstrapped_ci = np.percentile(bootstrapped_corrmat, [100 - ci, ci],
-                                    axis=-1, overwrite_input=True)
-
-    # remove unreliable (i.e., CI zero-crossing) correlations
-    # if the signs of the bootstrapped confidence intervals are different
-    # (i.e., their signs sum to 0), then we want to remove them
-    # so, take the logical not of the CI (CI = 0 ---> True) and create a mask
-    # then, set all connections from the consensus array inside the mask to 0
-    remove_inds = np.logical_not(np.sign(bootstrapped_ci).sum(axis=0))
-    mask = np.zeros_like(consensus, dtype=bool)
-    mask[triu_inds] = remove_inds
-    consensus[mask + mask.T] = 0
-
-    return consensus
-
-
-def _ecdf(data):
-    """
-    Estimate empirical cumulative distribution function of `data`.
-
-    Taken directly from StackOverflow. See original answer at
-    https://stackoverflow.com/questions/33345780.
-
-    Parameters
-    ----------
-    data : array_like
-
-    Returns
-    -------
-    prob : numpy.ndarray
-        Cumulative probability
-    quantiles : numpy.darray
-        Quantiles
-    """
-    sample = np.atleast_1d(data)
-
-    # find the unique values and their corresponding counts
-    quantiles, counts = np.unique(sample, return_counts=True)
-
-    # take the cumulative sum of the counts and divide by the sample size to
-    # get the cumulative probabilities between 0 and 1
-    prob = np.cumsum(counts).astype(float) / sample.size
-
-    # match MATLAB
-    prob, quantiles = np.append([0], prob), np.append(quantiles[0], quantiles)
-
-    return prob, quantiles
-
-
-def struct_consensus(data, distance, hemiid,
-                     conn_num_inter=None,
-                     conn_num_intra=None,
-                     weighted=False):
-    """
-    Calculate distance-dependent group consensus structural connectivity graph.
-
-    Takes as input a weighted stack of connectivity matrices with dimensions
-    (N, N, S) where `N` is the number of nodes and `S` is the number of
-    matrices or subjects. The matrices must be weighted, and ideally with
-    continuous weights (e.g. fractional anisotropy rather than streamline
-    count). The second input is a pairwise distance matrix, where distance(i,j)
-    is the Euclidean distance between nodes i and j. The final input is an
-    (N, 1) vector which labels nodes as belonging to the right (`hemiid==0`) or
-    left (`hemiid=1`) hemisphere (note that these values can be flipped as long
-    as `hemiid` contains only values of 0 and 1).
-
-    This function estimates the average edge length distribution and builds
-    a group-averaged connectivity matrix that approximates this distribution
-    with density equal to the mean density across subjects.
-
-    The algorithm works as follows:
-
-    1. Estimate the cumulative edge length distribution,
-    2. Divide the distribution into M length bins, one for each edge that will
-       be added to the group-average matrix, and
-    3. Within each bin, select the edge that is most consistently expressed
-       expressed across subjects, breaking ties according to average edge
-       weight (which is why the input matrix `data` must be weighted).
-
-    The algorithm works separately on within/between hemisphere links.
-    M is the sum of `conn_num_inter` and `conn_num_intra`, if provided.
-    Otherwise, M is estimated from the data.
-
-    Parameters
-    ----------
-    data : (N, N, S) array_like
-        Weighted connectivity matrices (i.e., fractional anisotropy), where `N`
-        is nodes and `S` is subjects
-    distance : (N, N) array_like
-        Array where `distance[i, j]` is the Euclidean distance between nodes
-        `i` and `j`
-    hemiid : (N, 1) array_like
-        Hemisphere designation for `N` nodes where a value of 0/1 indicates
-        node `N_{i}` is in the right/left hemisphere, respectively
-    conn_num_inter : int, optional
-        Number of inter-hemispheric connections to include in the consensus
-        matrix. If `None`, the number of inter-hemispheric connections will be
-        estimated from the data. Default = `None`.
-    conn_num_intra : int, optional
-        Number of intra-hemispheric connections to include in the consensus
-        matrix. If `None`, the number of intra-hemispheric connections will be
-        estimated from the data. Default = `None`.
-    weighted : bool
-        Flag indicating whether or not to return a weighted consensus map. If
-        `True`, the consensus will be multiplied by the mean of `data`.
-
-    Returns
-    -------
-    consensus : (N, N) numpy.ndarray
-        Binary (default) or mean-weighted group-level connectivity matrix
-
-    References
-    ----------
-    Betzel, R. F., Griffa, A., Hagmann, P., & Mišić, B. (2018). Distance-
-    dependent consensus thresholds for generating group-representative
-    structural brain networks. Network Neuroscience, 1-22.
+def randmio_und(W, itr):
     """
-    # confirm input shapes are as expected
-    check_consistent_length(data, distance, hemiid)
-    try:
-        hemiid = check_array(hemiid, ensure_2d=True)
-    except ValueError:
-        raise ValueError('Provided hemiid must be a 2D array. Reshape your '
-                         'data using array.reshape(-1, 1) and try again.') from None
-
-    num_node, _, num_sub = data.shape      # info on connectivity matrices
-    pos_data = data > 0                    # location of + values in matrix
-    pos_data_count = pos_data.sum(axis=2)  # num sub with + values at each node
-
-    with np.errstate(divide='ignore', invalid='ignore'):
-        average_weights = data.sum(axis=2) / pos_data_count
-
-    # empty array to hold inter/intra hemispheric connections
-    consensus = np.zeros((num_node, num_node, 2))
-
-    for conn_type in range(2):  # iterate through inter/intra hemisphere conn
-        if conn_type == 0:      # get inter hemisphere edges
-            inter_hemi = (hemiid == 0) @ (hemiid == 1).T
-            keep_conn = np.logical_or(inter_hemi, inter_hemi.T)
-        else:                   # get intra hemisphere edges
-            right_hemi = (hemiid == 0) @ (hemiid == 0).T
-            left_hemi = (hemiid == 1) @ (hemiid == 1).T
-            keep_conn = np.logical_or(right_hemi @ right_hemi.T,
-                                      left_hemi @ left_hemi.T)
-
-        # mask the distance array for only those edges we want to examine
-        full_dist_conn = distance * keep_conn
-        upper_dist_conn = np.atleast_3d(np.triu(full_dist_conn))
-
-        # generate array of weighted (by distance), positive edges across subs
-        pos_dist = pos_data * upper_dist_conn
-        pos_dist = pos_dist[np.nonzero(pos_dist)]
-
-        # determine average # of positive edges across subs
-        # we will use this to bin the edge weights
-        if conn_type == 0:
-            if conn_num_inter is None:
-                avg_conn_num = len(pos_dist) / num_sub
-            else:
-                avg_conn_num = conn_num_inter
-        else:
-            if conn_num_intra is None:
-                avg_conn_num = len(pos_dist) / num_sub
-            else:
-                avg_conn_num = conn_num_intra
-
-        # estimate empirical CDF of weighted, positive edges across subs
-        cumprob, quantiles = _ecdf(pos_dist)
-        cumprob = np.round(cumprob * avg_conn_num).astype(int)
-
-        # empty array to hold group-average matrix for current connection type
-        # (i.e., inter/intra hemispheric connections)
-        group_conn_type = np.zeros((num_node, num_node))
-
-        # iterate through bins (for edge weights)
-        for n in range(1, int(avg_conn_num) + 1):
-            # get current quantile of interest
-            curr_quant = quantiles[np.logical_and(cumprob >= (n - 1),
-                                                  cumprob < n)]
-            if curr_quant.size == 0:
-                continue
-
-            # find edges in distance connectivity matrix w/i current quantile
-            mask = np.logical_and(full_dist_conn >= curr_quant.min(),
-                                  full_dist_conn <= curr_quant.max())
-            i, j = np.where(np.triu(mask))  # indices of edges of interest
-
-            c = pos_data_count[i, j]   # get num sub with + values at edges
-            w = average_weights[i, j]  # get averaged weight of edges
-
-            # find locations of edges most commonly represented across subs
-            indmax = np.argwhere(c == c.max())
-
-            # determine index of most frequent edge; break ties with higher
-            # weighted edge
-            if indmax.size == 1:  # only one edge found
-                group_conn_type[i[indmax], j[indmax]] = 1
-            else:                 # multiple edges found
-                indmax = indmax[np.argmax(w[indmax])]
-                group_conn_type[i[indmax], j[indmax]] = 1
-
-        consensus[:, :, conn_type] = group_conn_type
-
-    # collapse across hemispheric connections types and make symmetrical array
-    consensus = consensus.sum(axis=2)
-    consensus = np.logical_or(consensus, consensus.T).astype(int)
-
-    if weighted:
-        consensus = consensus * np.mean(data, axis=2)
-    return consensus
-
+    Optimized version of randmio_und.
 
-def binarize_network(network, retain=10, keep_diag=False):
-    """
-    Keep top `retain` % of connections in `network` and binarizes.
+    This function randomizes an undirected network, while preserving the
+    degree distribution. The function does not preserve the strength
+    distribution in weighted networks.
 
-    Uses the upper triangle for determining connection percentage, which may
-    result in disconnected nodes. If this behavior is not desired see
-    :py:func:`netneurotools.networks.threshold_network`.
+    This function is significantly faster if numba is enabled, because
+    the main overhead is `np.random.randint`, see `here <https://stackoverflow.com/questions/58124646/why-in-python-is-random-randint-so-much-slower-than-random-random>`_
 
     Parameters
     ----------
-    network : (N, N) array_like
-        Input graph
-    retain : [0, 100] float, optional
-        Percent connections to retain. Default: 10
-    keep_diag : bool, optional
-        Whether to keep the diagonal instead of setting it to 0. Default: False
+    W : (N, N) array-like
+        Undirected binary/weighted connection matrix
+    itr : int
+        rewiring parameter. Each edge is rewired approximately itr times.
 
     Returns
     -------
-    binarized : (N, N) numpy.ndarray
-        Binarized, thresholded graph
-
-    See Also
-    --------
-    netneurotools.networks.threshold_network
-    """
-    if retain < 0 or retain > 100:
-        raise ValueError('Value provided for `retain` is outside [0, 100]: {}'
-                         .format(retain))
-
-    prctile = 100 - retain
-    triu = utils.get_triu(network)
-    thresh = np.percentile(triu, prctile, axis=0, keepdims=True)
-    binarized = np.array(network > thresh, dtype=int)
-
-    if not keep_diag:
-        binarized[np.diag_indices(len(binarized))] = 0
-
-    return binarized
-
-
-def threshold_network(network, retain=10):
-    """
-    Keep top `retain` % of connections in `network` and binarizes.
-
-    Uses a minimum spanning tree to ensure that no nodes are disconnected from
-    the resulting thresholded graph
+    W : (N, N) array-like
+        Randomized network
+    eff : int
+        number of actual rewirings carried out
+    """  # noqa: E501
+    W = W.copy()
+    n = len(W)
+    i, j = np.where(np.triu(W > 0, 1))
+    k = len(i)
+    itr *= k
 
-    Parameters
-    ----------
-    network : (N, N) array_like
-        Input graph
-    retain : [0, 100] float, optional
-        Percent connections to retain. Default: 10
+    # maximum number of rewiring attempts per iteration
+    max_attempts = np.round(n * k / (n * (n - 1)))
+    # actual number of successful rewirings
+    eff = 0
 
-    Returns
-    -------
-    thresholded : (N, N) numpy.ndarray
-        Binarized, thresholded graph
+    for _ in range(int(itr)):
+        att = 0
+        while att <= max_attempts:  # while not rewired
+            while True:
+                e1, e2 = np.random.randint(k), np.random.randint(k)
+                while e1 == e2:
+                    e2 = np.random.randint(k)
+                a, b = i[e1], j[e1]
+                c, d = i[e2], j[e2]
 
-    See Also
-    --------
-    netneurotools.networks.binarize_network
-    """
-    if retain < 0 or retain > 100:
-        raise ValueError('Value provided for `retain` must be a percent '
-                         'in range [0, 100]. Provided: {}'.format(retain))
+                if a != c and a != d and b != c and b != d:
+                    break  # all 4 vertices must be different
 
-    # get number of nodes in graph and invert weights (MINIMUM spanning tree)
-    nodes = len(network)
-    graph = np.triu(network * -1)
+            # flip edge c-d with 50% probability
+            # to explore all potential rewirings
+            if np.random.random() > .5:
+                i[e2], j[e2] = d, c
+                c, d = d, c
 
-    # find MST and count # of edges in graph
-    mst = csgraph.minimum_spanning_tree(graph).todense()
-    mst_edges = np.sum(mst != 0)
+            # rewiring condition
+            # not flipped
+            # a--b    a  b
+            #      TO  X
+            # c--d    c  d
+            # if flipped
+            # a--b    a--b    a  b
+            #      TO      TO  X
+            # c--d    d--c    d  c
+            if not (W[a, d] or W[c, b]):
+                W[a, d] = W[a, b]
+                W[a, b] = 0
+                W[d, a] = W[b, a]
+                W[b, a] = 0
+                W[c, b] = W[c, d]
+                W[c, d] = 0
+                W[b, c] = W[d, c]
+                W[d, c] = 0
 
-    # determine # of remaining edges and ensure we're not over the limit
-    remain = int((retain / 100) * ((nodes * (nodes - 1)) / 2)) - mst_edges
-    if remain < 0:
-        raise ValueError('Minimum spanning tree with {} edges exceeds desired '
-                         'connection density of {}% ({} edges). Cannot '
-                         'proceed with graph creation.'
-                         .format(mst_edges, retain, remain + mst_edges))
+                j[e1] = d
+                j[e2] = b  # reassign edge indices
+                eff += 1
+                break
+            att += 1
 
-    # zero out edges already in MST and then get indices of next best edges
-    graph -= mst
-    inds = utils.get_triu(graph).argsort()[:remain]
-    inds = tuple(e[inds] for e in np.triu_indices_from(graph, k=1))
+    return W, eff
 
-    # add edges to MST, symmetrize, and convert to binary matrix
-    mst[inds] = graph[inds]
-    mst = np.array((mst + mst.T) != 0, dtype=int)
 
-    return mst
+if use_numba:
+    randmio_und = njit(randmio_und)
 
 
 def match_length_degree_distribution(W, D, nbins=10, nswap=1000,
@@ -546,7 +246,7 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000,
     if len(np.where(B != 0)[0]) != len(np.where(newB != 0)[0]):
         print(
             f"ERROR --- number of edges changed, \
-            B:{len(np.where(B!=0)[0])}, newB:{len(np.where(newB!=0)[0])}")
+            B:{len(np.where(B != 0)[0])}, newB:{len(np.where(newB != 0)[0])}")
     # check that the degree of the nodes it's the same
     for i in range(N):
         if np.sum(B[i]) != np.sum(newB[i]):
@@ -578,93 +278,6 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000,
     return newB, newW, nr
 
 
-def randmio_und(W, itr):
-    """
-    Optimized version of randmio_und.
-
-    This function randomizes an undirected network, while preserving the
-    degree distribution. The function does not preserve the strength
-    distribution in weighted networks.
-
-    This function is significantly faster if numba is enabled, because
-    the main overhead is `np.random.randint`, see `here <https://stackoverflow.com/questions/58124646/why-in-python-is-random-randint-so-much-slower-than-random-random>`_
-
-    Parameters
-    ----------
-    W : (N, N) array-like
-        Undirected binary/weighted connection matrix
-    itr : int
-        rewiring parameter. Each edge is rewired approximately itr times.
-
-    Returns
-    -------
-    W : (N, N) array-like
-        Randomized network
-    eff : int
-        number of actual rewirings carried out
-    """  # noqa: E501
-    W = W.copy()
-    n = len(W)
-    i, j = np.where(np.triu(W > 0, 1))
-    k = len(i)
-    itr *= k
-
-    # maximum number of rewiring attempts per iteration
-    max_attempts = np.round(n * k / (n * (n - 1)))
-    # actual number of successful rewirings
-    eff = 0
-
-    for _ in range(int(itr)):
-        att = 0
-        while att <= max_attempts:  # while not rewired
-            while True:
-                e1, e2 = np.random.randint(k), np.random.randint(k)
-                while e1 == e2:
-                    e2 = np.random.randint(k)
-                a, b = i[e1], j[e1]
-                c, d = i[e2], j[e2]
-
-                if a != c and a != d and b != c and b != d:
-                    break  # all 4 vertices must be different
-
-            # flip edge c-d with 50% probability
-            # to explore all potential rewirings
-            if np.random.random() > .5:
-                i[e2], j[e2] = d, c
-                c, d = d, c
-
-            # rewiring condition
-            # not flipped
-            # a--b    a  b
-            #      TO  X
-            # c--d    c  d
-            # if flipped
-            # a--b    a--b    a  b
-            #      TO      TO  X
-            # c--d    d--c    d  c
-            if not (W[a, d] or W[c, b]):
-                W[a, d] = W[a, b]
-                W[a, b] = 0
-                W[d, a] = W[b, a]
-                W[b, a] = 0
-                W[c, b] = W[c, d]
-                W[c, d] = 0
-                W[b, c] = W[d, c]
-                W[d, c] = 0
-
-                j[e1] = d
-                j[e2] = b  # reassign edge indices
-                eff += 1
-                break
-            att += 1
-
-    return W, eff
-
-
-if use_numba:
-    randmio_und = njit(randmio_und)
-
-
 def strength_preserving_rand_sa(A, rewiring_iter=10,
                                 nstage=100, niter=10000,
                                 temp=1000, frac=0.5,
@@ -762,11 +375,11 @@ def strength_preserving_rand_sa(A, rewiring_iter=10,
     rs = check_random_state(seed)
 
     n = A.shape[0]
-    s = np.sum(A, axis=1) #strengths of A
+    s = np.sum(A, axis=1)  # strengths of A
 
-    #Maslov & Sneppen rewiring
+    # Maslov & Sneppen rewiring
     if R is None:
-        #ensuring connectedness if the original network is connected
+        # ensuring connectedness if the original network is connected
         if connected is None:
             connected = False if bct.number_of_components(A) > 1 else True
         if connected:
@@ -776,10 +389,10 @@ def strength_preserving_rand_sa(A, rewiring_iter=10,
     else:
         B = R.copy()
 
-    u, v = np.triu(B, k=1).nonzero() #upper triangle indices
-    wts = np.triu(B, k=1)[(u, v)] #upper triangle values
+    u, v = np.triu(B, k=1).nonzero()  # upper triangle indices
+    wts = np.triu(B, k=1)[(u, v)]  # upper triangle values
     m = len(wts)
-    sb = np.sum(B, axis=1) #strengths of B
+    sb = np.sum(B, axis=1)  # strengths of B
 
     if energy_func is not None:
         energy = energy_func(s, sb)
@@ -809,7 +422,7 @@ def strength_preserving_rand_sa(A, rewiring_iter=10,
         naccept = 0
         for _ in range(niter):
 
-            #permutation
+            # permutation
             e1 = rs.randint(m)
             e2 = rs.randint(m)
 
@@ -838,9 +451,9 @@ def strength_preserving_rand_sa(A, rewiring_iter=10,
                        "Received: {}.".format(energy_type))
                 raise ValueError(msg)
 
-            #permutation acceptance criterion
+            # permutation acceptance criterion
             if (energy_prime < energy or
-               rs.rand() < np.exp(-(energy_prime - energy)/temp)):
+               rs.rand() < np.exp(-(energy_prime - energy) / temp)):
                 sb = sb_prime.copy()
                 wts[[e1, e2]] = wts[[e2, e1]]
                 energy = energy_prime
@@ -849,13 +462,13 @@ def strength_preserving_rand_sa(A, rewiring_iter=10,
                     wtsmin = wts.copy()
                 naccept = naccept + 1
 
-        #temperature update
-        temp = temp*frac
+        # temperature update
+        temp = temp * frac
         if verbose:
             print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, '
                   'frac of accepted moves {:.3f}'.format(istage, temp,
                                                          energymin,
-                                                         naccept/niter))
+                                                         naccept / niter))
 
     B = np.zeros((n, n))
     B[(u, v)] = wtsmin
@@ -947,11 +560,11 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10,
     rs = check_random_state(seed)
 
     n = A.shape[0]
-    s = np.sum(A, axis=1) #strengths of A
+    s = np.sum(A, axis=1)  # strengths of A
 
-    #Maslov & Sneppen rewiring
+    # Maslov & Sneppen rewiring
     if R is None:
-        #ensuring connectedness if the original network is connected
+        # ensuring connectedness if the original network is connected
         if connected is None:
             connected = False if bct.number_of_components(A) > 1 else True
         if connected:
@@ -961,10 +574,10 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10,
     else:
         B = R.copy()
 
-    u, v = np.triu(B, k=1).nonzero() #upper triangle indices
-    wts = np.triu(B, k=1)[(u, v)] #upper triangle values
+    u, v = np.triu(B, k=1).nonzero()  # upper triangle indices
+    wts = np.triu(B, k=1)[(u, v)]  # upper triangle values
     m = len(wts)
-    sb = np.sum(B, axis=1) #strengths of B
+    sb = np.sum(B, axis=1)  # strengths of B
 
     energy = np.mean((s - sb)**2)
 
@@ -980,7 +593,7 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10,
                                   rs.rand(niter)
                                   ):
 
-            #permutation
+            # permutation
             a, b, c, d = u[e1], v[e1], u[e2], v[e2]
             wts_change = wts[e1] - wts[e2]
             delta_energy = (2 * wts_change *
@@ -990,10 +603,10 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10,
                              (s[c] - sb[c]) -
                              (s[d] - sb[d])
                              )
-                            )/n
+                            ) / n
 
-            #permutation acceptance criterion
-            if (delta_energy < 0 or prob < np.e**(-(delta_energy)/temp)):
+            # permutation acceptance criterion
+            if (delta_energy < 0 or prob < np.e**(-(delta_energy) / temp)):
 
                 sb[[a, b]] -= wts_change
                 sb[[c, d]] += wts_change
@@ -1006,13 +619,13 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10,
                     wtsmin = wts.copy()
                 naccept = naccept + 1
 
-        #temperature update
-        temp = temp*frac
+        # temperature update
+        temp = temp * frac
         if verbose:
             print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, '
                   'frac of accepted moves {:.3f}'.format(istage, temp,
                                                          energymin,
-                                                         naccept/niter))
+                                                         naccept / niter))
 
     B = np.zeros((n, n))
     B[(u, v)] = wtsmin
@@ -1114,20 +727,20 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10,
     rs = check_random_state(seed)
 
     n = A.shape[0]
-    s_in = np.sum(A, axis=0) #in-strengths of A
-    s_out = np.sum(A, axis=1) #out-strengths of A
+    s_in = np.sum(A, axis=0)  # in-strengths of A
+    s_out = np.sum(A, axis=1)  # out-strengths of A
 
-    #Maslov & Sneppen rewiring
+    # Maslov & Sneppen rewiring
     if connected:
         B = bct.randmio_dir_connected(A, rewiring_iter, seed=seed)[0]
     else:
         B = bct.randmio_dir(A, rewiring_iter, seed=seed)[0]
 
-    u, v = B.nonzero() #nonzero indices of B
-    wts = B[(u, v)] #nonzero values of B
+    u, v = B.nonzero()  # nonzero indices of B
+    wts = B[(u, v)]  # nonzero values of B
     m = len(wts)
-    sb_in = np.sum(B, axis=0) #in-strengths of B
-    sb_out = np.sum(B, axis=1) #out-strengths of B
+    sb_in = np.sum(B, axis=0)  # in-strengths of B
+    sb_out = np.sum(B, axis=1)  # out-strengths of B
 
     if energy_func is not None:
         energy = energy_func(s_in, sb_in) + energy_func(s_out, sb_out)
@@ -1136,7 +749,7 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10,
     elif energy_type == 'max':
         energy = np.max(np.abs(s_in - sb_in)) + np.max(np.abs(s_out - sb_out))
     elif energy_type == 'mae':
-        energy= np.mean(np.abs(s_in - sb_in)) + np.mean(np.abs(s_out - sb_out))
+        energy = np.mean(np.abs(s_in - sb_in)) + np.mean(np.abs(s_out - sb_out))
     elif energy_type == 'mse':
         energy = np.mean((s_in - sb_in)**2) + np.mean((s_out - sb_out)**2)
     elif energy_type == 'rmse':
@@ -1158,7 +771,7 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10,
         naccept = 0
         for _ in range(niter):
 
-            #permutation
+            # permutation
             e1 = rs.randint(m)
             e2 = rs.randint(m)
 
@@ -1196,9 +809,9 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10,
                        "Received: {}.".format(energy_type))
                 raise ValueError(msg)
 
-            #permutation acceptance criterion
+            # permutation acceptance criterion
             if (energy_prime < energy or
-               rs.rand() < np.exp(-(energy_prime - energy)/temp)):
+               rs.rand() < np.exp(-(energy_prime - energy) / temp)):
                 sb_in = sb_prime_in.copy()
                 sb_out = sb_prime_out.copy()
                 wts[[e1, e2]] = wts[[e2, e1]]
@@ -1208,13 +821,13 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10,
                     wtsmin = wts.copy()
                 naccept = naccept + 1
 
-        #temperature update
-        temp = temp*frac
+        # temperature update
+        temp = temp * frac
         if verbose:
             print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, '
                   'frac of accepted moves {:.3f}'.format(istage, temp,
                                                          energymin,
-                                                         naccept/niter))
+                                                         naccept / niter))
 
     B = np.zeros((n, n))
     B[(u, v)] = wtsmin
diff --git a/netneurotools/networks/tests/__init__.py b/netneurotools/networks/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/networks/tests/test_consensus.py b/netneurotools/networks/tests/test_consensus.py
new file mode 100644
index 0000000..2c0fc23
--- /dev/null
+++ b/netneurotools/networks/tests/test_consensus.py
@@ -0,0 +1 @@
+"""For testing netneurotools.networks.consensus functionality."""
diff --git a/netneurotools/networks/tests/test_generative.py b/netneurotools/networks/tests/test_generative.py
new file mode 100644
index 0000000..223d4bb
--- /dev/null
+++ b/netneurotools/networks/tests/test_generative.py
@@ -0,0 +1 @@
+"""For testing netneurotools.networks.generative functionality."""
diff --git a/netneurotools/networks/tests/test_networks_utils.py b/netneurotools/networks/tests/test_networks_utils.py
new file mode 100644
index 0000000..7bbcfef
--- /dev/null
+++ b/netneurotools/networks/tests/test_networks_utils.py
@@ -0,0 +1,12 @@
+"""For testing netneurotools.networks.networks_utils functionality."""
+
+import numpy as np
+
+from netneurotools import networks
+
+
+def test_get_triu():
+    """Test that get_triu returns correct values."""
+    arr = np.arange(9).reshape(3, 3)
+    assert np.all(networks.get_triu(arr) == np.array([1, 2, 5]))
+    assert np.all(networks.get_triu(arr, k=0) == np.array([0, 1, 2, 4, 5, 8]))
diff --git a/netneurotools/networks/tests/test_randomize.py b/netneurotools/networks/tests/test_randomize.py
new file mode 100644
index 0000000..c2a4be2
--- /dev/null
+++ b/netneurotools/networks/tests/test_randomize.py
@@ -0,0 +1 @@
+"""For testing netneurotools.networks.randomize functionality."""
diff --git a/netneurotools/plotting/__init__.py b/netneurotools/plotting/__init__.py
new file mode 100644
index 0000000..2124fe3
--- /dev/null
+++ b/netneurotools/plotting/__init__.py
@@ -0,0 +1,34 @@
+"""Functions for making pretty plots and whatnot."""
+
+
+from .pysurfer_plotters import (
+    plot_conte69, plot_fslr, plot_fsaverage, plot_fsvertex
+)
+
+
+from .pyvista_plotters import (
+    pv_plot_surface
+)
+
+
+from .mpl_plotters import (
+    _grid_communities, _sort_communities,
+    plot_point_brain, plot_mod_heatmap,
+)
+
+
+from .color_utils import (
+    available_cmaps
+)
+
+__all__ = [
+    # pysurfer_plotters
+    'plot_conte69', 'plot_fslr', 'plot_fsaverage', 'plot_fsvertex',
+    # pyvista_plotters
+    'pv_plot_surface',
+    # mpl_plotters
+    '_grid_communities', '_sort_communities',
+    'plot_point_brain', 'plot_mod_heatmap',
+    # color_utils
+    'available_cmaps'
+]
diff --git a/netneurotools/colors.py b/netneurotools/plotting/color_utils.py
similarity index 97%
rename from netneurotools/colors.py
rename to netneurotools/plotting/color_utils.py
index cf0b7d8..f9fb3d0 100644
--- a/netneurotools/colors.py
+++ b/netneurotools/plotting/color_utils.py
@@ -1,6 +1,6 @@
-# -*- coding: utf-8 -*-
-"""Useful colormaps."""
+"""Functions for working with colors and colormaps."""
 
+import matplotlib
 from matplotlib.colors import LinearSegmentedColormap, ListedColormap
 
 __all__ = ['parula', 'justine', 'dinosaur']
@@ -94,10 +94,8 @@ def available_cmaps():
 
 def _register_cmaps():
     """Register all colormaps in module so they are accessible via matplotlib."""
-    from matplotlib.cm import register_cmap
-
     for cmap in __all__:
-        register_cmap(cmap, globals()[cmap])
+        matplotlib.colormaps.register(globals()[cmap], name=cmap)
 
 
 _register_cmaps()
diff --git a/netneurotools/plotting/mpl_plotters.py b/netneurotools/plotting/mpl_plotters.py
new file mode 100644
index 0000000..66dcd9b
--- /dev/null
+++ b/netneurotools/plotting/mpl_plotters.py
@@ -0,0 +1,287 @@
+"""Functions for matplotlib-based plotting."""
+
+from typing import Iterable
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+
+
+def _grid_communities(communities):
+    """
+    Generate boundaries of `communities`.
+
+    Parameters
+    ----------
+    communities : array_like
+        Community assignment vector
+
+    Returns
+    -------
+    bounds : list
+        Boundaries of communities
+    """
+    communities = np.asarray(communities)
+    if 0 in communities:
+        communities = communities + 1
+
+    comm = communities[np.argsort(communities)]
+    bounds = []
+    for i in np.unique(comm):
+        ind = np.where(comm == i)
+        if len(ind) > 0:
+            bounds.append(np.min(ind))
+
+    bounds.append(len(communities))
+
+    return bounds
+
+
+def _sort_communities(consensus, communities):
+    """
+    Sort `communities` in `consensus` according to strength.
+
+    Parameters
+    ----------
+    consensus : array_like
+        Correlation matrix
+    communities : array_like
+        Community assignments for `consensus`
+
+    Returns
+    -------
+    inds : np.ndarray
+        Index array for sorting `consensus`
+    """
+    communities = np.asarray(communities)
+    if 0 in communities:
+        communities = communities + 1
+
+    bounds = _grid_communities(communities)
+    inds = np.argsort(communities)
+
+    for n, f in enumerate(bounds[:-1]):
+        i = inds[f:bounds[n + 1]]
+        cco = i[consensus[np.ix_(i, i)].mean(axis=1).argsort()[::-1]]
+        inds[f:bounds[n + 1]] = cco
+
+    return inds
+
+
+def plot_mod_heatmap(data, communities, *, inds=None, edgecolor='black',
+                     ax=None, figsize=(6.4, 4.8), xlabels=None, ylabels=None,
+                     xlabelrotation=90, ylabelrotation=0, cbar=True,
+                     square=True, xticklabels=None, yticklabels=None,
+                     mask_diagonal=True, **kwargs):
+    """
+    Plot `data` as heatmap with borders drawn around `communities`.
+
+    Parameters
+    ----------
+    data : (N, N) array_like
+        Correlation matrix
+    communities : (N,) array_like
+        Community assignments for `data`
+    inds : (N,) array_like, optional
+        Index array for sorting `data` within `communities`. If None, these
+        will be generated from `data`. Default: None
+    edgecolor : str, optional
+        Color for lines demarcating community boundaries. Default: 'black'
+    ax : matplotlib.axes.Axes, optional
+        Axis on which to plot the heatmap. If none provided, a new figure and
+        axis will be created. Default: None
+    figsize : tuple, optional
+        Size of figure to create if `ax` is not provided. Default: (20, 20)
+    {x,y}labels : list, optional
+        List of labels on {x,y}-axis for each community in `communities`. The
+        number of labels should match the number of unique communities.
+        Default: None
+    {x,y}labelrotation : float, optional
+        Angle of the rotation of the labels. Available only if `{x,y}labels`
+        provided. Default : xlabelrotation: 90, ylabelrotation: 0
+    square : bool, optional
+        Setting the matrix with equal aspect. Default: True
+    {x,y}ticklabels : list, optional
+        Incompatible with `{x,y}labels`. List of labels for each entry (not
+        community) in `data`. Default: None
+    cbar : bool, optional
+        Whether to plot colorbar. Default: True
+    mask_diagonal : bool, optional
+        Whether to mask the diagonal in the plotted heatmap. Default: True
+    kwargs : key-value mapping
+        Keyword arguments for `plt.pcolormesh()`
+
+    Returns
+    -------
+    ax : matplotlib.axes.Axes
+        Axis object containing plot
+    """
+    for t, label in zip([xticklabels, yticklabels], [xlabels, ylabels]):
+        if t is not None and label is not None:
+            raise ValueError('Cannot set both {x,y}labels and {x,y}ticklabels')
+
+    # get indices for sorting consensus
+    if inds is None:
+        inds = _sort_communities(data, communities)
+
+    if ax is None:
+        _, ax = plt.subplots(1, 1, figsize=figsize)
+
+    # plot data re-ordered based on community and node strength
+    if mask_diagonal:
+        plot_data = np.ma.masked_where(np.eye(len(data)),
+                                       data[np.ix_(inds, inds)])
+    else:
+        plot_data = data[np.ix_(inds, inds)]
+
+    coll = ax.pcolormesh(plot_data, edgecolor='none', **kwargs)
+    ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0]))
+
+    # set equal aspect
+    if square:
+        ax.set_aspect('equal')
+
+    for side in ['top', 'right', 'left', 'bottom']:
+        ax.spines[side].set_visible(False)
+
+    # invert the y-axis so it looks "as expected"
+    ax.invert_yaxis()
+
+    # plot the colorbar
+    if cbar:
+        cb = ax.figure.colorbar(coll)
+        if kwargs.get('rasterized', False):
+            cb.solids.set_rasterized(True)
+
+    # draw borders around communities
+    bounds = _grid_communities(communities)
+    bounds[0] += 0.2
+    bounds[-1] -= 0.2
+    for n, edge in enumerate(np.diff(bounds)):
+        ax.add_patch(mpatches.Rectangle((bounds[n], bounds[n]),
+                                       edge, edge, fill=False, linewidth=2,
+                                       edgecolor=edgecolor))
+
+    if xlabels is not None or ylabels is not None:
+        # find the tick locations
+        initloc = _grid_communities(communities)
+        tickloc = []
+        for loc in range(len(initloc) - 1):
+            tickloc.append(np.mean((initloc[loc], initloc[loc + 1])))
+
+        if xlabels is not None:
+            # make sure number of labels match the number of ticks
+            if len(tickloc) != len(xlabels):
+                raise ValueError('Number of labels do not match the number of '
+                                 'unique communities.')
+            else:
+                ax.set_xticks(tickloc)
+                ax.set_xticklabels(labels=xlabels, rotation=xlabelrotation)
+                ax.tick_params(left=False, bottom=False)
+        if ylabels is not None:
+            # make sure number of labels match the number of ticks
+            if len(tickloc) != len(ylabels):
+                raise ValueError('Number of labels do not match the number of '
+                                 'unique communities.')
+            else:
+                ax.set_yticks(tickloc)
+                ax.set_yticklabels(labels=ylabels, rotation=ylabelrotation)
+                ax.tick_params(left=False, bottom=False)
+
+    if xticklabels is not None:
+        labels_ind = [xticklabels[i] for i in inds]
+        ax.set_xticks(np.arange(len(labels_ind)) + 0.5)
+        ax.set_xticklabels(labels_ind, rotation=90)
+    if yticklabels is not None:
+        labels_ind = [yticklabels[i] for i in inds]
+        ax.set_yticks(np.arange(len(labels_ind)) + 0.5)
+        ax.set_yticklabels(labels_ind)
+
+    return ax
+
+
+def plot_point_brain(data, coords, views=None, views_orientation='vertical',
+                     views_size=(4, 2.4), cbar=False, robust=True, size=50,
+                     **kwargs):
+    """
+    Plot `data` as a cloud of points in 3D space based on specified `coords`.
+
+    Parameters
+    ----------
+    data : (N,) array_like
+        Data for an `N` node parcellation; determines color of points
+    coords : (N, 3) array_like
+        x, y, z coordinates for `N` node parcellation
+    views : list, optional
+        List specifying which views to use. Can be any of {'sagittal', 'sag',
+        'coronal', 'cor', 'axial', 'ax'}. If not specified will use 'sagittal'
+        and 'axial'. Default: None
+    views_orientation: str, optional
+        Orientation of the views. Can be either 'vertical' or 'horizontal'.
+        Default: 'vertical'.
+    views_size : tuple, optional
+        Figure size of each view. Default: (4, 2.4)
+    cbar : bool, optional
+        Whether to also show colorbar. Default: False
+    robust : bool, optional
+        Whether to use robust calculation of `vmin` and `vmax` for color scale.
+    size : int, optional
+        Size of points on plot. Default: 50
+    **kwargs
+        Key-value pairs passed to `matplotlib.axes.Axis.scatter`
+
+    Returns
+    -------
+    fig : :class:`matplotlib.figure.Figure`
+    """
+    _views = dict(sagittal=(0, 180), sag=(0, 180),
+                  axial=(90, 180), ax=(90, 180),
+                  coronal=(0, 90), cor=(0, 90))
+
+    x, y, z = coords[:, 0], coords[:, 1], coords[:, 2]
+
+    if views is None:
+        views = [_views[f] for f in ['sagittal', 'axial']]
+    else:
+        if not isinstance(views, Iterable) or isinstance(views, str):
+            views = [views]
+        views = [_views[f] for f in views]
+
+    if views_orientation == 'vertical':
+        ncols, nrows = 1, len(views)
+    elif views_orientation == 'horizontal':
+        ncols, nrows = len(views), 1
+    figsize = (ncols * views_size[0], nrows * views_size[1])
+
+    # create figure and axes (3d projections)
+    fig, axes = plt.subplots(ncols=ncols, nrows=nrows,
+                             figsize=figsize,
+                             subplot_kw=dict(projection='3d'))
+
+    opts = dict(linewidth=0.5, edgecolor='gray', cmap='viridis')
+    if robust:
+        vmin, vmax = np.percentile(data, [2.5, 97.5])
+        opts.update(dict(vmin=vmin, vmax=vmax))
+    opts.update(kwargs)
+
+    # iterate through saggital/axial views and plot, rotating as needed
+    for n, view in enumerate(views):
+        # if only one view then axes is not a list!
+        ax = axes[n] if len(views) > 1 else axes
+        # make the actual scatterplot and update the view / aspect ratios
+        col = ax.scatter(x, y, z, c=data, s=size, **opts)
+        ax.view_init(*view)
+        ax.axis('off')
+        scaling = np.array([ax.get_xlim(),
+                            ax.get_ylim(),
+                            ax.get_zlim()])
+        ax.set_box_aspect(tuple(scaling[:, 1] - scaling[:, 0]))
+
+    fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0, wspace=0)
+
+    # add colorbar to axes
+    if cbar:
+        cbar = fig.colorbar(col, ax=axes.flatten(),
+                            drawedges=False, shrink=0.7)
+        cbar.outline.set_linewidth(0)
+
+    return fig
diff --git a/netneurotools/plotting.py b/netneurotools/plotting/pysurfer_plotters.py
similarity index 61%
rename from netneurotools/plotting.py
rename to netneurotools/plotting/pysurfer_plotters.py
index 3886547..50e830b 100644
--- a/netneurotools/plotting.py
+++ b/netneurotools/plotting/pysurfer_plotters.py
@@ -1,209 +1,10 @@
-# -*- coding: utf-8 -*-
-"""Functions for making pretty plots and whatnot."""
+"""Functions for pysurfer-based plotting."""
 
 import os
-from typing import Iterable
-
-import matplotlib.patches as patches
-import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import Axes3D  # noqa
-import nibabel as nib
 import numpy as np
+import nibabel as nib
 
-from .freesurfer import FSIGNORE, _decode_list
-
-
-def _grid_communities(communities):
-    """
-    Generate boundaries of `communities`.
-
-    Parameters
-    ----------
-    communities : array_like
-        Community assignment vector
-
-    Returns
-    -------
-    bounds : list
-        Boundaries of communities
-    """
-    communities = np.asarray(communities)
-    if 0 in communities:
-        communities = communities + 1
-
-    comm = communities[np.argsort(communities)]
-    bounds = []
-    for i in np.unique(comm):
-        ind = np.where(comm == i)
-        if len(ind) > 0:
-            bounds.append(np.min(ind))
-
-    bounds.append(len(communities))
-
-    return bounds
-
-
-def sort_communities(consensus, communities):
-    """
-    Sort `communities` in `consensus` according to strength.
-
-    Parameters
-    ----------
-    consensus : array_like
-        Correlation matrix
-    communities : array_like
-        Community assignments for `consensus`
-
-    Returns
-    -------
-    inds : np.ndarray
-        Index array for sorting `consensus`
-    """
-    communities = np.asarray(communities)
-    if 0 in communities:
-        communities = communities + 1
-
-    bounds = _grid_communities(communities)
-    inds = np.argsort(communities)
-
-    for n, f in enumerate(bounds[:-1]):
-        i = inds[f:bounds[n + 1]]
-        cco = i[consensus[np.ix_(i, i)].mean(axis=1).argsort()[::-1]]
-        inds[f:bounds[n + 1]] = cco
-
-    return inds
-
-
-def plot_mod_heatmap(data, communities, *, inds=None, edgecolor='black',
-                     ax=None, figsize=(6.4, 4.8), xlabels=None, ylabels=None,
-                     xlabelrotation=90, ylabelrotation=0, cbar=True,
-                     square=True, xticklabels=None, yticklabels=None,
-                     mask_diagonal=True, **kwargs):
-    """
-    Plot `data` as heatmap with borders drawn around `communities`.
-
-    Parameters
-    ----------
-    data : (N, N) array_like
-        Correlation matrix
-    communities : (N,) array_like
-        Community assignments for `data`
-    inds : (N,) array_like, optional
-        Index array for sorting `data` within `communities`. If None, these
-        will be generated from `data`. Default: None
-    edgecolor : str, optional
-        Color for lines demarcating community boundaries. Default: 'black'
-    ax : matplotlib.axes.Axes, optional
-        Axis on which to plot the heatmap. If none provided, a new figure and
-        axis will be created. Default: None
-    figsize : tuple, optional
-        Size of figure to create if `ax` is not provided. Default: (20, 20)
-    {x,y}labels : list, optional
-        List of labels on {x,y}-axis for each community in `communities`. The
-        number of labels should match the number of unique communities.
-        Default: None
-    {x,y}labelrotation : float, optional
-        Angle of the rotation of the labels. Available only if `{x,y}labels`
-        provided. Default : xlabelrotation: 90, ylabelrotation: 0
-    square : bool, optional
-        Setting the matrix with equal aspect. Default: True
-    {x,y}ticklabels : list, optional
-        Incompatible with `{x,y}labels`. List of labels for each entry (not
-        community) in `data`. Default: None
-    cbar : bool, optional
-        Whether to plot colorbar. Default: True
-    mask_diagonal : bool, optional
-        Whether to mask the diagonal in the plotted heatmap. Default: True
-    kwargs : key-value mapping
-        Keyword arguments for `plt.pcolormesh()`
-
-    Returns
-    -------
-    ax : matplotlib.axes.Axes
-        Axis object containing plot
-    """
-    for t, label in zip([xticklabels, yticklabels], [xlabels, ylabels]):
-        if t is not None and label is not None:
-            raise ValueError('Cannot set both {x,y}labels and {x,y}ticklabels')
-
-    # get indices for sorting consensus
-    if inds is None:
-        inds = sort_communities(data, communities)
-
-    if ax is None:
-        fig, ax = plt.subplots(1, 1, figsize=figsize)
-
-    # plot data re-ordered based on community and node strength
-    if mask_diagonal:
-        plot_data = np.ma.masked_where(np.eye(len(data)),
-                                       data[np.ix_(inds, inds)])
-    else:
-        plot_data = data[np.ix_(inds, inds)]
-
-    coll = ax.pcolormesh(plot_data, edgecolor='none', **kwargs)
-    ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0]))
-
-    # set equal aspect
-    if square:
-        ax.set_aspect('equal')
-
-    for side in ['top', 'right', 'left', 'bottom']:
-        ax.spines[side].set_visible(False)
-
-    # invert the y-axis so it looks "as expected"
-    ax.invert_yaxis()
-
-    # plot the colorbar
-    if cbar:
-        cb = ax.figure.colorbar(coll)
-        if kwargs.get('rasterized', False):
-            cb.solids.set_rasterized(True)
-
-    # draw borders around communities
-    bounds = _grid_communities(communities)
-    bounds[0] += 0.2
-    bounds[-1] -= 0.2
-    for n, edge in enumerate(np.diff(bounds)):
-        ax.add_patch(patches.Rectangle((bounds[n], bounds[n]),
-                                       edge, edge, fill=False, linewidth=2,
-                                       edgecolor=edgecolor))
-
-    if xlabels is not None or ylabels is not None:
-        # find the tick locations
-        initloc = _grid_communities(communities)
-        tickloc = []
-        for loc in range(len(initloc) - 1):
-            tickloc.append(np.mean((initloc[loc], initloc[loc + 1])))
-
-        if xlabels is not None:
-            # make sure number of labels match the number of ticks
-            if len(tickloc) != len(xlabels):
-                raise ValueError('Number of labels do not match the number of '
-                                 'unique communities.')
-            else:
-                ax.set_xticks(tickloc)
-                ax.set_xticklabels(labels=xlabels, rotation=xlabelrotation)
-                ax.tick_params(left=False, bottom=False)
-        if ylabels is not None:
-            # make sure number of labels match the number of ticks
-            if len(tickloc) != len(ylabels):
-                raise ValueError('Number of labels do not match the number of '
-                                 'unique communities.')
-            else:
-                ax.set_yticks(tickloc)
-                ax.set_yticklabels(labels=ylabels, rotation=ylabelrotation)
-                ax.tick_params(left=False, bottom=False)
-
-    if xticklabels is not None:
-        labels_ind = [xticklabels[i] for i in inds]
-        ax.set_xticks(np.arange(len(labels_ind)) + 0.5)
-        ax.set_xticklabels(labels_ind, rotation=90)
-    if yticklabels is not None:
-        labels_ind = [yticklabels[i] for i in inds]
-        ax.set_yticks(np.arange(len(labels_ind)) + 0.5)
-        ax.set_yticklabels(labels_ind)
-
-    return ax
+from ..datasets import FREESURFER_IGNORE, _get_freesurfer_subjid
 
 
 def plot_conte69(data, lhlabel, rhlabel, surf='midthickness',
@@ -319,7 +120,7 @@ def plot_fslr(data, lhlabel, rhlabel, surf_atlas='conte69',
     scene : mayavi.Scene
         Scene object containing plot
     """
-    from .datasets import fetch_conte69, fetch_yerkes19
+    from ..datasets import fetch_conte69, fetch_yerkes19
     try:
         from mayavi import mlab
     except ImportError:
@@ -388,44 +189,6 @@ def plot_fslr(data, lhlabel, rhlabel, surf_atlas='conte69',
     return lhplot, rhplot
 
 
-def _get_fs_subjid(subject_id, subjects_dir=None):
-    """
-    Get fsaverage version `subject_id`, fetching if required.
-
-    Parameters
-    ----------
-    subject_id : str
-        FreeSurfer subject ID
-    subjects_dir : str, optional
-        Path to FreeSurfer subject directory. If not set, will inherit from
-        the environmental variable $SUBJECTS_DIR. Default: None
-
-    Returns
-    -------
-    subject_id : str
-        FreeSurfer subject ID
-    subjects_dir : str
-        Path to subject directory with `subject_id`
-    """
-    from netneurotools.utils import check_fs_subjid
-
-    # check for FreeSurfer install w/fsaverage; otherwise, fetch required
-    try:
-        subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir)
-    except FileNotFoundError:
-        if 'fsaverage' not in subject_id:
-            raise ValueError('Provided subject {} does not exist in provided '
-                             'subjects_dir {}'
-                             .format(subject_id, subjects_dir)) from None
-        from netneurotools.datasets import fetch_fsaverage
-        from netneurotools.datasets.utils import _get_data_dir
-        fetch_fsaverage(subject_id)
-        subjects_dir = os.path.join(_get_data_dir(), 'tpl-fsaverage')
-        subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir)
-
-    return subject_id, subjects_dir
-
-
 def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None,
                    noplot=None, subject_id='fsaverage', subjects_dir=None,
                    vmin=None, vmax=None, **kwargs):
@@ -503,7 +266,11 @@ def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None,
     ...                rhannot=schaefer.rh)  # doctest: +SKIP
 
     """
-    subject_id, subjects_dir = _get_fs_subjid(subject_id, subjects_dir)
+    def _decode_list(vals):
+        """List decoder."""
+        return [val.decode() if hasattr(val, 'decode') else val for val in vals]
+
+    subject_id, subjects_dir = _get_freesurfer_subjid(subject_id, subjects_dir)
 
     # cast data to float (required for NaNs)
     data = np.asarray(data, dtype='float')
@@ -521,7 +288,7 @@ def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None,
         vmax = np.nanpercentile(data, 97.5)
 
     # parcels that should not be included in parcellation
-    drop = FSIGNORE.copy()
+    drop = FREESURFER_IGNORE.copy()
     if noplot is not None:
         if isinstance(noplot, str):
             noplot = [noplot]
@@ -533,7 +300,7 @@ def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None,
         # loads annotation data for hemisphere, including vertex `labels`!
         if not annot.startswith(os.path.abspath(os.sep)):
             annot = os.path.join(subjects_dir, subject_id, 'label', annot)
-        labels, ctab, names = nib.freesurfer.read_annot(annot)
+        labels, _, names = nib.freesurfer.read_annot(annot)
         names = _decode_list(names)
 
         # get appropriate data, accounting for hemispheric asymmetry
@@ -637,7 +404,7 @@ def plot_fsvertex(data, *, order='lr', surf='pial', views='lat',
         raise ImportError('Cannot use plot_fsaverage() if pysurfer is not '
                           'installed. Please install pysurfer and try again.') from None
 
-    subject_id, subjects_dir = _get_fs_subjid(subject_id, subjects_dir)
+    subject_id, subjects_dir = _get_freesurfer_subjid(subject_id, subjects_dir)
 
     # cast data to float (required for NaNs)
     data = np.asarray(data, dtype='float')
@@ -712,91 +479,3 @@ def plot_fsvertex(data, *, order='lr', surf='pial', views='lat',
                     surf[n].render()
 
     return brain
-
-
-def plot_point_brain(data, coords, views=None, views_orientation='vertical',
-                     views_size=(4, 2.4), cbar=False, robust=True, size=50,
-                     **kwargs):
-    """
-    Plot `data` as a cloud of points in 3D space based on specified `coords`.
-
-    Parameters
-    ----------
-    data : (N,) array_like
-        Data for an `N` node parcellation; determines color of points
-    coords : (N, 3) array_like
-        x, y, z coordinates for `N` node parcellation
-    views : list, optional
-        List specifying which views to use. Can be any of {'sagittal', 'sag',
-        'coronal', 'cor', 'axial', 'ax'}. If not specified will use 'sagittal'
-        and 'axial'. Default: None
-    views_orientation: str, optional
-        Orientation of the views. Can be either 'vertical' or 'horizontal'.
-        Default: 'vertical'.
-    views_size : tuple, optional
-        Figure size of each view. Default: (4, 2.4)
-    cbar : bool, optional
-        Whether to also show colorbar. Default: False
-    robust : bool, optional
-        Whether to use robust calculation of `vmin` and `vmax` for color scale.
-    size : int, optional
-        Size of points on plot. Default: 50
-    **kwargs
-        Key-value pairs passed to `matplotlib.axes.Axis.scatter`
-
-    Returns
-    -------
-    fig : :class:`matplotlib.figure.Figure`
-    """
-    _views = dict(sagittal=(0, 180), sag=(0, 180),
-                  axial=(90, 180), ax=(90, 180),
-                  coronal=(0, 90), cor=(0, 90))
-
-    x, y, z = coords[:, 0], coords[:, 1], coords[:, 2]
-
-    if views is None:
-        views = [_views[f] for f in ['sagittal', 'axial']]
-    else:
-        if not isinstance(views, Iterable) or isinstance(views, str):
-            views = [views]
-        views = [_views[f] for f in views]
-
-    if views_orientation == 'vertical':
-        ncols, nrows = 1, len(views)
-    elif views_orientation == 'horizontal':
-        ncols, nrows = len(views), 1
-    figsize = (ncols * views_size[0], nrows * views_size[1])
-
-    # create figure and axes (3d projections)
-    fig, axes = plt.subplots(ncols=ncols, nrows=nrows,
-                             figsize=figsize,
-                             subplot_kw=dict(projection='3d'))
-
-    opts = dict(linewidth=0.5, edgecolor='gray', cmap='viridis')
-    if robust:
-        vmin, vmax = np.percentile(data, [2.5, 97.5])
-        opts.update(dict(vmin=vmin, vmax=vmax))
-    opts.update(kwargs)
-
-    # iterate through saggital/axial views and plot, rotating as needed
-    for n, view in enumerate(views):
-        # if only one view then axes is not a list!
-        ax = axes[n] if len(views) > 1 else axes
-        # make the actual scatterplot and update the view / aspect ratios
-        col = ax.scatter(x, y, z, c=data, s=size, **opts)
-        ax.view_init(*view)
-        ax.axis('off')
-        scaling = np.array([ax.get_xlim(),
-                            ax.get_ylim(),
-                            ax.get_zlim()])
-        ax.set_box_aspect(tuple(scaling[:, 1] - scaling[:, 0]))
-
-    fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0, wspace=0)
-
-    # add colorbar to axes
-    if cbar:
-        cbar = fig.colorbar(col, ax=axes.flatten(),
-                            drawedges=False, shrink=0.7)
-        cbar.outline.set_linewidth(0)
-
-    return fig
diff --git a/netneurotools/plotting/pyvista_plotters.py b/netneurotools/plotting/pyvista_plotters.py
new file mode 100644
index 0000000..7b17565
--- /dev/null
+++ b/netneurotools/plotting/pyvista_plotters.py
@@ -0,0 +1,6 @@
+"""Functions for pyvista-based plotting."""
+
+
+def pv_plot_surface():
+    """Plot a surface using PyVista."""
+    pass
diff --git a/netneurotools/plotting/tests/__init__.py b/netneurotools/plotting/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/plotting/tests/test_color_utils.py b/netneurotools/plotting/tests/test_color_utils.py
new file mode 100644
index 0000000..3bd55c1
--- /dev/null
+++ b/netneurotools/plotting/tests/test_color_utils.py
@@ -0,0 +1,10 @@
+"""For testing netneurotools.plotting.color_utils functionality."""
+
+
+def test_register_cmaps():
+    """Test registering colormaps."""
+    import matplotlib
+    if "justine" in matplotlib.colormaps:
+        assert True
+    else:
+        assert False
diff --git a/netneurotools/plotting/tests/test_mpl.py b/netneurotools/plotting/tests/test_mpl.py
new file mode 100644
index 0000000..1d7d79a
--- /dev/null
+++ b/netneurotools/plotting/tests/test_mpl.py
@@ -0,0 +1,38 @@
+"""For testing netneurotools.plotting.mpl_plotters functionality."""
+
+import numpy as np
+import matplotlib.pyplot as plt
+from netneurotools import plotting
+
+
+def test_grid_communities():
+    """Test _grid_communities function."""
+    comms = np.asarray([0, 0, 0, 0, 1, 1, 1, 1, 2, 2])
+    # check that comms with / without 0 community label yields same output
+    assert np.allclose(plotting._grid_communities(comms), [0, 4, 8, 10])
+    assert np.allclose(plotting._grid_communities(comms + 1), [0, 4, 8, 10])
+
+
+def test_sort_communities():
+    """Test sort_communities function."""
+    data = np.arange(9).reshape(3, 3)
+    comms = np.asarray([0, 0, 2])
+    # check that comms with / without 0 community label yields same output
+    assert np.allclose(plotting._sort_communities(data, comms), [1, 0, 2])
+    assert np.allclose(plotting._sort_communities(data, comms + 1), [1, 0, 2])
+
+
+def test_plot_mod_heatmap():
+    """Test plot_mod_heatmap function."""
+    data = np.random.rand(100, 100)
+    comms = np.random.choice(4, size=(100,))
+    ax = plotting.plot_mod_heatmap(data, comms)
+    assert isinstance(ax, plt.Axes)
+
+
+def test_plot_point_brain():
+    """Test plot_point_brain function."""
+    data = np.random.rand(100)
+    coords = np.random.rand(100, 3)
+    out = plotting.plot_point_brain(data, coords)
+    assert isinstance(out, plt.Figure)
diff --git a/netneurotools/plotting/tests/test_pysurfer.py b/netneurotools/plotting/tests/test_pysurfer.py
new file mode 100644
index 0000000..3133b3c
--- /dev/null
+++ b/netneurotools/plotting/tests/test_pysurfer.py
@@ -0,0 +1,28 @@
+"""For testing netneurotools.plotting.pysurfer_plotters functionality."""
+
+import pytest
+import numpy as np
+from netneurotools import datasets, plotting
+
+
+@pytest.mark.filterwarnings('ignore')
+def test_plot_fsvertex():
+    """Test plotting on a freesurfer vertex."""
+    surfer = pytest.importorskip('surfer')
+
+    data = np.random.rand(20484)
+    brain = plotting.plot_fsvertex(data, subject_id='fsaverage5',
+                                   offscreen=True)
+    assert isinstance(brain, surfer.Brain)
+
+
+@pytest.mark.filterwarnings('ignore')
+def test_plot_fsaverage():
+    """Test plotting on a freesurfer average brain."""
+    surfer = pytest.importorskip('surfer')
+
+    data = np.random.rand(68)
+    lhannot, rhannot = datasets.fetch_cammoun2012('fsaverage5')['scale033']
+    brain = plotting.plot_fsaverage(data, lhannot=lhannot, rhannot=rhannot,
+                                    subject_id='fsaverage5', offscreen=True)
+    assert isinstance(brain, surfer.Brain)
diff --git a/netneurotools/plotting/tests/test_pyvista.py b/netneurotools/plotting/tests/test_pyvista.py
new file mode 100644
index 0000000..0b87931
--- /dev/null
+++ b/netneurotools/plotting/tests/test_pyvista.py
@@ -0,0 +1 @@
+"""For testing netneurotools.plotting.pyvista_plotters functionality."""
diff --git a/netneurotools/spatial/__init__.py b/netneurotools/spatial/__init__.py
new file mode 100644
index 0000000..a958655
--- /dev/null
+++ b/netneurotools/spatial/__init__.py
@@ -0,0 +1,12 @@
+"""Functions for handling spatial brain data."""
+
+
+from .spatial_stats import (
+    morans_i, local_morans_i
+)
+
+
+__all__ = [
+    # spatial_stats
+    'morans_i', 'local_morans_i'
+]
diff --git a/netneurotools/spatial/gaussian_random_field.py b/netneurotools/spatial/gaussian_random_field.py
new file mode 100644
index 0000000..7b40565
--- /dev/null
+++ b/netneurotools/spatial/gaussian_random_field.py
@@ -0,0 +1 @@
+"""Functions for working with Gaussian random fields."""
diff --git a/netneurotools/spatial/spatial_stats.py b/netneurotools/spatial/spatial_stats.py
new file mode 100644
index 0000000..54baddf
--- /dev/null
+++ b/netneurotools/spatial/spatial_stats.py
@@ -0,0 +1,11 @@
+"""Functions for calculating spatial statistics."""
+
+
+def morans_i():
+    """Calculate Moran's I for spatial autocorrelation."""
+    pass
+
+
+def local_morans_i():
+    """Calculate local Moran's I for spatial autocorrelation."""
+    pass
diff --git a/netneurotools/spatial/tests/__init__.py b/netneurotools/spatial/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/spatial/tests/test_grf.py b/netneurotools/spatial/tests/test_grf.py
new file mode 100644
index 0000000..962bdb9
--- /dev/null
+++ b/netneurotools/spatial/tests/test_grf.py
@@ -0,0 +1 @@
+"""For testing netneurotools.spatial.gaussian_random_field functionality."""
diff --git a/netneurotools/spatial/tests/test_spatialstats.py b/netneurotools/spatial/tests/test_spatialstats.py
new file mode 100644
index 0000000..fa9c7f6
--- /dev/null
+++ b/netneurotools/spatial/tests/test_spatialstats.py
@@ -0,0 +1 @@
+"""For testing netneurotools.spatial.spatial_stats functionality."""
diff --git a/netneurotools/stats.py b/netneurotools/stats.py
deleted file mode 100644
index 952995b..0000000
--- a/netneurotools/stats.py
+++ /dev/null
@@ -1,1593 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for performing statistical preprocessing and analyses."""
-
-import warnings
-
-import numpy as np
-from tqdm import tqdm
-from itertools import combinations
-from scipy import optimize, spatial, special, stats as sstats
-try:  # scipy >= 1.8.0
-    from scipy.stats._stats_py import _chk2_asarray
-except ImportError:  # scipy < 1.8.0
-    from scipy.stats.stats import _chk2_asarray
-from sklearn.utils.validation import check_random_state
-from sklearn.linear_model import LinearRegression
-from joblib import Parallel, delayed
-
-
-from . import utils
-from .metrics import _graph_laplacian
-
-try:
-    from numba import njit
-    use_numba = True
-except ImportError:
-    use_numba = False
-
-
-def residualize(X, Y, Xc=None, Yc=None, normalize=True, add_intercept=True):
-    """
-    Return residuals of regression equation from `Y ~ X`.
-
-    Parameters
-    ----------
-    X : (N[, R]) array_like
-        Coefficient matrix of `R` variables for `N` subjects
-    Y : (N[, F]) array_like
-        Dependent variable matrix of `F` variables for `N` subjects
-    Xc : (M[, R]) array_like, optional
-        Coefficient matrix of `R` variables for `M` subjects. If not specified
-        then `X` is used to estimate betas. Default: None
-    Yc : (M[, F]) array_like, optional
-        Dependent variable matrix of `F` variables for `M` subjects. If not
-        specified then `Y` is used to estimate betas. Default: None
-    normalize : bool, optional
-        Whether to normalize (i.e., z-score) residuals. Will use residuals from
-        `Yc ~ Xc` for generating mean and variance. Default: True
-    add_intercept : bool, optional
-        Whether to add intercept to `X` (and `Xc`, if provided). The intercept
-        will not be removed, just used in beta estimation. Default: True
-
-    Returns
-    -------
-    Yr : (N, F) numpy.ndarray
-        Residuals of `Y ~ X`
-
-    Notes
-    -----
-    If both `Xc` and `Yc` are provided, these are used to calculate betas which
-    are then applied to `X` and `Y`.
-    """
-    if ((Yc is None and Xc is not None) or (Yc is not None and Xc is None)):
-        raise ValueError('If processing against a comparative group, you must '
-                         'provide both `Xc` and `Yc`.')
-
-    X, Y = np.asarray(X), np.asarray(Y)
-
-    if Yc is None:
-        Xc, Yc = X.copy(), Y.copy()
-    else:
-        Xc, Yc = np.asarray(Xc), np.asarray(Yc)
-
-    # add intercept to regressors if requested and calculate fit
-    if add_intercept:
-        X, Xc = utils.add_constant(X), utils.add_constant(Xc)
-    betas, *rest = np.linalg.lstsq(Xc, Yc, rcond=None)
-
-    # remove intercept from regressors and betas for calculation of residuals
-    if add_intercept:
-        betas = betas[:-1]
-        X, Xc = X[:, :-1], Xc[:, :-1]
-
-    # calculate residuals
-    Yr = Y - (X @ betas)
-    Ycr = Yc - (Xc @ betas)
-
-    if normalize:
-        Yr = sstats.zmap(Yr, compare=Ycr)
-
-    return Yr
-
-
-def get_mad_outliers(data, thresh=3.5):
-    """
-    Determine which samples in `data` are outliers.
-
-    Uses the Median Absolute Deviation for determining whether datapoints are
-    outliers
-
-    Parameters
-    ----------
-    data : (N, M) array_like
-        Data array where `N` is samples and `M` is features
-    thresh : float, optional
-        Modified z-score. Observations with a modified z-score (based on the
-        median absolute deviation) greater than this value will be classified
-        as outliers. Default: 3.5
-
-    Returns
-    -------
-    outliers : (N,) numpy.ndarray
-        Boolean array where True indicates an outlier
-
-    Notes
-    -----
-    Taken directly from https://stackoverflow.com/a/22357811
-
-    References
-    ----------
-    Boris Iglewicz and David Hoaglin (1993), "Volume 16: How to Detect and
-    Handle Outliers", The ASQC Basic References in Quality Control: Statistical
-    Techniques, Edward F. Mykytka, Ph.D., Editor.
-
-    Examples
-    --------
-    >>> from netneurotools import stats
-
-    Create array with three samples of four features each:
-
-    >>> X = np.array([[0, 5, 10, 15], [1, 4, 11, 16], [100, 100, 100, 100]])
-    >>> X
-    array([[  0,   5,  10,  15],
-           [  1,   4,  11,  16],
-           [100, 100, 100, 100]])
-
-    Determine which sample(s) is outlier:
-
-    >>> outliers = stats.get_mad_outliers(X)
-    >>> outliers
-    array([False, False,  True])
-    """
-    data = np.asarray(data)
-
-    if data.ndim == 1:
-        data = np.vstack(data)
-    if data.ndim > 2:
-        data = data.reshape(len(data), -1)
-
-    median = np.nanmedian(data, axis=0)
-    diff = np.nansum((data - median)**2, axis=-1)
-    diff = np.sqrt(diff)
-    med_abs_deviation = np.median(diff)
-
-    modified_z_score = 0.6745 * diff / med_abs_deviation
-
-    return modified_z_score > thresh
-
-
-def permtest_1samp(a, popmean, axis=0, n_perm=1000, seed=0):
-    """
-    Non-parametric equivalent of :py:func:`scipy.stats.ttest_1samp`.
-
-    Generates two-tailed p-value for hypothesis of whether `a` differs from
-    `popmean` using permutation tests
-
-    Parameters
-    ----------
-    a : array_like
-        Sample observations
-    popmean : float or array_like
-        Expected valued in null hypothesis. If array_like then it must have the
-        same shape as `a` excluding the `axis` dimension
-    axis : int or None, optional
-        Axis along which to compute test. If None, compute over the whole array
-        of `a`. Default: 0
-    n_perm : int, optional
-        Number of permutations to assess. Unless `a` is very small along `axis`
-        this will approximate a randomization test via Monte Carlo simulations.
-        Default: 1000
-    seed : {int, np.random.RandomState instance, None}, optional
-        Seed for random number generation. Set to None for "randomness".
-        Default: 0
-
-    Returns
-    -------
-    stat : float or numpy.ndarray
-        Difference from `popmean`
-    pvalue : float or numpy.ndarray
-        Non-parametric p-value
-
-    Notes
-    -----
-    Providing multiple values to `popmean` to run *independent* tests in
-    parallel is not currently supported.
-
-    The lowest p-value that can be returned by this function is equal to 1 /
-    (`n_perm` + 1).
-
-    Examples
-    --------
-    >>> from netneurotools import stats
-    >>> np.random.seed(7654567)  # set random seed for reproducible results
-    >>> rvs = np.random.normal(loc=5, scale=10, size=(50, 2))
-
-    Test if mean of random sample is equal to true mean, and different mean. We
-    reject the null hypothesis in the second case and don't reject it in the
-    first case.
-
-    >>> stats.permtest_1samp(rvs, 5.0)
-    (array([-0.985602  , -0.05204969]), array([0.48551449, 0.95904096]))
-    >>> stats.permtest_1samp(rvs, 0.0)
-    (array([4.014398  , 4.94795031]), array([0.00699301, 0.000999  ]))
-
-    Example using axis and non-scalar dimension for population mean
-
-    >>> stats.permtest_1samp(rvs, [5.0, 0.0])
-    (array([-0.985602  ,  4.94795031]), array([0.48551449, 0.000999  ]))
-    >>> stats.permtest_1samp(rvs.T, [5.0, 0.0], axis=1)
-    (array([-0.985602  ,  4.94795031]), array([0.51548452, 0.000999  ]))
-    """
-    a, popmean, axis = _chk2_asarray(a, popmean, axis)
-    rs = check_random_state(seed)
-
-    if a.size == 0:
-        return np.nan, np.nan
-
-    # ensure popmean will broadcast to `a` correctly
-    if popmean.ndim != a.ndim:
-        popmean = np.expand_dims(popmean, axis=axis)
-
-    # center `a` around `popmean` and calculate original mean
-    zeroed = a - popmean
-    true_mean = zeroed.mean(axis=axis) / 1
-    abs_mean = np.abs(true_mean)
-
-    # this for loop is not _the fastest_ but is memory efficient
-    # the broadcasting alt. would mean storing zeroed.size * n_perm in memory
-    permutations = np.ones(true_mean.shape)
-    for _ in range(n_perm):
-        flipped = zeroed * rs.choice([-1, 1], size=zeroed.shape)  # sign flip
-        permutations += np.abs(flipped.mean(axis=axis)) >= abs_mean
-
-    pvals = permutations / (n_perm + 1)  # + 1 in denom accounts for true_mean
-
-    return true_mean, pvals
-
-
-def permtest_rel(a, b, axis=0, n_perm=1000, seed=0):
-    """
-    Non-parametric equivalent of :py:func:`scipy.stats.ttest_rel`.
-
-    Generates two-tailed p-value for hypothesis of whether related samples `a`
-    and `b` differ using permutation tests
-
-    Parameters
-    ----------
-    a, b : array_like
-        Sample observations. These arrays must have the same shape.
-    axis : int or None, optional
-        Axis along which to compute test. If None, compute over whole arrays
-        of `a` and `b`. Default: 0
-    n_perm : int, optional
-        Number of permutations to assess. Unless `a` and `b` are very small
-        along `axis` this will approximate a randomization test via Monte
-        Carlo simulations. Default: 1000
-    seed : {int, np.random.RandomState instance, None}, optional
-        Seed for random number generation. Set to None for "randomness".
-        Default: 0
-
-    Returns
-    -------
-    stat : float or numpy.ndarray
-        Average difference between `a` and `b`
-    pvalue : float or numpy.ndarray
-        Non-parametric p-value
-
-    Notes
-    -----
-    The lowest p-value that can be returned by this function is equal to 1 /
-    (`n_perm` + 1).
-
-    Examples
-    --------
-    >>> from netneurotools import stats
-
-    >>> np.random.seed(12345678)  # set random seed for reproducible results
-    >>> rvs1 = np.random.normal(loc=5, scale=10, size=500)
-    >>> rvs2 = (np.random.normal(loc=5, scale=10, size=500)
-    ...         + np.random.normal(scale=0.2, size=500))
-    >>> stats.permtest_rel(rvs1, rvs2)  # doctest: +SKIP
-    (-0.16506275161572695, 0.8021978021978022)
-
-    >>> rvs3 = (np.random.normal(loc=8, scale=10, size=500)
-    ...         + np.random.normal(scale=0.2, size=500))
-    >>> stats.permtest_rel(rvs1, rvs3)  # doctest: +SKIP
-    (2.40533726097883, 0.000999000999000999)
-    """
-    a, b, axis = _chk2_asarray(a, b, axis)
-    rs = check_random_state(seed)
-
-    if a.shape[axis] != b.shape[axis]:
-        raise ValueError('Provided arrays do not have same length along axis')
-
-    if a.size == 0 or b.size == 0:
-        return np.nan, np.nan
-
-    # calculate original difference in means
-    ab = np.stack([a, b], axis=0)
-    if ab.ndim < 3:
-        ab = np.expand_dims(ab, axis=-1)
-    true_diff = np.squeeze(np.diff(ab, axis=0)).mean(axis=axis) / 1
-    abs_true = np.abs(true_diff)
-
-    # idx array
-    reidx = np.meshgrid(*[range(f) for f in ab.shape], indexing='ij')
-
-    permutations = np.ones(true_diff.shape)
-    for _ in range(n_perm):
-        # use this to re-index (i.e., swap along) the first axis of `ab`
-        swap = rs.random_sample(ab.shape[:-1]).argsort(axis=axis)
-        reidx[0] = np.repeat(swap[..., np.newaxis], ab.shape[-1], axis=-1)
-        # recompute difference between `a` and `b` (i.e., first axis of `ab`)
-        pdiff = np.squeeze(np.diff(ab[tuple(reidx)], axis=0)).mean(axis=axis)
-        permutations += np.abs(pdiff) >= abs_true
-
-    pvals = permutations / (n_perm + 1)  # + 1 in denom accounts for true_diff
-
-    return true_diff, pvals
-
-
-def permtest_pearsonr(a, b, axis=0, n_perm=1000, resamples=None, seed=0):
-    """
-    Non-parametric equivalent of :py:func:`scipy.stats.pearsonr`.
-
-    Generates two-tailed p-value for hypothesis of whether samples `a` and `b`
-    are correlated using permutation tests
-
-    Parameters
-    ----------
-    a,b : (N[, M]) array_like
-        Sample observations. These arrays must have the same length and either
-        an equivalent number of columns or be broadcastable
-    axis : int or None, optional
-        Axis along which to compute test. If None, compute over whole arrays
-        of `a` and `b`. Default: 0
-    n_perm : int, optional
-        Number of permutations to assess. Unless `a` and `b` are very small
-        along `axis` this will approximate a randomization test via Monte
-        Carlo simulations. Default: 1000
-    resamples : (N, P) array_like, optional
-        Resampling array used to shuffle `a` when generating null distribution
-        of correlations. This array must have the same length as `a` and `b`
-        and should have at least the same number of columns as `n_perm` (if it
-        has more then only `n_perm` columns will be used. When not specified a
-        standard permutation is used to shuffle `a`. Default: None
-    seed : {int, np.random.RandomState instance, None}, optional
-        Seed for random number generation. Set to None for "randomness".
-        Default: 0
-
-    Returns
-    -------
-    corr : float or numpyndarray
-        Correlations
-    pvalue : float or numpy.ndarray
-        Non-parametric p-value
-
-    Notes
-    -----
-    The lowest p-value that can be returned by this function is equal to 1 /
-    (`n_perm` + 1).
-
-    Examples
-    --------
-    >>> from netneurotools import datasets, stats
-
-    >>> np.random.seed(12345678)  # set random seed for reproducible results
-    >>> x, y = datasets.make_correlated_xy(corr=0.1, size=100)
-    >>> stats.permtest_pearsonr(x, y)  # doctest: +SKIP
-    (0.10032564626876286, 0.3046953046953047)
-
-    >>> x, y = datasets.make_correlated_xy(corr=0.5, size=100)
-    >>> stats.permtest_pearsonr(x, y)  # doctest: +SKIP
-    (0.500040365781984, 0.000999000999000999)
-
-    Also works with multiple columns by either broadcasting the smaller array
-    to the larger:
-
-    >>> z = x + np.random.normal(loc=1, size=100)
-    >>> stats.permtest_pearsonr(x, np.column_stack([y, z]))
-    (array([0.50004037, 0.25843187]), array([0.000999  , 0.01098901]))
-
-    or by using matching columns in the two arrays (e.g., `x` and `y` vs
-    `a` and `b`):
-
-    >>> a, b = datasets.make_correlated_xy(corr=0.9, size=100)
-    >>> stats.permtest_pearsonr(np.column_stack([x, a]), np.column_stack([y, b]))
-    (array([0.50004037, 0.89927523]), array([0.000999, 0.000999]))
-    """  # noqa
-    a, b, axis = _chk2_asarray(a, b, axis)
-    rs = check_random_state(seed)
-
-    if len(a) != len(b):
-        raise ValueError('Provided arrays do not have same length')
-
-    if a.size == 0 or b.size == 0:
-        return np.nan, np.nan
-
-    if resamples is not None:
-        if n_perm > resamples.shape[-1]:
-            raise ValueError('Number of permutations requested exceeds size '
-                             'of resampling array.')
-
-    # divide by one forces coercion to float if ndim = 0
-    true_corr = efficient_pearsonr(a, b)[0] / 1
-    abs_true = np.abs(true_corr)
-
-    permutations = np.ones(true_corr.shape)
-    for perm in range(n_perm):
-        # permute `a` and determine whether correlations exceed original
-        if resamples is None:
-            ap = a[rs.permutation(len(a))]
-        else:
-            ap = a[resamples[:, perm]]
-        permutations += np.abs(efficient_pearsonr(ap, b)[0]) >= abs_true
-
-    pvals = permutations / (n_perm + 1)  # + 1 in denom accounts for true_corr
-
-    return true_corr, pvals
-
-
-def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'):
-    """
-    Compute correlation of matching columns in `a` and `b`.
-
-    Parameters
-    ----------
-    a,b : array_like
-        Sample observations. These arrays must have the same length and either
-        an equivalent number of columns or be broadcastable
-    ddof : int, optional
-        Degrees of freedom correction in the calculation of the standard
-        deviation. Default: 1
-    nan_policy : bool, optional
-        Defines how to handle when input contains nan. 'propagate' returns nan,
-        'raise' throws an error, 'omit' performs the calculations ignoring nan
-        values. Default: 'propagate'
-
-    Returns
-    -------
-    corr : float or numpy.ndarray
-        Pearson's correlation coefficient between matching columns of inputs
-    pval : float or numpy.ndarray
-        Two-tailed p-values
-
-    Notes
-    -----
-    If either input contains nan and nan_policy is set to 'omit', both arrays
-    will be masked to omit the nan entries.
-
-    Examples
-    --------
-    >>> from netneurotools import datasets, stats
-
-    Generate some not-very-correlated and some highly-correlated data:
-
-    >>> np.random.seed(12345678)  # set random seed for reproducible results
-    >>> x1, y1 = datasets.make_correlated_xy(corr=0.1, size=100)
-    >>> x2, y2 = datasets.make_correlated_xy(corr=0.8, size=100)
-
-    Calculate both correlations simultaneously:
-
-    >>> stats.efficient_pearsonr(np.c_[x1, x2], np.c_[y1, y2])
-    (array([0.10032565, 0.79961189]), array([3.20636135e-01, 1.97429944e-23]))
-    """
-    a, b, axis = _chk2_asarray(a, b, 0)
-    if len(a) != len(b):
-        raise ValueError('Provided arrays do not have same length')
-
-    if a.size == 0 or b.size == 0:
-        return np.nan, np.nan
-
-    if nan_policy not in ('propagate', 'raise', 'omit'):
-        raise ValueError(f'Value for nan_policy "{nan_policy}" not allowed')
-
-    a, b = a.reshape(len(a), -1), b.reshape(len(b), -1)
-    if (a.shape[1] != b.shape[1]):
-        a, b = np.broadcast_arrays(a, b)
-
-    mask = np.logical_or(np.isnan(a), np.isnan(b))
-    if nan_policy == 'raise' and np.any(mask):
-        raise ValueError('Input cannot contain NaN when nan_policy is "omit"')
-    elif nan_policy == 'omit':
-        # avoid making copies of the data, if possible
-        a = np.ma.masked_array(a, mask, copy=False, fill_value=np.nan)
-        b = np.ma.masked_array(b, mask, copy=False, fill_value=np.nan)
-
-    with np.errstate(invalid='ignore'):
-        corr = (sstats.zscore(a, ddof=ddof, nan_policy=nan_policy)
-                * sstats.zscore(b, ddof=ddof, nan_policy=nan_policy))
-
-    sumfunc, n_obs = np.sum, len(a)
-    if nan_policy == 'omit':
-        corr = corr.filled(np.nan)
-        sumfunc = np.nansum
-        n_obs = np.squeeze(np.sum(np.logical_not(np.isnan(corr)), axis=0))
-
-    corr = sumfunc(corr, axis=0) / (n_obs - 1)
-    corr = np.squeeze(np.clip(corr, -1, 1)) / 1
-
-    # taken from scipy.stats
-    ab = (n_obs / 2) - 1
-    prob = 2 * special.btdtr(ab, ab, 0.5 * (1 - np.abs(corr)))
-
-    return corr, prob
-
-
-def _gen_rotation(seed=None):
-    """
-    Generate random matrix for rotating spherical coordinates.
-
-    Parameters
-    ----------
-    seed : {int, np.random.RandomState instance, None}, optional
-        Seed for random number generation
-
-    Returns
-    -------
-    rotate_{l,r} : (3, 3) numpy.ndarray
-        Rotations for left and right hemisphere coordinates, respectively
-    """
-    rs = check_random_state(seed)
-
-    # for reflecting across Y-Z plane
-    reflect = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
-
-    # generate rotation for left
-    rotate_l, temp = np.linalg.qr(rs.normal(size=(3, 3)))
-    rotate_l = rotate_l @ np.diag(np.sign(np.diag(temp)))
-    if np.linalg.det(rotate_l) < 0:
-        rotate_l[:, 0] = -rotate_l[:, 0]
-
-    # reflect the left rotation across Y-Z plane
-    rotate_r = reflect @ rotate_l @ reflect
-
-    return rotate_l, rotate_r
-
-
-def gen_spinsamples(coords, hemiid, n_rotate=1000, check_duplicates=True,
-                    method='original', exact=False, seed=None, verbose=False,
-                    return_cost=False):
-    """
-    Return a resampling array for `coords` obtained from rotations / spins.
-
-    Using the method initially proposed in [ST1]_ (and later modified + updated
-    based on findings in [ST2]_ and [ST3]_), this function applies random
-    rotations to the user-supplied `coords` in order to generate a resampling
-    array that preserves its spatial embedding. Rotations are generated for one
-    hemisphere and mirrored for the other (see `hemiid` for more information).
-
-    Due to irregular sampling of `coords` and the randomness of the rotations
-    it is possible that some "rotations" may resample with replacement (i.e.,
-    will not be a true permutation). The likelihood of this can be reduced by
-    either increasing the sampling density of `coords` or changing the
-    ``method`` parameter (see Notes for more information on the latter).
-
-    Parameters
-    ----------
-    coords : (N, 3) array_like
-        X, Y, Z coordinates of `N` nodes/parcels/regions/vertices defined on a
-        sphere
-    hemiid : (N,) array_like
-        Array denoting hemisphere designation of coordinates in `coords`, where
-        values should be {0, 1} denoting the different hemispheres. Rotations
-        are generated for one hemisphere and mirrored across the y-axis for the
-        other hemisphere.
-    n_rotate : int, optional
-        Number of rotations to generate. Default: 1000
-    check_duplicates : bool, optional
-        Whether to check for and attempt to avoid duplicate resamplings. A
-        warnings will be raised if duplicates cannot be avoided. Setting to
-        True may increase the runtime of this function! Default: True
-    method : {'original', 'vasa', 'hungarian'}, optional
-        Method by which to match non- and rotated coordinates. Specifying
-        'original' will use the method described in [ST1]_. Specfying 'vasa'
-        will use the method described in [ST4]_. Specfying 'hungarian' will use
-        the Hungarian algorithm to minimize the global cost of reassignment
-        (will dramatically increase runtime). Default: 'original'
-    seed : {int, np.random.RandomState instance, None}, optional
-        Seed for random number generation. Default: None
-    verbose : bool, optional
-        Whether to print occasional status messages. Default: False
-    return_cost : bool, optional
-        Whether to return cost array (specified as Euclidean distance) for each
-        coordinate for each rotation Default: True
-
-    Returns
-    -------
-    spinsamples : (N, `n_rotate`) numpy.ndarray
-        Resampling matrix to use in permuting data based on supplied `coords`.
-    cost : (N, `n_rotate`,) numpy.ndarray
-        Cost (specified as Euclidean distance) of re-assigning each coordinate
-        for every rotation in `spinsamples`. Only provided if `return_cost` is
-        True.
-
-    Notes
-    -----
-    By default, this function uses the minimum Euclidean distance between the
-    original coordinates and the new, rotated coordinates to generate a
-    resampling array after each spin. Unfortunately, this can (with some
-    frequency) lead to multiple coordinates being re-assigned the same value:
-
-        >>> from netneurotools import stats as nnstats
-        >>> coords = [[0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0]]
-        >>> hemi = [0, 0, 1, 1]
-        >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1,
-        ...                         method='original', check_duplicates=False)
-        array([[0],
-               [0],
-               [2],
-               [3]])
-
-    While this is reasonable in most circumstances, if you feel incredibly
-    strongly about having a perfect "permutation" (i.e., all indices appear
-    once and exactly once in the resampling), you can set the ``method``
-    parameter to either 'vasa' or 'hungarian':
-
-        >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1,
-        ...                         method='vasa', check_duplicates=False)
-        array([[1],
-               [0],
-               [2],
-               [3]])
-        >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1,
-        ...                         method='hungarian', check_duplicates=False)
-        array([[0],
-               [1],
-               [2],
-               [3]])
-
-    Note that setting this parameter may increase the runtime of the function
-    (especially for `method='hungarian'`). Refer to [ST1]_ for information on
-    why the default (i.e., ``exact`` set to False) suffices in most cases.
-
-    For the original MATLAB implementation of this function refer to [ST5]_.
-
-    References
-    ----------
-    .. [ST1] Alexander-Bloch, A., Shou, H., Liu, S., Satterthwaite, T. D.,
-       Glahn, D. C., Shinohara, R. T., Vandekar, S. N., & Raznahan, A. (2018).
-       On testing for spatial correspondence between maps of human brain
-       structure and function. NeuroImage, 178, 540-51.
-
-    .. [ST2] Blaser, R., & Fryzlewicz, P. (2016). Random Rotation Ensembles.
-       Journal of Machine Learning Research, 17(4), 1–26.
-
-    .. [ST3] Lefèvre, J., Pepe, A., Muscato, J., De Guio, F., Girard, N.,
-       Auzias, G., & Germanaud, D. (2018). SPANOL (SPectral ANalysis of Lobes):
-       A Spectral Clustering Framework for Individual and Group Parcellation of
-       Cortical Surfaces in Lobes. Frontiers in Neuroscience, 12, 354.
-
-    .. [ST4] Váša, F., Seidlitz, J., Romero-Garcia, R., Whitaker, K. J.,
-       Rosenthal, G., Vértes, P. E., ... & Jones, P. B. (2018). Adolescent
-       tuning of association cortex in human structural brain networks.
-       Cerebral Cortex, 28(1), 281-294.
-
-    .. [ST5] https://github.com/spin-test/spin-test
-    """
-    methods = ['original', 'vasa', 'hungarian']
-    if method not in methods:
-        raise ValueError('Provided method "{}" invalid. Must be one of {}.'
-                         .format(method, methods))
-
-    if exact:
-        warnings.warn('The `exact` parameter will no longer be supported in '
-                      'an upcoming release. Please use the `method` parameter '
-                      'instead.', DeprecationWarning, stacklevel=3)
-        if exact == 'vasa' and method == 'original':
-            method = 'vasa'
-        elif exact and method == 'original':
-            method = 'hungarian'
-
-    seed = check_random_state(seed)
-
-    coords = np.asanyarray(coords)
-    hemiid = np.squeeze(np.asanyarray(hemiid, dtype='int8'))
-
-    # check supplied coordinate shape
-    if coords.shape[-1] != 3 or coords.squeeze().ndim != 2:
-        raise ValueError('Provided `coords` must be of shape (N, 3), not {}'
-                         .format(coords.shape))
-
-    # ensure hemisphere designation array is correct
-    if hemiid.ndim != 1:
-        raise ValueError('Provided `hemiid` array must be one-dimensional.')
-    if len(coords) != len(hemiid):
-        raise ValueError('Provided `coords` and `hemiid` must have the same '
-                         'length. Provided lengths: coords = {}, hemiid = {}'
-                         .format(len(coords), len(hemiid)))
-    if np.max(hemiid) > 1 or np.min(hemiid) < 0:
-        raise ValueError('Hemiid must have values in {0, 1} denoting left and '
-                         'right hemisphere coordinates, respectively. '
-                         + 'Provided array contains values: {}'
-                         .format(np.unique(hemiid)))
-
-    # empty array to store resampling indices
-    spinsamples = np.zeros((len(coords), n_rotate), dtype=int)
-    cost = np.zeros((len(coords), n_rotate))
-    inds = np.arange(len(coords), dtype=int)
-
-    # generate rotations and resampling array!
-    msg, warned = '', False
-    for n in range(n_rotate):
-        count, duplicated = 0, True
-
-        if verbose:
-            msg = 'Generating spin {:>5} of {:>5}'.format(n, n_rotate)
-            print(msg, end='\r', flush=True)
-
-        while duplicated and count < 500:
-            count, duplicated = count + 1, False
-            resampled = np.zeros(len(coords), dtype='int32')
-
-            # rotate each hemisphere separately
-            for h, rot in enumerate(_gen_rotation(seed=seed)):
-                hinds = (hemiid == h)
-                coor = coords[hinds]
-                if len(coor) == 0:
-                    continue
-
-                # if we need an "exact" mapping (i.e., each node needs to be
-                # assigned EXACTLY once) then we have to calculate the full
-                # distance matrix which is a nightmare with respect to memory
-                # for anything that isn't parcellated data.
-                # that is, don't do this with vertex coordinates!
-                if method == 'vasa':
-                    dist = spatial.distance_matrix(coor, coor @ rot)
-                    # min of max a la Vasa et al., 2018
-                    col = np.zeros(len(coor), dtype='int32')
-                    for _ in range(len(dist)):
-                        # find parcel whose closest neighbor is farthest away
-                        # overall; assign to that
-                        row = dist.min(axis=1).argmax()
-                        col[row] = dist[row].argmin()
-                        cost[inds[hinds][row], n] = dist[row, col[row]]
-                        # set to -inf and inf so they can't be assigned again
-                        dist[row] = -np.inf
-                        dist[:, col[row]] = np.inf
-                # optimization of total cost using Hungarian algorithm. this
-                # may result in certain parcels having higher cost than with
-                # `method='vasa'` but should always result in the total cost
-                # being lower #tradeoffs
-                elif method == 'hungarian':
-                    dist = spatial.distance_matrix(coor, coor @ rot)
-                    row, col = optimize.linear_sum_assignment(dist)
-                    cost[hinds, n] = dist[row, col]
-                # if nodes can be assigned multiple targets, we can simply use
-                # the absolute minimum of the distances (no optimization
-                # required) which is _much_ lighter on memory
-                # huge thanks to https://stackoverflow.com/a/47779290 for this
-                # memory-efficient method
-                elif method == 'original':
-                    dist, col = spatial.cKDTree(coor @ rot).query(coor, 1)
-                    cost[hinds, n] = dist
-
-                resampled[hinds] = inds[hinds][col]
-
-            # if we want to check for duplicates ensure that we don't have any
-            if check_duplicates:
-                if np.any(np.all(resampled[:, None] == spinsamples[:, :n], 0)):
-                    duplicated = True
-                # if our "spin" is identical to the input then that's no good
-                elif np.all(resampled == inds):
-                    duplicated = True
-
-        # if we broke out because we tried 500 rotations and couldn't generate
-        # a new one, warn that we're using duplicate rotations and give up.
-        # this should only be triggered if check_duplicates is set to True
-        if count == 500 and not warned:
-            warnings.warn(
-                'Duplicate rotations used. Check resampling array '
-                'to determine real number of unique permutations.', stacklevel=2)
-            warned = True
-
-        spinsamples[:, n] = resampled
-
-    if verbose:
-        print(' ' * len(msg) + '\b' * len(msg), end='', flush=True)
-
-    if return_cost:
-        return spinsamples, cost
-
-    return spinsamples
-
-
-def get_dominance_stats(X, y, use_adjusted_r_sq=True, verbose=False, n_jobs=1):
-    """
-    Return the dominance analysis statistics for multilinear regression.
-
-    This is a rewritten & simplified version of [DA1]_. It is briefly
-    tested against the original package, but still in early stages.
-    Please feel free to report any bugs.
-
-    Warning: Still work-in-progress. Parameters might change!
-
-    Parameters
-    ----------
-    X : (N, M) array_like
-        Input data
-    y : (N,) array_like
-        Target values
-    use_adjusted_r_sq : bool, optional
-        Whether to use adjusted r squares. Default: True
-    verbose : bool, optional
-        Whether to print debug messages. Default: False
-    n_jobs : int, optional
-        The number of jobs to run in parallel. Default: 1
-
-    Returns
-    -------
-    model_metrics : dict
-        The dominance metrics, currently containing `individual_dominance`,
-        `partial_dominance`, `total_dominance`, and `full_r_sq`.
-    model_r_sq : dict
-        Contains all model r squares
-
-    Notes
-    -----
-    Example usage
-
-    .. code:: python
-
-        from netneurotools.stats import get_dominance_stats
-        from sklearn.datasets import load_boston
-        X, y = load_boston(return_X_y=True)
-        model_metrics, model_r_sq = get_dominance_stats(X, y)
-
-    To compare with [DA1]_, use `use_adjusted_r_sq=False`
-
-    .. code:: python
-
-        from dominance_analysis import Dominance_Datasets
-        from dominance_analysis import Dominance
-        boston_dataset=Dominance_Datasets.get_boston()
-        dominance_regression=Dominance(data=boston_dataset,
-                                       target='House_Price',objective=1)
-        incr_variable_rsquare=dominance_regression.incremental_rsquare()
-        dominance_regression.dominance_stats()
-
-    References
-    ----------
-    .. [DA1] https://github.com/dominance-analysis/dominance-analysis
-
-    """
-    # this helps to remove one element from a tuple
-    def remove_ret(tpl, elem):
-        lst = list(tpl)
-        lst.remove(elem)
-        return tuple(lst)
-
-    # sklearn linear regression wrapper
-    def get_reg_r_sq(X, y, use_adjusted_r_sq=True):
-        lin_reg = LinearRegression()
-        lin_reg.fit(X, y)
-        yhat = lin_reg.predict(X)
-        SS_Residual = sum((y - yhat) ** 2)
-        SS_Total = sum((y - np.mean(y)) ** 2)
-        r_squared = 1 - (float(SS_Residual)) / SS_Total
-        adjusted_r_squared = 1 - (1 - r_squared) * \
-            (len(y) - 1) / (len(y) - X.shape[1] - 1)
-        if use_adjusted_r_sq:
-            return adjusted_r_squared
-        else:
-            return r_squared
-
-    # helper function to compute r_sq for a given idx_tuple
-    def compute_r_sq(idx_tuple):
-        return idx_tuple, get_reg_r_sq(X[:, idx_tuple],
-                                       y,
-                                       use_adjusted_r_sq=use_adjusted_r_sq)
-
-    # generate all predictor combinations in list (num of predictors) of lists
-    n_predictor = X.shape[-1]
-    # n_comb_len_group = n_predictor - 1
-    predictor_combs = [list(combinations(range(n_predictor), i))
-                       for i in range(1, n_predictor + 1)]
-    if verbose:
-        print(f"[Dominance analysis] Generated \
-              {len([v for i in predictor_combs for v in i])} combinations")
-
-    model_r_sq = dict()
-    results = Parallel(n_jobs=n_jobs)(
-        delayed(compute_r_sq)(idx_tuple)
-        for len_group in tqdm(predictor_combs,
-                              desc='num-of-predictor loop',
-                              disable=not verbose)
-        for idx_tuple in tqdm(len_group,
-                              desc='insider loop',
-                              disable=not verbose))
-
-    # extract r_sq from results
-    for idx_tuple, r_sq in results:
-        model_r_sq[idx_tuple] = r_sq
-
-    if verbose:
-        print(f"[Dominance analysis] Acquired {len(model_r_sq)} r^2's")
-
-    # getting all model metrics
-    model_metrics = dict([])
-
-    # individual dominance
-    individual_dominance = []
-    for i_pred in range(n_predictor):
-        individual_dominance.append(model_r_sq[(i_pred,)])
-    individual_dominance = np.array(individual_dominance).reshape(1, -1)
-    model_metrics["individual_dominance"] = individual_dominance
-
-    # partial dominance
-    partial_dominance = [[] for _ in range(n_predictor - 1)]
-    for i_len in range(n_predictor - 1):
-        i_len_combs = list(combinations(range(n_predictor), i_len + 2))
-        for j_node in range(n_predictor):
-            j_node_sel = [v for v in i_len_combs if j_node in v]
-            reduced_list = [remove_ret(comb, j_node) for comb in j_node_sel]
-            diff_values = [
-                model_r_sq[j_node_sel[i]] - model_r_sq[reduced_list[i]]
-                for i in range(len(reduced_list))]
-            partial_dominance[i_len].append(np.mean(diff_values))
-
-    # save partial dominance
-    partial_dominance = np.array(partial_dominance)
-    model_metrics["partial_dominance"] = partial_dominance
-    # get total dominance
-    total_dominance = np.mean(
-        np.r_[individual_dominance, partial_dominance], axis=0)
-    # test and save total dominance
-    assert np.allclose(total_dominance.sum(),
-                       model_r_sq[tuple(range(n_predictor))]), \
-           "Sum of total dominance is not equal to full r square!"
-    model_metrics["total_dominance"] = total_dominance
-    # save full r^2
-    model_metrics["full_r_sq"] = model_r_sq[tuple(range(n_predictor))]
-
-    return model_metrics, model_r_sq
-
-
-def network_pearsonr(annot1, annot2, weight):
-    r"""
-    Calculate pearson correlation between two annotation vectors.
-
-    .. warning::
-       Test before use.
-
-    Parameters
-    ----------
-    annot1 : (N,) array_like
-        First annotation vector, demean will be applied.
-    annot2 : (N,) array_like
-        Second annotation vector, demean will be applied.
-    weight : (N, N) array_like
-        Weight matrix. Diagonal elements should be 1.
-
-    Returns
-    -------
-    corr : float
-        Network correlation between `annot1` and `annot2`
-
-    Notes
-    -----
-    If Pearson correlation is represented as
-
-    .. math::
-        \rho_{x,y} = \dfrac{
-            \mathrm{sum}(I \times (\hat{x} \otimes \hat{y}))
-        }{
-            \sigma_x \sigma_y
-        }
-
-    The network correlation is defined analogously as
-
-    .. math::
-        \rho_{x,y,G} = \dfrac{
-            \mathrm{sum}(W \times (\hat{x} \otimes \hat{y}))
-        }{
-            \sigma_{x,W} \sigma_{y,W}
-        }
-
-    where :math:`\hat{x}` and :math:`\hat{y}` are the demeaned annotation vectors,
-
-    The weight matrix :math:`W` is used to represent the network structure.
-    It is usually in the form of :math:`W = \\exp(-kL)` where :math:`L` is the
-    length matrix and :math:`k` is a decay parameter.
-
-    Example using shortest path length as weight
-
-    .. code:: python
-
-        spl, _ = distance_wei_floyd(D) # input should be distance matrix
-        spl_wei = 1 / np.exp(spl)
-        netcorr = network_pearsonr(annot1, annot2, spl_wei)
-
-    Example using (inverse) effective resistance as weight
-
-    .. code:: python
-
-        R_eff = effective_resistance(W)
-        R_eff_norm = R_eff / np.max(R_eff)
-        W = 1 / R_eff_norm
-        W = W / np.max(W)
-        np.fill_diagonal(W, 1.0)
-        netcorr = network_pearsonr(annot1, annot2, W)
-
-    References
-    ----------
-    .. [1] Coscia, M. (2021). Pearson correlations on complex networks.
-       Journal of Complex Networks, 9(6), cnab036.
-       https://doi.org/10.1093/comnet/cnab036
-
-
-    See Also
-    --------
-    netneurotools.stats.network_pearsonr_pairwise
-    """
-    annot1 = annot1 - np.mean(annot1)
-    annot2 = annot2 - np.mean(annot2)
-    upper = np.sum(np.multiply(weight, np.outer(annot1, annot2)))
-    lower1 = np.sum(np.multiply(weight, np.outer(annot1, annot1)))
-    lower2 = np.sum(np.multiply(weight, np.outer(annot2, annot2)))
-    return upper / np.sqrt(lower1) / np.sqrt(lower2)
-
-
-def network_pearsonr_numba(annot1, annot2, weight):
-    """
-    Numba version of :meth:`netneurotools.stats.network_pearsonr`.
-
-    .. warning::
-       Test before use.
-
-    Parameters
-    ----------
-    annot1 : (N,) array_like
-        First annotation vector, demean will be applied.
-    annot2 : (N,) array_like
-        Second annotation vector, demean will be applied.
-    weight : (N, N) array_like
-        Weight matrix. Diagonal elements should be 1.
-
-    Returns
-    -------
-    corr : float
-        Network correlation between `annot1` and `annot2`
-    """
-    n = annot1.shape[0]
-    annot1 = annot1 - np.mean(annot1)
-    annot2 = annot2 - np.mean(annot2)
-    upper, lower1, lower2 = 0.0, 0.0, 0.0
-    for i in range(n):
-        for j in range(n):
-            upper += annot1[i] * annot2[j] * weight[i, j]
-            lower1 += annot1[i] * annot1[j] * weight[i, j]
-            lower2 += annot2[i] * annot2[j] * weight[i, j]
-    return upper / np.sqrt(lower1) / np.sqrt(lower2)
-
-
-if use_numba:
-    network_pearsonr_numba = njit(network_pearsonr_numba)
-
-
-def _cross_outer(annot_mat):
-    """
-    Calculate cross outer product of input matrix.
-
-    This functions is only used in `network_pearsonr_pairwise`.
-
-    Parameters
-    ----------
-    annot_mat : (N, D) array_like
-        Input matrix
-
-    Returns
-    -------
-    cross_outer : (N, N, D, D) numpy.ndarray
-        Cross outer product of `annot_mat`
-    """
-    n_samp, n_feat = annot_mat.shape
-    cross_outer = np.empty((n_samp, n_samp, n_feat, n_feat), annot_mat.dtype)
-    for a in range(n_samp):
-        for b in range(n_samp):
-            for c in range(n_feat):
-                for d in range(n_feat):
-                    cross_outer[a, b, c, d] = annot_mat[a, c] * annot_mat[b, d]
-    return cross_outer
-
-
-if use_numba:
-    # ("float64[:,:,:,::1](float64[:,::1])")
-    _cross_outer = njit(_cross_outer)
-
-
-def _multiply_sum(cross_outer, weight):
-    """
-    Multiply and sum cross outer product.
-
-    This functions is only used in `network_pearsonr_pairwise`.
-
-    Parameters
-    ----------
-    cross_outer : (N, N, D, D) array_like
-        Cross outer product of `annot_mat`
-    weight : (D, D) array_like
-        Weight matrix
-
-    Returns
-    -------
-    cross_outer_after : (N, N) numpy.ndarray
-        Result of multiplying and summing `cross_outer`
-    """
-    n_samp, _, n_dim, _ = cross_outer.shape
-    cross_outer_after = np.empty((n_samp, n_samp), cross_outer.dtype)
-    for i in range(n_samp):
-        for j in range(n_samp):
-            curr_sum = 0.0
-            for k in range(n_dim):
-                for l in range(n_dim):  # noqa: E741
-                    curr_sum += weight[k, l] * cross_outer[i, j, k, l]
-            cross_outer_after[i, j] = curr_sum
-    return cross_outer_after
-
-
-if use_numba:
-    # ("float64[:,::1](float64[:,:,:,::1],float64[:,::1])")
-    _multiply_sum = njit(_multiply_sum)
-
-
-def network_pearsonr_pairwise(annot_mat, weight):
-    """
-    Calculate pairwise network correlation between rows of `annot_mat`.
-
-    .. warning::
-       Test before use.
-
-    Parameters
-    ----------
-    annot_mat : (N, D) array_like
-        Input matrix
-    weight : (D, D) array_like
-        Weight matrix. Diagonal elements should be 1.
-
-    Returns
-    -------
-    corr_mat : (N, N) numpy.ndarray
-        Pairwise network correlation matrix
-
-    Notes
-    -----
-    This is a faster version of :meth:`netneurotools.stats.network_pearsonr`
-    for calculating pairwise network correlation between rows of `annot_mat`.
-    Check :meth:`netneurotools.stats.network_pearsonr` for details.
-
-    See Also
-    --------
-    netneurotools.stats.network_pearsonr
-    """
-    annot_mat_demean = annot_mat - np.mean(annot_mat, axis=1, keepdims=True)
-    if use_numba:
-        cross_outer = _cross_outer(annot_mat_demean)
-        cross_outer_after = _multiply_sum(cross_outer, weight)
-    else:
-        # https://stackoverflow.com/questions/24839481/python-matrix-outer-product
-        cross_outer = np.einsum('ac,bd->abcd', annot_mat_demean, annot_mat_demean)
-        cross_outer_after = np.sum(np.multiply(cross_outer, weight), axis=(2, 3))
-    # translating the two lines below in numba does not speed up much
-    lower = np.sqrt(np.diagonal(cross_outer_after))
-    return cross_outer_after / np.einsum('i,j', lower, lower)
-
-
-def _onehot_quadratic_form_broadcast(Q_star):
-    """
-    Calculate one-hot quadratic form of input matrix.
-
-    This functions is only used in `effective_resistance`.
-
-    Parameters
-    ----------
-    Q_star : (N, N) array_like
-        Input matrix
-
-    Returns
-    -------
-    R_eff : (N, N) numpy.ndarray
-        One-hot quadratic form of `Q_star`
-    """
-    n = Q_star.shape[0]
-    R_eff = np.empty((n, n), Q_star.dtype)
-    for i in range(n):
-        for j in range(n):
-            R_eff[i, j] = Q_star[i, i] - Q_star[j, i] - Q_star[i, j] + Q_star[j, j]
-    return R_eff
-
-
-if use_numba:
-    # ("float64[:,::1](float64[:,::1])")
-    _onehot_quadratic_form_broadcast = njit(_onehot_quadratic_form_broadcast)
-
-
-def effective_resistance(W, directed=True):
-    """
-    Calculate effective resistance matrix.
-
-    The effective resistance between two nodes in a graph, often used in the context
-    of electrical networks, is a measure that stems from the inverse of the Laplacian
-    matrix of the graph.
-
-    .. warning::
-       Test before use.
-
-    Parameters
-    ----------
-    W : (N, N) array_like
-        Weight matrix.
-    directed : bool, optional
-        Whether the graph is directed. This is used to determine whether to turn on
-        the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are
-        using a symmetric weight matrix (while real-valued implying hermitian), you
-        can set this to False for better performance. Default: True
-
-    Returns
-    -------
-    R_eff : (N, N) numpy.ndarray
-        Effective resistance matrix
-
-    Notes
-    -----
-    The effective resistance between two nodes :math:`i` and :math:`j` is defined as
-
-    .. math::
-        R_{ij} = (e_i - e_j)^T Q^* (e_i - e_j)
-
-    where :math:`Q^*` is the Moore-Penrose pseudoinverse of the Laplacian matrix
-    :math:`L` of the graph, and :math:`e_i` is the :math:`i`-th standard basis vector.
-
-    References
-    ----------
-    .. [1] Ellens, W., Spieksma, F. M., Van Mieghem, P., Jamakovic, A., & Kooij,
-       R. E. (2011). Effective graph resistance. Linear Algebra and Its Applications,
-       435(10), 2491–2506. https://doi.org/10.1016/j.laa.2011.02.024
-
-    See Also
-    --------
-    netneurotools.stats.network_polarisation
-    """
-    L = _graph_laplacian(W)
-    Q_star = np.linalg.pinv(L, hermitian=not directed)
-    if use_numba:
-        R_eff = _onehot_quadratic_form_broadcast(Q_star)
-    else:
-        Q_star_diag = np.diag(Q_star)
-        R_eff = \
-            Q_star_diag[:, np.newaxis] \
-            - Q_star \
-            - Q_star.T \
-            + Q_star_diag[np.newaxis, :]
-    return R_eff
-
-
-def _polariz_diff(vec):
-    """
-    Calculate difference between positive and negative parts of a vector.
-
-    This functions is only used in `network_polarisation`.
-
-    Parameters
-    ----------
-    vec : (N,) array_like
-        Input vector. Must have both positive and negative values.
-
-    Returns
-    -------
-    vec_diff : (N,) numpy.ndarray
-        Difference between positive and negative parts of `vec`
-    """
-    #
-    vec_pos = np.maximum(vec, 0.0)
-    vec_pos /= np.max(vec_pos)
-    #
-    vec_neg = np.minimum(vec, 0.0)
-    vec_neg = np.abs(vec_neg)
-    vec_neg /= np.max(vec_neg)
-    return (vec_pos - vec_neg)
-
-
-if use_numba:
-    _polariz_diff = njit(_polariz_diff)
-
-
-def _quadratic_form(W, vec_left, vec_right, squared=False):
-    """
-    Calculate quadratic form :math:`v_{left}^T W v_{right}`.
-
-    Parameters
-    ----------
-    W : (N, N) array_like
-        Input matrix.
-    vec_left : (N,) array_like
-        Left weight vector.
-    vec_right : (N,) array_like
-        Right weight vector.
-    squared : bool, optional
-        Whether to square the input weight matrix. Default: False
-
-    Returns
-    -------
-    quadratic_form : float
-        Quadratic form from `W`, `vec_left`, and `vec_right`
-    """
-    # [numpy]
-
-    # (vec_left.T @ W @ vec_right)[0, 0]
-    # [numba]
-    # vec = np.ascontiguousarray(vec[np.newaxis, :])
-    n = W.shape[0]
-    ret = 0.0
-    for i in range(n):
-        for j in range(n):
-            if squared:
-                ret += vec_left[i] * vec_right[j] * W[i, j]**2
-            else:
-                ret += vec_left[i] * vec_right[j] * W[i, j]
-    return ret
-
-
-if use_numba:
-    _quadratic_form = njit(_quadratic_form)
-
-
-def network_polarisation(vec, W, directed=True):
-    r"""
-    Calculate polarisation of a vector on a graph.
-
-    Network polarisation is a measure of polizzartion taken into account all the
-    three factors below [1]_:
-
-    - how extreme the opinions of the people are
-    - how much they organize into echo chambers, and
-    - how these echo chambers organize in the network
-
-    .. warning::
-       Test before use.
-
-    Parameters
-    ----------
-    vec : (N,) array_like
-        Polarization vector. Must have both positive and negative values. Will be
-        normalized between -1 and 1 internally.
-    W : (N, N) array_like
-        Weight matrix.
-    directed : bool, optional
-        Whether the graph is directed. This is used to determine whether to turn on
-        the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are
-        using a symmetric weight matrix (while real-valued implying hermitian), you
-        can set this to False for better performance. Default: True
-
-    Returns
-    -------
-    polariz : float
-        Polarization of `vec` on `W`
-
-    Notes
-    -----
-    The measure is based on the genralized Eucledian distance, defined as
-
-    .. math::
-        \delta_{G, o} = \sqrt{(o^+ - o^-)^T Q^* (o^+ - o^-)}
-
-    where :math:`o^+` and :math:`o^-` are the positive and negative parts of the
-    polarization vector, and :math:`Q^*` is the Moore-Penrose pseudoinverse
-    of the Laplacian matrix :math:`L` of the graph. Check :func:`effective_resistance`
-    for similarity.
-
-    References
-    ----------
-    .. [1] Hohmann, M., Devriendt, K., & Coscia, M. (2023). Quantifying ideological
-       polarization on a network using generalized Euclidean distance. Science Advances,
-       9(9), eabq2044. https://doi.org/10.1126/sciadv.abq2044
-
-    See Also
-    --------
-    netneurotools.stats.effective_resistance
-    """
-    L = _graph_laplacian(W)
-    Q_star = np.linalg.pinv(L, hermitian=not directed)
-    diff = _polariz_diff(vec)
-    if use_numba:
-        polariz_sq = _quadratic_form(Q_star, diff, diff, squared=False)
-    else:
-        polariz_sq = (diff.T @ Q_star @ diff)
-    return np.sqrt(polariz_sq)
-
-
-def network_variance(vec, D):
-    r"""
-    Calculate variance of a vector on a graph.
-
-    Network variance is a measure of variance taken into account the network
-    structure.
-
-    .. warning::
-       Test before use.
-
-    Parameters
-    ----------
-    vec : (N,) array_like
-        Input vector. Must be all positive.
-        Will be normalized internally as a probability distribution.
-    D : (N, N) array_like
-        Distance matrix.
-
-    Returns
-    -------
-    network_variance : float
-        Network variance of `vec` on `D`
-
-    Notes
-    -----
-    The network variance is defined as
-
-    .. math::
-        var(p) = \frac{1}{2} \sum_{i, j} p(i) p(j) d^2(i,j)
-
-    where :math:`p` is the probability distribution of `vec`, and :math:`d(i,j)`
-    is the distance between node :math:`i` and :math:`j`.
-
-    The distance matrix :math:`D` can make use of effective resistance or its
-    square root.
-
-    Example using effective resistance as weight matrix
-
-    .. code:: python
-
-        R_eff = effective_resistance(W)
-        netvar = network_variance(vec, R_eff)
-
-    References
-    ----------
-    .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022).
-       Variance and covariance of distributions on graphs. SIAM Review, 64(2),
-       343–359. https://doi.org/10.1137/20M1361328
-
-    See Also
-    --------
-    netneurotools.stats.network_covariance
-    """
-    p = vec / np.sum(vec)
-    return 0.5 * (p.T @ np.multiply(D, D) @ p)
-
-
-def network_variance_numba(vec, D):
-    """
-    Numba version of :meth:`netneurotools.stats.network_variance`.
-
-    Network variance is a measure of variance taken into account the network
-    structure.
-
-    .. warning::
-       Test before use.
-
-    Parameters
-    ----------
-    vec : (N,) array_like
-        Input vector. Must be all positive.
-        Will be normalized internally as a probability distribution.
-    D : (N, N) array_like
-        Distance matrix.
-
-    Returns
-    -------
-    network_variance : float
-        Network variance of `vec` on `D`
-    """
-    p = vec / np.sum(vec)
-    return 0.5 * _quadratic_form(D, p, p, squared=True)
-
-
-if use_numba:
-    network_variance_numba = njit(network_variance_numba)
-
-
-def network_covariance(joint_pmat, D, calc_marginal=True):
-    r"""
-    Calculate covariance of a joint probability matrix on a graph.
-
-    .. warning::
-       Test before use.
-
-    Parameters
-    ----------
-    joint_pmat : (N, N) array_like
-        Joint probability matrix. Please make sure that it is valid.
-    D : (N, N) array_like
-        Distance matrix.
-    calc_marginal : bool, optional
-        Whether to calculate marginal variance. It will be marginally faster if
-        :code:`calc_marginal=False` (returning marginal variances as 0). Default: True
-
-    Returns
-    -------
-    network_covariance : float
-        Covariance of `joint_pmat` on `D`
-    var_p : float
-        Marginal variance of `joint_pmat` on `D`.
-        Will be 0 if :code:`calc_marginal=False`
-    var_q : float
-        Marginal variance of `joint_pmat` on `D`.
-        Will be 0 if :code:`calc_marginal=False`
-
-    Notes
-    -----
-    The network variance is defined as
-
-    .. math::
-        cov(P) = \frac{1}{2} \sum_{i, j} [p(i) q(j) - P(i,j)] d^2(i,j)
-
-    where :math:`P` is the joint probability matrix, :math:`p` and :math:`q`
-    are the marginal probability distributions of `joint_pmat`, and :math:`d(i,j)`
-    is the distance between node :math:`i` and :math:`j`.
-
-    Check :func:`network_variance` for usage.
-
-    References
-    ----------
-    .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022).
-       Variance and covariance of distributions on graphs. SIAM Review, 64(2),
-       343–359. https://doi.org/10.1137/20M1361328
-
-    See Also
-    --------
-    netneurotools.stats.network_variance
-    """
-    p = np.sum(joint_pmat, axis=1)
-    q = np.sum(joint_pmat, axis=0)
-    D_sq = np.multiply(D, D)
-    cov = p.T @ D_sq @ q - np.sum(np.multiply(joint_pmat, D_sq))
-    if calc_marginal:
-        var_p = p.T @ D_sq @ p
-        var_q = q.T @ D_sq @ q
-    else:
-        var_p, var_q = 0, 0
-    return 0.5 * cov, 0.5 * var_p, 0.5 * var_q
-
-
-def network_covariance_numba(joint_pmat, D, calc_marginal=True):
-    """
-    Numba version of :meth:`netneurotools.stats.network_covariance`.
-
-    .. warning::
-       Test before use.
-
-    Parameters
-    ----------
-    joint_pmat : (N, N) array_like
-        Joint probability matrix. Please make sure that it is valid.
-    D : (N, N) array_like
-        Distance matrix.
-    calc_marginal : bool, optional
-        Whether to calculate marginal variance. It will be marginally faster if
-        :code:`calc_marginal=False` (returning marginal variances as 0). Default: True
-
-    Returns
-    -------
-    network_covariance : float
-        Covariance of `joint_pmat` on `D`
-    var_p : float
-        Marginal variance of `joint_pmat` on `D`.
-        Will be 0 if :code:`calc_marginal=False`
-    var_q : float
-        Marginal variance of `joint_pmat` on `D`.
-        Will be 0 if :code:`calc_marginal=False`
-    """
-    n = joint_pmat.shape[0]
-    p = np.sum(joint_pmat, axis=1)
-    q = np.sum(joint_pmat, axis=0)
-    cov = 0.0
-    var_p, var_q = 0.0, 0.0
-    for i in range(n):
-        for j in range(n):
-            cov += (p[i] * q[j] - joint_pmat[i, j]) * D[i, j]**2
-            if calc_marginal:
-                var_p += p[i] * p[j] * D[i, j]**2
-                var_q += q[i] * q[j] * D[i, j]**2
-    return 0.5 * cov, 0.5 * var_p, 0.5 * var_q
-
-
-if use_numba:
-    network_covariance_numba = njit(network_covariance_numba)
diff --git a/netneurotools/stats/__init__.py b/netneurotools/stats/__init__.py
new file mode 100644
index 0000000..e000548
--- /dev/null
+++ b/netneurotools/stats/__init__.py
@@ -0,0 +1,36 @@
+"""Functions for performing statistical operations."""
+
+
+from .correlation import (
+    efficient_pearsonr,
+    weighted_pearsonr,
+    make_correlated_xy
+)
+
+
+from .permutation_test import (
+    permtest_1samp,
+    permtest_rel,
+    permtest_pearsonr
+)
+
+
+from .regression import (
+    _add_constant,
+    residualize,
+    get_dominance_stats
+)
+
+
+# from .stats_utils import ()
+
+
+__all__ = [
+    # correlation
+    'efficient_pearsonr', 'weighted_pearsonr', 'make_correlated_xy'
+    # permutation_test
+    'permtest_1samp', 'permtest_rel', 'permtest_pearsonr',
+    # regression
+    '_add_constant', 'residualize', 'get_dominance_stats',
+    # stats_utils
+]
diff --git a/netneurotools/stats/correlation.py b/netneurotools/stats/correlation.py
new file mode 100644
index 0000000..38489fd
--- /dev/null
+++ b/netneurotools/stats/correlation.py
@@ -0,0 +1,189 @@
+"""Functions for calculating correlation."""
+
+import numpy as np
+import scipy.stats as sstats
+import scipy.special as sspecial
+from sklearn.utils.validation import check_random_state
+
+try:  # scipy >= 1.8.0
+    from scipy.stats._stats_py import _chk2_asarray
+except ImportError:  # scipy < 1.8.0
+    from scipy.stats.stats import _chk2_asarray
+
+
+def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'):
+    """
+    Compute correlation of matching columns in `a` and `b`.
+
+    Parameters
+    ----------
+    a,b : array_like
+        Sample observations. These arrays must have the same length and either
+        an equivalent number of columns or be broadcastable
+    ddof : int, optional
+        Degrees of freedom correction in the calculation of the standard
+        deviation. Default: 1
+    nan_policy : bool, optional
+        Defines how to handle when input contains nan. 'propagate' returns nan,
+        'raise' throws an error, 'omit' performs the calculations ignoring nan
+        values. Default: 'propagate'
+
+    Returns
+    -------
+    corr : float or numpy.ndarray
+        Pearson's correlation coefficient between matching columns of inputs
+    pval : float or numpy.ndarray
+        Two-tailed p-values
+
+    Notes
+    -----
+    If either input contains nan and nan_policy is set to 'omit', both arrays
+    will be masked to omit the nan entries.
+
+    Examples
+    --------
+    >>> from netneurotools import stats
+
+    Generate some not-very-correlated and some highly-correlated data:
+
+    >>> np.random.seed(12345678)  # set random seed for reproducible results
+    >>> x1, y1 = stats.make_correlated_xy(corr=0.1, size=100)
+    >>> x2, y2 = stats.make_correlated_xy(corr=0.8, size=100)
+
+    Calculate both correlations simultaneously:
+
+    >>> stats.efficient_pearsonr(np.c_[x1, x2], np.c_[y1, y2])
+    (array([0.10032565, 0.79961189]), array([3.20636135e-01, 1.97429944e-23]))
+    """
+    a, b, _ = _chk2_asarray(a, b, 0)
+    if len(a) != len(b):
+        raise ValueError('Provided arrays do not have same length')
+
+    if a.size == 0 or b.size == 0:
+        return np.nan, np.nan
+
+    if nan_policy not in ('propagate', 'raise', 'omit'):
+        raise ValueError(f'Value for nan_policy "{nan_policy}" not allowed')
+
+    a, b = a.reshape(len(a), -1), b.reshape(len(b), -1)
+    if (a.shape[1] != b.shape[1]):
+        a, b = np.broadcast_arrays(a, b)
+
+    mask = np.logical_or(np.isnan(a), np.isnan(b))
+    if nan_policy == 'raise' and np.any(mask):
+        raise ValueError('Input cannot contain NaN when nan_policy is "omit"')
+    elif nan_policy == 'omit':
+        # avoid making copies of the data, if possible
+        a = np.ma.masked_array(a, mask, copy=False, fill_value=np.nan)
+        b = np.ma.masked_array(b, mask, copy=False, fill_value=np.nan)
+
+    with np.errstate(invalid='ignore'):
+        corr = (sstats.zscore(a, ddof=ddof, nan_policy=nan_policy)
+                * sstats.zscore(b, ddof=ddof, nan_policy=nan_policy))
+
+    sumfunc, n_obs = np.sum, len(a)
+    if nan_policy == 'omit':
+        corr = corr.filled(np.nan)
+        sumfunc = np.nansum
+        n_obs = np.squeeze(np.sum(np.logical_not(np.isnan(corr)), axis=0))
+
+    corr = sumfunc(corr, axis=0) / (n_obs - 1)
+    corr = np.squeeze(np.clip(corr, -1, 1)) / 1
+
+    # taken from scipy.stats
+    ab = (n_obs / 2) - 1
+    prob = 2 * sspecial.betainc(ab, ab, 0.5 * (1 - np.abs(corr)))
+
+    return corr, prob
+
+
+def weighted_pearsonr():
+    """Calculate weighted Pearson correlation coefficient."""
+    pass
+
+
+def make_correlated_xy(corr=0.85, size=10000, seed=None, tol=0.001):
+    """
+    Generate random vectors that are correlated to approximately `corr`.
+
+    Parameters
+    ----------
+    corr : [-1, 1] float or (N, N) numpy.ndarray, optional
+        The approximate correlation desired. If a float is provided, two
+        vectors with the specified level of correlation will be generated. If
+        an array is provided, it is assumed to be a symmetrical correlation
+        matrix and ``len(corr)`` vectors with the specified levels of
+        correlation will be generated. Default: 0.85
+    size : int or tuple, optional
+        Desired size of the generated vectors. Default: 1000
+    seed : {int, np.random.RandomState instance, None}, optional
+        Seed for random number generation. Default: None
+    tol : [0, 1] float, optional
+        Tolerance of correlation between generated `vectors` and specified
+        `corr`. Default: 0.001
+
+    Returns
+    -------
+    vectors : numpy.ndarray
+        Random vectors of size `size` with correlation specified by `corr`
+
+    Examples
+    --------
+    >>> from netneurotools import stats
+
+    By default two vectors are generated with specified correlation
+
+    >>> x, y = stats.make_correlated_xy()
+    >>> np.corrcoef(x, y)  # doctest: +SKIP
+    array([[1.        , 0.85083661],
+           [0.85083661, 1.        ]])
+    >>> x, y = stats.make_correlated_xy(corr=0.2)
+    >>> np.corrcoef(x, y)  # doctest: +SKIP
+    array([[1.        , 0.20069953],
+           [0.20069953, 1.        ]])
+
+    You can also provide correlation matrices to generate more than two vectors
+    if desired. Note that this makes it more difficult to ensure the actual
+    correlations are close to the desired values:
+
+    >>> corr = [[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]]
+    >>> out = stats.make_correlated_xy(corr=corr)
+    >>> out.shape
+    (3, 10000)
+    >>> np.corrcoef(out)  # doctest: +SKIP
+    array([[1.        , 0.50965273, 0.30235686],
+           [0.50965273, 1.        , 0.01089107],
+           [0.30235686, 0.01089107, 1.        ]])
+    """
+    rs = check_random_state(seed)
+
+    # no correlations outside [-1, 1] bounds
+    if np.any(np.abs(corr) > 1):
+        raise ValueError('Provided `corr` must (all) be in range [-1, 1].')
+
+    # if we're given a single number, assume two vectors are desired
+    if isinstance(corr, (int, float)):
+        covs = np.ones((2, 2)) * 0.111
+        covs[(0, 1), (1, 0)] *= corr
+    # if we're given a correlation matrix, assume `N` vectors are desired
+    elif isinstance(corr, (list, np.ndarray)):
+        corr = np.asarray(corr)
+        if corr.ndim != 2 or len(corr) != len(corr.T):
+            raise ValueError('If `corr` is a list or array, must be a 2D '
+                             'square array, not {}'.format(corr.shape))
+        if np.any(np.diag(corr) != 1):
+            raise ValueError('Diagonal of `corr` must be 1.')
+        covs = corr * 0.111
+    means = [0] * len(covs)
+
+    # generate the variables
+    count = 0
+    while count < 500:
+        vectors = rs.multivariate_normal(mean=means, cov=covs, size=size).T
+        flat = vectors.reshape(len(vectors), -1)
+        # if diff between actual and desired correlations less than tol, break
+        if np.all(np.abs(np.corrcoef(flat) - (covs / 0.111)) < tol):
+            break
+        count += 1
+
+    return vectors
diff --git a/netneurotools/stats/permutation_test.py b/netneurotools/stats/permutation_test.py
new file mode 100644
index 0000000..147e4d1
--- /dev/null
+++ b/netneurotools/stats/permutation_test.py
@@ -0,0 +1,283 @@
+"""Functions for calculating permutation test."""
+
+import numpy as np
+from sklearn.utils.validation import check_random_state
+
+try:  # scipy >= 1.8.0
+    from scipy.stats._stats_py import _chk2_asarray
+except ImportError:  # scipy < 1.8.0
+    from scipy.stats.stats import _chk2_asarray
+
+from .correlation import efficient_pearsonr
+
+
+def permtest_1samp(a, popmean, axis=0, n_perm=1000, seed=0):
+    """
+    Non-parametric equivalent of :py:func:`scipy.stats.ttest_1samp`.
+
+    Generates two-tailed p-value for hypothesis of whether `a` differs from
+    `popmean` using permutation tests
+
+    Parameters
+    ----------
+    a : array_like
+        Sample observations
+    popmean : float or array_like
+        Expected valued in null hypothesis. If array_like then it must have the
+        same shape as `a` excluding the `axis` dimension
+    axis : int or None, optional
+        Axis along which to compute test. If None, compute over the whole array
+        of `a`. Default: 0
+    n_perm : int, optional
+        Number of permutations to assess. Unless `a` is very small along `axis`
+        this will approximate a randomization test via Monte Carlo simulations.
+        Default: 1000
+    seed : {int, np.random.RandomState instance, None}, optional
+        Seed for random number generation. Set to None for "randomness".
+        Default: 0
+
+    Returns
+    -------
+    stat : float or numpy.ndarray
+        Difference from `popmean`
+    pvalue : float or numpy.ndarray
+        Non-parametric p-value
+
+    Notes
+    -----
+    Providing multiple values to `popmean` to run *independent* tests in
+    parallel is not currently supported.
+
+    The lowest p-value that can be returned by this function is equal to 1 /
+    (`n_perm` + 1).
+
+    Examples
+    --------
+    >>> from netneurotools import stats
+    >>> np.random.seed(7654567)  # set random seed for reproducible results
+    >>> rvs = np.random.normal(loc=5, scale=10, size=(50, 2))
+
+    Test if mean of random sample is equal to true mean, and different mean. We
+    reject the null hypothesis in the second case and don't reject it in the
+    first case.
+
+    >>> stats.permtest_1samp(rvs, 5.0)
+    (array([-0.985602  , -0.05204969]), array([0.48551449, 0.95904096]))
+    >>> stats.permtest_1samp(rvs, 0.0)
+    (array([4.014398  , 4.94795031]), array([0.00699301, 0.000999  ]))
+
+    Example using axis and non-scalar dimension for population mean
+
+    >>> stats.permtest_1samp(rvs, [5.0, 0.0])
+    (array([-0.985602  ,  4.94795031]), array([0.48551449, 0.000999  ]))
+    >>> stats.permtest_1samp(rvs.T, [5.0, 0.0], axis=1)
+    (array([-0.985602  ,  4.94795031]), array([0.51548452, 0.000999  ]))
+    """
+    a, popmean, axis = _chk2_asarray(a, popmean, axis)
+    rs = check_random_state(seed)
+
+    if a.size == 0:
+        return np.nan, np.nan
+
+    # ensure popmean will broadcast to `a` correctly
+    if popmean.ndim != a.ndim:
+        popmean = np.expand_dims(popmean, axis=axis)
+
+    # center `a` around `popmean` and calculate original mean
+    zeroed = a - popmean
+    true_mean = zeroed.mean(axis=axis) / 1
+    abs_mean = np.abs(true_mean)
+
+    # this for loop is not _the fastest_ but is memory efficient
+    # the broadcasting alt. would mean storing zeroed.size * n_perm in memory
+    permutations = np.ones(true_mean.shape)
+    for _ in range(n_perm):
+        flipped = zeroed * rs.choice([-1, 1], size=zeroed.shape)  # sign flip
+        permutations += np.abs(flipped.mean(axis=axis)) >= abs_mean
+
+    pvals = permutations / (n_perm + 1)  # + 1 in denom accounts for true_mean
+
+    return true_mean, pvals
+
+
+def permtest_rel(a, b, axis=0, n_perm=1000, seed=0):
+    """
+    Non-parametric equivalent of :py:func:`scipy.stats.ttest_rel`.
+
+    Generates two-tailed p-value for hypothesis of whether related samples `a`
+    and `b` differ using permutation tests
+
+    Parameters
+    ----------
+    a, b : array_like
+        Sample observations. These arrays must have the same shape.
+    axis : int or None, optional
+        Axis along which to compute test. If None, compute over whole arrays
+        of `a` and `b`. Default: 0
+    n_perm : int, optional
+        Number of permutations to assess. Unless `a` and `b` are very small
+        along `axis` this will approximate a randomization test via Monte
+        Carlo simulations. Default: 1000
+    seed : {int, np.random.RandomState instance, None}, optional
+        Seed for random number generation. Set to None for "randomness".
+        Default: 0
+
+    Returns
+    -------
+    stat : float or numpy.ndarray
+        Average difference between `a` and `b`
+    pvalue : float or numpy.ndarray
+        Non-parametric p-value
+
+    Notes
+    -----
+    The lowest p-value that can be returned by this function is equal to 1 /
+    (`n_perm` + 1).
+
+    Examples
+    --------
+    >>> from netneurotools import stats
+
+    >>> np.random.seed(12345678)  # set random seed for reproducible results
+    >>> rvs1 = np.random.normal(loc=5, scale=10, size=500)
+    >>> rvs2 = (np.random.normal(loc=5, scale=10, size=500)
+    ...         + np.random.normal(scale=0.2, size=500))
+    >>> stats.permtest_rel(rvs1, rvs2)  # doctest: +SKIP
+    (-0.16506275161572695, 0.8021978021978022)
+
+    >>> rvs3 = (np.random.normal(loc=8, scale=10, size=500)
+    ...         + np.random.normal(scale=0.2, size=500))
+    >>> stats.permtest_rel(rvs1, rvs3)  # doctest: +SKIP
+    (2.40533726097883, 0.000999000999000999)
+    """
+    a, b, axis = _chk2_asarray(a, b, axis)
+    rs = check_random_state(seed)
+
+    if a.shape[axis] != b.shape[axis]:
+        raise ValueError('Provided arrays do not have same length along axis')
+
+    if a.size == 0 or b.size == 0:
+        return np.nan, np.nan
+
+    # calculate original difference in means
+    ab = np.stack([a, b], axis=0)
+    if ab.ndim < 3:
+        ab = np.expand_dims(ab, axis=-1)
+    true_diff = np.squeeze(np.diff(ab, axis=0)).mean(axis=axis) / 1
+    abs_true = np.abs(true_diff)
+
+    # idx array
+    reidx = np.meshgrid(*[range(f) for f in ab.shape], indexing='ij')
+
+    permutations = np.ones(true_diff.shape)
+    for _ in range(n_perm):
+        # use this to re-index (i.e., swap along) the first axis of `ab`
+        swap = rs.random_sample(ab.shape[:-1]).argsort(axis=axis)
+        reidx[0] = np.repeat(swap[..., np.newaxis], ab.shape[-1], axis=-1)
+        # recompute difference between `a` and `b` (i.e., first axis of `ab`)
+        pdiff = np.squeeze(np.diff(ab[tuple(reidx)], axis=0)).mean(axis=axis)
+        permutations += np.abs(pdiff) >= abs_true
+
+    pvals = permutations / (n_perm + 1)  # + 1 in denom accounts for true_diff
+
+    return true_diff, pvals
+
+
+def permtest_pearsonr(a, b, axis=0, n_perm=1000, resamples=None, seed=0):
+    """
+    Non-parametric equivalent of :py:func:`scipy.stats.pearsonr`.
+
+    Generates two-tailed p-value for hypothesis of whether samples `a` and `b`
+    are correlated using permutation tests
+
+    Parameters
+    ----------
+    a,b : (N[, M]) array_like
+        Sample observations. These arrays must have the same length and either
+        an equivalent number of columns or be broadcastable
+    axis : int or None, optional
+        Axis along which to compute test. If None, compute over whole arrays
+        of `a` and `b`. Default: 0
+    n_perm : int, optional
+        Number of permutations to assess. Unless `a` and `b` are very small
+        along `axis` this will approximate a randomization test via Monte
+        Carlo simulations. Default: 1000
+    resamples : (N, P) array_like, optional
+        Resampling array used to shuffle `a` when generating null distribution
+        of correlations. This array must have the same length as `a` and `b`
+        and should have at least the same number of columns as `n_perm` (if it
+        has more then only `n_perm` columns will be used. When not specified a
+        standard permutation is used to shuffle `a`. Default: None
+    seed : {int, np.random.RandomState instance, None}, optional
+        Seed for random number generation. Set to None for "randomness".
+        Default: 0
+
+    Returns
+    -------
+    corr : float or numpyndarray
+        Correlations
+    pvalue : float or numpy.ndarray
+        Non-parametric p-value
+
+    Notes
+    -----
+    The lowest p-value that can be returned by this function is equal to 1 /
+    (`n_perm` + 1).
+
+    Examples
+    --------
+    >>> from netneurotools import stats
+
+    >>> np.random.seed(12345678)  # set random seed for reproducible results
+    >>> x, y = stats.make_correlated_xy(corr=0.1, size=100)
+    >>> stats.permtest_pearsonr(x, y)  # doctest: +SKIP
+    (0.10032564626876286, 0.3046953046953047)
+
+    >>> x, y = stats.make_correlated_xy(corr=0.5, size=100)
+    >>> stats.permtest_pearsonr(x, y)  # doctest: +SKIP
+    (0.500040365781984, 0.000999000999000999)
+
+    Also works with multiple columns by either broadcasting the smaller array
+    to the larger:
+
+    >>> z = x + np.random.normal(loc=1, size=100)
+    >>> stats.permtest_pearsonr(x, np.column_stack([y, z]))
+    (array([0.50004037, 0.25843187]), array([0.000999  , 0.01098901]))
+
+    or by using matching columns in the two arrays (e.g., `x` and `y` vs
+    `a` and `b`):
+
+    >>> a, b = stats.make_correlated_xy(corr=0.9, size=100)
+    >>> stats.permtest_pearsonr(np.column_stack([x, a]), np.column_stack([y, b]))
+    (array([0.50004037, 0.89927523]), array([0.000999, 0.000999]))
+    """  # noqa
+    a, b, axis = _chk2_asarray(a, b, axis)
+    rs = check_random_state(seed)
+
+    if len(a) != len(b):
+        raise ValueError('Provided arrays do not have same length')
+
+    if a.size == 0 or b.size == 0:
+        return np.nan, np.nan
+
+    if resamples is not None:
+        if n_perm > resamples.shape[-1]:
+            raise ValueError('Number of permutations requested exceeds size '
+                             'of resampling array.')
+
+    # divide by one forces coercion to float if ndim = 0
+    true_corr = efficient_pearsonr(a, b)[0] / 1
+    abs_true = np.abs(true_corr)
+
+    permutations = np.ones(true_corr.shape)
+    for perm in range(n_perm):
+        # permute `a` and determine whether correlations exceed original
+        if resamples is None:
+            ap = a[rs.permutation(len(a))]
+        else:
+            ap = a[resamples[:, perm]]
+        permutations += np.abs(efficient_pearsonr(ap, b)[0]) >= abs_true
+
+    pvals = permutations / (n_perm + 1)  # + 1 in denom accounts for true_corr
+
+    return true_corr, pvals
diff --git a/netneurotools/stats/regression.py b/netneurotools/stats/regression.py
new file mode 100644
index 0000000..c6ac6e2
--- /dev/null
+++ b/netneurotools/stats/regression.py
@@ -0,0 +1,256 @@
+"""Functions for calculating regression."""
+
+from itertools import combinations
+
+import numpy as np
+from tqdm import tqdm
+import scipy.stats as sstats
+from joblib import Parallel, delayed
+from sklearn.linear_model import LinearRegression
+from sklearn.utils.validation import check_array
+
+
+def _add_constant(data):
+    """
+    Add a constant (i.e., intercept) term to `data`.
+
+    Parameters
+    ----------
+    data : (N, M) array_like
+        Samples by features data array
+
+    Returns
+    -------
+    data : (N, F) np.ndarray
+        Where `F` is `M + 1`
+
+    Examples
+    --------
+    >>> from netneurotools import stats
+
+    >>> A = np.zeros((5, 5))
+    >>> Ac = stats._add_constant(A)
+    >>> Ac
+    array([[0., 0., 0., 0., 0., 1.],
+           [0., 0., 0., 0., 0., 1.],
+           [0., 0., 0., 0., 0., 1.],
+           [0., 0., 0., 0., 0., 1.],
+           [0., 0., 0., 0., 0., 1.]])
+    """
+    data = check_array(data, ensure_2d=False)
+    return np.column_stack([data, np.ones(len(data))])
+
+
+def residualize(X, Y, Xc=None, Yc=None, normalize=True, add_intercept=True):
+    """
+    Return residuals of regression equation from `Y ~ X`.
+
+    Parameters
+    ----------
+    X : (N[, R]) array_like
+        Coefficient matrix of `R` variables for `N` subjects
+    Y : (N[, F]) array_like
+        Dependent variable matrix of `F` variables for `N` subjects
+    Xc : (M[, R]) array_like, optional
+        Coefficient matrix of `R` variables for `M` subjects. If not specified
+        then `X` is used to estimate betas. Default: None
+    Yc : (M[, F]) array_like, optional
+        Dependent variable matrix of `F` variables for `M` subjects. If not
+        specified then `Y` is used to estimate betas. Default: None
+    normalize : bool, optional
+        Whether to normalize (i.e., z-score) residuals. Will use residuals from
+        `Yc ~ Xc` for generating mean and variance. Default: True
+    add_intercept : bool, optional
+        Whether to add intercept to `X` (and `Xc`, if provided). The intercept
+        will not be removed, just used in beta estimation. Default: True
+
+    Returns
+    -------
+    Yr : (N, F) numpy.ndarray
+        Residuals of `Y ~ X`
+
+    Notes
+    -----
+    If both `Xc` and `Yc` are provided, these are used to calculate betas which
+    are then applied to `X` and `Y`.
+    """
+    if ((Yc is None and Xc is not None) or (Yc is not None and Xc is None)):
+        raise ValueError('If processing against a comparative group, you must '
+                         'provide both `Xc` and `Yc`.')
+
+    X, Y = np.asarray(X), np.asarray(Y)
+
+    if Yc is None:
+        Xc, Yc = X.copy(), Y.copy()
+    else:
+        Xc, Yc = np.asarray(Xc), np.asarray(Yc)
+
+    # add intercept to regressors if requested and calculate fit
+    if add_intercept:
+        X, Xc = _add_constant(X), _add_constant(Xc)
+    betas, *_ = np.linalg.lstsq(Xc, Yc, rcond=None)
+
+    # remove intercept from regressors and betas for calculation of residuals
+    if add_intercept:
+        betas = betas[:-1]
+        X, Xc = X[:, :-1], Xc[:, :-1]
+
+    # calculate residuals
+    Yr = Y - (X @ betas)
+    Ycr = Yc - (Xc @ betas)
+
+    if normalize:
+        Yr = sstats.zmap(Yr, compare=Ycr)
+
+    return Yr
+
+
+def get_dominance_stats(X, y, use_adjusted_r_sq=True, verbose=False, n_jobs=1):
+    """
+    Return the dominance analysis statistics for multilinear regression.
+
+    This is a rewritten & simplified version of [DA1]_. It is briefly
+    tested against the original package, but still in early stages.
+    Please feel free to report any bugs.
+
+    Warning: Still work-in-progress. Parameters might change!
+
+    Parameters
+    ----------
+    X : (N, M) array_like
+        Input data
+    y : (N,) array_like
+        Target values
+    use_adjusted_r_sq : bool, optional
+        Whether to use adjusted r squares. Default: True
+    verbose : bool, optional
+        Whether to print debug messages. Default: False
+    n_jobs : int, optional
+        The number of jobs to run in parallel. Default: 1
+
+    Returns
+    -------
+    model_metrics : dict
+        The dominance metrics, currently containing `individual_dominance`,
+        `partial_dominance`, `total_dominance`, and `full_r_sq`.
+    model_r_sq : dict
+        Contains all model r squares
+
+    Notes
+    -----
+    Example usage
+
+    .. code:: python
+
+        from netneurotools.stats import get_dominance_stats
+        from sklearn.datasets import load_boston
+        X, y = load_boston(return_X_y=True)
+        model_metrics, model_r_sq = get_dominance_stats(X, y)
+
+    To compare with [DA1]_, use `use_adjusted_r_sq=False`
+
+    .. code:: python
+
+        from dominance_analysis import Dominance_Datasets
+        from dominance_analysis import Dominance
+        boston_dataset=Dominance_Datasets.get_boston()
+        dominance_regression=Dominance(data=boston_dataset,
+                                       target='House_Price',objective=1)
+        incr_variable_rsquare=dominance_regression.incremental_rsquare()
+        dominance_regression.dominance_stats()
+
+    References
+    ----------
+    .. [DA1] https://github.com/dominance-analysis/dominance-analysis
+
+    """
+    # this helps to remove one element from a tuple
+    def remove_ret(tpl, elem):
+        lst = list(tpl)
+        lst.remove(elem)
+        return tuple(lst)
+
+    # sklearn linear regression wrapper
+    def get_reg_r_sq(X, y, use_adjusted_r_sq=True):
+        lin_reg = LinearRegression()
+        lin_reg.fit(X, y)
+        yhat = lin_reg.predict(X)
+        SS_Residual = sum((y - yhat) ** 2)
+        SS_Total = sum((y - np.mean(y)) ** 2)
+        r_squared = 1 - (float(SS_Residual)) / SS_Total
+        adjusted_r_squared = 1 - (1 - r_squared) * \
+            (len(y) - 1) / (len(y) - X.shape[1] - 1)
+        if use_adjusted_r_sq:
+            return adjusted_r_squared
+        else:
+            return r_squared
+
+    # helper function to compute r_sq for a given idx_tuple
+    def compute_r_sq(idx_tuple):
+        return idx_tuple, get_reg_r_sq(X[:, idx_tuple],
+                                       y,
+                                       use_adjusted_r_sq=use_adjusted_r_sq)
+
+    # generate all predictor combinations in list (num of predictors) of lists
+    n_predictor = X.shape[-1]
+    # n_comb_len_group = n_predictor - 1
+    predictor_combs = [list(combinations(range(n_predictor), i))
+                       for i in range(1, n_predictor + 1)]
+    if verbose:
+        print(f"[Dominance analysis] Generated \
+              {len([v for i in predictor_combs for v in i])} combinations")
+
+    model_r_sq = dict()
+    results = Parallel(n_jobs=n_jobs)(
+        delayed(compute_r_sq)(idx_tuple)
+        for len_group in tqdm(predictor_combs,
+                              desc='num-of-predictor loop',
+                              disable=not verbose)
+        for idx_tuple in tqdm(len_group,
+                              desc='insider loop',
+                              disable=not verbose))
+
+    # extract r_sq from results
+    for idx_tuple, r_sq in results:
+        model_r_sq[idx_tuple] = r_sq
+
+    if verbose:
+        print(f"[Dominance analysis] Acquired {len(model_r_sq)} r^2's")
+
+    # getting all model metrics
+    model_metrics = dict([])
+
+    # individual dominance
+    individual_dominance = []
+    for i_pred in range(n_predictor):
+        individual_dominance.append(model_r_sq[(i_pred,)])
+    individual_dominance = np.array(individual_dominance).reshape(1, -1)
+    model_metrics["individual_dominance"] = individual_dominance
+
+    # partial dominance
+    partial_dominance = [[] for _ in range(n_predictor - 1)]
+    for i_len in range(n_predictor - 1):
+        i_len_combs = list(combinations(range(n_predictor), i_len + 2))
+        for j_node in range(n_predictor):
+            j_node_sel = [v for v in i_len_combs if j_node in v]
+            reduced_list = [remove_ret(comb, j_node) for comb in j_node_sel]
+            diff_values = [
+                model_r_sq[j_node_sel[i]] - model_r_sq[reduced_list[i]]
+                for i in range(len(reduced_list))]
+            partial_dominance[i_len].append(np.mean(diff_values))
+
+    # save partial dominance
+    partial_dominance = np.array(partial_dominance)
+    model_metrics["partial_dominance"] = partial_dominance
+    # get total dominance
+    total_dominance = np.mean(
+        np.r_[individual_dominance, partial_dominance], axis=0)
+    # test and save total dominance
+    assert np.allclose(total_dominance.sum(),
+                       model_r_sq[tuple(range(n_predictor))]), \
+           "Sum of total dominance is not equal to full r square!"
+    model_metrics["total_dominance"] = total_dominance
+    # save full r^2
+    model_metrics["full_r_sq"] = model_r_sq[tuple(range(n_predictor))]
+
+    return model_metrics, model_r_sq
diff --git a/netneurotools/stats/stats_utils.py b/netneurotools/stats/stats_utils.py
new file mode 100644
index 0000000..9964f8e
--- /dev/null
+++ b/netneurotools/stats/stats_utils.py
@@ -0,0 +1 @@
+"""Functions for supporting statistics."""
diff --git a/netneurotools/stats/tests/__init__.py b/netneurotools/stats/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/stats/tests/test_correlation.py b/netneurotools/stats/tests/test_correlation.py
new file mode 100644
index 0000000..f5c8653
--- /dev/null
+++ b/netneurotools/stats/tests/test_correlation.py
@@ -0,0 +1,67 @@
+"""For testing netneurotools.stats.correlation functionality."""
+
+import pytest
+import numpy as np
+from netneurotools import stats
+
+
+@pytest.mark.parametrize('x, y, expected', [
+    # basic one-dimensional input
+    (range(5), range(5), (1.0, 0.0)),
+    # broadcasting occurs regardless of input order
+    (np.stack([range(5), range(5, 0, -1)], 1), range(5),
+     ([1.0, -1.0], [0.0, 0.0])),
+    (range(5), np.stack([range(5), range(5, 0, -1)], 1),
+     ([1.0, -1.0], [0.0, 0.0])),
+    # correlation between matching columns
+    (np.stack([range(5), range(5, 0, -1)], 1),
+     np.stack([range(5), range(5, 0, -1)], 1),
+     ([1.0, 1.0], [0.0, 0.0]))
+])
+def test_efficient_pearsonr(x, y, expected):
+    """Test efficient_pearsonr function."""
+    assert np.allclose(stats.efficient_pearsonr(x, y), expected)
+
+
+def test_efficient_pearsonr_errors():
+    """Test efficient_pearsonr function errors."""
+    with pytest.raises(ValueError):
+        stats.efficient_pearsonr(range(4), range(5))
+
+    assert all(np.isnan(a) for a in stats.efficient_pearsonr([], []))
+
+
+@pytest.mark.parametrize('corr, size, tol, seed', [
+    (0.85, (1000,), 0.05, 1234),
+    (0.85, (1000, 1000), 0.05, 1234),
+    ([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]], (1000,), 0.05, 1234)
+])
+def test_make_correlated_xy(corr, size, tol, seed):
+    """Test make_correlated_xy function."""
+    out = stats.make_correlated_xy(corr=corr, size=size,
+                                      tol=tol, seed=seed)
+    # ensure output is expected shape
+    assert out.shape[1:] == size
+    assert len(out) == len(corr) if hasattr(corr, '__len__') else 2
+
+    # check outputs are correlated within specified tolerance
+    realcorr = np.corrcoef(out.reshape(len(out), -1))
+    if len(realcorr) == 2 and not hasattr(corr, '__len__'):
+        realcorr = realcorr[0, 1]
+    assert np.all(np.abs(realcorr - corr) < tol)
+
+    # check that seed generates reproducible values
+    duplicate = stats.make_correlated_xy(corr=corr, size=size,
+                                            tol=tol, seed=seed)
+    assert np.allclose(out, duplicate)
+
+
+@pytest.mark.parametrize('corr', [
+    (1.5), (-1.5),                                   # outside range of [-1, 1]
+    ([0.85]), ([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]),  # not 2D / square array
+    ([[0.85]]), ([[1, 0.5], [0.5, 0.5]])             # diagonal not equal to 1
+])
+def test_make_correlated_xy_errors(corr):
+    """Test make_correlated_xy function errors."""
+    with pytest.raises(ValueError):
+        stats.make_correlated_xy(corr)
diff --git a/netneurotools/stats/tests/test_permutation.py b/netneurotools/stats/tests/test_permutation.py
new file mode 100644
index 0000000..ae6a335
--- /dev/null
+++ b/netneurotools/stats/tests/test_permutation.py
@@ -0,0 +1,65 @@
+"""For testing netneurotools.stats.permutation_test functionality."""
+
+import pytest
+import numpy as np
+from netneurotools import stats
+
+
+@pytest.mark.xfail
+def test_permtest_1samp():
+    """Test permutation test for one-sample t-test."""
+    assert False
+    # n1, n2, n3 = 10, 15, 20
+    # rs = np.random.RandomState(1234)
+    # rvn1 = rs.normal(loc=8, scale=10, size=(n1, n2, n3))
+
+    # t1, p1 = stats.permtest_1samp(rvn1, 1, axis=0)
+
+
+def test_permtest_rel():
+    """Test permutation test for paired samples."""
+    dr, pr = -0.0005, 0.4175824175824176
+    dpr = ([dr, -dr], [pr, pr])
+
+    rvs1 = np.linspace(1, 100, 100)
+    rvs2 = np.linspace(1.01, 99.989, 100)
+    rvs1_2D = np.array([rvs1, rvs2])
+    rvs2_2D = np.array([rvs2, rvs1])
+
+    # the p-values in these two cases should be consistent
+    d, p = stats.permtest_rel(rvs1, rvs2, axis=0, seed=1234)
+    assert np.allclose([d, p], (dr, pr))
+    d, p = stats.permtest_rel(rvs1_2D.T, rvs2_2D.T, axis=0, seed=1234)
+    assert np.allclose([d, p], dpr)
+
+    # but the p-value will differ here because of _how_ we're drawing the
+    # random permutations... it would be nice if this was consistent, but as
+    # yet i don't have a great idea on how to make that happen without assuming
+    # a whole lot about the data
+    pr = 0.51248751
+    tpr = ([dr, -dr], [pr, pr])
+    d, p = stats.permtest_rel(rvs1_2D, rvs2_2D, axis=1, seed=1234)
+    assert np.allclose([d, p], tpr)
+
+
+def test_permtest_pearsonr():
+    """Test permutation test for Pearson correlation."""
+    np.random.seed(12345678)
+    x, y = stats.make_correlated_xy(corr=0.1, size=100)
+    r, p = stats.permtest_pearsonr(x, y)
+    assert np.allclose([r, p], [0.10032564626876286, 0.3046953046953047])
+
+    x, y = stats.make_correlated_xy(corr=0.5, size=100)
+    r, p = stats.permtest_pearsonr(x, y)
+    assert np.allclose([r, p], [0.500040365781984, 0.000999000999000999])
+
+    z = x + np.random.normal(loc=1, size=100)
+    r, p = stats.permtest_pearsonr(x, np.column_stack([y, z]))
+    assert np.allclose(r, np.array([0.50004037, 0.25843187]))
+    assert np.allclose(p, np.array([0.000999, 0.01098901]))
+
+    a, b = stats.make_correlated_xy(corr=0.9, size=100)
+    r, p = stats.permtest_pearsonr(np.column_stack([x, a]),
+                                   np.column_stack([y, b]))
+    assert np.allclose(r, np.array([0.50004037, 0.89927523]))
+    assert np.allclose(p, np.array([0.000999, 0.000999]))
diff --git a/netneurotools/stats/tests/test_regression.py b/netneurotools/stats/tests/test_regression.py
new file mode 100644
index 0000000..ce20638
--- /dev/null
+++ b/netneurotools/stats/tests/test_regression.py
@@ -0,0 +1,14 @@
+"""For testing netneurotools.stats.regression functionality."""
+
+import numpy as np
+from netneurotools import stats
+
+
+def test_add_constant():
+    """Test adding a constant to a 1D or 2D array."""
+    # if provided a vector it will return a 2D array
+    assert stats._add_constant(np.random.rand(100)).shape == (100, 2)
+
+    # if provided a 2D array it will return the same, extended by 1 column
+    out = stats._add_constant(np.random.rand(100, 100))
+    assert out.shape == (100, 101) and np.all(out[:, -1] == 1)
diff --git a/netneurotools/surface.py b/netneurotools/surface.py
deleted file mode 100644
index e43ca16..0000000
--- a/netneurotools/surface.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""Functions for constructing graphs from surface meshes."""
-
-import numpy as np
-from scipy import sparse
-
-
-def _get_edges(faces):
-    """
-    Get set of edges from `faces`.
-
-    Parameters
-    ----------
-    faces : (F, 3) array_like
-        Set of indices creating triangular faces of a mesh
-
-    Returns
-    -------
-    edges : (F*3, 2) array_like
-        All edges in `faces`
-    """
-    faces = np.asarray(faces)
-    edges = np.sort(faces[:, [0, 1, 1, 2, 2, 0]].reshape((-1, 2)), axis=1)
-
-    return edges
-
-
-def get_direct_edges(vertices, faces):
-    """
-    Get (unique) direct edges and weights in mesh describes by inputs.
-
-    Parameters
-    ----------
-    vertices : (N, 3) array_like
-        Coordinates of `vertices` comprising mesh with `faces`
-    faces : (F, 3) array_like
-        Indices of `vertices` that compose triangular faces of mesh
-
-    Returns
-    -------
-    edges : (E, 2) array_like
-        Indices of `vertices` comprising direct edges (without duplicates)
-    weights : (E, 1) array_like
-        Distances between `edges`
-
-    """
-    edges = np.unique(_get_edges(faces), axis=0)
-    weights = np.linalg.norm(np.diff(vertices[edges], axis=1), axis=-1)
-    return edges, weights.squeeze()
-
-
-def get_indirect_edges(vertices, faces):
-    """
-    Get indirect edges and weights in mesh described by inputs.
-
-    Indirect edges are between two vertices that participate in faces sharing
-    an edge
-
-    Parameters
-    ----------
-    vertices : (N, 3) array_like
-        Coordinates of `vertices` comprising mesh with `faces`
-    faces : (F, 3) array_like
-        Indices of `vertices` that compose triangular faces of mesh
-
-    Returns
-    -------
-    edges : (E, 2) array_like
-        Indices of `vertices` comprising indirect edges (without duplicates)
-    weights : (E, 1) array_like
-        Distances between `edges` on surface
-
-    References
-    ----------
-    https://github.com/mikedh/trimesh (MIT licensed)
-
-    """
-    # first generate the list of edges for the provided faces and the
-    # index for which face the edge is from (which is just the index of the
-    # face repeated thrice, since each face generates three direct edges)
-    edges = _get_edges(faces)
-    edges_face = np.repeat(np.arange(len(faces)), 3)
-
-    # every edge appears twice in a watertight surface, so we'll first get the
-    # indices for each duplicate edge in `edges` (this should, assuming all
-    # goes well, have rows equal to len(edges) // 2)
-    order = np.lexsort(edges.T[::-1])
-    edges_sorted = edges[order]
-    dupe = np.any(edges_sorted[1:] != edges_sorted[:-1], axis=1)
-    dupe_idx = np.append(0, np.nonzero(dupe)[0] + 1)
-    start_ok = np.diff(np.concatenate((dupe_idx, [len(edges_sorted)]))) == 2
-    groups = np.tile(dupe_idx[start_ok].reshape(-1, 1), 2)
-    edge_groups = order[groups + np.arange(2)]
-
-    # now, get the indices of the faces that participate in these duplicate
-    # edges, as well as the edges themselves
-    adjacency = edges_face[edge_groups]
-    nondegenerate = adjacency[:, 0] != adjacency[:, 1]
-    adjacency = np.sort(adjacency[nondegenerate], axis=1)
-    adjacency_edges = edges[edge_groups[:, 0][nondegenerate]]
-
-    # the non-shared vertex index is the same shape as adjacency, holding
-    # vertex indices vs face indices
-    indirect_edges = np.zeros(adjacency.shape, dtype=np.int32) - 1
-
-    # loop through the two columns of adjacency
-    for i, fid in enumerate(adjacency.T):
-        # faces from the current column of adjacency
-        face = faces[fid]
-        # get index of vertex not included in shared edge
-        unshared = np.logical_not(np.logical_or(
-            face == adjacency_edges[:, 0].reshape(-1, 1),
-            face == adjacency_edges[:, 1].reshape(-1, 1)))
-        # each row should have one "uncontained" vertex; ignore degenerates
-        row_ok = unshared.sum(axis=1) == 1
-        unshared[~row_ok, :] = False
-        indirect_edges[row_ok, i] = face[unshared]
-
-    # get vertex coordinates of triangles pairs with shared edges, ordered
-    # such that the non-shared vertex is always _last_ among the trio
-    shared = np.sort(face[np.logical_not(unshared)].reshape(-1, 1, 2), axis=-1)
-    shared = np.repeat(shared, 2, axis=1)
-    triangles = np.concatenate((shared, indirect_edges[..., None]), axis=-1)
-    # `A.shape`: (3, N, 2) corresponding to (xyz coords, edges, triangle pairs)
-    A, B, V = vertices[triangles].transpose(2, 3, 0, 1)
-
-    # calculate the xyz coordinates of the foot of each triangle, where the
-    # base is the shared edge
-    # that is, we're trying to calculate F in the equation `VF = VB - (w * BA)`
-    # where `VF`, `VB`, and `BA` are vectors, and `w = (AB * VB) / (AB ** 2)`
-    w = (np.sum((A - B) * (V - B), axis=0, keepdims=True)
-         / np.sum((A - B) ** 2, axis=0, keepdims=True))
-    feet = B - (w * (B - A))
-    # calculate coordinates of midpoint b/w the feet of each pair of triangles
-    midpoints = (np.sum(feet.transpose(1, 2, 0), axis=1) / 2)[:, None]
-    # calculate Euclidean distance between non-shared vertices and midpoints
-    # and add distances together for each pair of triangles
-    norms = np.linalg.norm(vertices[indirect_edges] - midpoints, axis=-1)
-    weights = np.sum(norms, axis=-1)
-
-    # NOTE: weights won't be perfectly accurate for a small subset of triangle
-    # pairs where either triangle has angle >90 along the shared edge. in these
-    # the midpoint lies _outside_ the shared edge, so neighboring triangles
-    # would need to be taken into account. that said, this occurs in only a
-    # minority of cases and the difference tends to be in the ~0.001 mm range
-    return indirect_edges, weights
-
-
-def make_surf_graph(vertices, faces, mask=None):
-    """
-    Construct adjacency graph from `surf`.
-
-    Parameters
-    ----------
-    vertices : (N, 3) array_like
-        Coordinates of `vertices` comprising mesh with `faces`
-    faces : (F, 3) array_like
-        Indices of `vertices` that compose triangular faces of mesh
-    mask : (N,) array_like, optional (default None)
-        Boolean mask indicating which vertices should be removed from generated
-        graph. If not supplied, all vertices are used.
-
-    Returns
-    -------
-    graph : scipy.sparse.csr_matrix
-        Sparse matrix representing graph of `vertices` and `faces`
-
-    Raises
-    ------
-    ValueError : inconsistent number of vertices in `mask` and `vertices`
-    """
-    if mask is not None and len(mask) != len(vertices):
-        raise ValueError('Supplied `mask` array has different number of '
-                         'vertices than supplied `vertices`.')
-
-    # get all (direct + indirect) edges from surface
-    direct_edges, direct_weights = get_direct_edges(vertices, faces)
-    indirect_edges, indirect_weights = get_indirect_edges(vertices, faces)
-    edges = np.vstack((direct_edges, indirect_edges))
-    weights = np.hstack((direct_weights, indirect_weights))
-
-    # remove edges that include a vertex in `mask`
-    if mask is not None:
-        idx, = np.where(mask)
-        mask = ~np.any(np.isin(edges, idx), axis=1)
-        edges, weights = edges[mask], weights[mask]
-
-    # construct our graph on which to calculate shortest paths
-    return sparse.csr_matrix((np.squeeze(weights), (edges[:, 0], edges[:, 1])),
-                             shape=(len(vertices), len(vertices)))
diff --git a/netneurotools/tests/test_civet.py b/netneurotools/tests/test_civet.py
deleted file mode 100644
index 9a6a1ab..0000000
--- a/netneurotools/tests/test_civet.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.civet functionality."""
-
-import numpy as np
-import pytest
-
-from netneurotools import civet, datasets
-
-
-@pytest.fixture(scope='module')
-def civet_surf(tmp_path_factory):
-    tmpdir = str(tmp_path_factory.getbasetemp())
-    return datasets.fetch_civet(data_dir=tmpdir, verbose=0)['mid']
-
-
-def test_read_civet(civet_surf):
-    vertices, triangles = civet.read_civet(civet_surf.lh)
-    assert len(vertices) == 40962
-    assert len(triangles) == 81920
-    assert np.all(triangles.max(axis=0) < vertices.shape[0])
-
-
-def test_civet_to_freesurfer():
-    brainmap = np.random.rand(81924)
-    out = civet.civet_to_freesurfer(brainmap)
-    out2 = civet.civet_to_freesurfer(brainmap, method='linear')
-    assert out.shape[0] == out2.shape[0] == 81924
-
-    with pytest.raises(ValueError):
-        civet.civet_to_freesurfer(np.random.rand(10))
diff --git a/netneurotools/tests/test_datasets.py b/netneurotools/tests/test_datasets.py
deleted file mode 100644
index 0a5af12..0000000
--- a/netneurotools/tests/test_datasets.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.datasets functionality."""
-
-import os
-
-import numpy as np
-import pytest
-
-from netneurotools import datasets
-from netneurotools.datasets import utils
-
-
-@pytest.mark.parametrize('corr, size, tol, seed', [
-    (0.85, (1000,), 0.05, 1234),
-    (0.85, (1000, 1000), 0.05, 1234),
-    ([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]], (1000,), 0.05, 1234)
-])
-def test_make_correlated_xy(corr, size, tol, seed):
-    out = datasets.make_correlated_xy(corr=corr, size=size,
-                                      tol=tol, seed=seed)
-    # ensure output is expected shape
-    assert out.shape[1:] == size
-    assert len(out) == len(corr) if hasattr(corr, '__len__') else 2
-
-    # check outputs are correlated within specified tolerance
-    realcorr = np.corrcoef(out.reshape(len(out), -1))
-    if len(realcorr) == 2 and not hasattr(corr, '__len__'):
-        realcorr = realcorr[0, 1]
-    assert np.all(np.abs(realcorr - corr) < tol)
-
-    # check that seed generates reproducible values
-    duplicate = datasets.make_correlated_xy(corr=corr, size=size,
-                                            tol=tol, seed=seed)
-    assert np.allclose(out, duplicate)
-
-
-@pytest.mark.parametrize('corr', [
-    (1.5), (-1.5),                                   # outside range of [-1, 1]
-    ([0.85]), ([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]),  # not 2D / square array
-    ([[0.85]]), ([[1, 0.5], [0.5, 0.5]])             # diagonal not equal to 1
-])
-def test_make_correlated_xy_errors(corr):
-    with pytest.raises(ValueError):
-        datasets.make_correlated_xy(corr)
-
-
-def test_fetch_conte69(tmpdir):
-    conte = datasets.fetch_conte69(data_dir=tmpdir, verbose=0)
-    assert all(hasattr(conte, k) for k in
-               ['midthickness', 'inflated', 'vinflated', 'info'])
-
-
-def test_fetch_yerkes19(tmpdir):
-    conte = datasets.fetch_yerkes19(data_dir=tmpdir, verbose=0)
-    assert all(hasattr(conte, k) for k in
-               ['midthickness', 'inflated', 'vinflated'])
-
-
-def test_fetch_pauli2018(tmpdir):
-    pauli = datasets.fetch_pauli2018(data_dir=tmpdir, verbose=0)
-    assert all(hasattr(pauli, k) and os.path.isfile(pauli[k]) for k in
-               ['probabilistic', 'deterministic', 'info'])
-
-
-@pytest.mark.parametrize('version', [
-    'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'
-])
-def test_fetch_fsaverage(tmpdir, version):
-    fsaverage = datasets.fetch_fsaverage(version=version, data_dir=tmpdir,
-                                         verbose=0)
-    assert all(hasattr(fsaverage, k)
-               and len(fsaverage[k]) == 2
-               and all(os.path.isfile(hemi)
-               for hemi in fsaverage[k]) for k in
-               ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere'])
-
-
-@pytest.mark.parametrize('version, expected', [
-    ('MNI152NLin2009aSym', [1, 1, 1, 1, 1]),
-    ('fsaverage', [2, 2, 2, 2, 2]),
-    ('fsaverage5', [2, 2, 2, 2, 2]),
-    ('fsaverage6', [2, 2, 2, 2, 2]),
-    ('fslr32k', [2, 2, 2, 2, 2]),
-    ('gcs', [2, 2, 2, 2, 6])
-])
-def test_fetch_cammoun2012(tmpdir, version, expected):
-    keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500']
-    cammoun = datasets.fetch_cammoun2012(version, data_dir=tmpdir, verbose=0)
-
-    # output has expected keys
-    assert all(hasattr(cammoun, k) for k in keys)
-    # and keys are expected lengths!
-    for k, e in zip(keys, expected):
-        out = getattr(cammoun, k)
-        if isinstance(out, (tuple, list)):
-            assert len(out) == e
-        else:
-            assert isinstance(out, str) and out.endswith('.nii.gz')
-
-    if 'fsaverage' in version:
-        with pytest.warns(DeprecationWarning):
-            datasets.fetch_cammoun2012('surface', data_dir=tmpdir, verbose=0)
-
-
-@pytest.mark.parametrize('dataset, expected', [
-    ('celegans', ['conn', 'dist', 'labels', 'ref']),
-    ('drosophila', ['conn', 'coords', 'labels', 'networks', 'ref']),
-    ('human_func_scale033', ['conn', 'coords', 'labels', 'ref']),
-    ('human_func_scale060', ['conn', 'coords', 'labels', 'ref']),
-    ('human_func_scale125', ['conn', 'coords', 'labels', 'ref']),
-    ('human_func_scale250', ['conn', 'coords', 'labels', 'ref']),
-    ('human_func_scale500', ['conn', 'coords', 'labels', 'ref']),
-    ('human_struct_scale033', ['conn', 'coords', 'dist', 'labels', 'ref']),
-    ('human_struct_scale060', ['conn', 'coords', 'dist', 'labels', 'ref']),
-    ('human_struct_scale125', ['conn', 'coords', 'dist', 'labels', 'ref']),
-    ('human_struct_scale250', ['conn', 'coords', 'dist', 'labels', 'ref']),
-    ('human_struct_scale500', ['conn', 'coords', 'dist', 'labels', 'ref']),
-    ('macaque_markov', ['conn', 'dist', 'labels', 'ref']),
-    ('macaque_modha', ['conn', 'coords', 'dist', 'labels', 'ref']),
-    ('mouse', ['acronyms', 'conn', 'coords', 'dist', 'labels', 'ref']),
-    ('rat', ['conn', 'labels', 'ref']),
-])
-def test_fetch_connectome(tmpdir, dataset, expected):
-    connectome = datasets.fetch_connectome(dataset, data_dir=tmpdir, verbose=0)
-
-    for key in expected:
-        assert (key in connectome)
-        assert isinstance(connectome[key], str if key == 'ref' else np.ndarray)
-
-
-@pytest.mark.parametrize('version', [
-    'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'
-])
-def test_fetch_schaefer2018(tmpdir, version):
-    keys = [
-        '{}Parcels{}Networks'.format(p, n)
-        for p in range(100, 1001, 100) for n in [7, 17]
-    ]
-    schaefer = datasets.fetch_schaefer2018(version, data_dir=tmpdir, verbose=0)
-
-    if version == 'fslr32k':
-        assert all(k in schaefer and os.path.isfile(schaefer[k]) for k in keys)
-    else:
-        assert all(k in schaefer
-                   and len(schaefer[k]) == 2
-                   and all(os.path.isfile(hemi) for hemi in schaefer[k])
-                   for k in keys)
-
-
-def test_fetch_hcp_standards(tmpdir):
-    hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=0)
-    assert os.path.isdir(hcp)
-
-
-def test_fetch_mmpall(tmpdir):
-    mmp = datasets.fetch_mmpall(data_dir=tmpdir, verbose=0)
-    assert len(mmp) == 2
-    assert all(os.path.isfile(hemi) for hemi in mmp)
-    assert all(hasattr(mmp, attr) for attr in ('lh', 'rh'))
-
-
-def test_fetch_voneconomo(tmpdir):
-    vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0)
-    assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ['gcs', 'ctab'])
-    assert isinstance(vek.get('info'), str)
-
-
-@pytest.mark.parametrize('dset, expected', [
-    ('atl-cammoun2012', ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k',
-                         'MNI152NLin2009aSym', 'gcs']),
-    ('tpl-conte69', ['url', 'md5']),
-    ('atl-pauli2018', ['url', 'md5', 'name']),
-    ('tpl-fsaverage', ['fsaverage' + f for f in ['', '3', '4', '5', '6']]),
-    ('atl-schaefer2018', ['fsaverage', 'fsaverage6', 'fsaverage6'])
-])
-def test_get_dataset_info(dset, expected):
-    info = utils._get_dataset_info(dset)
-    if isinstance(info, dict):
-        assert all(k in info.keys() for k in expected)
-    elif isinstance(info, list):
-        for f in info:
-            assert all(k in f.keys() for k in expected)
-    else:
-        assert False
-
-    with pytest.raises(KeyError):
-        utils._get_dataset_info('notvalid')
-
-
-@pytest.mark.parametrize('version', [
-    'v1', 'v2'
-])
-def test_fetch_civet(tmpdir, version):
-    civet = datasets.fetch_civet(version=version, data_dir=tmpdir, verbose=0)
-    for key in ('mid', 'white'):
-        assert key in civet
-        for hemi in ('lh', 'rh'):
-            assert hasattr(civet[key], hemi)
-            assert os.path.isfile(getattr(civet[key], hemi))
-
-
-def test_get_data_dir(tmpdir):
-    data_dir = utils._get_data_dir(tmpdir)
-    assert os.path.isdir(data_dir)
diff --git a/netneurotools/tests/test_freesurfer.py b/netneurotools/tests/test_freesurfer.py
deleted file mode 100644
index 53bf74e..0000000
--- a/netneurotools/tests/test_freesurfer.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.freesurfer functionality."""
-
-import numpy as np
-import pytest
-
-from netneurotools import datasets, freesurfer
-
-
-@pytest.fixture(scope='module')
-def cammoun_surf(tmp_path_factory):
-    tmpdir = str(tmp_path_factory.getbasetemp())
-    return datasets.fetch_cammoun2012('fsaverage5', data_dir=tmpdir, verbose=0)
-
-
-@pytest.mark.parametrize('method', [
-    'average', 'surface', 'geodesic'
-])
-@pytest.mark.parametrize('scale, parcels, n_right', [
-    ('scale033', 68, 34),
-    ('scale060', 114, 57),
-    ('scale125', 219, 108),
-    ('scale250', 448, 223),
-    ('scale500', 1000, 501),
-])
-def test_find_parcel_centroids(cammoun_surf, scale, parcels, n_right, method):
-    lh, rh = cammoun_surf[scale]
-
-    coords, hemi = freesurfer.find_parcel_centroids(lhannot=lh, rhannot=rh,
-                                                    method=method,
-                                                    version='fsaverage5')
-    assert len(coords) == parcels
-    assert len(hemi) == parcels
-    assert np.sum(hemi) == n_right
-
-
-@pytest.mark.parametrize('scale, parcels', [
-    ('scale033', 68),
-    ('scale060', 114),
-    ('scale125', 219),
-    ('scale250', 448),
-    ('scale500', 1000),
-])
-def test_project_reduce_vertices(cammoun_surf, scale, parcels):
-    # these functions are partners and should be tested in concert.
-    # we can test all the normal functionality and also ensure that "round
-    # trips" work as expected
-
-    # generate "parcellated" data
-    data = np.random.rand(parcels)
-    lh, rh = cammoun_surf[scale]
-
-    # do we get the expected number of vertices in our projection?
-    projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh)
-    assert len(projected) == 20484
-
-    # does reduction return our input data, as expected?
-    reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh)
-    assert np.allclose(data, reduced)
-
-    # can we do this with multi-dimensional data, too?
-    data = np.random.rand(parcels, 2)
-    projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh)
-    assert projected.shape == (20484, 2)
-    reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh)
-    assert np.allclose(data, reduced)
-
-    # what about int arrays as input?
-    data = np.random.choice(10, size=parcels)
-    projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh)
-    reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh)
-    assert np.allclose(reduced, data)
-
-    # number of parcels != annotation spec
-    with pytest.raises(ValueError):
-        freesurfer.parcels_to_vertices(np.random.rand(parcels + 1),
-                                       rhannot=rh, lhannot=lh)
-
-    # number of vertices != annotation spec
-    with pytest.raises(ValueError):
-        freesurfer.vertices_to_parcels(np.random.rand(20485),
-                                       rhannot=rh, lhannot=lh)
diff --git a/netneurotools/tests/test_modularity.py b/netneurotools/tests/test_modularity.py
deleted file mode 100644
index 4018ce0..0000000
--- a/netneurotools/tests/test_modularity.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.modularity functionality."""
-
-import numpy as np
-
-from netneurotools import modularity
-
-rs = np.random.RandomState(1234)
-
-
-def test_dummyvar():
-    # generate small example dummy variable code
-    out = modularity._dummyvar(np.array([1, 1, 2, 3, 3]))
-    assert np.all(out == np.array([[1, 0, 0],
-                                   [1, 0, 0],
-                                   [0, 1, 0],
-                                   [0, 0, 1],
-                                   [0, 0, 1]]))
-
-    allones = np.array([1, 1, 1, 1, 1, 1, 1, 1])
-    assert np.all(modularity._dummyvar(allones) == allones)
-
-
-def test_zrand():
-    # make the same two-group community assignments (with different labels)
-    label = np.ones((100, 1))
-    X, Y = np.vstack((label, label * 2)), np.vstack((label * 2, label))
-    # compare
-    assert modularity.zrand(X, Y) == modularity.zrand(X, Y[::-1])
-    random = rs.choice([0, 1], size=X.shape)
-    assert modularity.zrand(X, Y) > modularity.zrand(X, random)
-    assert modularity.zrand(X, Y) == modularity.zrand(X[:, 0], Y[:, 0])
-
-
-def test_zrand_partitions():
-    # make random communities
-    comm = rs.choice(range(6), size=(10, 100))
-    all_diff = modularity._zrand_partitions(comm)
-    all_same = modularity._zrand_partitions(np.repeat(comm[:, [0]], 10, axis=1))
-
-    # partition of labels that are all the same should have higher average
-    # zrand and lower stdev zrand
-    assert np.nanmean(all_same) > np.nanmean(all_diff)
-    assert np.nanstd(all_same) < np.nanstd(all_diff)
diff --git a/netneurotools/tests/test_plotting.py b/netneurotools/tests/test_plotting.py
deleted file mode 100644
index de35811..0000000
--- a/netneurotools/tests/test_plotting.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.plotting functionality."""
-
-import matplotlib.pyplot as plt
-import numpy as np
-
-from netneurotools import datasets, plotting
-import pytest
-
-
-def test_grid_communities():
-    comms = np.asarray([0, 0, 0, 0, 1, 1, 1, 1, 2, 2])
-    # check that comms with / without 0 community label yields same output
-    assert np.allclose(plotting._grid_communities(comms), [0, 4, 8, 10])
-    assert np.allclose(plotting._grid_communities(comms + 1), [0, 4, 8, 10])
-
-
-def test_sort_communities():
-    data = np.arange(9).reshape(3, 3)
-    comms = np.asarray([0, 0, 2])
-    # check that comms with / without 0 community label yields same output
-    assert np.allclose(plotting.sort_communities(data, comms), [1, 0, 2])
-    assert np.allclose(plotting.sort_communities(data, comms + 1), [1, 0, 2])
-
-
-def test_plot_mod_heatmap():
-    data = np.random.rand(100, 100)
-    comms = np.random.choice(4, size=(100,))
-    ax = plotting.plot_mod_heatmap(data, comms)
-    assert isinstance(ax, plt.Axes)
-
-
-@pytest.mark.filterwarnings('ignore')
-def test_plot_fsvertex():
-    surfer = pytest.importorskip('surfer')
-
-    data = np.random.rand(20484)
-    brain = plotting.plot_fsvertex(data, subject_id='fsaverage5',
-                                   offscreen=True)
-    assert isinstance(brain, surfer.Brain)
-
-
-@pytest.mark.filterwarnings('ignore')
-def test_plot_fsaverage():
-    surfer = pytest.importorskip('surfer')
-
-    data = np.random.rand(68)
-    lhannot, rhannot = datasets.fetch_cammoun2012('fsaverage5')['scale033']
-    brain = plotting.plot_fsaverage(data, lhannot=lhannot, rhannot=rhannot,
-                                    subject_id='fsaverage5', offscreen=True)
-    assert isinstance(brain, surfer.Brain)
-
-
-def test_plot_point_brain():
-    data = np.random.rand(100)
-    coords = np.random.rand(100, 3)
-    out = plotting.plot_point_brain(data, coords)
-    assert isinstance(out, plt.Figure)
diff --git a/netneurotools/tests/test_stats.py b/netneurotools/tests/test_stats.py
deleted file mode 100644
index 8730bfc..0000000
--- a/netneurotools/tests/test_stats.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.stats functionality."""
-
-import itertools
-import numpy as np
-import pytest
-
-from netneurotools import datasets, stats
-
-
-@pytest.mark.xfail
-def test_permtest_1samp():
-    assert False
-    # n1, n2, n3 = 10, 15, 20
-    # rs = np.random.RandomState(1234)
-    # rvn1 = rs.normal(loc=8, scale=10, size=(n1, n2, n3))
-
-    # t1, p1 = stats.permtest_1samp(rvn1, 1, axis=0)
-
-
-def test_permtest_rel():
-    dr, pr = -0.0005, 0.4175824175824176
-    dpr = ([dr, -dr], [pr, pr])
-
-    rvs1 = np.linspace(1, 100, 100)
-    rvs2 = np.linspace(1.01, 99.989, 100)
-    rvs1_2D = np.array([rvs1, rvs2])
-    rvs2_2D = np.array([rvs2, rvs1])
-
-    # the p-values in these two cases should be consistent
-    d, p = stats.permtest_rel(rvs1, rvs2, axis=0, seed=1234)
-    assert np.allclose([d, p], (dr, pr))
-    d, p = stats.permtest_rel(rvs1_2D.T, rvs2_2D.T, axis=0, seed=1234)
-    assert np.allclose([d, p], dpr)
-
-    # but the p-value will differ here because of _how_ we're drawing the
-    # random permutations... it would be nice if this was consistent, but as
-    # yet i don't have a great idea on how to make that happen without assuming
-    # a whole lot about the data
-    pr = 0.51248751
-    tpr = ([dr, -dr], [pr, pr])
-    d, p = stats.permtest_rel(rvs1_2D, rvs2_2D, axis=1, seed=1234)
-    assert np.allclose([d, p], tpr)
-
-
-def test_permtest_pearsonr():
-    np.random.seed(12345678)
-    x, y = datasets.make_correlated_xy(corr=0.1, size=100)
-    r, p = stats.permtest_pearsonr(x, y)
-    assert np.allclose([r, p], [0.10032564626876286, 0.3046953046953047])
-
-    x, y = datasets.make_correlated_xy(corr=0.5, size=100)
-    r, p = stats.permtest_pearsonr(x, y)
-    assert np.allclose([r, p], [0.500040365781984, 0.000999000999000999])
-
-    z = x + np.random.normal(loc=1, size=100)
-    r, p = stats.permtest_pearsonr(x, np.column_stack([y, z]))
-    assert np.allclose(r, np.array([0.50004037, 0.25843187]))
-    assert np.allclose(p, np.array([0.000999, 0.01098901]))
-
-    a, b = datasets.make_correlated_xy(corr=0.9, size=100)
-    r, p = stats.permtest_pearsonr(np.column_stack([x, a]),
-                                   np.column_stack([y, b]))
-    assert np.allclose(r, np.array([0.50004037, 0.89927523]))
-    assert np.allclose(p, np.array([0.000999, 0.000999]))
-
-
-@pytest.mark.parametrize('x, y, expected', [
-    # basic one-dimensional input
-    (range(5), range(5), (1.0, 0.0)),
-    # broadcasting occurs regardless of input order
-    (np.stack([range(5), range(5, 0, -1)], 1), range(5),
-     ([1.0, -1.0], [0.0, 0.0])),
-    (range(5), np.stack([range(5), range(5, 0, -1)], 1),
-     ([1.0, -1.0], [0.0, 0.0])),
-    # correlation between matching columns
-    (np.stack([range(5), range(5, 0, -1)], 1),
-     np.stack([range(5), range(5, 0, -1)], 1),
-     ([1.0, 1.0], [0.0, 0.0]))
-])
-def test_efficient_pearsonr(x, y, expected):
-    assert np.allclose(stats.efficient_pearsonr(x, y), expected)
-
-
-def test_efficient_pearsonr_errors():
-    with pytest.raises(ValueError):
-        stats.efficient_pearsonr(range(4), range(5))
-
-    assert all(np.isnan(a) for a in stats.efficient_pearsonr([], []))
-
-
-def test_gen_rotation():
-    # make a few rotations (some same / different)
-    rout1, lout1 = stats._gen_rotation(seed=1234)
-    rout2, lout2 = stats._gen_rotation(seed=1234)
-    rout3, lout3 = stats._gen_rotation(seed=5678)
-
-    # confirm consistency with the same seed
-    assert np.allclose(rout1, rout2) and np.allclose(lout1, lout2)
-
-    # confirm inconsistency with different seeds
-    assert not np.allclose(rout1, rout3) and not np.allclose(lout1, lout3)
-
-    # confirm reflection across L/R hemispheres as expected
-    # also confirm min/max never exceeds -1/1
-    reflected = np.array([[1, -1, -1], [-1, 1, 1], [-1, 1, 1]])
-    for r, l in zip([rout1, rout3], [lout1, lout3]):  # noqa: E741
-        assert np.allclose(r / l, reflected)
-        assert r.max() < 1 and r.min() > -1 and l.max() < 1 and l.min() > -1
-
-
-def _get_sphere_coords(s, t, r=1):
-    """Get coordinates at angles `s` and `t` a sphere of radius `r`."""
-    # convert to radians
-    rad = np.pi / 180
-    s, t = s * rad, t * rad
-
-    # calculate new points
-    x = r * np.cos(s) * np.sin(t)
-    y = r * np.sin(s) * np.cos(t)
-    z = r * np.cos(t)
-
-    return x, y, z
-
-
-def test_gen_spinsamples():
-    # grab a few points from a spherical surface and duplicate it for the
-    # "other hemisphere"
-    coords = [_get_sphere_coords(s, t, r=1) for s, t in
-              itertools.product(range(0, 360, 45), range(0, 360, 45))]
-    coords = np.vstack([coords, coords])
-    hemi = np.hstack([np.zeros(len(coords) // 2), np.ones(len(coords) // 2)])
-
-    # generate "normal" test spins
-    spins, cost = stats.gen_spinsamples(coords, hemi, n_rotate=10, seed=1234,
-                                        return_cost=True)
-    assert spins.shape == spins.shape == (len(coords), 10)
-
-    # confirm that `method` parameter functions as desired
-    for method in ['vasa', 'hungarian']:
-        spin_exact, cost_exact = stats.gen_spinsamples(coords, hemi,
-                                                       n_rotate=10, seed=1234,
-                                                       method=method,
-                                                       return_cost=True)
-        assert spin_exact.shape == cost.shape == (len(coords), 10)
-        for s in spin_exact.T:
-            assert len(np.unique(s)) == len(s)
-
-    # check that one hemisphere works
-    mask = hemi == 0
-    spins, cost = stats.gen_spinsamples(coords[mask], hemi[mask], n_rotate=10,
-                                        seed=1234, return_cost=True)
-    assert spins.shape == cost.shape == (len(coords[mask]), 10)
-
-    # confirm that check_duplicates will raise warnings
-    # since spins aren't exact permutations we need to use 4C4 with repeats
-    # and then perform one more rotation than that number (i.e., 35 + 1)
-    with pytest.warns(UserWarning):
-        i = [0, 1, -2, -1]  # only grab a few coordinates
-        stats.gen_spinsamples(coords[i], hemi[i], n_rotate=36, seed=1234)
-
-    # non-3D coords
-    with pytest.raises(ValueError):
-        stats.gen_spinsamples(coords[:, :2], hemi)
-
-    # non-1D hemi
-    with pytest.raises(ValueError):
-        stats.gen_spinsamples(coords, np.column_stack([hemi, hemi]))
-
-    # different length coords and hemi
-    with pytest.raises(ValueError):
-        stats.gen_spinsamples(coords, hemi[:-1])
diff --git a/netneurotools/tests/test_utils.py b/netneurotools/tests/test_utils.py
deleted file mode 100644
index 1ac6b91..0000000
--- a/netneurotools/tests/test_utils.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.utils functionality."""
-
-import numpy as np
-import pytest
-
-from netneurotools import datasets, utils
-
-
-def test_add_constant():
-    # if provided a vector it will return a 2D array
-    assert utils.add_constant(np.random.rand(100)).shape == (100, 2)
-
-    # if provided a 2D array it will return the same, extended by 1 column
-    out = utils.add_constant(np.random.rand(100, 100))
-    assert out.shape == (100, 101) and np.all(out[:, -1] == 1)
-
-
-def test_add_triu():
-    arr = np.arange(9).reshape(3, 3)
-    assert np.all(utils.get_triu(arr) == np.array([1, 2, 5]))
-    assert np.all(utils.get_triu(arr, k=0) == np.array([0, 1, 2, 4, 5, 8]))
-
-
-@pytest.mark.parametrize('scale, expected', [
-    ('scale033', 83),
-    ('scale060', 129),
-    ('scale125', 234),
-    ('scale250', 463),
-    ('scale500', 1015)
-])
-def test_get_centroids(tmpdir, scale, expected):
-    # fetch test dataset
-    cammoun = datasets.fetch_cammoun2012('MNI152NLin2009aSym', data_dir=tmpdir,
-                                         verbose=0)
-
-    ijk = utils.get_centroids(cammoun[scale])
-    xyz = utils.get_centroids(cammoun[scale], image_space=True)
-
-    # we get expected shape regardless of requested coordinate space
-    assert ijk.shape == xyz.shape == (expected, 3)
-    # ijk is all positive (i.e., cartesian) coordinates
-    assert np.all(ijk > 0)
-
-    # requesting specific labels gives us a subset of the full `ijk`
-    lim = utils.get_centroids(cammoun[scale], labels=[1, 2, 3])
-    assert np.all(lim == ijk[:3])
diff --git a/netneurotools/utils.py b/netneurotools/utils.py
deleted file mode 100644
index 2d8839e..0000000
--- a/netneurotools/utils.py
+++ /dev/null
@@ -1,243 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Miscellaneous functions of various utility."""
-
-import glob
-import os
-import subprocess
-
-import nibabel as nib
-import numpy as np
-from scipy import ndimage
-from sklearn.utils.validation import check_array
-
-
-def add_constant(data):
-    """
-    Add a constant (i.e., intercept) term to `data`.
-
-    Parameters
-    ----------
-    data : (N, M) array_like
-        Samples by features data array
-
-    Returns
-    -------
-    data : (N, F) np.ndarray
-        Where `F` is `M + 1`
-
-    Examples
-    --------
-    >>> from netneurotools import utils
-
-    >>> A = np.zeros((5, 5))
-    >>> Ac = utils.add_constant(A)
-    >>> Ac
-    array([[0., 0., 0., 0., 0., 1.],
-           [0., 0., 0., 0., 0., 1.],
-           [0., 0., 0., 0., 0., 1.],
-           [0., 0., 0., 0., 0., 1.],
-           [0., 0., 0., 0., 0., 1.]])
-    """
-    data = check_array(data, ensure_2d=False)
-    return np.column_stack([data, np.ones(len(data))])
-
-
-def get_triu(data, k=1):
-    """
-    Return vectorized version of upper triangle from `data`.
-
-    Parameters
-    ----------
-    data : (N, N) array_like
-        Input data
-    k : int, optional
-        Which diagonal to select from (where primary diagonal is 0). Default: 1
-
-    Returns
-    -------
-    triu : (N * N-1 / 2) numpy.ndarray
-        Upper triangle of `data`
-
-    Examples
-    --------
-    >>> from netneurotools import utils
-
-    >>> X = np.array([[1, 0.5, 0.25], [0.5, 1, 0.33], [0.25, 0.33, 1]])
-    >>> tri = utils.get_triu(X)
-    >>> tri
-    array([0.5 , 0.25, 0.33])
-    """
-    return data[np.triu_indices(len(data), k=k)].copy()
-
-
-def globpath(*args):
-    """
-    Join `args` with :py:func:`os.path.join` and returns sorted glob output.
-
-    Parameters
-    ----------
-    args : str
-        Paths / `glob`-compatible regex strings
-
-    Returns
-    -------
-    files : list
-        Sorted list of files
-    """
-    return sorted(glob.glob(os.path.join(*args)))
-
-
-def rescale(data, low=0, high=1):
-    """
-    Rescale `data` so it is within [`low`, `high`].
-
-    Parameters
-    ----------
-    data : array_like
-        Input data array
-    low : float, optional
-        Lower bound for rescaling. Default: -1
-    high : float, optional
-        Upper bound for rescaling. Default: 1
-
-    Returns
-    -------
-    rescaled : np.ndarray
-        Rescaled data
-    """
-    data = np.asarray(data)
-    rescaled = np.interp(data, (data.min(), data.max()), (low, high))
-
-    return rescaled
-
-
-def run(cmd, env=None, return_proc=False, quiet=False):
-    """
-    Run `cmd` via shell subprocess with provided environment `env`.
-
-    Parameters
-    ----------
-    cmd : str
-        Command to be run as single string
-    env : dict, optional
-        If provided, dictionary of key-value pairs to be added to base
-        environment when running `cmd`. Default: None
-    return_proc : bool, optional
-        Whether to return CompletedProcess object. Default: false
-    quiet : bool, optional
-        Whether to suppress stdout/stderr from subprocess. Default: False
-
-    Returns
-    -------
-    proc : subprocess.CompletedProcess
-        Process output
-
-    Raises
-    ------
-    subprocess.CalledProcessError
-        If subprocess does not exit cleanly
-
-    Examples
-    --------
-    >>> from netneurotools import utils
-    >>> p = utils.run('echo "hello world"', return_proc=True, quiet=True)
-    >>> p.returncode
-    0
-    >>> p.stdout  # doctest: +SKIP
-    'hello world\\n'
-    """  # noqa: D301
-    merged_env = os.environ.copy()
-    if env is not None:
-        if not isinstance(env, dict):
-            raise TypeError('Provided `env` must be a dictionary, not {}'
-                            .format(type(env)))
-        merged_env.update(env)
-
-    opts = {}
-    if quiet:
-        opts = dict(stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-    proc = subprocess.run(cmd, env=merged_env, shell=True, check=True,
-                          universal_newlines=True, **opts)
-
-    if return_proc:
-        return proc
-
-
-def check_fs_subjid(subject_id, subjects_dir=None):
-    """
-    Check that `subject_id` exists in provided FreeSurfer `subjects_dir`.
-
-    Parameters
-    ----------
-    subject_id : str
-        FreeSurfer subject ID
-    subjects_dir : str, optional
-        Path to FreeSurfer subject directory. If not set, will inherit from
-        the environmental variable $SUBJECTS_DIR. Default: None
-
-    Returns
-    -------
-    subject_id : str
-        FreeSurfer subject ID, as provided
-    subjects_dir : str
-        Full filepath to `subjects_dir`
-
-    Raises
-    ------
-    FileNotFoundError
-    """
-    # check inputs for subjects_dir and subject_id
-    if subjects_dir is None or not os.path.isdir(subjects_dir):
-        try:
-            subjects_dir = os.environ['SUBJECTS_DIR']
-        except KeyError:
-            subjects_dir = os.getcwd()
-    else:
-        subjects_dir = os.path.abspath(subjects_dir)
-
-    subjdir = os.path.join(subjects_dir, subject_id)
-    if not os.path.isdir(subjdir):
-        raise FileNotFoundError('Cannot find specified subject id {} in '
-                                'provided subject directory {}.'
-                                .format(subject_id, subjects_dir))
-
-    return subject_id, subjects_dir
-
-
-def get_centroids(img, labels=None, image_space=False):
-    """
-    Find centroids of `labels` in `img`.
-
-    Parameters
-    ----------
-    img : niimg-like object
-        3D image containing integer label at each point
-    labels : array_like, optional
-        List of labels for which to find centroids. If not specified all
-        labels present in `img` will be used. Zero will be ignored as it is
-        considered "background." Default: None
-    image_space : bool, optional
-        Whether to return xyz (image space) coordinates for centroids based
-        on transformation in `img.affine`. Default: False
-
-    Returns
-    -------
-    centroids : (N, 3) np.ndarray
-        Coordinates of centroids for ROIs in input data
-    """
-    from nilearn._utils import check_niimg_3d
-
-    img = check_niimg_3d(img)
-    data = np.asarray(img.dataobj)
-
-    if labels is None:
-        labels = np.trim_zeros(np.unique(data))
-
-    centroids = np.vstack(ndimage.center_of_mass(data, labels=data,
-                                                    index=labels))
-
-    if image_space:
-        centroids = nib.affines.apply_affine(img.affine, centroids)
-
-    return centroids
diff --git a/pyproject.toml b/pyproject.toml
index 9127969..653beee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,10 +37,15 @@ doc = [
   "sphinx_rtd_theme",
   "sphinx-gallery"
 ]
-plotting = [
+pysurfer = [
+  "vtk",
   "mayavi",
   "pysurfer"
 ]
+pyvista = [
+  "vtk",
+  "pyvista"
+]
 numba = [
   "numba"
 ]
@@ -61,9 +66,15 @@ test = [
 requires = ["setuptools", "versioneer[toml]"]
 build-backend = "setuptools.build_meta"
 
+[tool.setuptools]
+include-package-data = true
+
 [tool.setuptools.packages]
 find = {}
 
+[tool.setuptools.package-data]
+"*" = ["*.json", "*.bib"]
+
 [tool.setuptools.dynamic]
 version = {attr = "netneurotools.__version__"}
 
@@ -76,12 +87,7 @@ tag_prefix = ""
 parentdir_prefix = ""
 
 [tool.ruff]
-select = ["E", "F", "B", "W", "D", "NPY"]
-ignore = [
-    "B905", # zip() without an explicit strict= parameter
-    # "W605", # Invalid escape sequence: latex
-    "NPY002", # Replace legacy `np.random` call with `np.random.Generator`
-]
+
 line-length = 88
 exclude = [
   "setup.py",
@@ -92,18 +98,26 @@ exclude = [
 ]
 target-version = "py38"
 
-[tool.ruff.pydocstyle]
+[tool.ruff.lint]
+select = ["E", "F", "B", "W", "D", "NPY"]
+ignore = [
+    "B905", # zip() without an explicit strict= parameter
+    # "W605", # Invalid escape sequence: latex
+    "NPY002", # Replace legacy `np.random` call with `np.random.Generator`
+]
+preview = true
+
+[tool.ruff.lint.pydocstyle]
 convention = "numpy"
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["D104", "F401"]
-"netneurotools/tests/*" = ["B011", "D103"]
+"test_*" = ["B011"]
 "examples/*" = ["E402", "D"]
 
 [tool.coverage.run]
 source = ["netneurotools"]
 omit = [
-  "netneurotools/tests/*",
   "netneurotools/_version.py",
 ]
 
diff --git a/resources/generate_atl-cammoun2012_surface.py b/resources/generate_atl-cammoun2012_surface.py
index 686b518..6edc121 100755
--- a/resources/generate_atl-cammoun2012_surface.py
+++ b/resources/generate_atl-cammoun2012_surface.py
@@ -116,7 +116,7 @@ def combine_cammoun_500(lhannot, rhannot, subject_id, annot=None,
                 quiet=quiet)
 
             # save ctab information from annotation file
-            vtx, ct, names = nib.freesurfer.read_annot(fn)
+            _, ct, names = nib.freesurfer.read_annot(fn)
             data = np.column_stack([[f.decode() for f in names], ct[:, :-1]])
             ctab = ctab.append(pd.DataFrame(data), ignore_index=True)