diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e4e5487..953b0fd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,7 +24,7 @@ jobs: python -m pip install --upgrade pip python -m pip install ruff - name: Run style checks - run: ruff . + run: ruff check . run_tests: needs: check_style diff --git a/.gitignore b/.gitignore index 3bfe751..2a0ee24 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ docs/generated/ docs/auto_examples/ docs/modules/ +docs/sg_execution_times.rst # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/MANIFEST.in b/MANIFEST.in index 09d993a..8bd58d4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,5 @@ include README.rst LICENSE environment.yml requirements.txt -recursive-include netneurotools/data * include versioneer.py +include netneurotools/datasets/datasets.json +include netneurotools/datasets/references.json +include netneurotools/datasets/netneurotools.bib \ No newline at end of file diff --git a/dev_environment.yml b/dev_environment.yml deleted file mode 100644 index dd89823..0000000 --- a/dev_environment.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: netneurotools -channels: - - defaults - - conda-forge -dependencies: - - python>=3.6 - - flake8 - - matplotlib - - mayavi - - nibabel - - nilearn - - numba - - "numpy>=1.16" - - pandas - - pip - - "pytest>=3.6" - - pytest-cov - - scikit-learn - - "scipy>=1.4.0" - - "sphinx>=1.2" - - sphinx-gallery - - sphinx_rtd_theme - - versioneer - - pip: - - git+https://github.com/aestrivex/bctpy.git#egg=bctpy - - pysurfer diff --git a/docs/api.rst b/docs/api.rst index a5adc5c..601c92d 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -8,71 +8,102 @@ Python Reference API .. contents:: **List of modules** :local: -.. _ref_network: +.. _ref_datasets: -:mod:`netneurotools.networks` - Constructing networks ------------------------------------------------------ +:mod:`netneurotools.datasets` - Automatic dataset fetching +---------------------------------------------------------- -.. automodule:: netneurotools.networks +.. automodule:: netneurotools.datasets :no-members: :no-inherited-members: -.. currentmodule:: netneurotools.networks +.. currentmodule:: netneurotools.datasets + +To download templates .. autosummary:: :template: function.rst :toctree: generated/ - func_consensus - struct_consensus - threshold_network - binarize_network - match_length_degree_distribution - randmio_und - strength_preserving_rand_sa - strength_preserving_rand_sa_mse_opt - strength_preserving_rand_sa_dir -.. _ref_modularity: + fetch_fsaverage + fetch_hcp_standards + fetch_civet + fetch_conte69 + fetch_yerkes19 -:mod:`netneurotools.modularity` - Calculating network modularity ----------------------------------------------------------------- +To download atlases -.. automodule:: netneurotools.modularity - :no-members: - :no-inherited-members: +.. autosummary:: + :template: function.rst + :toctree: generated/ -.. currentmodule:: netneurotools.modularity + fetch_cammoun2012 + fetch_schaefer2018 + fetch_mmpall + fetch_pauli2018 + fetch_ye2020 + fetch_voneconomo + +To download project-related data .. autosummary:: :template: function.rst :toctree: generated/ - consensus_modularity - zrand - get_modularity - get_modularity_z - get_modularity_sig + fetch_vazquez_rodriguez2019 + fetch_mirchi2018 + fetch_hansen_manynetworks + fetch_hansen_receptors + fetch_hansen_genecognition + fetch_hansen_brainstem + fetch_shafiei_hcpmeg + fetch_suarez_mami + fetch_famous_gmat + fetch_neurosynth + -.. _ref_cluster: +.. _ref_network: -:mod:`netneurotools.cluster` - Working with clusters ----------------------------------------------------- +:mod:`netneurotools.networks` - Constructing networks +----------------------------------------------------- -.. automodule:: netneurotools.cluster +.. automodule:: netneurotools.networks :no-members: :no-inherited-members: -.. currentmodule:: netneurotools.cluster +.. currentmodule:: netneurotools.networks + +To construct consensus networks .. autosummary:: :template: function.rst :toctree: generated/ - find_consensus - match_assignments - reorder_assignments - match_cluster_labels + func_consensus + struct_consensus + +To randomize networks + +.. autosummary:: + :template: function.rst + :toctree: generated/ + + randmio_und + match_length_degree_distribution + strength_preserving_rand_sa + strength_preserving_rand_sa_mse_opt + strength_preserving_rand_sa_dir + +Convenient functions + +.. autosummary:: + :template: function.rst + :toctree: generated/ + + binarize_network + threshold_network + .. _ref_plotting: @@ -85,50 +116,42 @@ Python Reference API .. currentmodule:: netneurotools.plotting +PySurfer + .. autosummary:: :template: function.rst :toctree: generated/ - sort_communities - plot_mod_heatmap plot_conte69 plot_fslr plot_fsaverage plot_fsvertex - plot_point_brain -.. _ref_stats: +Pyvista -:mod:`netneurotools.stats` - General statistics functions ---------------------------------------------------------- +.. autosummary:: + :template: function.rst + :toctree: generated/ -.. automodule:: netneurotools.stats - :no-members: - :no-inherited-members: + pv_plot_surface -.. currentmodule:: netneurotools.stats +matplotlib .. autosummary:: :template: function.rst :toctree: generated/ - gen_spinsamples - residualize - get_mad_outliers - efficient_pearsonr - permtest_1samp - permtest_rel - permtest_pearsonr - get_dominance_stats - network_pearsonr - network_pearsonr_numba - network_pearsonr_pairwise - effective_resistance - network_polarisation - network_variance - network_variance_numba - network_covariance - network_covariance_numba + plot_point_brain + plot_mod_heatmap + +Fun color & colormap stuff + +.. autosummary:: + :template: function.rst + :toctree: generated/ + + available_cmaps + .. _ref_metrics: @@ -141,146 +164,172 @@ Python Reference API .. currentmodule:: netneurotools.metrics +Brain network metrics + .. autosummary:: :template: function.rst :toctree: generated/ - _binarize degrees_und degrees_dir distance_wei_floyd retrieve_shortest_path - communicability_bin - communicability_wei - rich_feeder_peripheral navigation_wu get_navigation_path_length - search_information + communicability_bin + communicability_wei path_transitivity - flow_graph + search_information mean_first_passage_time diffusion_efficiency resource_efficiency_bin + flow_graph + assortativity matching_ind_und - _graph_laplacian - -.. _ref_datasets: + rich_feeder_peripheral -:mod:`netneurotools.datasets` - Automatic dataset fetching ----------------------------------------------------------- +Network spreading -.. automodule:: netneurotools.datasets - :no-members: - :no-inherited-members: +.. autosummary:: + :template: function.rst + :toctree: generated/ -.. currentmodule:: netneurotools.datasets + simulate_atrophy -Functions to download atlases and templates +Statistical network metrics .. autosummary:: :template: function.rst :toctree: generated/ - fetch_cammoun2012 - fetch_civet - fetch_conte69 - fetch_fsaverage - fetch_pauli2018 - fetch_schaefer2018 - fetch_hcp_standards - fetch_voneconomo + network_pearsonr + network_pearsonr_numba + network_pearsonr_pairwise + effective_resistance + network_polarisation + network_variance + network_variance_numba + network_covariance + network_covariance_numba + -Functions to download real-world datasets +.. _ref_modularity: + +:mod:`netneurotools.modularity` - Calculating network modularity +---------------------------------------------------------------- + +.. automodule:: netneurotools.modularity + :no-members: + :no-inherited-members: + +.. currentmodule:: netneurotools.modularity .. autosummary:: :template: function.rst :toctree: generated/ - fetch_connectome - fetch_mirchi2018 - fetch_vazquez_rodriguez2019 + match_cluster_labels + match_assignments + reorder_assignments + find_consensus + consensus_modularity + zrand + get_modularity + get_modularity_z + get_modularity_sig + + +.. _ref_stats: -Functions to generate (pseudo-random) datasets +:mod:`netneurotools.stats` - General statistics functions +--------------------------------------------------------- + +.. automodule:: netneurotools.stats + :no-members: + :no-inherited-members: + +.. currentmodule:: netneurotools.stats + +Correlations .. autosummary:: :template: function.rst :toctree: generated/ - make_correlated_xy + efficient_pearsonr + weighted_pearsonr + make_correlated_xy -.. _ref_freesurfer: +Permutation tests -:mod:`netneurotools.freesurfer` - FreeSurfer compatibility functions --------------------------------------------------------------------- +.. autosummary:: + :template: function.rst + :toctree: generated/ -.. automodule:: netneurotools.freesurfer - :no-members: - :no-inherited-members: + permtest_1samp + permtest_rel + permtest_pearsonr -.. currentmodule:: netneurotools.freesurfer +Regressions .. autosummary:: :template: function.rst :toctree: generated/ - apply_prob_atlas - find_parcel_centroids - parcels_to_vertices - vertices_to_parcels - spin_data - spin_parcels + residualize + get_dominance_stats + -.. _ref_civet: +.. _ref_spatial: -:mod:`netneurotools.civet` - CIVET compatibility functions ----------------------------------------------------------- +:mod:`netneurotools.spatial` - Spatial statistics +------------------------------------------------- -.. automodule:: netneurotools.civet +.. automodule:: netneurotools.spatial :no-members: :no-inherited-members: -.. currentmodule:: netneurotools.civet +.. currentmodule:: netneurotools.spatial + +Calculating spatial statistics .. autosummary:: :template: function.rst :toctree: generated/ - read_civet - civet_to_freesurfer + morans_i + local_morans_i -.. _ref_utils: -:mod:`netneurotools.utils` - Miscellaneous, grab bag utilities --------------------------------------------------------------- +.. _ref_interface: -.. automodule:: netneurotools.utils +:mod:`netneurotools.interface` - Interface for external tools +------------------------------------------------------------- + +.. automodule:: netneurotools.interface :no-members: :no-inherited-members: -.. currentmodule:: netneurotools.utils +.. currentmodule:: netneurotools.interface .. autosummary:: :template: function.rst :toctree: generated/ - run - add_constant - get_triu - get_centroids -.. _ref_colors: +.. _ref_experimental: -:mod:`netneurotools.colors` - Useful colormaps --------------------------------------------------------------- +:mod:`netneurotools.experimental` - Functions in alpha stage +------------------------------------------------------------ -.. automodule:: netneurotools.colors +.. automodule:: netneurotools.experimental :no-members: :no-inherited-members: -.. currentmodule:: netneurotools.colors +.. currentmodule:: netneurotools.experimental .. autosummary:: :template: function.rst :toctree: generated/ - available_cmaps + diff --git a/docs/conf.py b/docs/conf.py index 37ee2e5..6ac2598 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -47,6 +47,7 @@ autodoc_default_options = {'members': True, 'inherited-members': True} numpydoc_show_class_members = False autoclass_content = "class" +napoleon_use_param = False # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 5984c5b..0000000 --- a/environment.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: netneurotools -channels: - - defaults - - conda-forge -dependencies: - - python>=3.6 - - matplotlib - - nibabel - - nilearn - - "numpy>=1.16" - - pip - - scikit-learn - - "scipy>=1.4.0" - - pip: - - git+https://github.com/aestrivex/bctpy.git#egg=bctpy diff --git a/examples/plot_consensus_clustering.py b/examples/plot_consensus_clustering.py index 13faab1..69c337f 100644 --- a/examples/plot_consensus_clustering.py +++ b/examples/plot_consensus_clustering.py @@ -81,9 +81,9 @@ # We'll provide these different assignments to our consensus-finding algorithm # which will generate one final community assignment vector: -from netneurotools import cluster +from netneurotools import modularity -consensus = cluster.find_consensus(np.column_stack(ci), seed=1234) +consensus = modularity.find_consensus(np.column_stack(ci), seed=1234) plotting.plot_mod_heatmap(corr, consensus, cmap='viridis') ############################################################################### diff --git a/examples/plot_perm_pvals.py b/examples/plot_perm_pvals.py index 702d28a..6f87056 100644 --- a/examples/plot_perm_pvals.py +++ b/examples/plot_perm_pvals.py @@ -28,8 +28,8 @@ # We can use ``scipy.stats`` for a standard parametric test to assess whether # the array is different from zero: -from scipy import stats -print(stats.ttest_1samp(rvs, 0.0)) +import scipy.stats as sstats +print(sstats.ttest_1samp(rvs, 0.0)) ############################################################################### # And can do the same thing with permutations using ``netneurotools.stats``: @@ -88,7 +88,7 @@ # These two arrays shouldn't be meaningfully different, and we can test that # with a standard parametric test: -print(stats.ttest_rel(rvs1, rvs2)) +print(sstats.ttest_rel(rvs1, rvs2)) ############################################################################### # Or with a non-parametric permutation test: @@ -114,13 +114,12 @@ # # First, we'll generate two correlated variables: -from netneurotools import datasets -x, y = datasets.make_correlated_xy(corr=0.2, size=100) +x, y = nnstats.make_correlated_xy(corr=0.2, size=100) ############################################################################### # We can generate the Pearson correlation with the standard parametric p-value: -print(stats.pearsonr(x, y)) +print(sstats.pearsonr(x, y)) ############################################################################### # Or use permutation testing to derive the p-value: @@ -132,7 +131,7 @@ # :func:`~.permtest_rel` apply here, so you can provide same-sized arrays and # correlations will only be calculated for paired columns: -a, b = datasets.make_correlated_xy(corr=0.9, size=100) +a, b = nnstats.make_correlated_xy(corr=0.9, size=100) arr1, arr2 = np.column_stack([x, a]), np.column_stack([y, b]) print(nnstats.permtest_pearsonr(arr1, arr2)) diff --git a/netneurotools/__init__.py b/netneurotools/__init__.py index 8163cc1..2ca326a 100644 --- a/netneurotools/__init__.py +++ b/netneurotools/__init__.py @@ -1,6 +1,7 @@ -__all__ = [ - '__version__', -] - -from . import _version -__version__ = _version.get_versions()['version'] + +from . import _version +__version__ = _version.get_versions()['version'] + +__all__ = [ + '__version__' +] diff --git a/netneurotools/civet.py b/netneurotools/civet.py deleted file mode 100644 index 5c247c0..0000000 --- a/netneurotools/civet.py +++ /dev/null @@ -1,104 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions for working with CIVET data (ugh).""" - -import nibabel as nib -import numpy as np -from scipy.interpolate import griddata - -from .datasets import fetch_civet, fetch_fsaverage - -_MNI305to152 = np.array([[0.9975, -0.0073, 0.0176, -0.0429], - [0.0146, 1.0009, -0.0024, 1.5496], - [-0.0130, -0.0093, 0.9971, 1.1840], - [0.0000, 0.0000, 0.0000, 1.0000]]) - - -def read_civet(fname): - """ - Read a CIVET-style .obj geometry file. - - Parameters - ---------- - fname : str or os.PathLike - Filepath to .obj file - - Returns - ------- - vertices : (N, 3) - triangles : (T, 3) - """ - k, polygons = 0, [] - with open(fname, 'r') as src: - n_vert = int(src.readline().split()[6]) - vertices = np.zeros((n_vert, 3)) - for i, line in enumerate(src): - if i < n_vert: - vertices[i] = [float(i) for i in line.split()] - elif i >= (2 * n_vert) + 5: - if not line.strip(): - k = 1 - elif k == 1: - polygons.extend([int(i) for i in line.split()]) - - triangles = np.reshape(np.asarray(polygons), (-1, 3)) - - return vertices, triangles - - -def civet_to_freesurfer(brainmap, surface='mid', version='v1', - freesurfer='fsaverage6', method='nearest', - data_dir=None): - """ - Project `brainmap` in CIVET space to `freesurfer` fsaverage space. - - Uses a nearest-neighbor projection based on the geometry of the vertices - - Parameters - ---------- - brainmap : array_like - CIVET brainmap to be converted to freesurfer space - surface : {'white', 'mid'}, optional - Which CIVET surface to use for geometry of `brainmap`. Default: 'mid' - version : {'v1', 'v2'}, optional - Which CIVET version to use for geometry of `brainmap`. Default: 'v1' - freesurfer : str, optional - Which version of FreeSurfer space to project data to. Must be one of - {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'}. - Default: 'fsaverage6' - method : {'nearest', 'linear'}, optional - What method of interpolation to use when projecting the data between - surfaces. Default: 'nearest' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - - Returns - ------- - data : np.ndarray - Provided `brainmap` mapped to FreeSurfer - """ - brainmap = np.asarray(brainmap) - densities = (81924, 327684) - n_vert = brainmap.shape[0] - if n_vert not in densities: - raise ValueError('Unable to interpret `brainmap` space; provided ' - 'array must have length in {}. Received: {}' - .format(densities, n_vert)) - - n_vert = n_vert // 2 - icbm = fetch_civet(density='41k' if n_vert == 40962 else '164k', - version=version, data_dir=data_dir, verbose=0)[surface] - fsavg = fetch_fsaverage(version=freesurfer, data_dir=data_dir, verbose=0) - fsavg = fsavg['pial' if surface == 'mid' else surface] - - data = [] - for n, hemi in enumerate(('lh', 'rh')): - sl = slice(n_vert * n, n_vert * (n + 1)) - vert_cv, _ = read_civet(getattr(icbm, hemi)) - vert_fs = nib.affines.apply_affine( - _MNI305to152, nib.freesurfer.read_geometry(getattr(fsavg, hemi))[0] - ) - data.append(griddata(vert_cv, brainmap[sl], vert_fs, method=method)) - - return np.hstack(data) diff --git a/netneurotools/data/osf.json b/netneurotools/data/osf.json deleted file mode 100644 index 289ecbc..0000000 --- a/netneurotools/data/osf.json +++ /dev/null @@ -1,405 +0,0 @@ -{ - "atl-cammoun2012": { - "gcs": { - "url": [ - "mb37e", - "5ce6bb4423fec40017e82c5e" - ], - "md5": "266c4520af768e766328fb8e6648005d" - }, - "fsaverage": { - "url": [ - "mb37e", - "5ce6c30523fec40017e83439" - ], - "md5": "2a19eb4744c0ce6c243f721bd43ecff0" - }, - "fsaverage5": { - "url": [ - "mb37e", - "5e189a1c57341903868036dd" - ], - "md5": "2afb22e1887d47f1ca81c340fff7692b" - }, - "fsaverage6": { - "url": [ - "mb37e", - "5e189a1b5734190380804072" - ], - "md5": "1df743bff13316f67bd41d13ec691c97" - }, - "MNI152NLin2009aSym": { - "url": [ - "mb37e", - "5e2f4bf0e71ef800301880c2" - ], - "md5": "9da30bad22d732aa5f00a6d178d087c4" - }, - "fslr32k": { - "url": [ - "mb37e", - "5e2f4bf1e71ef80027189c56" - ], - "md5": "a5177319d5e0b8825a91d503ded1a59e" - } - }, - "atl-pauli2018": [ - { - "url": [ - "jkzwp", - "5b11fa3364f25a001973dce0" - ], - "md5": "62dd6ff405d3a8b89ee188cafa3a7f6a", - "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_probabilistic.nii.gz" - }, - { - "url": [ - "jkzwp", - "5b11fa2ff1f288000e625a7f" - ], - "md5": "5a5b6246921be08456304875447c68ed", - "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_deterministic.nii.gz" - }, - { - "url": [ - "mb37e", - "5c93b4f034062c001b1ef50d" - ], - "md5": "390a693abeb1a583151f30aa8798bab5", - "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_info.csv" - } - ], - "tpl-conte69": { - "url": [ - "fvuh8", - "5b198ec5ec24e20011b48548" - ], - "md5": "bd944e3f9f343e0e51e562b440960529" - }, - "tpl-yerkes19": { - "url": [ - "mb37e", - "60ae93d504e91a005f1761ab" - ], - "md5": "9ee4f1605fb690a85b04b61549d62925" - }, - "tpl-fsaverage": { - "fsaverage": { - "url": [ - "mb37e", - "5c82830a1d73810018bdacea" - ], - "md5": "1e82c52ed21d06d4e6e7341c725c5262" - }, - "fsaverage3": { - "url": [ - "mb37e", - "5d9f83b6f6b03e000e1ba285" - ], - "md5": "b4182495d341364e3f7c5b86284d8d20" - }, - "fsaverage4": { - "url": [ - "mb37e", - "5d9f83b7fcf91f00111c7473" - ], - "md5": "5a481421dc1286c7bd9b8a47db5fad0b" - }, - "fsaverage5": { - "url": [ - "mb37e", - "5d9f83b6f6b03e00101c932f" - ], - "md5": "cc75f7290c03970a8b8a06dfc215e925" - }, - "fsaverage6": { - "url": [ - "mb37e", - "5d9f83b7a7bc73000cea05f1" - ], - "md5": "8f75b95c0e47ae935d10745baefa2c49" - } - }, - "tpl-civet": { - "v1": { - "civet41k": { - "url": [ - "mb37e", - "601daffd84ecf800fe031868" - ], - "md5": "b27219c876464992e1b61da1c60d8d6e" - } - }, - "v2": { - "civet41k": { - "url": [ - "mb37e", - "601dafe77ad0a80119d9483c" - ], - "md5": "a47b015e471c6a800d236f107fda5b4a" - }, - "civet164k": { - "url": [ - "mb37e", - "601dafe87ad0a8011ad94938" - ], - "md5": "02537ea65d5366acd8de729022a34bab" - } - } - }, - "ds-connectomes": { - "celegans": { - "url": [ - "mb37e", - "5d9b8e4aa7bc73000be65508" - ], - "md5": "f35cd893bc1aff4e8184a528fcda14b9", - "keys": [ - "conn", - "dist", - "labels" - ] - }, - "drosophila": { - "url": [ - "mb37e", - "5d9b8e4aa7bc73000ce65d00" - ], - "md5": "6a67a4fc1b4f35b72c42cca4d0827249", - "keys": [ - "conn", - "coords", - "labels", - "networks" - ] - }, - "human_func_scale033": { - "url": [ - "mb37e", - "5d9b8e4afcf91f000f18f57b" - ], - "md5": "1988ab427d9bc0de075bbe600ce0a27f", - "keys": [ - "conn", - "coords", - "labels" - ] - }, - "human_func_scale060": { - "url": [ - "mb37e", - "5d9b8e4aa7bc73000de67117" - ], - "md5": "4191f5a2b0c5063dcba9935ea0ef0bfe", - "keys": [ - "conn", - "coords", - "labels" - ] - }, - "human_func_scale125": { - "url": [ - "mb37e", - "5d9b8e4b26eb50000e78c987" - ], - "md5": "533e11cf9fea67d536648c9ef939a5f5", - "keys": [ - "conn", - "coords", - "labels" - ] - }, - "human_func_scale250": { - "url": [ - "mb37e", - "5d9b8e4efcf91f0012190ba1" - ], - "md5": "4abc7324c2a9ae04ef6cf5555149b3f4", - "keys": [ - "conn", - "coords", - "labels" - ] - }, - "human_func_scale500": { - "url": [ - "mb37e", - "5d9b8e4ff6b03e000d18b5a1" - ], - "md5": "637c6057476b2508f15f244d528e156d", - "keys": [ - "conn", - "coords", - "labels" - ] - }, - "human_struct_scale033": { - "url": [ - "mb37e", - "5d9b8e4f26eb50000e78c993" - ], - "md5": "27a2101f2f04e0fc8de09a8248793235", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "human_struct_scale060": { - "url": [ - "mb37e", - "5d9b8e4da7bc73000be6550e" - ], - "md5": "9289265ab1bd0fa18611eeaf1afce745", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "human_struct_scale125": { - "url": [ - "mb37e", - "5d9b8e50f6b03e000e18aa37" - ], - "md5": "07e60b141809babe8c2645d93cd24984", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "human_struct_scale250": { - "url": [ - "mb37e", - "5d9b8e51fcf91f001118fdc2" - ], - "md5": "56f9ca8b4ecc63ef9aaf64a606755c09", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "human_struct_scale500": { - "url": [ - "mb37e", - "5d9b8e51a7bc73000ee65769" - ], - "md5": "94724e0446f8cb06207a4521ba1df20f", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "macaque_markov": { - "url": [ - "mb37e", - "5d9b8e56a7bc73000ce65d11" - ], - "md5": "5ce43182afc9c4f779db2c0306afb202", - "keys": [ - "conn", - "dist", - "labels" - ] - }, - "macaque_modha": { - "url": [ - "mb37e", - "5d9b8e5626eb50000d78abd0" - ], - "md5": "f467c62b2670feaf75c93d90d5ed5de6", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "mouse": { - "url": [ - "mb37e", - "5d9b8e5626eb50000e78c9a0" - ], - "md5": "dba5cbbb9e72c1cacda945086d77a125", - "keys": [ - "conn", - "coords", - "dist", - "labels", - "acronyms" - ] - }, - "rat": { - "url": [ - "mb37e", - "5d9b8e56f6b03e000f18d06f" - ], - "md5": "9e1f12ce4fa42082a76d62f89670f5d0", - "keys": [ - "conn", - "labels" - ] - } - }, - "ds-vazquez_rodriguez2019": { - "url": [ - "mb37e", - "5d9f5aa4f6b03e000e1b819e" - ], - "md5": "c710365a2cc5cddb8a2fbb5f6ae421a3" - }, - "atl-schaefer2018": { - "fsaverage": { - "url": [ - "mb37e", - "5dbc8d7dcfc96c000dc3581c" - ], - "md5": "74dfe4237efaccabf057897c49e8af94" - }, - "fsaverage5": { - "url": [ - "mb37e", - "5dbc8d7daf84c3000eebffb2" - ], - "md5": "45a8c784f1979eb33a119bdab912a51f" - }, - "fsaverage6": { - "url": [ - "mb37e", - "5dbc8d7bcfc96c000ec6dca2" - ], - "md5": "8738daccab4648c3e891a1c8d3a9ec1f" - }, - "fslr32k": { - "url": [ - "mb37e", - "5e3086e4af75930094bdd507" - ], - "md5": "d8378f33107ed5d98c27e8070ebb5aa2" - } - }, - "atl-mmpall": { - "fslr32k": { - "url": [ - "mb37e", - "6047bac259e910009b83114f" - ], - "md5": "fd641742685a239d9c3f60e19a280ca2" - } - }, - "atl-voneconomo_koskinas": { - "url": [ - "mb37e", - "5ed80005fabc45000d639900" - ], - "md5": "67085e2577d21dc3a742f4fcde6e3b18" - } -} diff --git a/netneurotools/datasets/__init__.py b/netneurotools/datasets/__init__.py index cfe50c1..0cd400e 100644 --- a/netneurotools/datasets/__init__.py +++ b/netneurotools/datasets/__init__.py @@ -1,16 +1,53 @@ -"""Functions for fetching and generating datasets.""" +"""Functions for handling datasets.""" + + +from .fetch_template import ( + fetch_fsaverage, fetch_hcp_standards, fetch_civet, + fetch_conte69, fetch_yerkes19 +) + + +from .fetch_atlas import ( + # cortical + fetch_cammoun2012, fetch_schaefer2018, fetch_mmpall, + # subcortical + fetch_pauli2018, fetch_ye2020, + # annotation + fetch_voneconomo +) + + +from .fetch_project import ( + # old projects + fetch_vazquez_rodriguez2019, fetch_mirchi2018, + # new projects + fetch_hansen_manynetworks, fetch_hansen_receptors, fetch_hansen_genecognition, + fetch_hansen_brainstem, fetch_shafiei_hcpmeg, fetch_suarez_mami, + # example data + fetch_famous_gmat, + # resources + fetch_neurosynth +) + +from .datasets_utils import ( + FREESURFER_IGNORE, _get_freesurfer_subjid +) + __all__ = [ - 'fetch_cammoun2012', 'fetch_pauli2018', 'fetch_fsaverage', 'fetch_conte69', - 'fetch_connectome', 'available_connectomes', 'fetch_vazquez_rodriguez2019', - 'fetch_mirchi2018', 'make_correlated_xy', 'fetch_schaefer2018', - 'fetch_hcp_standards', 'fetch_voneconomo', 'fetch_mmpall', 'fetch_civet' + # fetch_template + 'fetch_fsaverage', 'fetch_hcp_standards', 'fetch_civet', + 'fetch_conte69', 'fetch_yerkes19', + # fetch_atlas + 'fetch_cammoun2012', 'fetch_schaefer2018', 'fetch_mmpall', + 'fetch_pauli2018', 'fetch_ye2020', + 'fetch_voneconomo', + # fetch_project + 'fetch_vazquez_rodriguez2019', 'fetch_mirchi2018', + 'fetch_hansen_manynetworks', 'fetch_hansen_receptors', 'fetch_hansen_genecognition', + 'fetch_hansen_brainstem', 'fetch_shafiei_hcpmeg', 'fetch_suarez_mami', + 'fetch_famous_gmat', + 'fetch_neurosynth', + # datasets_utils + 'FREESURFER_IGNORE', '_get_freesurfer_subjid' ] - -from .fetchers import (fetch_cammoun2012, fetch_pauli2018, fetch_fsaverage, - fetch_conte69, fetch_yerkes19, fetch_connectome, - available_connectomes, fetch_vazquez_rodriguez2019, - fetch_schaefer2018, fetch_hcp_standards, - fetch_voneconomo, fetch_mmpall, fetch_civet) -from .generators import (make_correlated_xy) -from .mirchi import (fetch_mirchi2018) diff --git a/netneurotools/datasets/mirchi.py b/netneurotools/datasets/_mirchi2018.py similarity index 71% rename from netneurotools/datasets/mirchi.py rename to netneurotools/datasets/_mirchi2018.py index 190ae63..9addfea 100644 --- a/netneurotools/datasets/mirchi.py +++ b/netneurotools/datasets/_mirchi2018.py @@ -1,13 +1,9 @@ -# -*- coding: utf-8 -*- """Code for re-generating results from Mirchi et al., 2018 (SCAN).""" -import os from urllib.request import HTTPError, urlopen import numpy as np -from .utils import _get_data_dir - TIMESERIES = ("https://s3.amazonaws.com/openneuro/ds000031/ds000031_R1.0.2" "/uncompressed/derivatives/sub-01/ses-{0}/" @@ -132,47 +128,3 @@ def _get_panas(data_dir=None, resume=True, verbose=1): measures[subscale] = measure.sum(axis=-1) return measures - - -def fetch_mirchi2018(data_dir=None, resume=True, verbose=1): - """ - Download (and creates) dataset for replicating Mirchi et al., 2018, SCAN. - - Parameters - ---------- - data_dir : str, optional - Directory to check for existing data files (if they exist) or to save - generated data files. Files should be named mirchi2018_fc.npy and - mirchi2018_panas.csv for the functional connectivity and behavioral - data, respectively. - - Returns - ------- - X : (73, 198135) numpy.ndarray - Functional connections from MyConnectome rsfMRI time series data - Y : (73, 13) numpy.ndarray - PANAS subscales from MyConnectome behavioral data - """ - data_dir = os.path.join(_get_data_dir(data_dir=data_dir), 'ds-mirchi2018') - os.makedirs(data_dir, exist_ok=True) - - X_fname = os.path.join(data_dir, 'myconnectome_fc.npy') - Y_fname = os.path.join(data_dir, 'myconnectome_panas.csv') - - if not os.path.exists(X_fname): - X = _get_fc(data_dir=data_dir, resume=resume, verbose=verbose) - np.save(X_fname, X, allow_pickle=False) - else: - X = np.load(X_fname, allow_pickle=False) - - if not os.path.exists(Y_fname): - Y = _get_panas(data_dir=data_dir, resume=resume, verbose=verbose) - np.savetxt(Y_fname, np.column_stack(list(Y.values())), - header=','.join(Y.keys()), delimiter=',', fmt='%i') - # convert dictionary to structured array before returning - Y = np.array([tuple(row) for row in np.column_stack(list(Y.values()))], - dtype=dict(names=list(Y.keys()), formats=['i8'] * len(Y))) - else: - Y = np.genfromtxt(Y_fname, delimiter=',', names=True, dtype=int) - - return X, Y diff --git a/netneurotools/datasets/datasets.json b/netneurotools/datasets/datasets.json new file mode 100644 index 0000000..e9fdd66 --- /dev/null +++ b/netneurotools/datasets/datasets.json @@ -0,0 +1,267 @@ +{ + "atl-cammoun2012": { + "gcs": { + "url-type": "osf", + "url": [ + "mb37e", + "5ce6bb4423fec40017e82c5e" + ], + "md5": "266c4520af768e766328fb8e6648005d" + }, + "fsaverage": { + "url-type": "osf", + "url": [ + "mb37e", + "5ce6c30523fec40017e83439" + ], + "md5": "2a19eb4744c0ce6c243f721bd43ecff0" + }, + "fsaverage5": { + "url-type": "osf", + "url": [ + "mb37e", + "5e189a1c57341903868036dd" + ], + "md5": "2afb22e1887d47f1ca81c340fff7692b" + }, + "fsaverage6": { + "url-type": "osf", + "url": [ + "mb37e", + "5e189a1b5734190380804072" + ], + "md5": "1df743bff13316f67bd41d13ec691c97" + }, + "MNI152NLin2009aSym": { + "url-type": "osf", + "url": [ + "mb37e", + "5e2f4bf0e71ef800301880c2" + ], + "md5": "9da30bad22d732aa5f00a6d178d087c4" + }, + "fslr32k": { + "url-type": "osf", + "url": [ + "mb37e", + "5e2f4bf1e71ef80027189c56" + ], + "md5": "a5177319d5e0b8825a91d503ded1a59e" + } + }, + "atl-pauli2018": { + "probabilistic": { + "url-type": "osf", + "url": [ + "jkzwp", + "5b11fa3364f25a001973dce0" + ], + "md5": "62dd6ff405d3a8b89ee188cafa3a7f6a", + "folder-name": "atl-pauli2018", + "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_probabilistic.nii.gz" + }, + "deterministic": { + "url-type": "osf", + "url": [ + "jkzwp", + "5b11fa2ff1f288000e625a7f" + ], + "md5": "5a5b6246921be08456304875447c68ed", + "folder-name": "atl-pauli2018", + "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_deterministic.nii.gz" + }, + "info": { + "url-type": "osf", + "url": [ + "mb37e", + "5c93b4f034062c001b1ef50d" + ], + "md5": "390a693abeb1a583151f30aa8798bab5", + "folder-name": "atl-pauli2018", + "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_info.csv" + } + }, + "tpl-conte69": { + "url-type": "osf", + "url": [ + "fvuh8", + "5b198ec5ec24e20011b48548" + ], + "md5": "bd944e3f9f343e0e51e562b440960529" + }, + "tpl-yerkes19": { + "url-type": "osf", + "url": [ + "mb37e", + "60ae93d504e91a005f1761ab" + ], + "md5": "9ee4f1605fb690a85b04b61549d62925" + }, + "tpl-fsaverage": { + "fsaverage": { + "url-type": "osf", + "url": [ + "mb37e", + "5c82830a1d73810018bdacea" + ], + "md5": "1e82c52ed21d06d4e6e7341c725c5262" + }, + "fsaverage3": { + "url-type": "osf", + "url": [ + "mb37e", + "5d9f83b6f6b03e000e1ba285" + ], + "md5": "b4182495d341364e3f7c5b86284d8d20" + }, + "fsaverage4": { + "url-type": "osf", + "url": [ + "mb37e", + "5d9f83b7fcf91f00111c7473" + ], + "md5": "5a481421dc1286c7bd9b8a47db5fad0b" + }, + "fsaverage5": { + "url-type": "osf", + "url": [ + "mb37e", + "5d9f83b6f6b03e00101c932f" + ], + "md5": "cc75f7290c03970a8b8a06dfc215e925" + }, + "fsaverage6": { + "url-type": "osf", + "url": [ + "mb37e", + "5d9f83b7a7bc73000cea05f1" + ], + "md5": "8f75b95c0e47ae935d10745baefa2c49" + } + }, + "tpl-civet": { + "v1": { + "civet41k": { + "url-type": "osf", + "url": [ + "mb37e", + "601daffd84ecf800fe031868" + ], + "md5": "b27219c876464992e1b61da1c60d8d6e" + } + }, + "v2": { + "civet41k": { + "url-type": "osf", + "url": [ + "mb37e", + "601dafe77ad0a80119d9483c" + ], + "md5": "a47b015e471c6a800d236f107fda5b4a" + }, + "civet164k": { + "url-type": "osf", + "url": [ + "mb37e", + "601dafe87ad0a8011ad94938" + ], + "md5": "02537ea65d5366acd8de729022a34bab" + } + } + }, + "ds-famous_gmat": { + "url-type": "osf", + "url": [ + "mb37e", + "664683ca4664da9ebced6b70" + ], + "md5": "b803de1058579881a759f475704e9f35" + }, + "ds-vazquez_rodriguez2019": { + "url-type": "osf", + "url": [ + "mb37e", + "5d9f5aa4f6b03e000e1b819e" + ], + "md5": "c710365a2cc5cddb8a2fbb5f6ae421a3" + }, + "atl-schaefer2018": { + "fsaverage": { + "url-type": "osf", + "url": [ + "mb37e", + "5dbc8d7dcfc96c000dc3581c" + ], + "md5": "74dfe4237efaccabf057897c49e8af94" + }, + "fsaverage5": { + "url-type": "osf", + "url": [ + "mb37e", + "5dbc8d7daf84c3000eebffb2" + ], + "md5": "45a8c784f1979eb33a119bdab912a51f" + }, + "fsaverage6": { + "url-type": "osf", + "url": [ + "mb37e", + "5dbc8d7bcfc96c000ec6dca2" + ], + "md5": "8738daccab4648c3e891a1c8d3a9ec1f" + }, + "fslr32k": { + "url-type": "osf", + "url": [ + "mb37e", + "5e3086e4af75930094bdd507" + ], + "md5": "d8378f33107ed5d98c27e8070ebb5aa2" + } + }, + "atl-mmpall": { + "fslr32k": { + "url-type": "osf", + "url": [ + "mb37e", + "6047bac259e910009b83114f" + ], + "md5": "fd641742685a239d9c3f60e19a280ca2" + } + }, + "atl-voneconomo_koskinas": { + "url-type": "osf", + "url": [ + "mb37e", + "5ed80005fabc45000d639900" + ], + "md5": "67085e2577d21dc3a742f4fcde6e3b18" + }, + "tpl-hcp_standards": { + "standard_mesh_atlases": { + "url-type": "osf", + "url": [ + "mb37e", + "6643d2ab2eacc48a57097091" + ], + "md5": "806abac71f76b8dba8af467ef313c3f7", + "keys": [ + "fs_LR_32k", + "fsaverage", + "fsaverage5", + "fsaverage6", + "MNI152NLin2009cAsym" + ] + } + }, + "ds-hansen_manynetworks": { + "url-type": "github-release", + "url": [ + "netneurolab", + "hansen_many_networks", + "v1.0.0" + ], + "folder-name": "hansen_many_networks-1.0.0", + "md5": "9e503c759506293aa441054cfd206ccc" + } +} diff --git a/netneurotools/datasets/datasets_utils.py b/netneurotools/datasets/datasets_utils.py new file mode 100644 index 0000000..82bd228 --- /dev/null +++ b/netneurotools/datasets/datasets_utils.py @@ -0,0 +1,291 @@ +"""Utilites for loading / creating datasets.""" + +import json +import os +from collections import namedtuple +import importlib.resources + + +SURFACE = namedtuple('Surface', ('lh', 'rh')) + +FREESURFER_IGNORE = [ + 'unknown', 'corpuscallosum', 'Background+FreeSurfer_Defined_Medial_Wall' +] + + +def _get_data_dir(data_dir=None): + """ + Get path to netneurotools data directory. + + Parameters + ---------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + + Returns + ------- + data_dir : str + Path to use as data directory + """ + if data_dir is None: + data_dir = os.environ.get('NNT_DATA', os.path.join('~', 'nnt-data')) + data_dir = os.path.expanduser(data_dir) + if not os.path.exists(data_dir): + os.makedirs(data_dir) + + return data_dir + + +def _decode_urls(data): + """ + Format `data` object with OSF API URL. + + Parameters + ---------- + data : object + If dict with a `url` key, will format OSF_API with relevant values + + Returns + ------- + data : object + Input data with all `url` dict keys formatted + """ + OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}" + GITHUB_RELEASE_API = "https://github.com/{}/{}/archive/refs/tags/{}.tar.gz" + + if isinstance(data, str) or isinstance(data, list): + return data + elif 'url' in data: + if data['url-type'] == 'osf': + data['url'] = OSF_API.format(*data['url']) + elif data['url-type'] == 'github-release': + data['url'] = GITHUB_RELEASE_API.format(*data['url']) + else: + raise ValueError("URL type {} not recognized".format(data['url-type'])) + + for key, value in data.items(): + data[key] = _decode_urls(value) + + return data + + +def _load_resource_json(relative_path): + """ + Load JSON file from package resources. + + Parameters + ---------- + relative_path : str + Path to JSON file relative to package resources + + Returns + ------- + resource_json : dict + JSON file loaded as a dictionary + """ + # handling pkg_resources.resource_filename deprecation + if getattr(importlib.resources, 'files', None) is not None: + f_resource = importlib.resources.files("netneurotools") / relative_path + else: + from pkg_resources import resource_filename + f_resource = resource_filename('netneurotools', relative_path) + + with open(f_resource) as src: + resource_json = json.load(src) + + return resource_json + + +NNT_DATASETS = _load_resource_json('datasets/datasets.json') +NNT_DATASETS = _decode_urls(NNT_DATASETS) + + +def _get_dataset_info(name): + """ + Return url and MD5 checksum for dataset `name`. + + Parameters + ---------- + name : str + Name of dataset + + Returns + ------- + url : str + URL from which to download dataset + md5 : str + MD5 checksum for file downloade from `url` + """ + try: + return NNT_DATASETS[name] + except KeyError: + raise KeyError( + f"Provided dataset {name} is not valid. " + f"Must be one of: {sorted(NNT_DATASETS.keys())}" + ) from None + + +NNT_REFERENCES = _load_resource_json('datasets/references.json') + + +def _get_reference_info(name, verbose=1, return_dict=False): + """ + Return reference information for dataset `name`. + + Parameters + ---------- + name : str + Name of dataset + + Returns + ------- + reference : str + Reference information for dataset + """ + try: + curr_refs = NNT_REFERENCES[name] + if verbose: + print("Please cite the following papers if you are using this function:") + for bib_category, bib_category_items in curr_refs.items(): + print(f" [{bib_category}]:") + for bib_item in bib_category_items: + print(f" {bib_item['citation']}") + + if return_dict: + return curr_refs + except KeyError: + raise KeyError( + f"Provided dataset {name} is not valid. " + f"Must be one of: {sorted(NNT_REFERENCES.keys())}" + ) from None + + +def _fill_reference_json(bib_file, json_file, overwrite=False, use_defaults=False): + """ + Fill in citation information for references in a JSON file. + + For internal use only. + + Parameters + ---------- + bib_file : str + Path to BibTeX file containing references + json_file : str + Path to JSON file containing references + overwrite : bool, optional + Whether to overwrite existing citation information. Default: False + use_defaults : bool, optional + Whether to use default paths for `bib_file` and `json_file`. Default: False + + Returns + ------- + None + """ + if use_defaults: + bib_file = \ + importlib.resources.files("netneurotools") / "datasets/netneurotools.bib" + json_file = \ + importlib.resources.files("netneurotools") / "datasets/references.json" + + from pybtex import PybtexEngine + engine = PybtexEngine() + + def _get_citation(key): + s = engine.format_from_file( + filename=bib_file, style="unsrt", + citations=[key], output_backend="plaintext" + ) + return s.strip("\n").replace("[1] ", "") + + with open(json_file) as src: + nnt_refs = json.load(src) + + for _, value in nnt_refs.items(): + for bib_category in value: + for bib_item in value[bib_category]: + if bib_item["bibkey"] not in ["", None]: + if bib_item["citation"] == "" or overwrite: + bib_item["citation"] = _get_citation(bib_item["bibkey"]) + + with open(json_file, "w") as dst: + json.dump(nnt_refs, dst, indent=4) + + +def _check_freesurfer_subjid(subject_id, subjects_dir=None): + """ + Check that `subject_id` exists in provided FreeSurfer `subjects_dir`. + + Parameters + ---------- + subject_id : str + FreeSurfer subject ID + subjects_dir : str, optional + Path to FreeSurfer subject directory. If not set, will inherit from + the environmental variable $SUBJECTS_DIR. Default: None + + Returns + ------- + subject_id : str + FreeSurfer subject ID, as provided + subjects_dir : str + Full filepath to `subjects_dir` + + Raises + ------ + FileNotFoundError + """ + # check inputs for subjects_dir and subject_id + if subjects_dir is None or not os.path.isdir(subjects_dir): + try: + subjects_dir = os.environ['SUBJECTS_DIR'] + except KeyError: + subjects_dir = os.getcwd() + else: + subjects_dir = os.path.abspath(subjects_dir) + + subjdir = os.path.join(subjects_dir, subject_id) + if not os.path.isdir(subjdir): + raise FileNotFoundError( + f'Cannot find specified subject id {subject_id} in ' + f'provided subject directory {subjects_dir}.' + ) + + return subject_id, subjects_dir + + +def _get_freesurfer_subjid(subject_id, subjects_dir=None): + """ + Get fsaverage version `subject_id`, fetching if required. + + Parameters + ---------- + subject_id : str + FreeSurfer subject ID + subjects_dir : str, optional + Path to FreeSurfer subject directory. If not set, will inherit from + the environmental variable $SUBJECTS_DIR. Default: None + + Returns + ------- + subject_id : str + FreeSurfer subject ID + subjects_dir : str + Path to subject directory with `subject_id` + """ + # check for FreeSurfer install w/fsaverage; otherwise, fetch required + try: + subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir) + except FileNotFoundError: + if 'fsaverage' not in subject_id: + raise ValueError( + f'Provided subject {subject_id} does not exist in provided ' + f'subjects_dir {subjects_dir}' + ) from None + from .fetch_template import fetch_fsaverage + fetch_fsaverage(subject_id) + subjects_dir = os.path.join(_get_data_dir(), 'tpl-fsaverage') + subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir) + + return subject_id, subjects_dir diff --git a/netneurotools/datasets/fetch_atlas.py b/netneurotools/datasets/fetch_atlas.py new file mode 100644 index 0000000..5df542c --- /dev/null +++ b/netneurotools/datasets/fetch_atlas.py @@ -0,0 +1,451 @@ +"""Functions for fetching atlas data.""" +import itertools +import warnings + +try: + # nilearn 0.10.3 + from nilearn.datasets._utils import fetch_files +except ImportError: + from nilearn.datasets.utils import _fetch_files as fetch_files + +from sklearn.utils import Bunch + +from .datasets_utils import ( + SURFACE, + _get_data_dir, _get_dataset_info, _get_reference_info +) + + +def fetch_cammoun2012( + version='MNI152NLin2009aSym', + data_dir=None, resume=True, verbose=1 + ): + """ + Download files for Cammoun et al., 2012 multiscale parcellation. + + This dataset contains + + If you used this data, please cite 1_. + + Parameters + ---------- + version : str, optional + Specifies which version of the dataset to download, where + 'MNI152NLin2009aSym' will return .nii.gz atlas files defined in MNI152 + space, 'fsaverageX' will return .annot files defined in fsaverageX + space (FreeSurfer 6.0.1), 'fslr32k' will return .label.gii files in + fs_LR_32k HCP space, and 'gcs' will return FreeSurfer-style .gcs + probabilistic atlas files for generating new, subject-specific + parcellations. Default: 'MNI152NLin2009aSym' + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['scale033', 'scale060', 'scale125', + 'scale250', 'scale500'], where corresponding values are lists of + filepaths to downloaded parcellation files. + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Notes + ----- + License: https://raw.githubusercontent.com/LTS5/cmp/master/COPYRIGHT + + References + ---------- + .. [1] Leila Cammoun, Xavier Gigandet, Djalel Meskaldji, Jean Philippe + Thiran, Olaf Sporns, Kim Q Do, Philippe Maeder, Reto Meuli, and Patric + Hagmann. Mapping the human connectome at multiple scales with diffusion + spectrum mri. Journal of neuroscience methods, 203(2):386\u2013397, + 2012. + """ + if version == 'surface': + warnings.warn('Providing `version="surface"` is deprecated and will ' + 'be removed in a future release. For consistent ' + 'behavior please use `version="fsaverage"` instead.', + DeprecationWarning, stacklevel=2) + version = 'fsaverage' + elif version == 'volume': + warnings.warn('Providing `version="volume"` is deprecated and will ' + 'be removed in a future release. For consistent ' + 'behavior please use `version="MNI152NLin2009aSym"` ' + 'instead.', + DeprecationWarning, stacklevel=2) + version = 'MNI152NLin2009aSym' + + versions = [ + 'gcs', 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k', + 'MNI152NLin2009aSym' + ] + if version not in versions: + raise ValueError( + f'The version of Cammoun et al., 2012 parcellation ' + f'requested {version} does not exist. Must be one of {versions}' + ) + + dataset_name = 'atl-cammoun2012' + _get_reference_info(dataset_name, verbose=verbose) + + keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)[version] + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}.tar.gz' + } + + # filenames differ based on selected version of dataset + if version == 'MNI152NLin2009aSym': + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-Cammoun2012_space-MNI152NLin2009aSym_res-{res[-3:]}' + f'_deterministic{suff}' + for res in keys for suff in ['.nii.gz'] + ] + [ + f'{dataset_name}/{version}/' + f'atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv' + ] + elif version == 'fslr32k': + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-Cammoun2012_space-fslr32k_res-{res[-3:]}_hemi-{hemi}' + f'_deterministic{suff}' + for res in keys for hemi in ['L', 'R'] for suff in ['.label.gii'] + ] + elif version in ('fsaverage', 'fsaverage5', 'fsaverage6'): + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-Cammoun2012_space-{version}_res-{res[-3:]}_hemi-{hemi}' + f'_deterministic{suff}' + for res in keys for hemi in ['L', 'R'] for suff in ['.annot'] + ] + else: + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-Cammoun2012_res-{res[5:]}_hemi-{hemi}' + f'_probabilistic{suff}' + for res in keys[:-1] + ['scale500v1', 'scale500v2', 'scale500v3'] + for hemi in ['L', 'R'] for suff in ['.gcs', '.ctab'] + ] + _files = [(f, info['url'], opts) for f in _filenames] + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + + if version == 'MNI152NLin2009aSym': + keys += ['info'] + elif version in ('fslr32k', 'fsaverage', 'fsaverage5', 'fsaverage6'): + data = [SURFACE(*data[i:i + 2]) for i in range(0, len(data), 2)] + else: + data = [data[::2][i:i + 2] for i in range(0, len(data) // 2, 2)] + # deal with the fact that last scale is split into three files :sigh: + data = data[:-3] + [list(itertools.chain.from_iterable(data[-3:]))] + + return Bunch(**dict(zip(keys, data))) + + +def fetch_schaefer2018( + version='fsaverage', + data_dir=None, resume=True, verbose=1 + ): + """ + Download FreeSurfer .annot files for Schaefer et al., 2018 parcellation. + + This dataset contains + + If you used this data, please cite 1_. + + Parameters + ---------- + version : {'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'} + Specifies which surface annotation files should be matched to. Default: + 'fsaverage' + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys of format '{}Parcels{}Networks' where + corresponding values are the left/right hemisphere annotation files + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Notes + ----- + License: https://github.com/ThomasYeoLab/CBIG/blob/master/LICENSE.md + + References + ---------- + .. [1] Alexander Schaefer, Ru Kong, Evan M Gordon, Timothy O Laumann, + Xi-Nian Zuo, Avram J Holmes, Simon B Eickhoff, and BT Thomas Yeo. + Local-global parcellation of the human cerebral cortex from intrinsic + functional connectivity mri. Cerebral cortex, 28(9):3095\u20133114, + 2018. + """ + versions = ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'] + if version not in versions: + raise ValueError( + f'The version of Schaefer et al., 2018 parcellation ' + f'requested "{version}" does not exist. Must be one of {versions}' + ) + + dataset_name = 'atl-schaefer2018' + _get_reference_info(dataset_name, verbose=verbose) + + keys = [ + f'{p}Parcels{n}Networks' + for p in range(100, 1001, 100) for n in [7, 17] + ] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)[version] + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}.tar.gz' + } + + if version == 'fslr32k': + hemispheres, suffix = ['LR'], 'dlabel.nii' + else: + hemispheres, suffix = ['L', 'R'], 'annot' + + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-Schaefer2018_space-{version}_hemi-{hemi}_desc-{desc}' + f'_deterministic.{suffix}' + for desc in keys for hemi in hemispheres + ] + + _files = [(f, info['url'], opts) for f in _filenames] + + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + + if suffix == 'annot': + data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] + + return Bunch(**dict(zip(keys, data))) + + +def fetch_mmpall( + version='fslr32k', + data_dir=None, resume=True, verbose=1 + ): + """ + Download .label.gii files for Glasser et al., 2016 MMPAll atlas. + + This dataset contains + + If you used this data, please cite 1_. + + Parameters + ---------- + version : {'fslr32k'} + Specifies which surface annotation files should be matched to. Default: + 'fslr32k' + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Namedtuple with fields ('lh', 'rh') corresponding to filepaths to + left/right hemisphere parcellation files + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Notes + ----- + License: https://www.humanconnectome.org/study/hcp-young-adult/document/wu-minn-hcp-consortium-open-access-data-use-terms + + References + ---------- + .. [1] Matthew F Glasser, Timothy S Coalson, Emma C Robinson, Carl D Hacker, + John Harwell, Essa Yacoub, Kamil Ugurbil, Jesper Andersson, Christian F + Beckmann, Mark Jenkinson, and others. A multi-modal parcellation of + human cerebral cortex. Nature, 536(7615):171\u2013178, 2016. + """ + versions = ['fslr32k'] + if version not in versions: + raise ValueError( + f'The version of Glasser et al., 2016 parcellation ' + f'requested "{version}" does not exist. Must be one of {versions}' + ) + + dataset_name = 'atl-mmpall' + _get_reference_info(dataset_name, verbose=verbose) + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)[version] + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}.tar.gz' + } + + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-MMPAll_space-{version}_hemi-{hemi}_deterministic.label.gii' + for hemi in ['L', 'R'] + ] + _files = [(f, info['url'], opts) for f in _filenames] + + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + + return SURFACE(*data) + + +def fetch_pauli2018(data_dir=None, resume=True, verbose=1): + """ + Download files for Pauli et al., 2018 subcortical parcellation. + + This dataset contains + + If you used this data, please cite 1_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['probabilistic', 'deterministic'], + where corresponding values are filepaths to downloaded atlas files. + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Notes + ----- + License: CC-BY Attribution 4.0 International + + References + ---------- + .. [1] Wolfgang M Pauli, Amanda N Nili, and J Michael Tyszka. A + high-resolution probabilistic in vivo atlas of human subcortical brain + nuclei. Scientific data, 5(1):1\u201313, 2018. + """ + dataset_name = 'atl-pauli2018' + _get_reference_info(dataset_name, verbose=verbose) + + keys = ['probabilistic', 'deterministic', 'info'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + + _files = [] + for _, v in info.items(): + _f = f'{v["folder-name"]}/{v["file-name"]}' + _url = v['url'] + _opts = { + 'md5sum': v['md5'], + 'move': f'{v["folder-name"]}/{v["file-name"]}' + } + _files.append( + (_f, _url, _opts) + ) + + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + + return Bunch(**dict(zip(keys, data))) + + +def fetch_ye2020(): + """Fetch Ye et al., 2020 subcortical parcellation.""" + pass + + +def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1): + """ + Fetch von-Economo Koskinas probabilistic FreeSurfer atlas. + + This dataset contains + + If you used this data, please cite 1_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['gcs', 'ctab', 'info'] + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Notes + ----- + License: CC-BY-NC-SA 4.0 + + References + ---------- + .. [1] Lianne H Scholtens, Marcel A de Reus, Siemon C de Lange, Ruben + Schmidt, and Martijn P van den Heuvel. An mri von economo\u2013koskinas + atlas. NeuroImage, 170:249\u2013256, 2018. + """ + dataset_name = 'atl-voneconomo_koskinas' + _get_reference_info(dataset_name, verbose=verbose) + + keys = ['gcs', 'ctab', 'info'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}.tar.gz' + } + + _filenames = [ + f'{dataset_name}/' + f'atl-vonEconomoKoskinas_hemi-{hemi}_probabilistic.{suff}' + for hemi in ['L', 'R'] for suff in ['gcs', 'ctab'] + ] + [ + f'{dataset_name}/atl-vonEconomoKoskinas_info.csv' + ] + _files = [(f, info['url'], opts) for f in _filenames] + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + + data = [SURFACE(*data[:-1:2])] + [SURFACE(*data[1:-1:2])] + [data[-1]] + + return Bunch(**dict(zip(keys, data))) diff --git a/netneurotools/datasets/fetch_project.py b/netneurotools/datasets/fetch_project.py new file mode 100644 index 0000000..ce3def1 --- /dev/null +++ b/netneurotools/datasets/fetch_project.py @@ -0,0 +1,346 @@ +"""Functions for fetching project data.""" +import os +from pathlib import Path +import numpy as np + +try: + # nilearn 0.10.3 + from nilearn.datasets._utils import fetch_files +except ImportError: + from nilearn.datasets.utils import _fetch_files as fetch_files + +from sklearn.utils import Bunch + +from .datasets_utils import ( + _get_data_dir, _get_dataset_info, _get_reference_info +) + +from ._mirchi2018 import _get_fc, _get_panas + + +def fetch_vazquez_rodriguez2019(data_dir=None, resume=True, verbose=1): + """ + Download files from Vazquez-Rodriguez et al., 2019, PNAS. + + This dataset contains one file: rsquared_gradient.csv, which contains + two columns: rsquared and gradient. + + If you used this data, please cite [1]_. + + Returns + ------- + data : :class:`sklearn.utils.Bunch` + Dictionary-like object with fetched data. + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] Bertha V\u00e1zquez-Rodr\u00edguez, Laura E Su\u00e1rez, Ross D + Markello, Golia Shafiei, Casey Paquola, Patric Hagmann, Martijn P Van + Den Heuvel, Boris C Bernhardt, R Nathan Spreng, and Bratislav Misic. + Gradients of structure\u2013function tethering across neocortex. + Proceedings of the National Academy of Sciences, + 116(42):21219\u201321227, 2019. + """ + dataset_name = 'ds-vazquez_rodriguez2019' + _get_reference_info(dataset_name, verbose=verbose) + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}.tar.gz' + } + fetched = fetch_files( + data_dir, + files=[(dataset_name, info['url'], opts)], + resume=resume, verbose=verbose + ) + fetched = Path(fetched[0]) + + # load data + rsq, grad = np.loadtxt( + fetched / "rsquared_gradient.csv", + delimiter=',', skiprows=1 + ).T + data = { + 'rsquared': rsq, + 'gradient': grad + } + + return Bunch(**data) + + +def fetch_mirchi2018(data_dir=None, resume=True, verbose=1): + """ + Download (and creates) dataset for replicating Mirchi et al., 2018, SCAN. + + Parameters + ---------- + data_dir : str, optional + Directory to check for existing data files (if they exist) or to save + generated data files. Files should be named mirchi2018_fc.npy and + mirchi2018_panas.csv for the functional connectivity and behavioral + data, respectively. + + Returns + ------- + X : (73, 198135) numpy.ndarray + Functional connections from MyConnectome rsfMRI time series data + Y : (73, 13) numpy.ndarray + PANAS subscales from MyConnectome behavioral data + """ + data_dir = os.path.join(_get_data_dir(data_dir=data_dir), 'ds-mirchi2018') + os.makedirs(data_dir, exist_ok=True) + + X_fname = os.path.join(data_dir, 'myconnectome_fc.npy') + Y_fname = os.path.join(data_dir, 'myconnectome_panas.csv') + + if not os.path.exists(X_fname): + X = _get_fc(data_dir=data_dir, resume=resume, verbose=verbose) + np.save(X_fname, X, allow_pickle=False) + else: + X = np.load(X_fname, allow_pickle=False) + + if not os.path.exists(Y_fname): + Y = _get_panas(data_dir=data_dir, resume=resume, verbose=verbose) + np.savetxt(Y_fname, np.column_stack(list(Y.values())), + header=','.join(Y.keys()), delimiter=',', fmt='%i') + # convert dictionary to structured array before returning + Y = np.array([tuple(row) for row in np.column_stack(list(Y.values()))], + dtype=dict(names=list(Y.keys()), formats=['i8'] * len(Y))) + else: + Y = np.genfromtxt(Y_fname, delimiter=',', names=True, dtype=int) + + return X, Y + + +def fetch_hansen_manynetworks(data_dir=None, resume=True, verbose=1): + """ + Download files from Hansen et al., 2023, PLOS Biology. + + This dataset contains + + If you used this data, please cite [1]_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with fetched data. + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] + """ + dataset_name = 'ds-hansen_manynetworks' + _get_reference_info(dataset_name, verbose=verbose) + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}/{dataset_name}.tar.gz' + } + # the download info["folder-name"].tar.gz was moved to + # {dataset_name}/{dataset_name}.tar.gz and uncompressed + # to keep the same structure as other datasets + fetched = fetch_files( + data_dir, + files=[(f'{dataset_name}/{info["folder-name"]}', info['url'], opts)], + resume=resume, verbose=verbose + ) + fetched = Path(fetched[0]) + + # load data + data = { + "cammoun033": { + "gene": fetched / "data/Cammoun033/gene_coexpression.npy", + "func": fetched / "data/Cammoun033/func_coactivation.npy", + }, + "schaefer100": { + "gene": fetched / "data/Schaefer100/gene_coexpression.npy", + }, + "schaefer400": { + "gene": fetched / "data/Schaefer400/gene_coexpression.npy", + } + } + + return Bunch(**data) + + +def fetch_hansen_receptors(): + """Download files from Hansen et al., 2022, Nature Neuroscience.""" + pass + + +def fetch_hansen_genecognition(): + """Download files from Hansen et al., 2021, Nature Human Behaviour.""" + pass + + +def fetch_hansen_brainstem(): + """Download files from Hansen et al., 2024.""" + pass + + +def fetch_shafiei_hcpmeg(): + """Download files from Shafiei et al., 2022 & Shafiei et al., 2023.""" + pass + + +def fetch_suarez_mami(): + """Download files from Suarez et al., 2022, eLife.""" + pass + + +def fetch_famous_gmat( + dataset, + data_dir=None, resume=True, verbose=1 + ): + """ + Download files from multi-species connectomes. + + This dataset contains + + If you used this data, please cite celegans [1]_, drosophila [2]_, human + [3]_, macaque_markov [4]_, macaque_modha [5]_, mouse [6]_, rat [7]_. + + Parameters + ---------- + dataset : str + Specifies which dataset to download. + + Returns + ------- + data : :class:`sklearn.utils.Bunch` + Dictionary-like object with, at a minimum, keys ['conn', 'labels', + 'ref'] providing connectivity / correlation matrix, region labels, and + relevant reference. Other possible keys include 'dist' (an array of + Euclidean distances between regions of 'conn'), 'coords' (an array of + xyz coordinates for regions of 'conn'), 'acronyms' (an array of + acronyms for regions of 'conn'), and 'networks' (an array of network + affiliations for regions of 'conn'). + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] Lav R Varshney, Beth L Chen, Eric Paniagua, David H Hall, and Dmitri + B Chklovskii. Structural properties of the caenorhabditis elegans + neuronal network. PLoS computational biology, 7(2):e1001066, 2011. + .. [2] Ann-Shyn Chiang, Chih-Yung Lin, Chao-Chun Chuang, Hsiu-Ming Chang, + Chang-Huain Hsieh, Chang-Wei Yeh, Chi-Tin Shih, Jian-Jheng Wu, Guo-Tzau + Wang, Yung-Chang Chen, and others. Three-dimensional reconstruction of + brain-wide wiring networks in drosophila at single-cell resolution. + Current biology, 21(1):1\u201311, 2011. + .. [3] Alessandra Griffa, Yasser Alem\u00e1n-G\u00f3mez, and Patric Hagmann. + Structural and functional connectome from 70 young healthy adults [data + set]. Zenodo, 2019. + .. [4] Nikola T Markov, Maria Ercsey-Ravasz, Camille Lamy, Ana Rita Ribeiro + Gomes, Lo\u00efc Magrou, Pierre Misery, Pascale Giroud, Pascal Barone, + Colette Dehay, Zolt\u00e1n Toroczkai, and others. The role of long-range + connections on the specificity of the macaque interareal cortical + network. Proceedings of the National Academy of Sciences, + 110(13):5187\u20135192, 2013. + .. [5] Dharmendra S Modha and Raghavendra Singh. Network architecture of the + long-distance pathways in the macaque brain. Proceedings of the National + Academy of Sciences, 107(30):13485\u201313490, 2010. + .. [6] Mikail Rubinov, Rolf JF Ypma, Charles Watson, and Edward T Bullmore. + Wiring cost and topological participation of the mouse brain connectome. + Proceedings of the National Academy of Sciences, + 112(32):10032\u201310037, 2015. + .. [7] Mihail Bota, Olaf Sporns, and Larry W Swanson. Architecture of the + cerebral cortical association connectome underlying cognition. + Proceedings of the National Academy of Sciences, + 112(16):E2093\u2013E2101, 2015. + """ + available_connectomes = [ + 'celegans', + 'drosophila', + 'human_func_scale033', + 'human_func_scale060', + 'human_func_scale125', + 'human_func_scale250', + 'human_func_scale500', + 'human_struct_scale033', + 'human_struct_scale060', + 'human_struct_scale125', + 'human_struct_scale250', + 'human_struct_scale500', + 'macaque_markov', + 'macaque_modha', + 'mouse', + 'rat' + ] + + if dataset not in available_connectomes: + raise ValueError( + f'Provided dataset {dataset} not available; ' + f'must be one of {available_connectomes}' + ) + + base_dataset_name = 'ds-famous_gmat' + _get_reference_info(base_dataset_name, verbose=verbose) + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(base_dataset_name) + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{base_dataset_name}.tar.gz' + } + fetched = fetch_files( + data_dir, + files=[(base_dataset_name, info['url'], opts)], + resume=resume, verbose=verbose + ) + fetched = Path(fetched[0]) + + data = {} + for f in (fetched / dataset).glob("*.csv"): + try: + data[f.stem] = np.loadtxt(f, delimiter=',') + except ValueError: + data[f.stem] = np.loadtxt(f, delimiter=',', dtype=str) + + return Bunch(**data) + + +def fetch_neurosynth(): + """Download Neurosynth data.""" + pass diff --git a/netneurotools/datasets/fetch_template.py b/netneurotools/datasets/fetch_template.py new file mode 100644 index 0000000..fca331e --- /dev/null +++ b/netneurotools/datasets/fetch_template.py @@ -0,0 +1,409 @@ +"""Functions for fetching template data.""" + + +import json +from pathlib import Path +import os.path as op + +try: + # nilearn 0.10.3 + from nilearn.datasets._utils import fetch_files +except ImportError: + from nilearn.datasets.utils import _fetch_files as fetch_files + +from sklearn.utils import Bunch + +from .datasets_utils import ( + SURFACE, + _get_data_dir, _get_dataset_info, _get_reference_info, _check_freesurfer_subjid +) + + +def fetch_fsaverage( + version='fsaverage', + data_dir=None, resume=True, verbose=1 + ): + """ + Download files for fsaverage FreeSurfer template. + + This dataset contains + + If you used this data, please cite 1_, 2_, 3_. + + Parameters + ---------- + version : str, optional + One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', + 'fsaverage6'}. Default: 'fsaverage' + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['surf'] where corresponding values + are length-2 lists downloaded template files (each list composed of + files for the left and right hemisphere). + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical + surface-based analysis: i. segmentation and surface reconstruction. + Neuroimage, 9(2):179\u2013194, 1999. + .. [2] Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical + surface-based analysis: ii: inflation, flattening, and a surface-based + coordinate system. Neuroimage, 9(2):195\u2013207, 1999. + .. [3] Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale. + High-resolution intersubject averaging and a coordinate system for the + cortical surface. Human brain mapping, 8(4):272\u2013284, 1999. + """ + versions = [ + 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' + ] + if version not in versions: + raise ValueError( + f'The version of fsaverage requested {version} does not ' + f'exist. Must be one of {versions}' + ) + + dataset_name = 'tpl-fsaverage' + _get_reference_info(dataset_name, verbose=verbose) + + keys = ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)[version] + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}.tar.gz' + } + + _filenames = [ + f"{version}/surf/{hemi}.{surf}" + for surf in keys for hemi in ['lh', 'rh'] + ] + + try: + # use local FreeSurfer data if available + data_dir = _check_freesurfer_subjid(version)[1] + data = [op.join(data_dir, f) for f in _filenames] + except FileNotFoundError: + _filenames = [f"{dataset_name}/{_}" for _ in _filenames] + _files = [(f, info['url'], opts) for f in _filenames] + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + + data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] + + return Bunch(**dict(zip(keys, data))) + + +def fetch_hcp_standards(data_dir=None, resume=True, verbose=1): + """ + Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP. + + This dataset contains + + The original file was from 3_, but is no longer available. The archived + file is available from 4_. + + If you used this data, please cite 1_, 2_. + + Returns + ------- + standards : str + Filepath to standard_mesh_atlases directory + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna + Barch,Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, + Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome + project: a data acquisition perspective. Neuroimage, + 62(4):2222\u20132231, 2012. + .. [2] Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson, + Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad + Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal + preprocessing pipelines for the human connectome project. Neuroimage, + 80:105\u2013124, 2013. + .. [3] http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip + .. [4] https://web.archive.org/web/20220121035833/http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip + """ + dataset_name = 'tpl-hcp_standards' + _get_reference_info(dataset_name, verbose=verbose) + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)["standard_mesh_atlases"] + + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}.tar.gz' + } + fetched = fetch_files( + data_dir, + files=[(f'{dataset_name}/standard_mesh_atlases', info['url'], opts)], + resume=resume, verbose=verbose + ) + fetched = Path(fetched[0]) + + return fetched + + +def fetch_civet( + density='41k', version='v1', + data_dir=None, resume=True, verbose=1 + ): + """ + Fetch CIVET surface files. + + This dataset contains + + If you used this data, please cite 1_, 2_, 3_. + + Parameters + ---------- + density : {'41k', '164k'}, optional + Which density of the CIVET-space geometry files to fetch. The + high-resolution '164k' surface only exists for version 'v2' + version : {'v1, 'v2'}, optional + Which version of the CIVET surfaces to use. Default: 'v2' + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['mid', 'white'] containing geometry + files for CIVET surface. Note for version 'v1' the 'mid' and 'white' + files are identical. + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Notes + ----- + License: https://github.com/aces/CIVET_Full_Project/blob/master/LICENSE + + References + ---------- + .. [1] Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An + unbiased iterative group registration template for cortical surface + analysis. Neuroimage, 34(4):1535\u20131544, 2007. + .. [2] Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli, + and DL Collins. Unbiased nonlinear average age-appropriate brain + templates from birth to adulthood. NeuroImage, 47:S102, 2009. + .. [3] Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K + Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet + image-processing environment: a fully automated comprehensive pipeline + for anatomical neuroimaging research. proceedings of the 12th annual + meeting of the organization for human brain mapping. Florence, Italy, + pages 2266, 2006. + """ + densities = ['41k', '164k'] + if density not in densities: + raise ValueError( + f'The density of CIVET requested "{density}" does not exist. ' + f'Must be one of {densities}' + ) + versions = ['v1', 'v2'] + if version not in versions: + raise ValueError( + f'The version of CIVET requested "{version}" does not exist. ' + f'Must be one of {versions}' + ) + + if version == 'v1' and density == '164k': + raise ValueError('The "164k" density CIVET surface only exists for ' + 'version "v2"') + + dataset_name = 'tpl-civet' + _get_reference_info(dataset_name, verbose=verbose) + + keys = ['mid', 'white'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)[version][f'civet{density}'] + + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}.tar.gz' + } + + _filenames = [ + f"{dataset_name}/{version}/civet{density}/" + f"tpl-civet_space-ICBM152_hemi-{hemi}_den-{density}_{surf}.obj" + for surf in keys for hemi in ['L', 'R'] + ] + _files = [(f, info['url'], opts) for f in _filenames] + + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + + data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] + + return Bunch(**dict(zip(keys, data))) + + +def fetch_conte69(data_dir=None, resume=True, verbose=1): + """ + Download files for Van Essen et al., 2012 Conte69 template. + + This dataset contains + + If you used this data, please cite 1_, 2_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['midthickness', 'inflated', + 'vinflated'], where corresponding values are lists of filepaths to + downloaded template files. + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, + Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio + Corbetta, Sandra W Curtiss, and others. The human connectome project: a + data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012. + .. [2] David C Van Essen, Matthew F Glasser, Donna L Dierker, John Harwell, + and Timothy Coalson. Parcellations and hemispheric asymmetries of human + cerebral cortex analyzed on surface-based atlases. Cerebral cortex, + 22(10):2241\u20132262, 2012. + .. [3] http://brainvis.wustl.edu/wiki/index.php//Caret:Atlases/Conte69_Atlas + """ + dataset_name = 'tpl-conte69' + _get_reference_info(dataset_name, verbose=verbose) + + keys = ['midthickness', 'inflated', 'vinflated'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}.tar.gz' + } + + _filenames = [ + f"{dataset_name}/tpl-conte69_space-MNI305_variant-fsLR32k_{res}.{hemi}.surf.gii" + for res in keys for hemi in ['L', 'R'] + ] + [ + f"{dataset_name}/template_description.json" + ] + _files = [(f, info['url'], opts) for f in _filenames] + + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + + with open(data[-1], 'r') as src: + data[-1] = json.load(src) + + # bundle hemispheres together + data = [SURFACE(*data[:-1][i:i + 2]) for i in range(0, 6, 2)] + [data[-1]] + + return Bunch(**dict(zip(keys + ['info'], data))) + + +def fetch_yerkes19(data_dir=None, resume=None, verbose=1): + """ + Download files for Donahue et al., 2016 Yerkes19 template. + + This dataset contains + + If you used this data, please cite 1_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['midthickness', 'inflated', + 'vinflated'], where corresponding values are lists of filepaths to + downloaded template files. + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] Chad J Donahue, Stamatios N Sotiropoulos, Saad Jbabdi, Moises + Hernandez-Fernandez, Timothy E Behrens, Tim B Dyrby, Timothy Coalson, + Henry Kennedy, Kenneth Knoblauch, David C Van Essen, and others. Using + diffusion tractography to predict cortical connection strength and + distance: a quantitative comparison with tracers in the monkey. Journal + of Neuroscience, 36(25):6758\u20136770, 2016. + .. [2] https://balsa.wustl.edu/reference/show/976nz + """ + dataset_name = 'tpl-yerkes19' + _get_reference_info(dataset_name, verbose=verbose) + + keys = ['midthickness', 'inflated', 'vinflated'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}.tar.gz' + } + _filenames = [ + f"{dataset_name}/tpl-yerkes19_space-fsLR32k_{res}.{hemi}.surf.gii" + for res in keys for hemi in ['L', 'R'] + + ] + _files = [(f, info['url'], opts) for f in _filenames] + + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + + # bundle hemispheres together + data = [SURFACE(*data[i:i + 2]) for i in range(0, 6, 2)] + + return Bunch(**dict(zip(keys + ['info'], data))) diff --git a/netneurotools/datasets/fetchers.py b/netneurotools/datasets/fetchers.py deleted file mode 100644 index b2fa95d..0000000 --- a/netneurotools/datasets/fetchers.py +++ /dev/null @@ -1,882 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions for fetching datasets from the internet.""" - -from collections import namedtuple -import itertools -import json -import os.path as op -import warnings - -try: - # nilearn 0.10.3 - from nilearn.datasets._utils import fetch_files as _fetch_files -except ImportError: - from nilearn.datasets.utils import _fetch_files - -import numpy as np -from sklearn.utils import Bunch - -from .utils import _get_data_dir, _get_dataset_info -from ..utils import check_fs_subjid - -SURFACE = namedtuple('Surface', ('lh', 'rh')) - - -def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, - resume=True, verbose=1): - """ - Download files for Cammoun et al., 2012 multiscale parcellation. - - Parameters - ---------- - version : str, optional - Specifies which version of the dataset to download, where - 'MNI152NLin2009aSym' will return .nii.gz atlas files defined in MNI152 - space, 'fsaverageX' will return .annot files defined in fsaverageX - space (FreeSurfer 6.0.1), 'fslr32k' will return .label.gii files in - fs_LR_32k HCP space, and 'gcs' will return FreeSurfer-style .gcs - probabilistic atlas files for generating new, subject-specific - parcellations. Default: 'MNI152NLin2009aSym' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['scale033', 'scale060', 'scale125', - 'scale250', 'scale500'], where corresponding values are lists of - filepaths to downloaded parcellation files. - - References - ---------- - Cammoun, L., Gigandet, X., Meskaldji, D., Thiran, J. P., Sporns, O., Do, K. - Q., Maeder, P., and Meuli, R., & Hagmann, P. (2012). Mapping the human - connectome at multiple scales with diffusion spectrum MRI. Journal of - Neuroscience Methods, 203(2), 386-397. - - Notes - ----- - License: https://raw.githubusercontent.com/LTS5/cmp/master/COPYRIGHT - """ - if version == 'surface': - warnings.warn('Providing `version="surface"` is deprecated and will ' - 'be removed in a future release. For consistent ' - 'behavior please use `version="fsaverage"` instead.', - DeprecationWarning, stacklevel=2) - version = 'fsaverage' - elif version == 'volume': - warnings.warn('Providing `version="volume"` is deprecated and will ' - 'be removed in a future release. For consistent ' - 'behavior please use `version="MNI152NLin2009aSym"` ' - 'instead.', - DeprecationWarning, stacklevel=2) - version = 'MNI152NLin2009aSym' - - versions = [ - 'gcs', 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k', - 'MNI152NLin2009aSym' - ] - if version not in versions: - raise ValueError('The version of Cammoun et al., 2012 parcellation ' - 'requested "{}" does not exist. Must be one of {}' - .format(version, versions)) - - dataset_name = 'atl-cammoun2012' - keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - # filenames differ based on selected version of dataset - if version == 'MNI152NLin2009aSym': - filenames = [ - 'atl-Cammoun2012_space-MNI152NLin2009aSym_res-{}_deterministic{}' - .format(res[-3:], suff) for res in keys for suff in ['.nii.gz'] - ] + ['atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv'] - elif version == 'fslr32k': - filenames = [ - 'atl-Cammoun2012_space-fslr32k_res-{}_hemi-{}_deterministic{}' - .format(res[-3:], hemi, suff) for res in keys - for hemi in ['L', 'R'] for suff in ['.label.gii'] - ] - elif version in ('fsaverage', 'fsaverage5', 'fsaverage6'): - filenames = [ - 'atl-Cammoun2012_space-{}_res-{}_hemi-{}_deterministic{}' - .format(version, res[-3:], hemi, suff) for res in keys - for hemi in ['L', 'R'] for suff in ['.annot'] - ] - else: - filenames = [ - 'atl-Cammoun2012_res-{}_hemi-{}_probabilistic{}' - .format(res[5:], hemi, suff) - for res in keys[:-1] + ['scale500v1', 'scale500v2', 'scale500v3'] - for hemi in ['L', 'R'] for suff in ['.gcs', '.ctab'] - ] - - files = [ - (op.join(dataset_name, version, f), url, opts) for f in filenames - ] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - - if version == 'MNI152NLin2009aSym': - keys += ['info'] - elif version in ('fslr32k', 'fsaverage', 'fsaverage5', 'fsaverage6'): - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(data), 2)] - else: - data = [data[::2][i:i + 2] for i in range(0, len(data) // 2, 2)] - # deal with the fact that last scale is split into three files :sigh: - data = data[:-3] + [list(itertools.chain.from_iterable(data[-3:]))] - - return Bunch(**dict(zip(keys, data))) - - -def fetch_conte69(data_dir=None, url=None, resume=True, verbose=1): - """ - Download files for Van Essen et al., 2012 Conte69 template. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['midthickness', 'inflated', - 'vinflated'], where corresponding values are lists of filepaths to - downloaded template files. - - References - ---------- - http://brainvis.wustl.edu/wiki/index.php//Caret:Atlases/Conte69_Atlas - - Van Essen, D. C., Glasser, M. F., Dierker, D. L., Harwell, J., & Coalson, - T. (2011). Parcellations and hemispheric asymmetries of human cerebral - cortex analyzed on surface-based atlases. Cerebral cortex, 22(10), - 2241-2262. - - Notes - ----- - License: ??? - """ - dataset_name = 'tpl-conte69' - keys = ['midthickness', 'inflated', 'vinflated'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - filenames = [ - 'tpl-conte69/tpl-conte69_space-MNI305_variant-fsLR32k_{}.{}.surf.gii' - .format(res, hemi) for res in keys for hemi in ['L', 'R'] - ] + ['tpl-conte69/template_description.json'] - - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) - - with open(data[-1], 'r') as src: - data[-1] = json.load(src) - - # bundle hemispheres together - data = [SURFACE(*data[:-1][i:i + 2]) for i in range(0, 6, 2)] + [data[-1]] - - return Bunch(**dict(zip(keys + ['info'], data))) - - -def fetch_yerkes19(data_dir=None, url=None, resume=None, verbose=1): - """ - Download files for Donahue et al., 2016 Yerkes19 template. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['midthickness', 'inflated', - 'vinflated'], where corresponding values are lists of filepaths to - downloaded template files. - - References - ---------- - https://balsa.wustl.edu/reference/show/976nz - - Donahue, C. J., Sotiropoulos, S. N., Jbabdi, S., Hernandez-Fernandez, M., - Behrens, T. E., Dyrby, T. B., ... & Glasser, M. F. (2016). Using diffusion - tractography to predict cortical connection strength and distance: a - quantitative comparison with tracers in the monkey. Journal of - Neuroscience, 36(25), 6758-6770. - - Notes - ----- - License: ??? - """ - dataset_name = 'tpl-yerkes19' - keys = ['midthickness', 'inflated', 'vinflated'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - filenames = [ - 'tpl-yerkes19/tpl-yerkes19_space-fsLR32k_{}.{}.surf.gii' - .format(res, hemi) for res in keys for hemi in ['L', 'R'] - ] - - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) - - # bundle hemispheres together - data = [SURFACE(*data[i:i + 2]) for i in range(0, 6, 2)] - - return Bunch(**dict(zip(keys + ['info'], data))) - - -def fetch_pauli2018(data_dir=None, url=None, resume=True, verbose=1): - """ - Download files for Pauli et al., 2018 subcortical parcellation. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['probabilistic', 'deterministic'], - where corresponding values are filepaths to downloaded atlas files. - - References - ---------- - Pauli, W. M., Nili, A. N., & Tyszka, J. M. (2018). A high-resolution - probabilistic in vivo atlas of human subcortical brain nuclei. Scientific - Data, 5, 180063. - - Notes - ----- - License: CC-BY Attribution 4.0 International - """ - dataset_name = 'atl-pauli2018' - keys = ['probabilistic', 'deterministic', 'info'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - - # format the query how _fetch_files() wants things and then download data - files = [ - (i['name'], i['url'], dict(md5sum=i['md5'], move=i['name'])) - for i in info - ] - - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - - return Bunch(**dict(zip(keys, data))) - - -def fetch_fsaverage(version='fsaverage', data_dir=None, url=None, resume=True, - verbose=1): - """ - Download files for fsaverage FreeSurfer template. - - Parameters - ---------- - version : str, optional - One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', - 'fsaverage6'}. Default: 'fsaverage' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['surf'] where corresponding values - are length-2 lists downloaded template files (each list composed of - files for the left and right hemisphere). - """ - versions = [ - 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' - ] - if version not in versions: - raise ValueError('The version of fsaverage requested "{}" does not ' - 'exist. Must be one of {}'.format(version, versions)) - - dataset_name = 'tpl-fsaverage' - keys = ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - filenames = [ - op.join(version, 'surf', '{}.{}'.format(hemi, surf)) - for surf in keys for hemi in ['lh', 'rh'] - ] - - try: - data_dir = check_fs_subjid(version)[1] - data = [op.join(data_dir, f) for f in filenames] - except FileNotFoundError: - data = _fetch_files(data_dir, resume=resume, verbose=verbose, - files=[(op.join(dataset_name, f), url, opts) - for f in filenames]) - - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] - - return Bunch(**dict(zip(keys, data))) - - -def available_connectomes(): - """ - List datasets available via :func:`~.fetch_connectome`. - - Returns - ------- - datasets : list of str - List of available datasets - """ - return sorted(_get_dataset_info('ds-connectomes').keys()) - - -def fetch_connectome(dataset, data_dir=None, url=None, resume=True, - verbose=1): - """ - Download files from multi-species connectomes. - - Parameters - ---------- - dataset : str - Specifies which dataset to download; must be one of the datasets listed - in :func:`netneurotools.datasets.available_connectomes()`. - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - data : :class:`sklearn.utils.Bunch` - Dictionary-like object with, at a minimum, keys ['conn', 'labels', - 'ref'] providing connectivity / correlation matrix, region labels, and - relevant reference. Other possible keys include 'dist' (an array of - Euclidean distances between regions of 'conn'), 'coords' (an array of - xyz coordinates for regions of 'conn'), 'acronyms' (an array of - acronyms for regions of 'conn'), and 'networks' (an array of network - affiliations for regions of 'conn') - - References - ---------- - See `ref` key of returned dictionary object for relevant dataset reference - """ - if dataset not in available_connectomes(): - raise ValueError('Provided dataset {} not available; must be one of {}' - .format(dataset, available_connectomes())) - - dataset_name = 'ds-connectomes' - - data_dir = op.join(_get_data_dir(data_dir=data_dir), dataset_name) - info = _get_dataset_info(dataset_name)[dataset] - if url is None: - url = info['url'] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset) - } - - filenames = [ - op.join(dataset, '{}.csv'.format(fn)) for fn in info['keys'] - ] + [op.join(dataset, 'ref.txt')] - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) - - # load data - for n, arr in enumerate(data[:-1]): - try: - data[n] = np.loadtxt(arr, delimiter=',') - except ValueError: - data[n] = np.loadtxt(arr, delimiter=',', dtype=str) - with open(data[-1]) as src: - data[-1] = src.read().strip() - - return Bunch(**dict(zip(info['keys'] + ['ref'], data))) - - -def fetch_vazquez_rodriguez2019(data_dir=None, url=None, resume=True, - verbose=1): - """ - Download files from Vazquez-Rodriguez et al., 2019, PNAS. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - data : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['rsquared', 'gradient'] containing - 1000 values from - - References - ---------- - See `ref` key of returned dictionary object for relevant dataset reference - """ - dataset_name = 'ds-vazquez_rodriguez2019' - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - filenames = [ - op.join(dataset_name, 'rsquared_gradient.csv') - ] - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) - - # load data - rsq, grad = np.loadtxt(data[0], delimiter=',', skiprows=1).T - - return Bunch(rsquared=rsq, gradient=grad) - - -def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None, - resume=True, verbose=1): - """ - Download FreeSurfer .annot files for Schaefer et al., 2018 parcellation. - - Parameters - ---------- - version : {'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'} - Specifies which surface annotation files should be matched to. Default: - 'fsaverage' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys of format '{}Parcels{}Networks' where - corresponding values are the left/right hemisphere annotation files - - References - ---------- - Schaefer, A., Kong, R., Gordon, E. M., Laumann, T. O., Zuo, X. N., Holmes, - A. J., ... & Yeo, B. T. (2017). Local-global parcellation of the human - cerebral cortex from intrinsic functional connectivity MRI. Cerebral - Cortex, 28(9), 3095-3114. - - Notes - ----- - License: https://github.com/ThomasYeoLab/CBIG/blob/master/LICENSE.md - """ - versions = ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'] - if version not in versions: - raise ValueError('The version of Schaefer et al., 2018 parcellation ' - 'requested "{}" does not exist. Must be one of {}' - .format(version, versions)) - - dataset_name = 'atl-schaefer2018' - keys = [ - '{}Parcels{}Networks'.format(p, n) - for p in range(100, 1001, 100) for n in [7, 17] - ] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - if version == 'fslr32k': - hemispheres, suffix = ['LR'], 'dlabel.nii' - else: - hemispheres, suffix = ['L', 'R'], 'annot' - filenames = [ - 'atl-Schaefer2018_space-{}_hemi-{}_desc-{}_deterministic.{}' - .format(version, hemi, desc, suffix) - for desc in keys for hemi in hemispheres - ] - - files = [(op.join(dataset_name, version, f), url, opts) - for f in filenames] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - - if suffix == 'annot': - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] - - return Bunch(**dict(zip(keys, data))) - - -def fetch_hcp_standards(data_dir=None, url=None, resume=True, verbose=1): - """ - Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - standards : str - Filepath to standard_mesh_atlases directory - """ - if url is None: - url = 'https://web.archive.org/web/20220121035833/' + \ - 'http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip' - dataset_name = 'standard_mesh_atlases' - data_dir = _get_data_dir(data_dir=data_dir) - opts = { - 'uncompress': True, - 'move': '{}.zip'.format(dataset_name) - } - filenames = [ - 'L.sphere.32k_fs_LR.surf.gii', 'R.sphere.32k_fs_LR.surf.gii' - ] - files = [(op.join(dataset_name, f), url, opts) for f in filenames] - _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - - return op.join(data_dir, dataset_name) - - -def fetch_mmpall(version='fslr32k', data_dir=None, url=None, resume=True, - verbose=1): - """ - Download .label.gii files for Glasser et al., 2016 MMPAll atlas. - - Parameters - ---------- - version : {'fslr32k'} - Specifies which surface annotation files should be matched to. Default: - 'fslr32k' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Namedtuple with fields ('lh', 'rh') corresponding to filepaths to - left/right hemisphere parcellation files - - References - ---------- - Glasser, M. F., Coalson, T. S., Robinson, E. C., Hacker, C. D., Harwell, - J., Yacoub, E., ... & Van Essen, D. C. (2016). A multi-modal parcellation - of human cerebral cortex. Nature, 536(7615), 171-178. - - Notes - ----- - License: https://www.humanconnectome.org/study/hcp-young-adult/document/ - wu-minn-hcp-consortium-open-access-data-use-terms - """ - versions = ['fslr32k'] - if version not in versions: - raise ValueError('The version of Glasser et al., 2016 parcellation ' - 'requested "{}" does not exist. Must be one of {}' - .format(version, versions)) - - dataset_name = 'atl-mmpall' - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - hemispheres = ['L', 'R'] - filenames = [ - 'atl-MMPAll_space-{}_hemi-{}_deterministic.label.gii' - .format(version, hemi) for hemi in hemispheres - ] - - files = [(op.join(dataset_name, version, f), url, opts) for f in filenames] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - - return SURFACE(*data) - - -def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1): - """ - Fetch von-Economo Koskinas probabilistic FreeSurfer atlas. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['gcs', 'ctab', 'info'] - - References - ---------- - Scholtens, L. H., de Reus, M. A., de Lange, S. C., Schmidt, R., & van den - Heuvel, M. P. (2018). An MRI von Economo–Koskinas atlas. NeuroImage, 170, - 249-256. - - Notes - ----- - License: CC-BY-NC-SA 4.0 - """ - dataset_name = 'atl-voneconomo_koskinas' - keys = ['gcs', 'ctab', 'info'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - filenames = [ - 'atl-vonEconomoKoskinas_hemi-{}_probabilistic.{}'.format(hemi, suff) - for hemi in ['L', 'R'] for suff in ['gcs', 'ctab'] - ] + ['atl-vonEconomoKoskinas_info.csv'] - files = [(op.join(dataset_name, f), url, opts) for f in filenames] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - data = [SURFACE(*data[:-1:2])] + [SURFACE(*data[1:-1:2])] + [data[-1]] - - return Bunch(**dict(zip(keys, data))) - - -def fetch_civet(density='41k', version='v1', data_dir=None, url=None, - resume=True, verbose=1): - """ - Fetch CIVET surface files. - - Parameters - ---------- - density : {'41k', '164k'}, optional - Which density of the CIVET-space geometry files to fetch. The - high-resolution '164k' surface only exists for version 'v2' - version : {'v1, 'v2'}, optional - Which version of the CIVET surfaces to use. Default: 'v2' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['mid', 'white'] containing geometry - files for CIVET surface. Note for version 'v1' the 'mid' and 'white' - files are identical. - - References - ---------- - Y. Ad-Dab’bagh, O. Lyttelton, J.-S. Muehlboeck, C. Lepage, D. Einarson, K. - Mok, O. Ivanov, R. Vincent, J. Lerch, E. Fombonne, A. C. Evans, The CIVET - image-processing environment: A fully automated comprehensive pipeline for - anatomical neuroimaging research. Proceedings of the 12th Annual Meeting of - the Organization for Human Brain Mapping (2006). - - Notes - ----- - License: https://github.com/aces/CIVET_Full_Project/blob/master/LICENSE - """ - densities = ['41k', '164k'] - if density not in densities: - raise ValueError('The density of CIVET requested "{}" does not exist. ' - 'Must be one of {}'.format(density, densities)) - versions = ['v1', 'v2'] - if version not in versions: - raise ValueError('The version of CIVET requested "{}" does not exist. ' - 'Must be one of {}'.format(version, versions)) - - if version == 'v1' and density == '164k': - raise ValueError('The "164k" density CIVET surface only exists for ' - 'version "v2"') - - dataset_name = 'tpl-civet' - keys = ['mid', 'white'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version]['civet{}'.format(density)] - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - filenames = [ - op.join(dataset_name, version, 'civet{}'.format(density), - 'tpl-civet_space-ICBM152_hemi-{}_den-{}_{}.obj' - .format(hemi, density, surf)) - for surf in keys for hemi in ['L', 'R'] - ] - - data = _fetch_files(data_dir, resume=resume, verbose=verbose, - files=[(f, url, opts) for f in filenames]) - - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] - - return Bunch(**dict(zip(keys, data))) diff --git a/netneurotools/datasets/generators.py b/netneurotools/datasets/generators.py deleted file mode 100644 index 42c4f56..0000000 --- a/netneurotools/datasets/generators.py +++ /dev/null @@ -1,93 +0,0 @@ - -# -*- coding: utf-8 -*- -"""Functions for making "random" datasets.""" - -import numpy as np -from sklearn.utils.validation import check_random_state - - -def make_correlated_xy(corr=0.85, size=10000, seed=None, tol=0.001): - """ - Generate random vectors that are correlated to approximately `corr`. - - Parameters - ---------- - corr : [-1, 1] float or (N, N) numpy.ndarray, optional - The approximate correlation desired. If a float is provided, two - vectors with the specified level of correlation will be generated. If - an array is provided, it is assumed to be a symmetrical correlation - matrix and ``len(corr)`` vectors with the specified levels of - correlation will be generated. Default: 0.85 - size : int or tuple, optional - Desired size of the generated vectors. Default: 1000 - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Default: None - tol : [0, 1] float, optional - Tolerance of correlation between generated `vectors` and specified - `corr`. Default: 0.001 - - Returns - ------- - vectors : numpy.ndarray - Random vectors of size `size` with correlation specified by `corr` - - Examples - -------- - >>> from netneurotools import datasets - - By default two vectors are generated with specified correlation - - >>> x, y = datasets.make_correlated_xy() - >>> np.corrcoef(x, y) # doctest: +SKIP - array([[1. , 0.85083661], - [0.85083661, 1. ]]) - >>> x, y = datasets.make_correlated_xy(corr=0.2) - >>> np.corrcoef(x, y) # doctest: +SKIP - array([[1. , 0.20069953], - [0.20069953, 1. ]]) - - You can also provide correlation matrices to generate more than two vectors - if desired. Note that this makes it more difficult to ensure the actual - correlations are close to the desired values: - - >>> corr = [[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]] - >>> out = datasets.make_correlated_xy(corr=corr) - >>> out.shape - (3, 10000) - >>> np.corrcoef(out) # doctest: +SKIP - array([[1. , 0.50965273, 0.30235686], - [0.50965273, 1. , 0.01089107], - [0.30235686, 0.01089107, 1. ]]) - """ - rs = check_random_state(seed) - - # no correlations outside [-1, 1] bounds - if np.any(np.abs(corr) > 1): - raise ValueError('Provided `corr` must (all) be in range [-1, 1].') - - # if we're given a single number, assume two vectors are desired - if isinstance(corr, (int, float)): - covs = np.ones((2, 2)) * 0.111 - covs[(0, 1), (1, 0)] *= corr - # if we're given a correlation matrix, assume `N` vectors are desired - elif isinstance(corr, (list, np.ndarray)): - corr = np.asarray(corr) - if corr.ndim != 2 or len(corr) != len(corr.T): - raise ValueError('If `corr` is a list or array, must be a 2D ' - 'square array, not {}'.format(corr.shape)) - if np.any(np.diag(corr) != 1): - raise ValueError('Diagonal of `corr` must be 1.') - covs = corr * 0.111 - means = [0] * len(covs) - - # generate the variables - count = 0 - while count < 500: - vectors = rs.multivariate_normal(mean=means, cov=covs, size=size).T - flat = vectors.reshape(len(vectors), -1) - # if diff between actual and desired correlations less than tol, break - if np.all(np.abs(np.corrcoef(flat) - (covs / 0.111)) < tol): - break - count += 1 - - return vectors diff --git a/netneurotools/datasets/netneurotools.bib b/netneurotools/datasets/netneurotools.bib new file mode 100644 index 0000000..1e78958 --- /dev/null +++ b/netneurotools/datasets/netneurotools.bib @@ -0,0 +1,253 @@ +@article{cammoun2012mapping, + title={Mapping the human connectome at multiple scales with diffusion spectrum MRI}, + author={Cammoun, Leila and Gigandet, Xavier and Meskaldji, Djalel and Thiran, Jean Philippe and Sporns, Olaf and Do, Kim Q and Maeder, Philippe and Meuli, Reto and Hagmann, Patric}, + journal={Journal of neuroscience methods}, + volume={203}, + number={2}, + pages={386--397}, + year={2012}, + publisher={Elsevier} +} + +@article{pauli2018high, + title={A high-resolution probabilistic in vivo atlas of human subcortical brain nuclei}, + author={Pauli, Wolfgang M and Nili, Amanda N and Tyszka, J Michael}, + journal={Scientific data}, + volume={5}, + number={1}, + pages={1--13}, + year={2018}, + publisher={Nature Publishing Group} +} + +@article{van2012human, + title={The Human Connectome Project: a data acquisition perspective}, + author={Van Essen, David C and Ugurbil, Kamil and Auerbach, Edward and Barch, Deanna and Behrens, Timothy EJ and Bucholz, Richard and Chang, Acer and Chen, Liyong and Corbetta, Maurizio and Curtiss, Sandra W and others}, + journal={Neuroimage}, + volume={62}, + number={4}, + pages={2222--2231}, + year={2012}, + publisher={Elsevier} +} + +@article{van2012parcellations, + title={Parcellations and hemispheric asymmetries of human cerebral cortex analyzed on surface-based atlases}, + author={Van Essen, David C and Glasser, Matthew F and Dierker, Donna L and Harwell, John and Coalson, Timothy}, + journal={Cerebral cortex}, + volume={22}, + number={10}, + pages={2241--2262}, + year={2012}, + publisher={Oxford University Press} +} + +@article{glasser2013minimal, + title={The minimal preprocessing pipelines for the Human Connectome Project}, + author={Glasser, Matthew F and Sotiropoulos, Stamatios N and Wilson, J Anthony and Coalson, Timothy S and Fischl, Bruce and Andersson, Jesper L and Xu, Junqian and Jbabdi, Saad and Webster, Matthew and Polimeni, Jonathan R and others}, + journal={Neuroimage}, + volume={80}, + pages={105--124}, + year={2013}, + publisher={Elsevier} +} + +@article{donahue2016using, + title={Using diffusion tractography to predict cortical connection strength and distance: a quantitative comparison with tracers in the monkey}, + author={Donahue, Chad J and Sotiropoulos, Stamatios N and Jbabdi, Saad and Hernandez-Fernandez, Moises and Behrens, Timothy E and Dyrby, Tim B and Coalson, Timothy and Kennedy, Henry and Knoblauch, Kenneth and Van Essen, David C and others}, + journal={Journal of Neuroscience}, + volume={36}, + number={25}, + pages={6758--6770}, + year={2016}, + publisher={Soc Neuroscience} +} + +@article{dale1999cortical, + title={Cortical surface-based analysis: I. Segmentation and surface reconstruction}, + author={Dale, Anders M and Fischl, Bruce and Sereno, Martin I}, + journal={Neuroimage}, + volume={9}, + number={2}, + pages={179--194}, + year={1999}, + publisher={Elsevier} +} + +@article{fischl1999cortical, + title={Cortical surface-based analysis: II: inflation, flattening, and a surface-based coordinate system}, + author={Fischl, Bruce and Sereno, Martin I and Dale, Anders M}, + journal={Neuroimage}, + volume={9}, + number={2}, + pages={195--207}, + year={1999}, + publisher={Elsevier} +} + +@article{fischl1999high, + title={High-resolution intersubject averaging and a coordinate system for the cortical surface}, + author={Fischl, Bruce and Sereno, Martin I and Tootell, Roger BH and Dale, Anders M}, + journal={Human brain mapping}, + volume={8}, + number={4}, + pages={272--284}, + year={1999}, + publisher={Wiley Online Library} +} + +@article{lyttelton2007unbiased, + title={An unbiased iterative group registration template for cortical surface analysis}, + author={Lyttelton, Oliver and Boucher, Maxime and Robbins, Steven and Evans, Alan}, + journal={Neuroimage}, + volume={34}, + number={4}, + pages={1535--1544}, + year={2007}, + publisher={Elsevier} +} + +@article{fonov2009unbiased, + title={Unbiased nonlinear average age-appropriate brain templates from birth to adulthood}, + author={Fonov, Vladimir S and Evans, Alan C and McKinstry, Robert C and Almli, C Robert and Collins, DL}, + journal={NeuroImage}, + volume={47}, + pages={S102}, + year={2009}, + publisher={Elsevier} +} + +@article{ad2006civet, + title={The CIVET image-processing environment: A fully automated comprehensive pipeline for anatomical neuroimaging research. Proceedings of the 12th annual meeting of the organization for human brain mapping}, + author={Ad-Dab'bagh, Y and Lyttelton, O and Muehlboeck, J and Lepage, C and Einarson, D and Mok, K and Ivanov, O and Vincent, R and Lerch, J and Fombonne, E}, + journal={Florence, Italy}, + pages={2266}, + year={2006} +} + +@article{varshney2011structural, + title={Structural properties of the Caenorhabditis elegans neuronal network}, + author={Varshney, Lav R and Chen, Beth L and Paniagua, Eric and Hall, David H and Chklovskii, Dmitri B}, + journal={PLoS computational biology}, + volume={7}, + number={2}, + pages={e1001066}, + year={2011}, + publisher={Public Library of Science San Francisco, USA} +} + +@article{chiang2011three, + title={Three-dimensional reconstruction of brain-wide wiring networks in Drosophila at single-cell resolution}, + author={Chiang, Ann-Shyn and Lin, Chih-Yung and Chuang, Chao-Chun and Chang, Hsiu-Ming and Hsieh, Chang-Huain and Yeh, Chang-Wei and Shih, Chi-Tin and Wu, Jian-Jheng and Wang, Guo-Tzau and Chen, Yung-Chang and others}, + journal={Current biology}, + volume={21}, + number={1}, + pages={1--11}, + year={2011}, + publisher={Elsevier} +} + +@article{griffa2019lausanne, + title={Structural and functional connectome from 70 young healthy adults [data set]}, + author={Griffa, Alessandra and Alem{\'a}n-G{\'o}mez, Yasser and Hagmann, Patric}, + journal={Zenodo}, + year={2019} +} + +@article{markov2013role, + title={The role of long-range connections on the specificity of the macaque interareal cortical network}, + author={Markov, Nikola T and Ercsey-Ravasz, Maria and Lamy, Camille and Ribeiro Gomes, Ana Rita and Magrou, Lo{\"\i}c and Misery, Pierre and Giroud, Pascale and Barone, Pascal and Dehay, Colette and Toroczkai, Zolt{\'a}n and others}, + journal={Proceedings of the National Academy of Sciences}, + volume={110}, + number={13}, + pages={5187--5192}, + year={2013}, + publisher={National Acad Sciences} +} + +@article{modha2010network, + title={Network architecture of the long-distance pathways in the macaque brain}, + author={Modha, Dharmendra S and Singh, Raghavendra}, + journal={Proceedings of the National Academy of Sciences}, + volume={107}, + number={30}, + pages={13485--13490}, + year={2010}, + publisher={National Acad Sciences} +} + +@article{rubinov2015wiring, + title={Wiring cost and topological participation of the mouse brain connectome}, + author={Rubinov, Mikail and Ypma, Rolf JF and Watson, Charles and Bullmore, Edward T}, + journal={Proceedings of the National Academy of Sciences}, + volume={112}, + number={32}, + pages={10032--10037}, + year={2015}, + publisher={National Acad Sciences} +} + +@article{bota2015architecture, + title={Architecture of the cerebral cortical association connectome underlying cognition}, + author={Bota, Mihail and Sporns, Olaf and Swanson, Larry W}, + journal={Proceedings of the National Academy of Sciences}, + volume={112}, + number={16}, + pages={E2093--E2101}, + year={2015}, + publisher={National Acad Sciences} +} + +@article{vazquez2019gradients, + title={Gradients of structure--function tethering across neocortex}, + author={V{\'a}zquez-Rodr{\'\i}guez, Bertha and Su{\'a}rez, Laura E and Markello, Ross D and Shafiei, Golia and Paquola, Casey and Hagmann, Patric and Van Den Heuvel, Martijn P and Bernhardt, Boris C and Spreng, R Nathan and Misic, Bratislav}, + journal={Proceedings of the National Academy of Sciences}, + volume={116}, + number={42}, + pages={21219--21227}, + year={2019}, + publisher={National Acad Sciences} +} + +@article{hansen2023integrating, + title={Integrating multimodal and multiscale connectivity blueprints of the human cerebral cortex in health and disease}, + author={Hansen, Justine Y and Shafiei, Golia and Voigt, Katharina and Liang, Emma X and Cox, Sylvia ML and Leyton, Marco and Jamadar, Sharna D and Misic, Bratislav}, + journal={PLoS biology}, + volume={21}, + number={9}, + pages={e3002314}, + year={2023}, + publisher={Public Library of Science San Francisco, CA USA} +} + +@article{schaefer2018local, + title={Local-global parcellation of the human cerebral cortex from intrinsic functional connectivity MRI}, + author={Schaefer, Alexander and Kong, Ru and Gordon, Evan M and Laumann, Timothy O and Zuo, Xi-Nian and Holmes, Avram J and Eickhoff, Simon B and Yeo, BT Thomas}, + journal={Cerebral cortex}, + volume={28}, + number={9}, + pages={3095--3114}, + year={2018}, + publisher={Oxford University Press} +} + +@article{glasser2016multi, + title={A multi-modal parcellation of human cerebral cortex}, + author={Glasser, Matthew F and Coalson, Timothy S and Robinson, Emma C and Hacker, Carl D and Harwell, John and Yacoub, Essa and Ugurbil, Kamil and Andersson, Jesper and Beckmann, Christian F and Jenkinson, Mark and others}, + journal={Nature}, + volume={536}, + number={7615}, + pages={171--178}, + year={2016}, + publisher={Nature Publishing Group} +} + +@article{scholtens2018mri, + title={An mri von economo--koskinas atlas}, + author={Scholtens, Lianne H and de Reus, Marcel A and de Lange, Siemon C and Schmidt, Ruben and van den Heuvel, Martijn P}, + journal={NeuroImage}, + volume={170}, + pages={249--256}, + year={2018}, + publisher={Elsevier} +} diff --git a/netneurotools/datasets/references.json b/netneurotools/datasets/references.json new file mode 100644 index 0000000..b5214a9 --- /dev/null +++ b/netneurotools/datasets/references.json @@ -0,0 +1,220 @@ +{ + "atl-cammoun2012": { + "primary": [ + { + "citation": "Leila Cammoun, Xavier Gigandet, Djalel Meskaldji, Jean Philippe Thiran, Olaf Sporns, Kim Q Do, Philippe Maeder, Reto Meuli, and Patric Hagmann. Mapping the human connectome at multiple scales with diffusion spectrum mri. Journal of neuroscience methods, 203(2):386\u2013397, 2012.", + "bibkey": "cammoun2012mapping" + } + ] + }, + "atl-pauli2018": { + "primary": [ + { + "citation": "Wolfgang M Pauli, Amanda N Nili, and J Michael Tyszka. A high-resolution probabilistic in vivo atlas of human subcortical brain nuclei. Scientific data, 5(1):1\u201313, 2018.", + "bibkey": "pauli2018high" + } + ] + }, + "tpl-conte69": { + "primary": [ + { + "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.", + "bibkey": "van2012human" + }, + { + "citation": "David C Van Essen, Matthew F Glasser, Donna L Dierker, John Harwell, and Timothy Coalson. Parcellations and hemispheric asymmetries of human cerebral cortex analyzed on surface-based atlases. Cerebral cortex, 22(10):2241\u20132262, 2012.", + "bibkey": "van2012parcellations" + } + ] + }, + "tpl-yerkes19": { + "primary": [ + { + "citation": "Chad J Donahue, Stamatios N Sotiropoulos, Saad Jbabdi, Moises Hernandez-Fernandez, Timothy E Behrens, Tim B Dyrby, Timothy Coalson, Henry Kennedy, Kenneth Knoblauch, David C Van Essen, and others. Using diffusion tractography to predict cortical connection strength and distance: a quantitative comparison with tracers in the monkey. Journal of Neuroscience, 36(25):6758\u20136770, 2016.", + "bibkey": "donahue2016using" + } + ] + }, + "tpl-fsaverage": { + "primary": [ + { + "citation": "Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical surface-based analysis: i. segmentation and surface reconstruction. Neuroimage, 9(2):179\u2013194, 1999.", + "bibkey": "dale1999cortical" + }, + { + "citation": "Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical surface-based analysis: ii: inflation, flattening, and a surface-based coordinate system. Neuroimage, 9(2):195\u2013207, 1999.", + "bibkey": "fischl1999cortical" + }, + { + "citation": "Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale. High-resolution intersubject averaging and a coordinate system for the cortical surface. Human brain mapping, 8(4):272\u2013284, 1999.", + "bibkey": "fischl1999high" + } + ] + }, + "tpl-civet": { + "primary": [ + { + "citation": "Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An unbiased iterative group registration template for cortical surface analysis. Neuroimage, 34(4):1535\u20131544, 2007.", + "bibkey": "lyttelton2007unbiased" + }, + { + "citation": "Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli, and DL Collins. Unbiased nonlinear average age-appropriate brain templates from birth to adulthood. NeuroImage, 47:S102, 2009.", + "bibkey": "fonov2009unbiased" + }, + { + "citation": "Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet image-processing environment: a fully automated comprehensive pipeline for anatomical neuroimaging research. proceedings of the 12th annual meeting of the organization for human brain mapping. Florence, Italy, pages 2266, 2006.", + "bibkey": "ad2006civet" + } + ] + }, + "ds-famous_gmat": { + "primary": [ + { + "citation": "", + "bibkey": "" + } + ], + "celegans": [ + { + "citation": "Lav R Varshney, Beth L Chen, Eric Paniagua, David H Hall, and Dmitri B Chklovskii. Structural properties of the caenorhabditis elegans neuronal network. PLoS computational biology, 7(2):e1001066, 2011.", + "bibkey": "varshney2011structural" + } + ], + "drosophila": [ + { + "citation": "Ann-Shyn Chiang, Chih-Yung Lin, Chao-Chun Chuang, Hsiu-Ming Chang, Chang-Huain Hsieh, Chang-Wei Yeh, Chi-Tin Shih, Jian-Jheng Wu, Guo-Tzau Wang, Yung-Chang Chen, and others. Three-dimensional reconstruction of brain-wide wiring networks in drosophila at single-cell resolution. Current biology, 21(1):1\u201311, 2011.", + "bibkey": "chiang2011three" + } + ], + "human": [ + { + "citation": "Alessandra Griffa, Yasser Alem\u00e1n-G\u00f3mez, and Patric Hagmann. Structural and functional connectome from 70 young healthy adults [data set]. Zenodo, 2019.", + "bibkey": "griffa2019lausanne" + } + ], + "macaque_markov": [ + { + "citation": "Nikola T Markov, Maria Ercsey-Ravasz, Camille Lamy, Ana Rita Ribeiro Gomes, Lo\u00efc Magrou, Pierre Misery, Pascale Giroud, Pascal Barone, Colette Dehay, Zolt\u00e1n Toroczkai, and others. The role of long-range connections on the specificity of the macaque interareal cortical network. Proceedings of the National Academy of Sciences, 110(13):5187\u20135192, 2013.", + "bibkey": "markov2013role" + } + ], + "macaque_modha": [ + { + "citation": "Dharmendra S Modha and Raghavendra Singh. Network architecture of the long-distance pathways in the macaque brain. Proceedings of the National Academy of Sciences, 107(30):13485\u201313490, 2010.", + "bibkey": "modha2010network" + } + ], + "mouse": [ + { + "citation": "Mikail Rubinov, Rolf JF Ypma, Charles Watson, and Edward T Bullmore. Wiring cost and topological participation of the mouse brain connectome. Proceedings of the National Academy of Sciences, 112(32):10032\u201310037, 2015.", + "bibkey": "rubinov2015wiring" + } + ], + "rat": [ + { + "citation": "Mihail Bota, Olaf Sporns, and Larry W Swanson. Architecture of the cerebral cortical association connectome underlying cognition. Proceedings of the National Academy of Sciences, 112(16):E2093\u2013E2101, 2015.", + "bibkey": "bota2015architecture" + } + ] + }, + "ds-vazquez_rodriguez2019": { + "primary": [ + { + "citation": "Bertha V\u00e1zquez-Rodr\u00edguez, Laura E Su\u00e1rez, Ross D Markello, Golia Shafiei, Casey Paquola, Patric Hagmann, Martijn P Van Den Heuvel, Boris C Bernhardt, R Nathan Spreng, and Bratislav Misic. Gradients of structure\u2013function tethering across neocortex. Proceedings of the National Academy of Sciences, 116(42):21219\u201321227, 2019.", + "bibkey": "vazquez2019gradients" + } + ] + }, + "atl-schaefer2018": { + "primary": [ + { + "citation": "Alexander Schaefer, Ru Kong, Evan M Gordon, Timothy O Laumann, Xi-Nian Zuo, Avram J Holmes, Simon B Eickhoff, and BT Thomas Yeo. Local-global parcellation of the human cerebral cortex from intrinsic functional connectivity mri. Cerebral cortex, 28(9):3095\u20133114, 2018.", + "bibkey": "schaefer2018local" + } + ] + }, + "atl-mmpall": { + "primary": [ + { + "citation": "Matthew F Glasser, Timothy S Coalson, Emma C Robinson, Carl D Hacker, John Harwell, Essa Yacoub, Kamil Ugurbil, Jesper Andersson, Christian F Beckmann, Mark Jenkinson, and others. A multi-modal parcellation of human cerebral cortex. Nature, 536(7615):171\u2013178, 2016.", + "bibkey": "glasser2016multi" + } + ] + }, + "atl-voneconomo_koskinas": { + "primary": [ + { + "citation": "Lianne H Scholtens, Marcel A de Reus, Siemon C de Lange, Ruben Schmidt, and Martijn P van den Heuvel. An mri von economo\u2013koskinas atlas. NeuroImage, 170:249\u2013256, 2018.", + "bibkey": "scholtens2018mri" + } + ] + }, + "tpl-hcp_standards": { + "primary": [ + { + "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.", + "bibkey": "van2012human" + }, + { + "citation": "Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson, Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal preprocessing pipelines for the human connectome project. Neuroimage, 80:105\u2013124, 2013.", + "bibkey": "glasser2013minimal" + } + ] + }, + "ds-hansen_manynetworks": { + "primary": [ + { + "citation": "Justine Y Hansen, Golia Shafiei, Katharina Voigt, Emma X Liang, Sylvia ML Cox, Marco Leyton, Sharna D Jamadar, and Bratislav Misic. Integrating multimodal and multiscale connectivity blueprints of the human cerebral cortex in health and disease. PLoS biology, 21(9):e3002314, 2023.", + "bibkey": "hansen2023integrating" + } + ], + "gene": [ + { + "citation": "", + "bibkey": "" + } + ], + "receptor": [ + { + "citation": "", + "bibkey": "" + } + ], + "larminar": [ + { + "citation": "", + "bibkey": "" + } + ], + "metabolic": [ + { + "citation": "", + "bibkey": "" + } + ], + "haemodynamic": [ + { + "citation": "", + "bibkey": "" + } + ], + "electrophysiological": [ + { + "citation": "", + "bibkey": "" + } + ], + "temporal": [ + { + "citation": "", + "bibkey": "" + } + ], + "cognitive": [ + { + "citation": "", + "bibkey": "" + } + ] + } +} \ No newline at end of file diff --git a/netneurotools/tests/__init__.py b/netneurotools/datasets/tests/__init__.py similarity index 100% rename from netneurotools/tests/__init__.py rename to netneurotools/datasets/tests/__init__.py diff --git a/netneurotools/datasets/tests/test_datasets_utils.py b/netneurotools/datasets/tests/test_datasets_utils.py new file mode 100644 index 0000000..9d5d85c --- /dev/null +++ b/netneurotools/datasets/tests/test_datasets_utils.py @@ -0,0 +1,35 @@ +"""For testing netneurotools.datasets.datasets_utils functionality.""" +import os + +import pytest + +from netneurotools.datasets import datasets_utils as utils + + +@pytest.mark.parametrize('dset, expected', [ + ('atl-cammoun2012', ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k', + 'MNI152NLin2009aSym', 'gcs']), + ('tpl-conte69', ['url', 'md5']), + ('atl-pauli2018', ['probabilistic', 'deterministic', 'info']), + ('tpl-fsaverage', ['fsaverage' + f for f in ['', '3', '4', '5', '6']]), + ('atl-schaefer2018', ['fsaverage', 'fsaverage6', 'fsaverage6']) +]) +def test_get_dataset_info(dset, expected): + """Test getting dataset info.""" + info = utils._get_dataset_info(dset) + if isinstance(info, dict): + assert all(k in info.keys() for k in expected) + elif isinstance(info, list): + for f in info: + assert all(k in f.keys() for k in expected) + else: + assert False + + with pytest.raises(KeyError): + utils._get_dataset_info('notvalid') + + +def test_get_data_dir(tmpdir): + """Test getting data directory.""" + data_dir = utils._get_data_dir(tmpdir) + assert os.path.isdir(data_dir) diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py new file mode 100644 index 0000000..b55e58d --- /dev/null +++ b/netneurotools/datasets/tests/test_fetch.py @@ -0,0 +1,206 @@ +"""For testing netneurotools.datasets.fetch_* functionality.""" +import os +import pytest +from pathlib import Path +import numpy as np +from netneurotools import datasets + + +class TestFetchTemplate: + """Test fetching of template datasets.""" + + @pytest.mark.parametrize('version', [ + 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' + ]) + def test_fetch_fsaverage(self, tmpdir, version): + """Test fetching of fsaverage surfaces.""" + fsaverage = datasets.fetch_fsaverage( + version=version, data_dir=tmpdir, verbose=0 + ) + for k in ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']: + assert hasattr(fsaverage, k) + assert len(fsaverage[k]) == 2 + assert all(os.path.isfile(hemi) for hemi in fsaverage[k]), fsaverage[k] + + def test_fetch_hcp_standards(self, tmpdir): + """Test fetching of HCP standard meshes.""" + hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=0) + assert hcp.exists() + + @pytest.mark.parametrize('version', [ + 'v1', 'v2' + ]) + def test_fetch_civet(self, tmpdir, version): + """Test fetching of CIVET templates.""" + civet = datasets.fetch_civet(version=version, data_dir=tmpdir, verbose=0) + for key in ('mid', 'white'): + assert key in civet + for hemi in ('lh', 'rh'): + assert hasattr(civet[key], hemi) + assert os.path.isfile(getattr(civet[key], hemi)) + + def test_fetch_conte69(self, tmpdir): + """Test fetching of Conte69 surfaces.""" + conte = datasets.fetch_conte69(data_dir=tmpdir, verbose=0) + assert all(hasattr(conte, k) for k in + ['midthickness', 'inflated', 'vinflated', 'info']) + + def test_fetch_yerkes19(self, tmpdir): + """Test fetching of Yerkes19 surfaces.""" + yerkes19 = datasets.fetch_yerkes19(data_dir=tmpdir, verbose=0) + assert all(hasattr(yerkes19, k) for k in + ['midthickness', 'inflated', 'vinflated']) + + +class TestFetchAtlas: + """Test fetching of atlas datasets.""" + + @pytest.mark.parametrize('version, expected', [ + ('MNI152NLin2009aSym', [1, 1, 1, 1, 1]), + ('fsaverage', [2, 2, 2, 2, 2]), + ('fsaverage5', [2, 2, 2, 2, 2]), + ('fsaverage6', [2, 2, 2, 2, 2]), + ('fslr32k', [2, 2, 2, 2, 2]), + ('gcs', [2, 2, 2, 2, 6]) + ]) + def test_fetch_cammoun2012(self, tmpdir, version, expected): + """Test fetching of Cammoun2012 parcellations.""" + keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] + cammoun = datasets.fetch_cammoun2012(version, data_dir=tmpdir, verbose=0) + + # output has expected keys + assert all(hasattr(cammoun, k) for k in keys) + # and keys are expected lengths! + for k, e in zip(keys, expected): + out = getattr(cammoun, k) + if isinstance(out, (tuple, list)): + assert len(out) == e + else: + assert isinstance(out, str) and out.endswith('.nii.gz') + + if 'fsaverage' in version: + with pytest.warns(DeprecationWarning): + datasets.fetch_cammoun2012('surface', data_dir=tmpdir, verbose=0) + + @pytest.mark.parametrize('version', [ + 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k' + ]) + def test_fetch_schaefer2018(self, tmpdir, version): + """Test fetching of Schaefer2018 parcellations.""" + keys = [ + f'{p}Parcels{n}Networks' + for p in range(100, 1001, 100) for n in [7, 17] + ] + schaefer = datasets.fetch_schaefer2018(version, data_dir=tmpdir, verbose=0) + + if version == 'fslr32k': + assert all(k in schaefer and os.path.isfile(schaefer[k]) for k in keys) + else: + for k in keys: + assert k in schaefer + assert len(schaefer[k]) == 2 + assert all(os.path.isfile(hemi) for hemi in schaefer[k]) + + def test_fetch_mmpall(self, tmpdir): + """Test fetching of MMPAll parcellations.""" + mmp = datasets.fetch_mmpall(data_dir=tmpdir, verbose=0) + assert len(mmp) == 2 + assert all(os.path.isfile(hemi) for hemi in mmp) + assert all(hasattr(mmp, attr) for attr in ('lh', 'rh')) + + def test_fetch_pauli2018(self, tmpdir): + """Test fetching of Pauli2018 parcellations.""" + pauli = datasets.fetch_pauli2018(data_dir=tmpdir, verbose=0) + assert all(hasattr(pauli, k) and os.path.isfile(pauli[k]) for k in + ['probabilistic', 'deterministic', 'info']) + + @pytest.mark.xfail + def test_fetch_ye2020(self, tmpdir): + """Test fetching of Ye2020 parcellations.""" + assert False + + def test_fetch_voneconomo(self, tmpdir): + """Test fetching of von Economo parcellations.""" + vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0) + assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ['gcs', 'ctab']) + assert isinstance(vek.get('info'), str) + + +class TestFetchProject: + """Test fetching of project datasets.""" + + def test_fetch_vazquez_rodriguez2019(self, tmpdir): + """Test fetching of Vazquez-Rodriguez2019 dataset.""" + vazquez = datasets.fetch_vazquez_rodriguez2019(data_dir=tmpdir, verbose=0) + for k in ['rsquared', 'gradient']: + assert hasattr(vazquez, k) + assert isinstance(getattr(vazquez, k), np.ndarray) + + @pytest.mark.xfail + def test_fetch_mirchi2018(self, tmpdir): + """Test fetching of Mirchi2018 dataset.""" + assert False + + def test_fetch_hansen_manynetworks(self, tmpdir): + """Test fetching of Hansen et al., 2023 many-networks dataset.""" + hansen = datasets.fetch_hansen_manynetworks(data_dir=tmpdir, verbose=0) + assert "cammoun033" in hansen + assert "gene" in hansen["cammoun033"] + assert isinstance(hansen["cammoun033"]["gene"], Path) + + @pytest.mark.xfail + def test_fetch_hansen_receptors(self, tmpdir): + """Test fetching of Hansen et al., 2022 receptor dataset.""" + # hansen = datasets.fetch_hansen_receptors(data_dir=tmpdir, verbose=0) + assert False + + @pytest.mark.xfail + def test_fetch_hansen_genecognition(self, tmpdir): + """Test fetching of Hansen et al., 2021 gene-cognition dataset.""" + # hansen = datasets.fetch_hansen_genecognition(data_dir=tmpdir, verbose=0) + assert False + + @pytest.mark.xfail + def test_fetch_hansen_brainstem(self, tmpdir): + """Test fetching of Hansen et al., 2024 brainstem dataset.""" + # hansen = datasets.fetch_hansen_brainstem(data_dir=tmpdir, verbose=0) + assert False + + @pytest.mark.xfail + def test_fetch_shafiei_hcpmeg(self, tmpdir): + """Test fetching of Shafiei et al., 2022 & 2023 HCP-MEG dataset.""" + # shafiei = datasets.fetch_shafiei_hcpmeg(data_dir=tmpdir, verbose=0) + assert False + + @pytest.mark.xfail + def test_fetch_suarez_mami(self, tmpdir): + """Test fetching of Suarez et al., 2022 mami dataset.""" + # suarez = datasets.fetch_suarez_mami(data_dir=tmpdir, verbose=0) + assert False + + @pytest.mark.parametrize('dataset, expected', [ + ('celegans', ['conn', 'dist', 'labels', 'ref']), + ('drosophila', ['conn', 'coords', 'labels', 'networks', 'ref']), + ('human_func_scale033', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale060', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale125', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale250', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale500', ['conn', 'coords', 'labels', 'ref']), + ('human_struct_scale033', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale060', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale125', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale250', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale500', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('macaque_markov', ['conn', 'dist', 'labels', 'ref']), + ('macaque_modha', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('mouse', ['acronyms', 'conn', 'coords', 'dist', 'labels', 'ref']), + ('rat', ['conn', 'labels', 'ref']), + ]) + def test_fetch_famous_gmat(self, tmpdir, dataset, expected): + """Test fetching of famous G.mat datasets.""" + connectome = datasets.fetch_famous_gmat(dataset, data_dir=tmpdir, verbose=0) + + expected.remove("ref") + for key in expected: + assert (key in connectome) + assert isinstance(connectome[key], str if key == 'ref' else np.ndarray) diff --git a/netneurotools/datasets/utils.py b/netneurotools/datasets/utils.py deleted file mode 100644 index 4339c57..0000000 --- a/netneurotools/datasets/utils.py +++ /dev/null @@ -1,100 +0,0 @@ -# -*- coding: utf-8 -*- -"""Utilites for loading / creating datasets.""" - -import json -import os -import importlib.resources - -if getattr(importlib.resources, 'files', None) is not None: - _importlib_avail = True -else: - from pkg_resources import resource_filename - _importlib_avail = False - - -def _osfify_urls(data): - """ - Format `data` object with OSF API URL. - - Parameters - ---------- - data : object - If dict with a `url` key, will format OSF_API with relevant values - - Returns - ------- - data : object - Input data with all `url` dict keys formatted - """ - OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}" - - if isinstance(data, str): - return data - elif 'url' in data: - data['url'] = OSF_API.format(*data['url']) - - try: - for key, value in data.items(): - data[key] = _osfify_urls(value) - except AttributeError: - for n, value in enumerate(data): - data[n] = _osfify_urls(value) - - return data - - -if _importlib_avail: - osf = importlib.resources.files("netneurotools") / "data/osf.json" -else: - osf = resource_filename('netneurotools', 'data/osf.json') - -with open(osf) as src: - OSF_RESOURCES = _osfify_urls(json.load(src)) - - -def _get_dataset_info(name): - """ - Return url and MD5 checksum for dataset `name`. - - Parameters - ---------- - name : str - Name of dataset - - Returns - ------- - url : str - URL from which to download dataset - md5 : str - MD5 checksum for file downloade from `url` - """ - try: - return OSF_RESOURCES[name] - except KeyError: - raise KeyError("Provided dataset '{}' is not valid. Must be one of: {}" - .format(name, sorted(OSF_RESOURCES.keys()))) from None - - -def _get_data_dir(data_dir=None): - """ - Get path to netneurotools data directory. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - - Returns - ------- - data_dir : str - Path to use as data directory - """ - if data_dir is None: - data_dir = os.environ.get('NNT_DATA', os.path.join('~', 'nnt-data')) - data_dir = os.path.expanduser(data_dir) - if not os.path.exists(data_dir): - os.makedirs(data_dir) - - return data_dir diff --git a/netneurotools/experimental/__init__.py b/netneurotools/experimental/__init__.py new file mode 100644 index 0000000..911c0f7 --- /dev/null +++ b/netneurotools/experimental/__init__.py @@ -0,0 +1,4 @@ +"""Functions in alpha stage.""" + + +__all__ = [] diff --git a/netneurotools/freesurfer.py b/netneurotools/freesurfer.py deleted file mode 100644 index 047590d..0000000 --- a/netneurotools/freesurfer.py +++ /dev/null @@ -1,662 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions for working with FreeSurfer data and parcellations.""" - -import os -import os.path as op -import warnings - -from nibabel.freesurfer import read_annot, read_geometry -import numpy as np -from scipy import sparse -try: # scipy >= 1.8.0 - from scipy.ndimage._measurements import _stats, labeled_comprehension -except ImportError: # scipy < 1.8.0 - from scipy.ndimage.measurements import _stats, labeled_comprehension -from scipy.spatial.distance import cdist - -from .datasets import fetch_fsaverage -from .stats import gen_spinsamples -from .surface import make_surf_graph -from .utils import check_fs_subjid, run - -FSIGNORE = [ - 'unknown', 'corpuscallosum', 'Background+FreeSurfer_Defined_Medial_Wall' -] - - -def apply_prob_atlas(subject_id, gcs, hemi, *, orig='white', annot=None, - ctab=None, subjects_dir=None, use_cache=True, - quiet=False): - """ - Create an annotation file for `subject_id` by applying atlas in `gcs`. - - Runs subprocess calling FreeSurfer's "mris_ca_label" function; as such, - FreeSurfer must be installed and accesible on the local system path. - - Parameters - ---------- - subject_id : str - FreeSurfer subject ID - gcs : str - Filepath to .gcs file containing classifier array - hemi : {'lh', 'rh'} - Hemisphere corresponding to `gcs` file - orig : str, optional - Original surface to which to apply classifer. Default: 'white' - annot : str, optional - Path to output annotation file to generate. If set to None, the name is - created from the provided `hemi` and `gcs`. If provided as a - relative path, it is assumed to stem from `subjects_dir`/`subject_id`. - Default: None - ctab : str, optional - Path to colortable corresponding to `gcs`. Default: None - subjects_dir : str, optional - Path to FreeSurfer subject directory. If not set, will inherit from - the environmental variable $SUBJECTS_DIR. Default: None - use_cache : bool, optional - Whether to check for existence of `annot` in directory specified by - `{subjects_dir}/{subject_id}/label' and use that, if it exists. If - False, will create a new annot file. Default: True - quiet : bool, optional - Whether to restrict status messages. Default: False - - Returns - ------- - annot : str - Path to generated annotation file - """ - cmd = 'mris_ca_label {opts}{subject_id} {hemi} {hemi}.sphere.reg ' \ - '{gcs} {annot}' - - if hemi not in ['rh', 'lh']: - raise ValueError('Provided hemisphere designation `hemi` must be one ' - 'of \'rh\' or \'lh\'. Provided: {}'.format(hemi)) - if not op.isfile(gcs): - raise ValueError('Cannot find specified `gcs` file {}.'.format(gcs)) - - subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir) - - # add all the options together, as specified - opts = '' - if ctab is not None and op.isfile(ctab): - opts += '-t {} '.format(ctab) - if orig is not None: - opts += '-orig {} '.format(orig) - if subjects_dir is not None: - opts += '-sdir {} '.format(subjects_dir) - else: - subjects_dir = os.environ['SUBJECTS_DIR'] - - # generate output filename - if annot is None: - base = '{}.{}.annot'.format(hemi, gcs[:-4]) - annot = op.join(subjects_dir, subject_id, 'label', base) - else: - # if not a full path, assume relative from subjects_dir/subject_id - if not annot.startswith(op.abspath(os.sep)): - annot = op.join(subjects_dir, subject_id, annot) - - # if annotation file doesn't exist or we explicitly want to make a new one - if not op.isfile(annot) or not use_cache: - run(cmd.format(opts=opts, subject_id=subject_id, hemi=hemi, - gcs=gcs, annot=annot), - quiet=quiet) - - return annot - - -def _decode_list(vals): - """List decoder.""" - return [val.decode() if hasattr(val, 'decode') else val for val in vals] - - -def find_parcel_centroids(*, lhannot, rhannot, method='surface', - version='fsaverage', surf='sphere', drop=None): - """ - Return vertex coords corresponding to centroids of parcels in annotations. - - Note that using any other `surf` besides the default of 'sphere' may result - in centroids that are not directly within the parcels themselves due to - sulcal folding patterns. - - Parameters - ---------- - {lh,rh}annot : str - Path to .annot file containing labels of parcels on the {left,right} - hemisphere. These must be specified as keyword arguments to avoid - accidental order switching. - method : {'average', 'surface', 'geodesic'}, optional - Method for calculation of parcel centroid. See Notes for more - information. Default: 'surface' - version : str, optional - Specifies which version of `fsaverage` provided annotation files - correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4', - 'fsaverage5', 'fsaverage6'}. Default: 'fsaverage' - surf : str, optional - Specifies which surface projection of fsaverage to use for finding - parcel centroids. Default: 'sphere' - drop : list, optional - Specifies regions in {lh,rh}annot for which the parcel centroid should - not be calculated. If not specified, centroids for parcels defined in - `netneurotools.freesurfer.FSIGNORE` are not calculated. Default: None - - Returns - ------- - centroids : (N, 3) numpy.ndarray - xyz coordinates of vertices closest to the centroid of each parcel - defined in `lhannot` and `rhannot` - hemiid : (N,) numpy.ndarray - Array denoting hemisphere designation of coordinates in `centroids`, - where `hemiid=0` denotes the left and `hemiid=1` the right hemisphere - - Notes - ----- - The following methods can be used for finding parcel centroids: - - 1. ``method='average'`` - - Uses the arithmetic mean of the coordinates for the vertices in each - parcel. Note that in this case the calculated centroids will not act - actually fall on the surface of `surf`. - - 2. ``method='surface'`` - - Calculates the 'average' coordinates and then finds the closest vertex - on `surf`, where closest is defined as the vertex with the minimum - Euclidean distance. - - 3. ``method='geodesic'`` - - Uses the coordinates of the vertex with the minimum average geodesic - distance to all other vertices in the parcel. Note that this is slightly - more time-consuming than the other two methods, especially for - high-resolution meshes. - """ - methods = ['average', 'surface', 'geodesic'] - if method not in methods: - raise ValueError('Provided method for centroid calculation {} is ' - 'invalid. Must be one of {}'.format(methods, methods)) - - if drop is None: - drop = FSIGNORE - drop = _decode_list(drop) - - surfaces = fetch_fsaverage(version)[surf] - - centroids, hemiid = [], [] - for n, (annot, surf) in enumerate(zip([lhannot, rhannot], surfaces)): - vertices, faces = read_geometry(surf) - labels, ctab, names = read_annot(annot) - names = _decode_list(names) - - for lab in np.unique(labels): - if names[lab] in drop: - continue - if method in ['average', 'surface']: - roi = np.atleast_2d(vertices[labels == lab].mean(axis=0)) - if method == 'surface': # find closest vertex on the sphere - roi = vertices[np.argmin(cdist(vertices, roi), axis=0)[0]] - elif method == 'geodesic': - inds, = np.where(labels == lab) - roi = _geodesic_parcel_centroid(vertices, faces, inds) - centroids.append(roi) - hemiid.append(n) - - return np.vstack(centroids), np.asarray(hemiid) - - -def _geodesic_parcel_centroid(vertices, faces, inds): - """ - Calculate parcel centroids based on surface distance. - - Parameters - ---------- - vertices : (N, 3) - Coordinates of vertices defining surface - faces : (F, 3) - Triangular faces defining surface - inds : (R,) - Indices of `vertices` that belong to parcel - - Returns - ------- - roi : (3,) numpy.ndarray - Vertex corresponding to centroid of parcel - """ - mask = np.ones(len(vertices), dtype=bool) - mask[inds] = False - mat = make_surf_graph(vertices, faces, mask=mask) - paths = sparse.csgraph.dijkstra(mat, directed=False, indices=inds)[:, inds] - - # the selected vertex is the one with the minimum average shortest path - # to the other vertices in the parcel - roi = vertices[inds[paths.mean(axis=1).argmin()]] - - return roi - - -def parcels_to_vertices(data, *, lhannot, rhannot, drop=None): - """ - Project parcellated `data` to vertices defined in annotation files. - - Assigns np.nan to all ROIs in `drop` - - Parameters - ---------- - data : (N,) numpy.ndarray - Parcellated data to be projected to vertices. Parcels should be ordered - by [left, right] hemisphere; ordering within hemisphere should - correspond to the provided annotation files. - {lh,rh}annot : str - Path to .annot file containing labels of parcels on the {left,right} - hemisphere. These must be specified as keyword arguments to avoid - accidental order switching. - drop : list, optional - Specifies regions in {lh,rh}annot that are not present in `data`. NaNs - will be inserted in place of the these regions in the returned data. If - not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE` - are assumed to not be present. Default: None - - Returns - ------- - projected : numpy.ndarray - Vertex-level data - """ - if drop is None: - drop = FSIGNORE - drop = _decode_list(drop) - - data = np.vstack(data).astype(float) - - # check this so we're not unduly surprised by anything... - n_vert = expected = 0 - for a in [lhannot, rhannot]: - vn, _, names = read_annot(a) - n_vert += len(vn) - names = _decode_list(names) - expected += len(names) - len(set(drop) & set(names)) - if expected != len(data): - raise ValueError('Number of parcels in provided annotation files ' - 'differs from size of parcellated data array.\n' - ' EXPECTED: {} parcels\n' - ' RECEIVED: {} parcels' - .format(expected, len(data))) - - projected = np.zeros((n_vert, data.shape[-1]), dtype=data.dtype) - start = end = n_vert = 0 - for annot in [lhannot, rhannot]: - # read files and update end index for `data` - labels, ctab, names = read_annot(annot) - names = _decode_list(names) - todrop = set(names) & set(drop) - end += len(names) - len(todrop) # unknown and corpuscallosum - - # get indices of unknown and corpuscallosum and insert NaN values - inds = sorted([names.index(f) for f in todrop]) - inds = [f - n for n, f in enumerate(inds)] - currdata = np.insert(data[start:end], inds, np.nan, axis=0) - - # project to vertices and store - projected[n_vert:n_vert + len(labels), :] = currdata[labels] - start = end - n_vert += len(labels) - - return np.squeeze(projected) - - -def vertices_to_parcels(data, *, lhannot, rhannot, drop=None): - """ - Reduce vertex-level `data` to parcels defined in annotation files. - - Takes average of vertices within each parcel, excluding np.nan values - (i.e., np.nanmean). Assigns np.nan to parcels for which all vertices are - np.nan. - - Parameters - ---------- - data : (N,) numpy.ndarray - Vertex-level data to be reduced to parcels - {lh,rh}annot : str - Path to .annot file containing labels to parcels on the {left,right} - hemisphere - drop : list, optional - Specifies regions in {lh,rh}annot that should be removed from the - parcellated version of `data`. If not specified, vertices corresponding - to parcels defined in `netneurotools.freesurfer.FSIGNORE` will be - removed. Default: None - - Returns - ------- - reduced : numpy.ndarray - Parcellated `data`, without regions specified in `drop` - """ - if drop is None: - drop = FSIGNORE - drop = _decode_list(drop) - - data = np.vstack(data) - - n_parc = expected = 0 - for a in [lhannot, rhannot]: - vn, _, names = read_annot(a) - expected += len(vn) - names = _decode_list(names) - n_parc += len(names) - len(set(drop) & set(names)) - if expected != len(data): - raise ValueError('Number of vertices in provided annotation files ' - 'differs from size of vertex-level data array.\n' - ' EXPECTED: {} vertices\n' - ' RECEIVED: {} vertices' - .format(expected, len(data))) - - reduced = np.zeros((n_parc, data.shape[-1]), dtype=data.dtype) - start = end = n_parc = 0 - for annot in [lhannot, rhannot]: - # read files and update end index for `data` - labels, ctab, names = read_annot(annot) - names = _decode_list(names) - - indices = np.unique(labels) - end += len(labels) - - for idx in range(data.shape[-1]): - # get average of vertex-level data within parcels - # set all NaN values to 0 before calling `_stats` because we are - # returning sums, so the 0 values won't impact the sums (if we left - # the NaNs then all parcels with even one NaN entry would be NaN) - currdata = np.squeeze(data[start:end, idx]) - isna = np.isnan(currdata) - counts, sums = _stats(np.nan_to_num(currdata), labels, indices) - - # however, we do need to account for the NaN values in the counts - # so that our means are similar to what we'd get from e.g., - # np.nanmean here, our "sums" are the counts of NaN values in our - # parcels - _, nacounts = _stats(isna, labels, indices) - counts = (np.asanyarray(counts, dtype=float) - - np.asanyarray(nacounts, dtype=float)) - - with np.errstate(divide='ignore', invalid='ignore'): - currdata = sums / counts - - # get indices of unkown and corpuscallosum and delete from parcels - inds = sorted([names.index(f) for f in set(drop) & set(names)]) - currdata = np.delete(currdata, inds) - - # store parcellated data - reduced[n_parc:n_parc + len(names) - len(inds), idx] = currdata - - start = end - n_parc += len(names) - len(inds) - - return np.squeeze(reduced) - - -def _get_fsaverage_coords(version='fsaverage', surface='sphere'): - """ - Get vertex coordinates for specified `surface` of fsaverage `version`. - - Parameters - ---------- - version : str, optional - One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', - 'fsaverage6'}. Default: 'fsaverage' - surface : str, optional - Surface for which to return vertex coordinates. Default: 'sphere' - - Returns - ------- - coords : (N, 3) numpy.ndarray - xyz coordinates of vertices for {left,right} hemisphere - hemiid : (N,) numpy.ndarray - Array denoting hemisphere designation of entries in `coords`, where - `hemiid=0` denotes the left and `hemiid=1` the right hemisphere - """ - # get coordinates and hemisphere designation for spin generation - lhsphere, rhsphere = fetch_fsaverage(version)[surface] - coords, hemi = [], [] - for n, sphere in enumerate([lhsphere, rhsphere]): - coords.append(read_geometry(sphere)[0]) - hemi.append(np.ones(len(coords[-1])) * n) - - return np.vstack(coords), np.hstack(hemi) - - -def _get_fsaverage_spins(version='fsaverage', spins=None, n_rotate=1000, - **kwargs): - """ - Generate spatial permutation resamples for fsaverage `version`. - - If `spins` are provided then performs checks to confirm they are valid - - Parameters - ---------- - version : str, optional - Specifies which version of `fsaverage` for which to generate spins. - Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', - 'fsaverage6'}. Default: 'fsaverage' - spins : array_like, optional - Pre-computed spins to use instead of generating them on the fly. If not - provided will use other provided parameters to create them. Default: - None - n_rotate : int, optional - Number of rotations to generate. Default: 1000 - return_cost : bool, optional - Whether to return cost array (specified as Euclidean distance) for each - coordinate for each rotation. Currently this option is not supported if - pre-computed `spins` are provided. Default: True - kwargs : key-value pairs - Keyword arguments passed to `netneurotools.stats.gen_spinsamples` - - Returns - ------- - spins : (N, S) numpy.ndarray - Resampling array - """ - if spins is None: - coords, hemiid = _get_fsaverage_coords(version, 'sphere') - spins = gen_spinsamples(coords, hemiid, n_rotate=n_rotate, - **kwargs) - if kwargs.get('return_cost'): - return spins - - spins = np.asarray(spins, dtype='int32') - if spins.shape[-1] != n_rotate: - warnings.warn('Shape of provided `spins` array does not match ' - 'number of rotations requested with `n_rotate`. ' - 'Ignoring specified `n_rotate` parameter and using ' - 'all provided `spins`.', stacklevel=2) - n_rotate = spins.shape[-1] - - return spins, None - - -def spin_data(data, *, lhannot, rhannot, version='fsaverage', n_rotate=1000, - spins=None, drop=None, verbose=False, **kwargs): - """ - Project parcellated `data` to surface, rotates, and re-parcellates. - - Projection to the surface uses `{lh,rh}annot` files. Rotation uses vertex - coordinates from the specified fsaverage `version` and relies on - :func:`netneurotools.stats.gen_spinsamples`. Re-parcellated data will not - be exactly identical to original values due to re-averaging process. - Parcels subsumed by regions in `drop` will be listed as NaN. - - Parameters - ---------- - data : (N,) numpy.ndarray - Parcellated data to be rotated. Parcels should be ordered by [left, - right] hemisphere; ordering within hemisphere should correspond to the - provided `{lh,rh}annot` annotation files. - {lh,rh}annot : str - Path to .annot file containing labels to parcels on the {left,right} - hemisphere - version : str, optional - Specifies which version of `fsaverage` provided annotation files - correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4', - 'fsaverage5', 'fsaverage6'}. Default: 'fsaverage' - n_rotate : int, optional - Number of rotations to generate. Default: 1000 - spins : array_like, optional - Pre-computed spins to use instead of generating them on the fly. If not - provided will use other provided parameters to create them. Default: - None - drop : list, optional - Specifies regions in {lh,rh}annot that are not present in `data`. NaNs - will be inserted in place of the these regions in the returned data. If - not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE` - are assumed to not be present. Default: None - verbose : bool, optional - Whether to print occasional status messages. Default: False - kwargs : key-value pairs - Keyword arguments passed to `netneurotools.stats.gen_spinsamples` - - Returns - ------- - rotated : (N, `n_rotate`) numpy.ndarray - Rotated `data - cost : (N, `n_rotate`,) numpy.ndarray - Cost (specified as Euclidean distance) of re-assigning each coordinate - for every rotation in `spinsamples`. Only provided if `return_cost` is - True. - """ - if drop is None: - drop = FSIGNORE - - # get coordinates and hemisphere designation for spin generation - vertices = parcels_to_vertices(data, lhannot=lhannot, rhannot=rhannot, - drop=drop) - - # get spins + cost (if requested) - spins, cost = _get_fsaverage_spins(version=version, spins=spins, - n_rotate=n_rotate, - verbose=verbose, **kwargs) - if len(vertices) != len(spins): - raise ValueError('Provided annotation files have a different ' - 'number of vertices than the specified fsaverage ' - 'surface.\n ANNOTATION: {} vertices\n ' - 'FSAVERAGE: {} vertices' - .format(len(vertices), len(spins))) - - spun = np.zeros(data.shape + (n_rotate,)) - for n in range(n_rotate): - if verbose: - msg = f'Reducing vertices to parcels: {n:>5}/{n_rotate}' - print(msg, end='\b' * len(msg), flush=True) - spun[..., n] = vertices_to_parcels(vertices[spins[:, n]], - lhannot=lhannot, rhannot=rhannot, - drop=drop) - - if verbose: - print(' ' * len(msg) + '\b' * len(msg), end='', flush=True) - - if kwargs.get('return_cost'): - return spun, cost - - return spun - - -def spin_parcels(*, lhannot, rhannot, version='fsaverage', n_rotate=1000, - spins=None, drop=None, verbose=False, **kwargs): - """ - Rotate parcels in `{lh,rh}annot` and re-assigns based on maximum overlap. - - Vertex labels are rotated with :func:`netneurotools.stats.gen_spinsamples` - and a new label is assigned to each *parcel* based on the region maximally - overlapping with its boundaries. - - Parameters - ---------- - {lh,rh}annot : str - Path to .annot file containing labels to parcels on the {left,right} - hemisphere - version : str, optional - Specifies which version of `fsaverage` provided annotation files - correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4', - 'fsaverage5', 'fsaverage6'}. Default: 'fsaverage' - n_rotate : int, optional - Number of rotations to generate. Default: 1000 - spins : array_like, optional - Pre-computed spins to use instead of generating them on the fly. If not - provided will use other provided parameters to create them. Default: - None - drop : list, optional - Specifies regions in {lh,rh}annot that are not present in `data`. NaNs - will be inserted in place of the these regions in the returned data. If - not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE` - are assumed to not be present. Default: None - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Default: None - verbose : bool, optional - Whether to print occasional status messages. Default: False - return_cost : bool, optional - Whether to return cost array (specified as Euclidean distance) for each - coordinate for each rotation. Default: True - kwargs : key-value pairs - Keyword arguments passed to `netneurotools.stats.gen_spinsamples` - - Returns - ------- - spinsamples : (N, `n_rotate`) numpy.ndarray - Resampling matrix to use in permuting data parcellated with labels from - {lh,rh}annot, where `N` is the number of parcels. Indices of -1 - indicate that the parcel was completely encompassed by regions in - `drop` and should be ignored. - cost : (N, `n_rotate`,) numpy.ndarray - Cost (specified as Euclidean distance) of re-assigning each coordinate - for every rotation in `spinsamples`. Only provided if `return_cost` is - True. - """ - - def overlap(vals): - """Return most common non-negative value in `vals`; -1 if all neg.""" - vals = np.asarray(vals) - vals, counts = np.unique(vals[vals > 0], return_counts=True) - try: - return vals[counts.argmax()] - except ValueError: - return -1 - - if drop is None: - drop = FSIGNORE - drop = _decode_list(drop) - - # get vertex-level labels (set drop labels to - values) - vertices, end = [], 0 - for n, annot in enumerate([lhannot, rhannot]): - labels, ctab, names = read_annot(annot) - names = _decode_list(names) - todrop = set(names) & set(drop) - inds = [names.index(f) - n for n, f in enumerate(todrop)] - labs = np.arange(len(names) - len(inds)) + (end - (len(inds) * n)) - insert = np.arange(-1, -(len(inds) + 1), -1) - vertices.append(np.insert(labs, inds, insert)[labels]) - end += len(names) - vertices = np.hstack(vertices) - labels = np.unique(vertices) - mask = labels > -1 - - # get spins + cost (if requested) - spins, cost = _get_fsaverage_spins(version=version, spins=spins, - n_rotate=n_rotate, verbose=verbose, - **kwargs) - if len(vertices) != len(spins): - raise ValueError('Provided annotation files have a different ' - 'number of vertices than the specified fsaverage ' - 'surface.\n ANNOTATION: {} vertices\n ' - 'FSAVERAGE: {} vertices' - .format(len(vertices), len(spins))) - - # spin and assign regions based on max overlap - regions = np.zeros((len(labels[mask]), n_rotate), dtype='int32') - for n in range(n_rotate): - if verbose: - msg = f'Calculating parcel overlap: {n:>5}/{n_rotate}' - print(msg, end='\b' * len(msg), flush=True) - regions[:, n] = labeled_comprehension(vertices[spins[:, n]], vertices, - labels, overlap, int, -1)[mask] - - if kwargs.get('return_cost'): - return regions, cost - - return regions diff --git a/netneurotools/interface/__init__.py b/netneurotools/interface/__init__.py new file mode 100644 index 0000000..1b474b1 --- /dev/null +++ b/netneurotools/interface/__init__.py @@ -0,0 +1,3 @@ +"""Functions for interfacing with common tools.""" + +__all__ = [] diff --git a/netneurotools/interface/freesurfer.py b/netneurotools/interface/freesurfer.py new file mode 100644 index 0000000..9efc02f --- /dev/null +++ b/netneurotools/interface/freesurfer.py @@ -0,0 +1 @@ +"""Functions for working with FreeSurfer data and parcellations.""" diff --git a/netneurotools/interface/tests/__init__.py b/netneurotools/interface/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/interface/tests/test_freesurfer.py b/netneurotools/interface/tests/test_freesurfer.py new file mode 100644 index 0000000..fcdcd23 --- /dev/null +++ b/netneurotools/interface/tests/test_freesurfer.py @@ -0,0 +1 @@ +"""For testing netneurotools.interface.freesurfer functionality.""" diff --git a/netneurotools/metrics/__init__.py b/netneurotools/metrics/__init__.py new file mode 100644 index 0000000..51d8181 --- /dev/null +++ b/netneurotools/metrics/__init__.py @@ -0,0 +1,66 @@ +"""Magics on networks.""" + + +from .bct import ( + # routing + degrees_und, degrees_dir, + distance_wei_floyd, retrieve_shortest_path, + navigation_wu, get_navigation_path_length, + # diffusion + communicability_bin, communicability_wei, + path_transitivity, search_information, + mean_first_passage_time, diffusion_efficiency, + resource_efficiency_bin, flow_graph, + # other + assortativity, + matching_ind_und, + rich_feeder_peripheral +) + + +from .metrics_utils import ( + _fast_binarize, + _graph_laplacian, +) + + +from .spreading import ( + simulate_atrophy +) + + +from .statistical import ( + network_pearsonr, + network_pearsonr_numba, + network_pearsonr_pairwise, + effective_resistance, + network_polarisation, + network_variance, + network_variance_numba, + network_covariance, + network_covariance_numba +) + + +__all__ = [ + # bct + 'degrees_und', 'degrees_dir', + 'distance_wei_floyd', 'retrieve_shortest_path', + 'navigation_wu', 'get_navigation_path_length', + 'communicability_bin', 'communicability_wei', + 'path_transitivity', 'search_information', + 'mean_first_passage_time', 'diffusion_efficiency', + 'resource_efficiency_bin', 'flow_graph', + 'assortativity', 'matching_ind_und', + 'rich_feeder_peripheral', + # metrics_utils + '_fast_binarize', '_graph_laplacian', + # spreading + 'simulate_atrophy', + # statistical + 'network_pearsonr', 'network_pearsonr_numba', + 'network_pearsonr_pairwise', 'effective_resistance', + 'network_polarisation', 'network_variance', + 'network_variance_numba', 'network_covariance', + 'network_covariance_numba' +] diff --git a/netneurotools/metrics.py b/netneurotools/metrics/bct.py similarity index 95% rename from netneurotools/metrics.py rename to netneurotools/metrics/bct.py index 2b8abf2..3996350 100644 --- a/netneurotools/metrics.py +++ b/netneurotools/metrics/bct.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- """ -Functions for calculating network metrics. +Functions for calculating brain connectivity metrics. Uses naming conventions adopted from the Brain Connectivity Toolbox (https://sites.google.com/site/bctnet/). @@ -18,26 +17,7 @@ except ImportError: use_numba = False - -def _binarize(W): - """ - Binarize a matrix. - - Parameters - ---------- - W : (N, N) array_like - Matrix to be binarized - - Returns - ------- - binarized : (N, N) numpy.ndarray - Binarized matrix - """ - return (W > 0) * 1 - - -if use_numba: - _binarize = njit(_binarize) +from .metrics_utils import _fast_binarize def degrees_und(W): @@ -56,7 +36,7 @@ def degrees_und(W): deg : (N,) numpy.ndarray Degree of each node in `W` """ - return np.sum(_binarize(W), axis=0) + return np.sum(_fast_binarize(W), axis=0) def degrees_dir(W): @@ -78,7 +58,7 @@ def degrees_dir(W): deg : (N,) numpy.ndarray Degree (in-degree + out-degree) of each node in `W` """ - W_bin = _binarize(W) + W_bin = _fast_binarize(W) deg_in = np.sum(W_bin, axis=0) deg_out = np.sum(W_bin, axis=1) deg = deg_in + deg_out @@ -165,179 +145,6 @@ def retrieve_shortest_path(s, t, p_mat): retrieve_shortest_path = njit(retrieve_shortest_path) -def communicability_bin(adjacency, normalize=False): - """ - Compute the communicability of pairs of nodes in `adjacency`. - - Parameters - ---------- - adjacency : (N, N) array_like - Unweighted, direct/undirected connection weight/length array - normalize : bool, optional - Whether to normalize `adjacency` by largest eigenvalue prior to - calculation of communicability metric. Default: False - - Returns - ------- - comm : (N, N) numpy.ndarray - Symmetric array representing communicability of nodes {i, j} - - References - ---------- - Estrada, E., & Hatano, N. (2008). Communicability in complex networks. - Physical Review E, 77(3), 036111. - - Examples - -------- - >>> from netneurotools import metrics - - >>> A = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 1]]) - >>> Q = metrics.communicability_bin(A) - >>> Q - array([[4.19452805, 0. , 3.19452805], - [1.47624622, 2.71828183, 3.19452805], - [3.19452805, 0. , 4.19452805]]) - """ - if not np.any(np.logical_or(adjacency == 0, adjacency == 1)): - raise ValueError('Provided adjancecy matrix must be unweighted.') - - # normalize by largest eigenvalue to prevent communicability metric from - # "blowing up" - if normalize: - norm = np.linalg.eigvals(adjacency).max() - adjacency = adjacency / norm - - return scipy.sparse.linalg.expm(adjacency) - - -def communicability_wei(adjacency): - """ - Compute the communicability of pairs of nodes in `adjacency`. - - Parameters - ---------- - adjacency : (N, N) array_like - Weighted, direct/undirected connection weight/length array - - Returns - ------- - cmc : (N, N) numpy.ndarray - Symmetric array representing communicability of nodes {i, j} - - References - ---------- - Crofts, J. J., & Higham, D. J. (2009). A weighted communicability measure - applied to complex brain networks. Journal of the Royal Society Interface, - 6(33), 411-414. - - Examples - -------- - >>> from netneurotools import metrics - - >>> A = np.array([[2, 0, 3], [0, 2, 1], [0.5, 0, 1]]) - >>> Q = metrics.communicability_wei(A) - >>> Q - array([[0. , 0. , 1.93581903], - [0.07810379, 0. , 0.94712177], - [0.32263651, 0. , 0. ]]) - """ - # negative square root of nodal degrees - row_sum = adjacency.sum(1) - neg_sqrt = np.power(row_sum, -0.5) - square_sqrt = np.diag(neg_sqrt) - - # normalize input matrix - for_expm = square_sqrt @ adjacency @ square_sqrt - - # calculate matrix exponential of normalized matrix - cmc = scipy.sparse.linalg.expm(for_expm) - cmc[np.diag_indices_from(cmc)] = 0 - - return cmc - - -def rich_feeder_peripheral(x, sc, stat='median'): - """ - Calculate connectivity values in rich, feeder, and peripheral edges. - - Parameters - ---------- - x : (N, N) numpy.ndarray - Symmetric correlation or connectivity matrix - sc : (N, N) numpy.ndarray - Binary structural connectivity matrix - stat : {'mean', 'median'}, optional - Statistic to use over rich/feeder/peripheral links. Default: 'median' - - Returns - ------- - rfp : (3, k) numpy.ndarray - Array of median rich (0), feeder (1), and peripheral (2) - values, defined by `x`. `k` is the maximum degree defined on `sc`. - pvals : (3, k) numpy.ndarray - p-value for each link, computed using Welch's t-test. - Rich links are compared against non-rich links. Feeder links are - compared against peripheral links. Peripheral links are compared - against feeder links. T-test is one-sided. - - Notes - ----- - This code was written by Justine Hansen who promises to fix and even - optimize the code should any issues arise, provided you let her know. - """ - stats = ['mean', 'median'] - if stat not in stats: - raise ValueError(f'Provided stat {stat} not valid.\ - Must be one of {stats}') - - nnodes = len(sc) - mask = np.triu(np.ones(nnodes), 1) > 0 - node_degree = degrees_und(sc) - k = np.max(node_degree).astype(np.int64) - rfp_label = np.zeros((len(sc[mask]), k)) - - for degthresh in range(k): # for each degree threshold - hub_idx = np.where(node_degree >= degthresh) # find the hubs - hub = np.zeros([nnodes, 1]) - hub[hub_idx, :] = 1 - - rfp = np.zeros([nnodes, nnodes]) # for each link, define rfp - for edge1 in range(nnodes): - for edge2 in range(nnodes): - if hub[edge1] + hub[edge2] == 2: - rfp[edge1, edge2] = 1 # rich - if hub[edge1] + hub[edge2] == 1: - rfp[edge1, edge2] = 2 # feeder - if hub[edge1] + hub[edge2] == 0: - rfp[edge1, edge2] = 3 # peripheral - rfp_label[:, degthresh] = rfp[mask] - - rfp = np.zeros([3, k]) - pvals = np.zeros([3, k]) - for degthresh in range(k): - - redfunc = np.median if stat == 'median' else np.mean - for linktype in range(3): - rfp[linktype, degthresh] = redfunc(x[mask][rfp_label[:, degthresh] - == linktype + 1]) - - # p-value (one-sided Welch's t-test) - _, pvals[0, degthresh] = ttest_ind( - x[mask][rfp_label[:, degthresh] == 1], - x[mask][rfp_label[:, degthresh] != 1], - equal_var=False, alternative='greater') - _, pvals[1, degthresh] = ttest_ind( - x[mask][rfp_label[:, degthresh] == 2], - x[mask][rfp_label[:, degthresh] == 3], - equal_var=False, alternative='greater') - _, pvals[2, degthresh] = ttest_ind( - x[mask][rfp_label[:, degthresh] == 3], - x[mask][rfp_label[:, degthresh] == 2], - equal_var=False, alternative='greater') - - return rfp, pvals - - def navigation_wu(nav_dist_mat, sc_mat): """ Compute network navigation. @@ -461,20 +268,170 @@ def get_navigation_path_length(nav_paths, alt_dist_mat): `pl_dis = get_navigation_path_length(nav_paths, D)` D is Euclidean distance between node centroids. - See Also - -------- - netneurotools.metrics.navigation_wu - """ - nav_path_len = np.zeros_like(alt_dist_mat) - for nav_item in nav_paths: - i, j, _, hop, path = nav_item - if hop != -1: - nav_path_len[i, j] = np.sum( - [alt_dist_mat[path[_], path[_ + 1]] for _ in range(hop)] - ) - else: - nav_path_len[i, j] = np.inf - return nav_path_len + See Also + -------- + netneurotools.metrics.navigation_wu + """ + nav_path_len = np.zeros_like(alt_dist_mat) + for nav_item in nav_paths: + i, j, _, hop, path = nav_item + if hop != -1: + nav_path_len[i, j] = np.sum( + [alt_dist_mat[path[_], path[_ + 1]] for _ in range(hop)] + ) + else: + nav_path_len[i, j] = np.inf + return nav_path_len + + +def communicability_bin(adjacency, normalize=False): + """ + Compute the communicability of pairs of nodes in `adjacency`. + + Parameters + ---------- + adjacency : (N, N) array_like + Unweighted, direct/undirected connection weight/length array + normalize : bool, optional + Whether to normalize `adjacency` by largest eigenvalue prior to + calculation of communicability metric. Default: False + + Returns + ------- + comm : (N, N) numpy.ndarray + Symmetric array representing communicability of nodes {i, j} + + References + ---------- + Estrada, E., & Hatano, N. (2008). Communicability in complex networks. + Physical Review E, 77(3), 036111. + + Examples + -------- + >>> from netneurotools import metrics + + >>> A = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 1]]) + >>> Q = metrics.communicability_bin(A) + >>> Q + array([[4.19452805, 0. , 3.19452805], + [1.47624622, 2.71828183, 3.19452805], + [3.19452805, 0. , 4.19452805]]) + """ + if not np.any(np.logical_or(adjacency == 0, adjacency == 1)): + raise ValueError('Provided adjancecy matrix must be unweighted.') + + # normalize by largest eigenvalue to prevent communicability metric from + # "blowing up" + if normalize: + norm = np.linalg.eigvals(adjacency).max() + adjacency = adjacency / norm + + return scipy.sparse.linalg.expm(adjacency) + + +def communicability_wei(adjacency): + """ + Compute the communicability of pairs of nodes in `adjacency`. + + Parameters + ---------- + adjacency : (N, N) array_like + Weighted, direct/undirected connection weight/length array + + Returns + ------- + cmc : (N, N) numpy.ndarray + Symmetric array representing communicability of nodes {i, j} + + References + ---------- + Crofts, J. J., & Higham, D. J. (2009). A weighted communicability measure + applied to complex brain networks. Journal of the Royal Society Interface, + 6(33), 411-414. + + Examples + -------- + >>> from netneurotools import metrics + + >>> A = np.array([[2, 0, 3], [0, 2, 1], [0.5, 0, 1]]) + >>> Q = metrics.communicability_wei(A) + >>> Q + array([[0. , 0. , 1.93581903], + [0.07810379, 0. , 0.94712177], + [0.32263651, 0. , 0. ]]) + """ + # negative square root of nodal degrees + row_sum = adjacency.sum(1) + neg_sqrt = np.power(row_sum, -0.5) + square_sqrt = np.diag(neg_sqrt) + + # normalize input matrix + for_expm = square_sqrt @ adjacency @ square_sqrt + + # calculate matrix exponential of normalized matrix + cmc = scipy.sparse.linalg.expm(for_expm) + cmc[np.diag_indices_from(cmc)] = 0 + + return cmc + + +def path_transitivity(D): + """ + Calculate path transitivity. + + This function implements path transitivity, calculating the density of + local detours (triangles) that are available along the shortest paths + between all pairs of nodes. + + This function is adapted and optimized from the Brain Connectivity Toolbox. + + .. warning:: + Test before use. + + Parameters + ---------- + D : (N, N) ndarray + Weight or connection length matrix. Length matrix is recommended and + transform should have been applied. + + Returns + ------- + T_mat : (N, N) ndarray + Path transitivity matrix + + References + ---------- + .. [1] Goñi, J., Van Den Heuvel, M. P., Avena-Koenigsberger, + A., Velez de Mendizabal, N., Betzel, R. F., Griffa, A., ... & + Sporns, O. (2014). Resting-brain functional connectivity predicted + by analytic measures of network communication. Proceedings of the + National Academy of Sciences, 111(2), 833-838. + """ + n = len(D) + m = np.zeros((n, n)) + T_mat = np.zeros((n, n)) + + deg_wu = np.sum(D, axis=0) + + for i in range(n - 1): + for j in range(i + 1, n): + sig_and = np.logical_and(D[i, :], D[j, :]) + m[i, j] = np.dot(D[i, :] + D[j, :], sig_and) \ + / (deg_wu[i] + deg_wu[j] - 2 * D[i, j]) + m += m.transpose() + + _, p_mat = distance_wei_floyd(D) + + for i in range(n - 1): + for j in range(i + 1, n): + path = retrieve_shortest_path(i, j, p_mat) + K = len(path) + T_mat[i, j] = 2 \ + * sum([m[i, j] for i, j in itertools.combinations(path, 2)]) \ + / (K * (K - 1)) + T_mat += T_mat.transpose() + + return T_mat def search_information(W, D, has_memory=False): @@ -580,116 +537,6 @@ def search_information(W, D, has_memory=False): return SI -def path_transitivity(D): - """ - Calculate path transitivity. - - This function implements path transitivity, calculating the density of - local detours (triangles) that are available along the shortest paths - between all pairs of nodes. - - This function is adapted and optimized from the Brain Connectivity Toolbox. - - .. warning:: - Test before use. - - Parameters - ---------- - D : (N, N) ndarray - Weight or connection length matrix. Length matrix is recommended and - transform should have been applied. - - Returns - ------- - T_mat : (N, N) ndarray - Path transitivity matrix - - References - ---------- - .. [1] Goñi, J., Van Den Heuvel, M. P., Avena-Koenigsberger, - A., Velez de Mendizabal, N., Betzel, R. F., Griffa, A., ... & - Sporns, O. (2014). Resting-brain functional connectivity predicted - by analytic measures of network communication. Proceedings of the - National Academy of Sciences, 111(2), 833-838. - """ - n = len(D) - m = np.zeros((n, n)) - T_mat = np.zeros((n, n)) - - deg_wu = np.sum(D, axis=0) - - for i in range(n - 1): - for j in range(i + 1, n): - sig_and = np.logical_and(D[i, :], D[j, :]) - m[i, j] = np.dot(D[i, :] + D[j, :], sig_and) \ - / (deg_wu[i] + deg_wu[j] - 2 * D[i, j]) - m += m.transpose() - - _, p_mat = distance_wei_floyd(D) - - for i in range(n - 1): - for j in range(i + 1, n): - path = retrieve_shortest_path(i, j, p_mat) - K = len(path) - T_mat[i, j] = 2 \ - * sum([m[i, j] for i, j in itertools.combinations(path, 2)]) \ - / (K * (K - 1)) - T_mat += T_mat.transpose() - - return T_mat - - -def flow_graph(W, r=None, t=1): - """ - Calculate flow graph. - - This function implements flow graph, instantiates a continuous - time random walk on network. Waiting time for walkers at each - node are distributed as Poisson with rate parameter r. - This function returns the flow graph at time t. - - .. warning:: - Test before use. - - Parameters - ---------- - W : (N, N) ndarray - Symmetric adjacency matrix. - r : (N,) or (N, 1) ndarray, optional - Rate parameter. Will be set to np.ones((N, 1)) if not specified. - Default: None - t : int, optional - Markov time. Default: 1 - - Returns - ------- - dyn : (N, N) ndarray - flow graph at time T - - References - ---------- - .. [1] Lambiotte, R., Sinatra, R., Delvenne, J. C., Evans, T. S., - Barahona, M., & Latora, V. (2011). Flow graphs: Interweaving - dynamics and structure. Physical Review E, 84(1), 017102. - .. [2] https://github.com/brain-networks/local_scfc/blob/main/fcn/fcn_flow_graph.m - """ - if r is None: - r = np.ones((W.shape[0], 1)) - else: - if r.ndim == 1: - r = r[:, None] - deg_wu = np.sum(W, axis=0, keepdims=True) # (1, N) - deg_rate = np.sum(deg_wu / r, axis=0, keepdims=True) # (N, N) => (1, N) - ps = deg_wu / (deg_rate * r) # (1, N) / (N, N) => (N, N) - laplacian = np.diagflat(r) - np.multiply(np.divide(W, deg_wu), r) # elementwise - dyn = np.multiply( - deg_rate * scipy.sparse.linalg.expm(-t * laplacian), - ps - ) # elementwise - dyn = (dyn + dyn.T) / 2 - return dyn - - def mean_first_passage_time(W, tol=1e-3): """ Calculate mean first passage time. @@ -824,7 +671,7 @@ def resource_efficiency_bin(W_bin, lambda_prob=0.5): morphospace of communication efficiency in complex networks. PLoS One, 8(3), e58070. """ - W_bin = _binarize(W_bin) + W_bin = _fast_binarize(W_bin) if not (0 < lambda_prob < 1): raise ValueError("lambda_prob must be between 0 and 1.") @@ -871,6 +718,62 @@ def resource_efficiency_bin(W_bin, lambda_prob=0.5): return E_res, prob_spl +def flow_graph(W, r=None, t=1): + """ + Calculate flow graph. + + This function implements flow graph, instantiates a continuous + time random walk on network. Waiting time for walkers at each + node are distributed as Poisson with rate parameter r. + This function returns the flow graph at time t. + + .. warning:: + Test before use. + + Parameters + ---------- + W : (N, N) ndarray + Symmetric adjacency matrix. + r : (N,) or (N, 1) ndarray, optional + Rate parameter. Will be set to np.ones((N, 1)) if not specified. + Default: None + t : int, optional + Markov time. Default: 1 + + Returns + ------- + dyn : (N, N) ndarray + flow graph at time T + + References + ---------- + .. [1] Lambiotte, R., Sinatra, R., Delvenne, J. C., Evans, T. S., + Barahona, M., & Latora, V. (2011). Flow graphs: Interweaving + dynamics and structure. Physical Review E, 84(1), 017102. + .. [2] https://github.com/brain-networks/local_scfc/blob/main/fcn/fcn_flow_graph.m + """ + if r is None: + r = np.ones((W.shape[0], 1)) + else: + if r.ndim == 1: + r = r[:, None] + deg_wu = np.sum(W, axis=0, keepdims=True) # (1, N) + deg_rate = np.sum(deg_wu / r, axis=0, keepdims=True) # (N, N) => (1, N) + ps = deg_wu / (deg_rate * r) # (1, N) / (N, N) => (N, N) + laplacian = np.diagflat(r) - np.multiply(np.divide(W, deg_wu), r) # elementwise + dyn = np.multiply( + deg_rate * scipy.sparse.linalg.expm(-t * laplacian), + ps + ) # elementwise + dyn = (dyn + dyn.T) / 2 + return dyn + + +def assortativity(W, r=None): + """Calculate assortativity.""" + pass + + def matching_ind_und(W): """ Calculate undirected matching index. @@ -928,37 +831,83 @@ def matching_ind_und(W): return M0 -def _graph_laplacian(W): - r""" - Compute the graph Laplacian of a weighted adjacency matrix. - - Graph Laplacian is defined as the degree matrix minus the adjacency - matrix :math:`L = D - W`, where :math:`D` is the degree matrix and - is defined as :math:`D_{ii} = \sum_j W_{ij}`. - - The graph Laplacian matrix :math:`L` has the form of - - .. math:: - L = \begin{bmatrix} - d_1 & -w_{12} & \cdots & -w_{1n} \\ - -w_{21} & d_2 & \cdots & -w_{2n} \\ - \vdots & \vdots & \ddots & \vdots \\ - -w_{n1} & -w_{n2} & \cdots & d_n - \end{bmatrix} +def rich_feeder_peripheral(x, sc, stat='median'): + """ + Calculate connectivity values in rich, feeder, and peripheral edges. Parameters ---------- - W : (N, N) array_like - Weighted, directed/undirected connection weight/length array + x : (N, N) numpy.ndarray + Symmetric correlation or connectivity matrix + sc : (N, N) numpy.ndarray + Binary structural connectivity matrix + stat : {'mean', 'median'}, optional + Statistic to use over rich/feeder/peripheral links. Default: 'median' Returns ------- - L : (N, N) numpy.ndarray - Graph Laplacian of `W` + rfp : (3, k) numpy.ndarray + Array of median rich (0), feeder (1), and peripheral (2) + values, defined by `x`. `k` is the maximum degree defined on `sc`. + pvals : (3, k) numpy.ndarray + p-value for each link, computed using Welch's t-test. + Rich links are compared against non-rich links. Feeder links are + compared against peripheral links. Peripheral links are compared + against feeder links. T-test is one-sided. + + Notes + ----- + This code was written by Justine Hansen who promises to fix and even + optimize the code should any issues arise, provided you let her know. """ - D = np.diag(np.sum(W, axis=0)) - return D - W + stats = ['mean', 'median'] + if stat not in stats: + raise ValueError(f'Provided stat {stat} not valid.\ + Must be one of {stats}') + nnodes = len(sc) + mask = np.triu(np.ones(nnodes), 1) > 0 + node_degree = degrees_und(sc) + k = np.max(node_degree).astype(np.int64) + rfp_label = np.zeros((len(sc[mask]), k)) -if use_numba: - _graph_laplacian = njit(_graph_laplacian) # ("float64[:,::1](float64[:,::1])") + for degthresh in range(k): # for each degree threshold + hub_idx = np.where(node_degree >= degthresh) # find the hubs + hub = np.zeros([nnodes, 1]) + hub[hub_idx, :] = 1 + + rfp = np.zeros([nnodes, nnodes]) # for each link, define rfp + for edge1 in range(nnodes): + for edge2 in range(nnodes): + if hub[edge1] + hub[edge2] == 2: + rfp[edge1, edge2] = 1 # rich + if hub[edge1] + hub[edge2] == 1: + rfp[edge1, edge2] = 2 # feeder + if hub[edge1] + hub[edge2] == 0: + rfp[edge1, edge2] = 3 # peripheral + rfp_label[:, degthresh] = rfp[mask] + + rfp = np.zeros([3, k]) + pvals = np.zeros([3, k]) + for degthresh in range(k): + + redfunc = np.median if stat == 'median' else np.mean + for linktype in range(3): + rfp[linktype, degthresh] = redfunc(x[mask][rfp_label[:, degthresh] + == linktype + 1]) + + # p-value (one-sided Welch's t-test) + _, pvals[0, degthresh] = ttest_ind( + x[mask][rfp_label[:, degthresh] == 1], + x[mask][rfp_label[:, degthresh] != 1], + equal_var=False, alternative='greater') + _, pvals[1, degthresh] = ttest_ind( + x[mask][rfp_label[:, degthresh] == 2], + x[mask][rfp_label[:, degthresh] == 3], + equal_var=False, alternative='greater') + _, pvals[2, degthresh] = ttest_ind( + x[mask][rfp_label[:, degthresh] == 3], + x[mask][rfp_label[:, degthresh] == 2], + equal_var=False, alternative='greater') + + return rfp, pvals diff --git a/netneurotools/metrics/communication.py b/netneurotools/metrics/communication.py new file mode 100644 index 0000000..6d126b9 --- /dev/null +++ b/netneurotools/metrics/communication.py @@ -0,0 +1 @@ +"""Functions for calculating network communication metrics.""" diff --git a/netneurotools/metrics/control.py b/netneurotools/metrics/control.py new file mode 100644 index 0000000..b1e4b1c --- /dev/null +++ b/netneurotools/metrics/control.py @@ -0,0 +1 @@ +"""Functions for calculating network control metrics.""" diff --git a/netneurotools/metrics/metrics_utils.py b/netneurotools/metrics/metrics_utils.py new file mode 100644 index 0000000..b32f632 --- /dev/null +++ b/netneurotools/metrics/metrics_utils.py @@ -0,0 +1,66 @@ +"""Functions for supporting network metrics.""" + +import numpy as np + +try: + from numba import njit + use_numba = True +except ImportError: + use_numba = False + + +def _fast_binarize(W): + """ + Binarize a matrix. + + Parameters + ---------- + W : (N, N) array_like + Matrix to be binarized + + Returns + ------- + binarized : (N, N) numpy.ndarray + Binarized matrix + """ + return (W > 0) * 1 + + +if use_numba: + _fast_binarize = njit(_fast_binarize) + + +def _graph_laplacian(W): + r""" + Compute the graph Laplacian of a weighted adjacency matrix. + + Graph Laplacian is defined as the degree matrix minus the adjacency + matrix :math:`L = D - W`, where :math:`D` is the degree matrix and + is defined as :math:`D_{ii} = \sum_j W_{ij}`. + + The graph Laplacian matrix :math:`L` has the form of + + .. math:: + L = \begin{bmatrix} + d_1 & -w_{12} & \cdots & -w_{1n} \\ + -w_{21} & d_2 & \cdots & -w_{2n} \\ + \vdots & \vdots & \ddots & \vdots \\ + -w_{n1} & -w_{n2} & \cdots & d_n + \end{bmatrix} + + Parameters + ---------- + W : (N, N) array_like + Weighted, directed/undirected connection weight/length array + + Returns + ------- + L : (N, N) numpy.ndarray + Graph Laplacian of `W` + """ + D = np.diag(np.sum(W, axis=0)) + return D - W + + +if use_numba: + _graph_laplacian = njit(_graph_laplacian) # ("float64[:,::1](float64[:,::1])") diff --git a/netneurotools/metrics/spreading.py b/netneurotools/metrics/spreading.py new file mode 100644 index 0000000..8a5fb79 --- /dev/null +++ b/netneurotools/metrics/spreading.py @@ -0,0 +1,6 @@ +"""Functions for calculating network spreading models.""" + + +def simulate_atrophy(): + """Simulate atrophy in a network.""" + pass diff --git a/netneurotools/metrics/statistical.py b/netneurotools/metrics/statistical.py new file mode 100644 index 0000000..db1bed8 --- /dev/null +++ b/netneurotools/metrics/statistical.py @@ -0,0 +1,661 @@ +"""Functions for calculating statistical network metrics.""" + +import numpy as np + +try: + from numba import njit + use_numba = True +except ImportError: + use_numba = False + +from .metrics_utils import _graph_laplacian + + +def network_pearsonr(annot1, annot2, weight): + r""" + Calculate pearson correlation between two annotation vectors. + + .. warning:: + Test before use. + + Parameters + ---------- + annot1 : (N,) array_like + First annotation vector, demean will be applied. + annot2 : (N,) array_like + Second annotation vector, demean will be applied. + weight : (N, N) array_like + Weight matrix. Diagonal elements should be 1. + + Returns + ------- + corr : float + Network correlation between `annot1` and `annot2` + + Notes + ----- + If Pearson correlation is represented as + + .. math:: + \rho_{x,y} = \dfrac{ + \mathrm{sum}(I \times (\hat{x} \otimes \hat{y})) + }{ + \sigma_x \sigma_y + } + + The network correlation is defined analogously as + + .. math:: + \rho_{x,y,G} = \dfrac{ + \mathrm{sum}(W \times (\hat{x} \otimes \hat{y})) + }{ + \sigma_{x,W} \sigma_{y,W} + } + + where :math:`\hat{x}` and :math:`\hat{y}` are the demeaned annotation vectors, + + The weight matrix :math:`W` is used to represent the network structure. + It is usually in the form of :math:`W = \\exp(-kL)` where :math:`L` is the + length matrix and :math:`k` is a decay parameter. + + Example using shortest path length as weight + + .. code:: python + + spl, _ = distance_wei_floyd(D) # input should be distance matrix + spl_wei = 1 / np.exp(spl) + netcorr = network_pearsonr(annot1, annot2, spl_wei) + + Example using (inverse) effective resistance as weight + + .. code:: python + + R_eff = effective_resistance(W) + R_eff_norm = R_eff / np.max(R_eff) + W = 1 / R_eff_norm + W = W / np.max(W) + np.fill_diagonal(W, 1.0) + netcorr = network_pearsonr(annot1, annot2, W) + + References + ---------- + .. [1] Coscia, M. (2021). Pearson correlations on complex networks. + Journal of Complex Networks, 9(6), cnab036. + https://doi.org/10.1093/comnet/cnab036 + + + See Also + -------- + netneurotools.stats.network_pearsonr_pairwise + """ + annot1 = annot1 - np.mean(annot1) + annot2 = annot2 - np.mean(annot2) + upper = np.sum(np.multiply(weight, np.outer(annot1, annot2))) + lower1 = np.sum(np.multiply(weight, np.outer(annot1, annot1))) + lower2 = np.sum(np.multiply(weight, np.outer(annot2, annot2))) + return upper / np.sqrt(lower1) / np.sqrt(lower2) + + +def network_pearsonr_numba(annot1, annot2, weight): + """ + Numba version of :meth:`netneurotools.stats.network_pearsonr`. + + .. warning:: + Test before use. + + Parameters + ---------- + annot1 : (N,) array_like + First annotation vector, demean will be applied. + annot2 : (N,) array_like + Second annotation vector, demean will be applied. + weight : (N, N) array_like + Weight matrix. Diagonal elements should be 1. + + Returns + ------- + corr : float + Network correlation between `annot1` and `annot2` + """ + n = annot1.shape[0] + annot1 = annot1 - np.mean(annot1) + annot2 = annot2 - np.mean(annot2) + upper, lower1, lower2 = 0.0, 0.0, 0.0 + for i in range(n): + for j in range(n): + upper += annot1[i] * annot2[j] * weight[i, j] + lower1 += annot1[i] * annot1[j] * weight[i, j] + lower2 += annot2[i] * annot2[j] * weight[i, j] + return upper / np.sqrt(lower1) / np.sqrt(lower2) + + +if use_numba: + network_pearsonr_numba = njit(network_pearsonr_numba) + + +def _cross_outer(annot_mat): + """ + Calculate cross outer product of input matrix. + + This functions is only used in `network_pearsonr_pairwise`. + + Parameters + ---------- + annot_mat : (N, D) array_like + Input matrix + + Returns + ------- + cross_outer : (N, N, D, D) numpy.ndarray + Cross outer product of `annot_mat` + """ + n_samp, n_feat = annot_mat.shape + cross_outer = np.empty((n_samp, n_samp, n_feat, n_feat), annot_mat.dtype) + for a in range(n_samp): + for b in range(n_samp): + for c in range(n_feat): + for d in range(n_feat): + cross_outer[a, b, c, d] = annot_mat[a, c] * annot_mat[b, d] + return cross_outer + + +if use_numba: + # ("float64[:,:,:,::1](float64[:,::1])") + _cross_outer = njit(_cross_outer) + + +def _multiply_sum(cross_outer, weight): + """ + Multiply and sum cross outer product. + + This functions is only used in `network_pearsonr_pairwise`. + + Parameters + ---------- + cross_outer : (N, N, D, D) array_like + Cross outer product of `annot_mat` + weight : (D, D) array_like + Weight matrix + + Returns + ------- + cross_outer_after : (N, N) numpy.ndarray + Result of multiplying and summing `cross_outer` + """ + n_samp, _, n_dim, _ = cross_outer.shape + cross_outer_after = np.empty((n_samp, n_samp), cross_outer.dtype) + for i in range(n_samp): + for j in range(n_samp): + curr_sum = 0.0 + for k in range(n_dim): + for l in range(n_dim): # noqa: E741 + curr_sum += weight[k, l] * cross_outer[i, j, k, l] + cross_outer_after[i, j] = curr_sum + return cross_outer_after + + +if use_numba: + # ("float64[:,::1](float64[:,:,:,::1],float64[:,::1])") + _multiply_sum = njit(_multiply_sum) + + +def network_pearsonr_pairwise(annot_mat, weight): + """ + Calculate pairwise network correlation between rows of `annot_mat`. + + .. warning:: + Test before use. + + Parameters + ---------- + annot_mat : (N, D) array_like + Input matrix + weight : (D, D) array_like + Weight matrix. Diagonal elements should be 1. + + Returns + ------- + corr_mat : (N, N) numpy.ndarray + Pairwise network correlation matrix + + Notes + ----- + This is a faster version of :meth:`netneurotools.stats.network_pearsonr` + for calculating pairwise network correlation between rows of `annot_mat`. + Check :meth:`netneurotools.stats.network_pearsonr` for details. + + See Also + -------- + netneurotools.stats.network_pearsonr + """ + annot_mat_demean = annot_mat - np.mean(annot_mat, axis=1, keepdims=True) + if use_numba: + cross_outer = _cross_outer(annot_mat_demean) + cross_outer_after = _multiply_sum(cross_outer, weight) + else: + # https://stackoverflow.com/questions/24839481/python-matrix-outer-product + cross_outer = np.einsum('ac,bd->abcd', annot_mat_demean, annot_mat_demean) + cross_outer_after = np.sum(np.multiply(cross_outer, weight), axis=(2, 3)) + # translating the two lines below in numba does not speed up much + lower = np.sqrt(np.diagonal(cross_outer_after)) + return cross_outer_after / np.einsum('i,j', lower, lower) + + +def _onehot_quadratic_form_broadcast(Q_star): + """ + Calculate one-hot quadratic form of input matrix. + + This functions is only used in `effective_resistance`. + + Parameters + ---------- + Q_star : (N, N) array_like + Input matrix + + Returns + ------- + R_eff : (N, N) numpy.ndarray + One-hot quadratic form of `Q_star` + """ + n = Q_star.shape[0] + R_eff = np.empty((n, n), Q_star.dtype) + for i in range(n): + for j in range(n): + R_eff[i, j] = Q_star[i, i] - Q_star[j, i] - Q_star[i, j] + Q_star[j, j] + return R_eff + + +if use_numba: + # ("float64[:,::1](float64[:,::1])") + _onehot_quadratic_form_broadcast = njit(_onehot_quadratic_form_broadcast) + + +def effective_resistance(W, directed=True): + """ + Calculate effective resistance matrix. + + The effective resistance between two nodes in a graph, often used in the context + of electrical networks, is a measure that stems from the inverse of the Laplacian + matrix of the graph. + + .. warning:: + Test before use. + + Parameters + ---------- + W : (N, N) array_like + Weight matrix. + directed : bool, optional + Whether the graph is directed. This is used to determine whether to turn on + the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are + using a symmetric weight matrix (while real-valued implying hermitian), you + can set this to False for better performance. Default: True + + Returns + ------- + R_eff : (N, N) numpy.ndarray + Effective resistance matrix + + Notes + ----- + The effective resistance between two nodes :math:`i` and :math:`j` is defined as + + .. math:: + R_{ij} = (e_i - e_j)^T Q^* (e_i - e_j) + + where :math:`Q^*` is the Moore-Penrose pseudoinverse of the Laplacian matrix + :math:`L` of the graph, and :math:`e_i` is the :math:`i`-th standard basis vector. + + References + ---------- + .. [1] Ellens, W., Spieksma, F. M., Van Mieghem, P., Jamakovic, A., & Kooij, + R. E. (2011). Effective graph resistance. Linear Algebra and Its Applications, + 435(10), 2491–2506. https://doi.org/10.1016/j.laa.2011.02.024 + + See Also + -------- + netneurotools.stats.network_polarisation + """ + L = _graph_laplacian(W) + Q_star = np.linalg.pinv(L, hermitian=not directed) + if use_numba: + R_eff = _onehot_quadratic_form_broadcast(Q_star) + else: + Q_star_diag = np.diag(Q_star) + R_eff = \ + Q_star_diag[:, np.newaxis] \ + - Q_star \ + - Q_star.T \ + + Q_star_diag[np.newaxis, :] + return R_eff + + +def _polariz_diff(vec): + """ + Calculate difference between positive and negative parts of a vector. + + This functions is only used in `network_polarisation`. + + Parameters + ---------- + vec : (N,) array_like + Input vector. Must have both positive and negative values. + + Returns + ------- + vec_diff : (N,) numpy.ndarray + Difference between positive and negative parts of `vec` + """ + # + vec_pos = np.maximum(vec, 0.0) + vec_pos /= np.max(vec_pos) + # + vec_neg = np.minimum(vec, 0.0) + vec_neg = np.abs(vec_neg) + vec_neg /= np.max(vec_neg) + return (vec_pos - vec_neg) + + +if use_numba: + _polariz_diff = njit(_polariz_diff) + + +def _quadratic_form(W, vec_left, vec_right, squared=False): + """ + Calculate quadratic form :math:`v_{left}^T W v_{right}`. + + Parameters + ---------- + W : (N, N) array_like + Input matrix. + vec_left : (N,) array_like + Left weight vector. + vec_right : (N,) array_like + Right weight vector. + squared : bool, optional + Whether to square the input weight matrix. Default: False + + Returns + ------- + quadratic_form : float + Quadratic form from `W`, `vec_left`, and `vec_right` + """ + # [numpy] + + # (vec_left.T @ W @ vec_right)[0, 0] + # [numba] + # vec = np.ascontiguousarray(vec[np.newaxis, :]) + n = W.shape[0] + ret = 0.0 + for i in range(n): + for j in range(n): + if squared: + ret += vec_left[i] * vec_right[j] * W[i, j]**2 + else: + ret += vec_left[i] * vec_right[j] * W[i, j] + return ret + + +if use_numba: + _quadratic_form = njit(_quadratic_form) + + +def network_polarisation(vec, W, directed=True): + r""" + Calculate polarisation of a vector on a graph. + + Network polarisation is a measure of polizzartion taken into account all the + three factors below [1]_: + + - how extreme the opinions of the people are + - how much they organize into echo chambers, and + - how these echo chambers organize in the network + + .. warning:: + Test before use. + + Parameters + ---------- + vec : (N,) array_like + Polarization vector. Must have both positive and negative values. Will be + normalized between -1 and 1 internally. + W : (N, N) array_like + Weight matrix. + directed : bool, optional + Whether the graph is directed. This is used to determine whether to turn on + the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are + using a symmetric weight matrix (while real-valued implying hermitian), you + can set this to False for better performance. Default: True + + Returns + ------- + polariz : float + Polarization of `vec` on `W` + + Notes + ----- + The measure is based on the genralized Eucledian distance, defined as + + .. math:: + \delta_{G, o} = \sqrt{(o^+ - o^-)^T Q^* (o^+ - o^-)} + + where :math:`o^+` and :math:`o^-` are the positive and negative parts of the + polarization vector, and :math:`Q^*` is the Moore-Penrose pseudoinverse + of the Laplacian matrix :math:`L` of the graph. Check :func:`effective_resistance` + for similarity. + + References + ---------- + .. [1] Hohmann, M., Devriendt, K., & Coscia, M. (2023). Quantifying ideological + polarization on a network using generalized Euclidean distance. Science Advances, + 9(9), eabq2044. https://doi.org/10.1126/sciadv.abq2044 + + See Also + -------- + netneurotools.stats.effective_resistance + """ + L = _graph_laplacian(W) + Q_star = np.linalg.pinv(L, hermitian=not directed) + diff = _polariz_diff(vec) + if use_numba: + polariz_sq = _quadratic_form(Q_star, diff, diff, squared=False) + else: + polariz_sq = (diff.T @ Q_star @ diff) + return np.sqrt(polariz_sq) + + +def network_variance(vec, D): + r""" + Calculate variance of a vector on a graph. + + Network variance is a measure of variance taken into account the network + structure. + + .. warning:: + Test before use. + + Parameters + ---------- + vec : (N,) array_like + Input vector. Must be all positive. + Will be normalized internally as a probability distribution. + D : (N, N) array_like + Distance matrix. + + Returns + ------- + network_variance : float + Network variance of `vec` on `D` + + Notes + ----- + The network variance is defined as + + .. math:: + var(p) = \frac{1}{2} \sum_{i, j} p(i) p(j) d^2(i,j) + + where :math:`p` is the probability distribution of `vec`, and :math:`d(i,j)` + is the distance between node :math:`i` and :math:`j`. + + The distance matrix :math:`D` can make use of effective resistance or its + square root. + + Example using effective resistance as weight matrix + + .. code:: python + + R_eff = effective_resistance(W) + netvar = network_variance(vec, R_eff) + + References + ---------- + .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022). + Variance and covariance of distributions on graphs. SIAM Review, 64(2), + 343–359. https://doi.org/10.1137/20M1361328 + + See Also + -------- + netneurotools.stats.network_covariance + """ + p = vec / np.sum(vec) + return 0.5 * (p.T @ np.multiply(D, D) @ p) + + +def network_variance_numba(vec, D): + """ + Numba version of :meth:`netneurotools.stats.network_variance`. + + Network variance is a measure of variance taken into account the network + structure. + + .. warning:: + Test before use. + + Parameters + ---------- + vec : (N,) array_like + Input vector. Must be all positive. + Will be normalized internally as a probability distribution. + D : (N, N) array_like + Distance matrix. + + Returns + ------- + network_variance : float + Network variance of `vec` on `D` + """ + p = vec / np.sum(vec) + return 0.5 * _quadratic_form(D, p, p, squared=True) + + +if use_numba: + network_variance_numba = njit(network_variance_numba) + + +def network_covariance(joint_pmat, D, calc_marginal=True): + r""" + Calculate covariance of a joint probability matrix on a graph. + + .. warning:: + Test before use. + + Parameters + ---------- + joint_pmat : (N, N) array_like + Joint probability matrix. Please make sure that it is valid. + D : (N, N) array_like + Distance matrix. + calc_marginal : bool, optional + Whether to calculate marginal variance. It will be marginally faster if + :code:`calc_marginal=False` (returning marginal variances as 0). Default: True + + Returns + ------- + network_covariance : float + Covariance of `joint_pmat` on `D` + var_p : float + Marginal variance of `joint_pmat` on `D`. + Will be 0 if :code:`calc_marginal=False` + var_q : float + Marginal variance of `joint_pmat` on `D`. + Will be 0 if :code:`calc_marginal=False` + + Notes + ----- + The network variance is defined as + + .. math:: + cov(P) = \frac{1}{2} \sum_{i, j} [p(i) q(j) - P(i,j)] d^2(i,j) + + where :math:`P` is the joint probability matrix, :math:`p` and :math:`q` + are the marginal probability distributions of `joint_pmat`, and :math:`d(i,j)` + is the distance between node :math:`i` and :math:`j`. + + Check :func:`network_variance` for usage. + + References + ---------- + .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022). + Variance and covariance of distributions on graphs. SIAM Review, 64(2), + 343–359. https://doi.org/10.1137/20M1361328 + + See Also + -------- + netneurotools.stats.network_variance + """ + p = np.sum(joint_pmat, axis=1) + q = np.sum(joint_pmat, axis=0) + D_sq = np.multiply(D, D) + cov = p.T @ D_sq @ q - np.sum(np.multiply(joint_pmat, D_sq)) + if calc_marginal: + var_p = p.T @ D_sq @ p + var_q = q.T @ D_sq @ q + else: + var_p, var_q = 0, 0 + return 0.5 * cov, 0.5 * var_p, 0.5 * var_q + + +def network_covariance_numba(joint_pmat, D, calc_marginal=True): + """ + Numba version of :meth:`netneurotools.stats.network_covariance`. + + .. warning:: + Test before use. + + Parameters + ---------- + joint_pmat : (N, N) array_like + Joint probability matrix. Please make sure that it is valid. + D : (N, N) array_like + Distance matrix. + calc_marginal : bool, optional + Whether to calculate marginal variance. It will be marginally faster if + :code:`calc_marginal=False` (returning marginal variances as 0). Default: True + + Returns + ------- + network_covariance : float + Covariance of `joint_pmat` on `D` + var_p : float + Marginal variance of `joint_pmat` on `D`. + Will be 0 if :code:`calc_marginal=False` + var_q : float + Marginal variance of `joint_pmat` on `D`. + Will be 0 if :code:`calc_marginal=False` + """ + n = joint_pmat.shape[0] + p = np.sum(joint_pmat, axis=1) + q = np.sum(joint_pmat, axis=0) + cov = 0.0 + var_p, var_q = 0.0, 0.0 + for i in range(n): + for j in range(n): + cov += (p[i] * q[j] - joint_pmat[i, j]) * D[i, j]**2 + if calc_marginal: + var_p += p[i] * p[j] * D[i, j]**2 + var_q += q[i] * q[j] * D[i, j]**2 + return 0.5 * cov, 0.5 * var_p, 0.5 * var_q + + +if use_numba: + network_covariance_numba = njit(network_covariance_numba) diff --git a/netneurotools/metrics/tests/__init__.py b/netneurotools/metrics/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/tests/test_metrics.py b/netneurotools/metrics/tests/test_bct.py similarity index 72% rename from netneurotools/tests/test_metrics.py rename to netneurotools/metrics/tests/test_bct.py index 253da0f..f83ab6a 100644 --- a/netneurotools/tests/test_metrics.py +++ b/netneurotools/metrics/tests/test_bct.py @@ -1,15 +1,15 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.metrics functionality.""" +"""For testing netneurotools.metrics.bct functionality.""" -import numpy as np import pytest +import numpy as np from netneurotools import metrics rs = np.random.RandomState(1234) -def test_communicability(): +def test_communicability_bin(): + """Test communicability_bin function.""" comm = metrics.communicability_bin(rs.choice([0, 1], size=(100, 100))) assert comm.shape == (100, 100) @@ -18,6 +18,7 @@ def test_communicability(): def test_communicability_wei(): + """Test communicability_wei function.""" comm = metrics.communicability_wei(rs.rand(100, 100)) assert comm.shape == (100, 100) assert np.allclose(np.diag(comm), 0) diff --git a/netneurotools/metrics/tests/test_communication.py b/netneurotools/metrics/tests/test_communication.py new file mode 100644 index 0000000..dd066f8 --- /dev/null +++ b/netneurotools/metrics/tests/test_communication.py @@ -0,0 +1 @@ +"""For testing netneurotools.metrics.communication functionality.""" diff --git a/netneurotools/metrics/tests/test_control.py b/netneurotools/metrics/tests/test_control.py new file mode 100644 index 0000000..28ad7c2 --- /dev/null +++ b/netneurotools/metrics/tests/test_control.py @@ -0,0 +1 @@ +"""For testing netneurotools.metrics.control functionality.""" diff --git a/netneurotools/metrics/tests/test_spreading.py b/netneurotools/metrics/tests/test_spreading.py new file mode 100644 index 0000000..216c638 --- /dev/null +++ b/netneurotools/metrics/tests/test_spreading.py @@ -0,0 +1 @@ +"""For testing netneurotools.metrics.spreading functionality.""" diff --git a/netneurotools/metrics/tests/test_statistical.py b/netneurotools/metrics/tests/test_statistical.py new file mode 100644 index 0000000..73caf2d --- /dev/null +++ b/netneurotools/metrics/tests/test_statistical.py @@ -0,0 +1 @@ +"""For testing netneurotools.metrics.statistical functionality.""" diff --git a/netneurotools/modularity.py b/netneurotools/modularity.py deleted file mode 100644 index 1831dd9..0000000 --- a/netneurotools/modularity.py +++ /dev/null @@ -1,316 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions for working with network modularity.""" - -import bct -import numpy as np -from sklearn.utils.validation import check_random_state -from . import cluster - -try: - from numba import njit, prange - use_numba = True -except ImportError: - prange = range - use_numba = False - - -def consensus_modularity(adjacency, gamma=1, B='modularity', - repeats=250, null_func=np.mean, seed=None): - """ - Find community assignments from `adjacency` through consensus. - - Performs `repeats` iterations of community detection on `adjacency` and - then uses consensus clustering on the resulting community assignments. - - Parameters - ---------- - adjacency : (N, N) array_like - Adjacency matrix (weighted/non-weighted) on which to perform consensus - community detection. - gamma : float, optional - Resolution parameter for modularity maximization. Default: 1 - B : str or (N, N) array_like, optional - Null model to use for consensus clustering. If `str`, must be one of - ['modularity', 'potts', 'negative_sym', 'negative_asym']. Default: - 'modularity' - repeats : int, optional - Number of times to repeat Louvain algorithm clustering. Default: 250 - null_func : callable, optional - Function used to generate null model when performing consensus-based - clustering. Must accept a 2D array as input and return a single value. - Default: `np.mean` - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Default: None - - Returns - ------- - consensus : (N,) np.ndarray - Consensus-derived community assignments - Q_all : array_like - Optimized modularity over all `repeats` community assignments - zrand_all : array_like - z-Rand score over all pairs of `repeats` community assignment vectors - - References - ---------- - Bassett, D. S., Porter, M. A., Wymbs, N. F., Grafton, S. T., Carlson, - J. M., & Mucha, P. J. (2013). Robust detection of dynamic community - structure in networks. Chaos: An Interdisciplinary Journal of Nonlinear - Science, 23(1), 013142. - """ - # generate community partitions `repeat` times - comms, Q_all = zip(*[bct.community_louvain(adjacency, gamma=gamma, B=B) - for i in range(repeats)]) - comms = np.column_stack(comms) - - # find consensus cluster assignments across all partitoning solutions - consensus = cluster.find_consensus(comms, null_func=null_func, seed=seed) - - # get z-rand statistics for partition similarity (n.b. can take a while) - zrand_all = _zrand_partitions(comms) - - return consensus, np.array(Q_all), zrand_all - - -def _dummyvar(labels): - """ - Generate dummy-coded array from provided community assignment `labels`. - - Parameters - ---------- - labels : (N,) array_like - Labels assigning `N` samples to `G` groups - - Returns - ------- - ci : (N, G) numpy.ndarray - Dummy-coded array where 1 indicates that a sample belongs to a group - """ - comms = np.unique(labels) - - ci = np.zeros((len(labels), len(comms))) - for n, grp in enumerate(comms): - ci[:, n] = labels == grp - - return ci - - -def zrand(X, Y): - """ - Calculate the z-Rand index of two community assignments. - - Parameters - ---------- - X, Y : (n, 1) array_like - Community assignment vectors to compare - - Returns - ------- - z_rand : float - Z-rand index - - References - ---------- - Amanda L. Traud, Eric D. Kelsic, Peter J. Mucha, and Mason A. Porter. - (2011). Comparing Community Structure to Characteristics in Online - Collegiate Social Networks. SIAM Review, 53, 526-543. - """ - if X.ndim > 1 or Y.ndim > 1: - if X.shape[-1] > 1 or Y.shape[-1] > 1: - raise ValueError('X and Y must have only one-dimension each. ' - 'Please check inputs.') - - Xf = X.flatten() - Yf = Y.flatten() - - n = len(Xf) - indx, indy = _dummyvar(Xf), _dummyvar(Yf) - Xa = indx.dot(indx.T) - Ya = indy.dot(indy.T) - - M = n * (n - 1) / 2 - M1 = Xa.nonzero()[0].size / 2 - M2 = Ya.nonzero()[0].size / 2 - - wab = np.logical_and(Xa, Ya).nonzero()[0].size / 2 - - mod = n * (n**2 - 3 * n - 2) - C1 = mod - (8 * (n + 1) * M1) + (4 * np.power(indx.sum(0), 3).sum()) - C2 = mod - (8 * (n + 1) * M2) + (4 * np.power(indy.sum(0), 3).sum()) - - a = M / 16 - b = ((4 * M1 - 2 * M)**2) * ((4 * M2 - 2 * M)**2) / (256 * (M**2)) - c = C1 * C2 / (16 * n * (n - 1) * (n - 2)) - d = ((((4 * M1 - 2 * M)**2) - (4 * C1) - (4 * M)) - * (((4 * M2 - 2 * M)**2) - (4 * C2) - (4 * M)) - / (64 * n * (n - 1) * (n - 2) * (n - 3))) - - sigw2 = a - b + c + d - # catch any negatives - if sigw2 < 0: - return 0 - z_rand = (wab - ((M1 * M2) / M)) / np.sqrt(sigw2) - - return z_rand - - -def _zrand_partitions(communities): - """ - Calculate z-Rand for all pairs of assignments in `communities`. - - Iterates through every pair of community assignment vectors in - `communities` and calculates the z-Rand score to assess their similarity. - - Parameters - ---------- - communities : (S, R) array_like - Community assignments for `S` samples over `R` partitions - - Returns - ------- - all_zrand : array_like - z-Rand score over all pairs of `R` partitions of community assignments - """ - n_partitions = communities.shape[-1] - all_zrand = np.zeros(int(n_partitions * (n_partitions - 1) / 2)) - - for c1 in prange(n_partitions): - for c2 in prange(c1 + 1, n_partitions): - idx = int((c1 * n_partitions) + c2 - ((c1 + 1) * (c1 + 2) // 2)) - all_zrand[idx] = zrand(communities[:, c1], communities[:, c2]) - - return all_zrand - - -if use_numba: - _dummyvar = njit(_dummyvar) - zrand = njit(zrand) - _zrand_partitions = njit(_zrand_partitions, parallel=True) - - -def get_modularity(adjacency, comm, gamma=1): - """ - Calculate modularity contribution for each community in `comm`. - - Parameters - ---------- - adjacency : (N, N) array_like - Adjacency (e.g., correlation) matrix - comm : (N,) array_like - Community assignment vector splitting `N` subjects into `G` groups - gamma : float, optional - Resolution parameter used in original modularity maximization. - Default: 1 - - Returns - ------- - comm_q : (G,) ndarray - Relative modularity for each community - - See Also - -------- - netneurotools.modularity.get_modularity_z - netneurotools.modularity.get_modularity_sig - """ - adjacency, comm = np.asarray(adjacency), np.asarray(comm) - s = adjacency.sum() - B = adjacency - (gamma * np.outer(adjacency.sum(axis=1), - adjacency.sum(axis=0)) / s) - - # find modularity contribution of each community - communities = np.unique(comm) - comm_q = np.empty(shape=communities.size) - for n, ci in enumerate(communities): - inds = comm == ci - comm_q[n] = B[np.ix_(inds, inds)].sum() / s - - return comm_q - - -def get_modularity_z(adjacency, comm, gamma=1, n_perm=10000, seed=None): - """ - Calculate average z-score of community assignments by permutation. - - Parameters - ---------- - adjacency : (N, N) array_like - Adjacency (correlation) matrix - comm : (N,) array_like - Community assignment vector splitting `N` subjects into `G` groups - gamma : float, optional - Resolution parameter used in original modularity maximization. - Default: 1 - n_perm : int, optional - Number of permutations. Default: 10000 - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Default: None - - Returns - ------- - q_z : float - Average Z-score of modularity of communities - - See Also - -------- - netneurotools.modularity.get_modularity - netneurotools.modularity.get_modularity_sig - """ - rs = check_random_state(seed) - - real_qs = get_modularity(adjacency, comm, gamma) - simu_qs = np.empty(shape=(np.unique(comm).size, n_perm)) - for perm in range(n_perm): - simu_qs[:, perm] = get_modularity(adjacency, - rs.permutation(comm), - gamma) - - # avoid instances where dist.std(1) == 0 - std = simu_qs.std(axis=1) - if std == 0: - return np.mean(real_qs - simu_qs.mean(axis=1)) - else: - return np.mean((real_qs - simu_qs.mean(axis=1)) / std) - - -def get_modularity_sig(adjacency, comm, gamma=1, n_perm=10000, alpha=0.01, - seed=None): - """ - Calculate significance of community assignments in `comm` by permutation. - - Parameters - ---------- - adjacency : (N, N) array_like - Adjacency (correlation) matrix - comm : (N,) array_like - Community assignment vector - gamma : float - Resolution parameter used in original modularity maximization - n_perm : int, optional - Number of permutations to test against. Default: 10000 - alpha : (0,1) float, optional - Alpha level to assess significance. Default: 0.01 - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Default: None - - Returns - ------- - ndarray - Significance of each community in `comm` (boolean) - - See Also - -------- - netneurotools.modularity.get_modularity_z - netneurotools.modularity.get_modularity_sig - """ - rs = check_random_state(seed) - - real_qs = get_modularity(adjacency, comm, gamma) - simu_qs = np.empty(shape=(np.unique(comm).size, n_perm)) - for perm in range(n_perm): - simu_qs[:, perm] = get_modularity(adjacency, - rs.permutation(comm), - gamma) - - q_sig = real_qs > np.percentile(simu_qs, 100 * (1 - alpha), axis=1) - - return q_sig diff --git a/netneurotools/modularity/__init__.py b/netneurotools/modularity/__init__.py new file mode 100644 index 0000000..2fe84ad --- /dev/null +++ b/netneurotools/modularity/__init__.py @@ -0,0 +1,25 @@ +"""Functions for working with network modularity.""" + + +from .modules import ( + match_cluster_labels, + match_assignments, + reorder_assignments, + find_consensus, + consensus_modularity, + _dummyvar, + zrand, + _zrand_partitions, + get_modularity, + get_modularity_z, + get_modularity_sig, +) + + +__all__ = [ + # modules + 'match_cluster_labels', 'match_assignments', 'reorder_assignments', + 'find_consensus', 'consensus_modularity', '_dummyvar', 'zrand', + '_zrand_partitions', 'get_modularity', 'get_modularity_z', + 'get_modularity_sig', +] diff --git a/netneurotools/cluster.py b/netneurotools/modularity/modules.py similarity index 56% rename from netneurotools/cluster.py rename to netneurotools/modularity/modules.py index 4b46a9d..120e984 100644 --- a/netneurotools/cluster.py +++ b/netneurotools/modularity/modules.py @@ -1,11 +1,17 @@ -# -*- coding: utf-8 -*- -"""Functions for clustering and working with cluster solutions.""" +"""Functions for working with network modules.""" import bct import numpy as np +from sklearn.utils.validation import check_random_state from scipy import optimize from scipy.cluster import hierarchy -from sklearn.utils.validation import check_random_state + +try: + from numba import njit, prange + use_numba = True +except ImportError: + prange = range + use_numba = False def _get_relabels(c1, c2): @@ -64,14 +70,14 @@ def match_cluster_labels(source, target): Examples -------- - >>> from netneurotools import cluster + >>> from netneurotools import modularity When cluster labels are perfectly matched but e.g., inverted the function will find a perfect mapping: >>> a = np.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) >>> b = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1]) - >>> cluster.match_cluster_labels(a, b) + >>> modularity.match_cluster_labels(a, b) array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1]) However, the mapping will work even when cluster assignments between the @@ -80,13 +86,13 @@ def match_cluster_labels(source, target): >>> a = np.array([0, 0, 0, 2, 2, 2, 2, 1, 1, 1]) >>> b = np.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) - >>> cluster.match_cluster_labels(a, b) + >>> modularity.match_cluster_labels(a, b) array([1, 1, 1, 0, 0, 0, 0, 2, 2, 2]) If the source assignment has fewer clusters than the target the returned values may be discontinuous: - >>> cluster.match_cluster_labels(b, a) + >>> modularity.match_cluster_labels(b, a) array([0, 0, 0, 2, 2, 2, 2, 2, 2, 2]) """ # try and match the source to target @@ -137,7 +143,7 @@ def match_assignments(assignments, target=None, seed=None): Examples -------- - >>> from netneurotools import cluster + >>> from netneurotools import modularity First we can construct a matrix of `N` samples clustered `M` times (in this case, `M` is three) . Since cluster labels are generally arbitrary we can @@ -157,7 +163,7 @@ def match_assignments(assignments, target=None, seed=None): of the columns will be randomly picked as the "target" solution, we provide a `seed` to ensure reproducibility in the selection: - >>> cluster.match_assignments(assignments, seed=1234) + >>> modularity.match_assignments(assignments, seed=1234) array([[1, 1, 1], [1, 1, 1], [1, 1, 1], @@ -179,7 +185,7 @@ def match_assignments(assignments, target=None, seed=None): ... [1, 2, 0], ... [1, 1, 2], ... [1, 1, 2]]) - >>> cluster.match_assignments(assignments) + >>> modularity.match_assignments(assignments) array([[0, 0, 0], [0, 0, 0], [0, 0, 0], @@ -362,3 +368,305 @@ def find_consensus(assignments, null_func=np.mean, return_agreement=False, return consensus.astype(int), agreement * (agreement > threshold) return consensus.astype(int) + + +def consensus_modularity(adjacency, gamma=1, B='modularity', + repeats=250, null_func=np.mean, seed=None): + """ + Find community assignments from `adjacency` through consensus. + + Performs `repeats` iterations of community detection on `adjacency` and + then uses consensus clustering on the resulting community assignments. + + Parameters + ---------- + adjacency : (N, N) array_like + Adjacency matrix (weighted/non-weighted) on which to perform consensus + community detection. + gamma : float, optional + Resolution parameter for modularity maximization. Default: 1 + B : str or (N, N) array_like, optional + Null model to use for consensus clustering. If `str`, must be one of + ['modularity', 'potts', 'negative_sym', 'negative_asym']. Default: + 'modularity' + repeats : int, optional + Number of times to repeat Louvain algorithm clustering. Default: 250 + null_func : callable, optional + Function used to generate null model when performing consensus-based + clustering. Must accept a 2D array as input and return a single value. + Default: `np.mean` + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Default: None + + Returns + ------- + consensus : (N,) np.ndarray + Consensus-derived community assignments + Q_all : array_like + Optimized modularity over all `repeats` community assignments + zrand_all : array_like + z-Rand score over all pairs of `repeats` community assignment vectors + + References + ---------- + Bassett, D. S., Porter, M. A., Wymbs, N. F., Grafton, S. T., Carlson, + J. M., & Mucha, P. J. (2013). Robust detection of dynamic community + structure in networks. Chaos: An Interdisciplinary Journal of Nonlinear + Science, 23(1), 013142. + """ + # generate community partitions `repeat` times + comms, Q_all = zip(*[bct.community_louvain(adjacency, gamma=gamma, B=B) + for i in range(repeats)]) + comms = np.column_stack(comms) + + # find consensus cluster assignments across all partitoning solutions + consensus = find_consensus(comms, null_func=null_func, seed=seed) + + # get z-rand statistics for partition similarity (n.b. can take a while) + zrand_all = _zrand_partitions(comms) + + return consensus, np.array(Q_all), zrand_all + + +def _dummyvar(labels): + """ + Generate dummy-coded array from provided community assignment `labels`. + + Parameters + ---------- + labels : (N,) array_like + Labels assigning `N` samples to `G` groups + + Returns + ------- + ci : (N, G) numpy.ndarray + Dummy-coded array where 1 indicates that a sample belongs to a group + """ + comms = np.unique(labels) + + ci = np.zeros((len(labels), len(comms))) + for n, grp in enumerate(comms): + ci[:, n] = labels == grp + + return ci + + +def zrand(X, Y): + """ + Calculate the z-Rand index of two community assignments. + + Parameters + ---------- + X, Y : (n, 1) array_like + Community assignment vectors to compare + + Returns + ------- + z_rand : float + Z-rand index + + References + ---------- + Amanda L. Traud, Eric D. Kelsic, Peter J. Mucha, and Mason A. Porter. + (2011). Comparing Community Structure to Characteristics in Online + Collegiate Social Networks. SIAM Review, 53, 526-543. + """ + if X.ndim > 1 or Y.ndim > 1: + if X.shape[-1] > 1 or Y.shape[-1] > 1: + raise ValueError('X and Y must have only one-dimension each. ' + 'Please check inputs.') + + Xf = X.flatten() + Yf = Y.flatten() + + n = len(Xf) + indx, indy = _dummyvar(Xf), _dummyvar(Yf) + Xa = indx.dot(indx.T) + Ya = indy.dot(indy.T) + + M = n * (n - 1) / 2 + M1 = Xa.nonzero()[0].size / 2 + M2 = Ya.nonzero()[0].size / 2 + + wab = np.logical_and(Xa, Ya).nonzero()[0].size / 2 + + mod = n * (n**2 - 3 * n - 2) + C1 = mod - (8 * (n + 1) * M1) + (4 * np.power(indx.sum(0), 3).sum()) + C2 = mod - (8 * (n + 1) * M2) + (4 * np.power(indy.sum(0), 3).sum()) + + a = M / 16 + b = ((4 * M1 - 2 * M)**2) * ((4 * M2 - 2 * M)**2) / (256 * (M**2)) + c = C1 * C2 / (16 * n * (n - 1) * (n - 2)) + d = ((((4 * M1 - 2 * M)**2) - (4 * C1) - (4 * M)) + * (((4 * M2 - 2 * M)**2) - (4 * C2) - (4 * M)) + / (64 * n * (n - 1) * (n - 2) * (n - 3))) + + sigw2 = a - b + c + d + # catch any negatives + if sigw2 < 0: + return 0 + z_rand = (wab - ((M1 * M2) / M)) / np.sqrt(sigw2) + + return z_rand + + +def _zrand_partitions(communities): + """ + Calculate z-Rand for all pairs of assignments in `communities`. + + Iterates through every pair of community assignment vectors in + `communities` and calculates the z-Rand score to assess their similarity. + + Parameters + ---------- + communities : (S, R) array_like + Community assignments for `S` samples over `R` partitions + + Returns + ------- + all_zrand : array_like + z-Rand score over all pairs of `R` partitions of community assignments + """ + n_partitions = communities.shape[-1] + all_zrand = np.zeros(int(n_partitions * (n_partitions - 1) / 2)) + + for c1 in prange(n_partitions): + for c2 in prange(c1 + 1, n_partitions): + idx = int((c1 * n_partitions) + c2 - ((c1 + 1) * (c1 + 2) // 2)) + all_zrand[idx] = zrand(communities[:, c1], communities[:, c2]) + + return all_zrand + + +if use_numba: + _dummyvar = njit(_dummyvar) + zrand = njit(zrand) + _zrand_partitions = njit(_zrand_partitions, parallel=True) + + +def get_modularity(adjacency, comm, gamma=1): + """ + Calculate modularity contribution for each community in `comm`. + + Parameters + ---------- + adjacency : (N, N) array_like + Adjacency (e.g., correlation) matrix + comm : (N,) array_like + Community assignment vector splitting `N` subjects into `G` groups + gamma : float, optional + Resolution parameter used in original modularity maximization. + Default: 1 + + Returns + ------- + comm_q : (G,) ndarray + Relative modularity for each community + + See Also + -------- + netneurotools.modularity.get_modularity_z + netneurotools.modularity.get_modularity_sig + """ + adjacency, comm = np.asarray(adjacency), np.asarray(comm) + s = adjacency.sum() + B = adjacency - (gamma * np.outer(adjacency.sum(axis=1), + adjacency.sum(axis=0)) / s) + + # find modularity contribution of each community + communities = np.unique(comm) + comm_q = np.empty(shape=communities.size) + for n, ci in enumerate(communities): + inds = comm == ci + comm_q[n] = B[np.ix_(inds, inds)].sum() / s + + return comm_q + + +def get_modularity_z(adjacency, comm, gamma=1, n_perm=10000, seed=None): + """ + Calculate average z-score of community assignments by permutation. + + Parameters + ---------- + adjacency : (N, N) array_like + Adjacency (correlation) matrix + comm : (N,) array_like + Community assignment vector splitting `N` subjects into `G` groups + gamma : float, optional + Resolution parameter used in original modularity maximization. + Default: 1 + n_perm : int, optional + Number of permutations. Default: 10000 + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Default: None + + Returns + ------- + q_z : float + Average Z-score of modularity of communities + + See Also + -------- + netneurotools.modularity.get_modularity + netneurotools.modularity.get_modularity_sig + """ + rs = check_random_state(seed) + + real_qs = get_modularity(adjacency, comm, gamma) + simu_qs = np.empty(shape=(np.unique(comm).size, n_perm)) + for perm in range(n_perm): + simu_qs[:, perm] = get_modularity(adjacency, + rs.permutation(comm), + gamma) + + # avoid instances where dist.std(1) == 0 + std = simu_qs.std(axis=1) + if std == 0: + return np.mean(real_qs - simu_qs.mean(axis=1)) + else: + return np.mean((real_qs - simu_qs.mean(axis=1)) / std) + + +def get_modularity_sig(adjacency, comm, gamma=1, n_perm=10000, alpha=0.01, + seed=None): + """ + Calculate significance of community assignments in `comm` by permutation. + + Parameters + ---------- + adjacency : (N, N) array_like + Adjacency (correlation) matrix + comm : (N,) array_like + Community assignment vector + gamma : float + Resolution parameter used in original modularity maximization + n_perm : int, optional + Number of permutations to test against. Default: 10000 + alpha : (0,1) float, optional + Alpha level to assess significance. Default: 0.01 + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Default: None + + Returns + ------- + ndarray + Significance of each community in `comm` (boolean) + + See Also + -------- + netneurotools.modularity.get_modularity_z + netneurotools.modularity.get_modularity_sig + """ + rs = check_random_state(seed) + + real_qs = get_modularity(adjacency, comm, gamma) + simu_qs = np.empty(shape=(np.unique(comm).size, n_perm)) + for perm in range(n_perm): + simu_qs[:, perm] = get_modularity(adjacency, + rs.permutation(comm), + gamma) + + q_sig = real_qs > np.percentile(simu_qs, 100 * (1 - alpha), axis=1) + + return q_sig diff --git a/netneurotools/modularity/tests/__init__.py b/netneurotools/modularity/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/tests/test_cluster.py b/netneurotools/modularity/tests/test_modules.py similarity index 58% rename from netneurotools/tests/test_cluster.py rename to netneurotools/modularity/tests/test_modules.py index 59b9f8a..64248b9 100644 --- a/netneurotools/tests/test_cluster.py +++ b/netneurotools/modularity/tests/test_modules.py @@ -1,12 +1,13 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.cluster functionality.""" +"""For testing netneurotools.modularity.modules functionality.""" import bct -import numpy as np import pytest +import numpy as np from sklearn.cluster import k_means, spectral_clustering -from netneurotools import cluster +from netneurotools import modularity + +rs = np.random.RandomState(1234) @pytest.mark.parametrize('c1, c2, out', [ @@ -28,10 +29,12 @@ np.array([1, 1, 1, 3, 3, 3, 2, 2, 2])) ]) def test_match_cluster_labels(c1, c2, out): - assert np.all(cluster.match_cluster_labels(c1, c2) == out) + """Test matching of cluster labels.""" + assert np.all(modularity.match_cluster_labels(c1, c2) == out) def test_match_assignments(): + """Test matching of clustering assignments.""" # generate some random data to be clustered (must be symmetric) rs = np.random.RandomState(1234) data = rs.rand(100, 100) @@ -48,7 +51,7 @@ def test_match_assignments(): # match labels and assert that we got perfect matches (this is not 100% # guaranteed with spectral clustering but it is...pretty likely) - matched = cluster.match_assignments(assignments, seed=rs) + matched = modularity.match_assignments(assignments, seed=rs) assert np.all(matched[:, [0]] == matched) # check that we didn't _actually_ change cluster assignments with matching; @@ -58,6 +61,7 @@ def test_match_assignments(): def test_reorder_assignments(): + """Test re-ordering of clustering assignments.""" # generate a bunch of ~random(ish) clustering assignments that have a bit # of consistency but aren't all identical rs = np.random.RandomState(1234) @@ -72,11 +76,11 @@ def test_reorder_assignments(): # (we're re-labelling the matrix but k-means does not provide stable # clustering assignments so we shouldn't get identical assignments even # after "matching") - reordered, idx = cluster.reorder_assignments(assignments, seed=1234) + reordered, idx = modularity.reorder_assignments(assignments, seed=1234) assert not np.all(reordered[:, [0]] == reordered) # make sure that the returned idx does exactly what it's supposed to - matched = cluster.match_assignments(assignments, seed=1234)[idx] + matched = modularity.match_assignments(assignments, seed=1234)[idx] assert np.all(matched == reordered) @@ -87,4 +91,44 @@ def test_reorder_assignments(): np.array([1, 1, 1, 2, 2, 2, 3, 3, 3])) ]) def test_find_consensus(assignments, clusters): - assert np.all(cluster.find_consensus(assignments) == clusters) + """Test finding consensus clustering.""" + assert np.all(modularity.find_consensus(assignments) == clusters) + + +def test_dummyvar(): + """Test generation of dummy variables.""" + # generate small example dummy variable code + out = modularity._dummyvar(np.array([1, 1, 2, 3, 3])) + assert np.all(out == np.array([[1, 0, 0], + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + [0, 0, 1]])) + + allones = np.array([1, 1, 1, 1, 1, 1, 1, 1]) + assert np.all(modularity._dummyvar(allones) == allones) + + +def test_zrand(): + """Test calculation of zrand.""" + # make the same two-group community assignments (with different labels) + label = np.ones((100, 1)) + X, Y = np.vstack((label, label * 2)), np.vstack((label * 2, label)) + # compare + assert modularity.zrand(X, Y) == modularity.zrand(X, Y[::-1]) + random = rs.choice([0, 1], size=X.shape) + assert modularity.zrand(X, Y) > modularity.zrand(X, random) + assert modularity.zrand(X, Y) == modularity.zrand(X[:, 0], Y[:, 0]) + + +def test_zrand_partitions(): + """Test calculation of zrand for partitions.""" + # make random communities + comm = rs.choice(range(6), size=(10, 100)) + all_diff = modularity._zrand_partitions(comm) + all_same = modularity._zrand_partitions(np.repeat(comm[:, [0]], 10, axis=1)) + + # partition of labels that are all the same should have higher average + # zrand and lower stdev zrand + assert np.nanmean(all_same) > np.nanmean(all_diff) + assert np.nanstd(all_same) < np.nanstd(all_diff) diff --git a/netneurotools/networks/__init__.py b/netneurotools/networks/__init__.py new file mode 100644 index 0000000..fb7d82a --- /dev/null +++ b/netneurotools/networks/__init__.py @@ -0,0 +1,33 @@ +"""Functions for constucting networks.""" + + +from .consensus import ( + func_consensus, struct_consensus +) + + +from .randomize import ( + randmio_und, + match_length_degree_distribution, + strength_preserving_rand_sa, + strength_preserving_rand_sa_mse_opt, + strength_preserving_rand_sa_dir +) + + +from .networks_utils import ( + binarize_network, threshold_network, get_triu +) + + +__all__ = [ + # consensus + 'func_consensus', 'struct_consensus', + # generative + # randomize + 'randmio_und', 'match_length_degree_distribution', + 'strength_preserving_rand_sa', 'strength_preserving_rand_sa_mse_opt', + 'strength_preserving_rand_sa_dir', + # networks_utils + 'binarize_network', 'threshold_network', 'get_triu' +] diff --git a/netneurotools/networks/consensus.py b/netneurotools/networks/consensus.py new file mode 100644 index 0000000..cc48baf --- /dev/null +++ b/netneurotools/networks/consensus.py @@ -0,0 +1,294 @@ +"""Functions for generating consensus networks.""" + +import numpy as np +from sklearn.utils.validation import ( + check_random_state, check_array, check_consistent_length +) + + +def func_consensus(data, n_boot=1000, ci=95, seed=None): + """ + Calculate thresholded group consensus functional connectivity graph. + + This function concatenates all time series in `data` and computes a group + correlation matrix based on this extended time series. It then generates + length `T` bootstrapped samples from the concatenated matrix and estimates + confidence intervals for all correlations. Correlations whose sign is + consistent across bootstraps are retained; inconsistent correlations are + set to zero. + + If `n_boot` is set to 0 or None a simple, group-averaged functional + connectivity matrix is estimated, instead. + + Parameters + ---------- + data : (N, T, S) array_like (or a list of S arrays, each shaped as (N, T)) + Pre-processed functional time series, where `N` is the number of nodes, + `T` is the number of volumes in the time series, and `S` is the number + of subjects. + n_boot : int, optional + Number of bootstraps for which to generate correlation. Default: 1000 + ci : (0, 100) float, optional + Confidence interval for which to assess the reliability of correlations + with bootstraps. Default: 95 + seed : int, optional + Random seed. Default: None + + Returns + ------- + consensus : (N, N) numpy.ndarray + Thresholded, group-level correlation matrix + + References + ---------- + Mišić, B., Betzel, R. F., Nematzadeh, A., Goni, J., Griffa, A., Hagmann, + P., Flammini, A., Ahn, Y.-Y., & Sporns, O. (2015). Cooperative and + competitive spreading dynamics on the human connectome. Neuron, 86(6), + 1518-1529. + """ + # check inputs + rs = check_random_state(seed) + if ci > 100 or ci < 0: + raise ValueError("`ci` must be between 0 and 100.") + + # group-average functional connectivity matrix desired instead of bootstrap + if n_boot == 0 or n_boot is None: + if isinstance(data, list): + corrs = [np.corrcoef(sub) for sub in data] + else: + corrs = [np.corrcoef(data[..., sub]) for sub in + range(data.shape[-1])] + return np.nanmean(corrs, axis=0) + + if isinstance(data, list): + collapsed_data = np.hstack(data) + nsample = int(collapsed_data.shape[-1] / len(data)) + else: + collapsed_data = data.reshape((len(data), -1), order='F') + nsample = data.shape[1] + + consensus = np.corrcoef(collapsed_data) + + # only keep the upper triangle for the bootstraps to save on memory usage + triu_inds = np.triu_indices_from(consensus, k=1) + bootstrapped_corrmat = np.zeros((len(triu_inds[0]), n_boot)) + + # generate `n_boot` bootstrap correlation matrices by sampling `t` time + # points from the concatenated time series + for boot in range(n_boot): + inds = rs.randint(collapsed_data.shape[-1], size=nsample) + bootstrapped_corrmat[..., boot] = \ + np.corrcoef(collapsed_data[:, inds])[triu_inds] + + # extract the CIs from the bootstrapped correlation matrices + # we don't need the input anymore so overwrite it + bootstrapped_ci = np.percentile(bootstrapped_corrmat, [100 - ci, ci], + axis=-1, overwrite_input=True) + + # remove unreliable (i.e., CI zero-crossing) correlations + # if the signs of the bootstrapped confidence intervals are different + # (i.e., their signs sum to 0), then we want to remove them + # so, take the logical not of the CI (CI = 0 ---> True) and create a mask + # then, set all connections from the consensus array inside the mask to 0 + remove_inds = np.logical_not(np.sign(bootstrapped_ci).sum(axis=0)) + mask = np.zeros_like(consensus, dtype=bool) + mask[triu_inds] = remove_inds + consensus[mask + mask.T] = 0 + + return consensus + + +def _ecdf(data): + """ + Estimate empirical cumulative distribution function of `data`. + + Taken directly from StackOverflow. See original answer at + https://stackoverflow.com/questions/33345780. + + Parameters + ---------- + data : array_like + + Returns + ------- + prob : numpy.ndarray + Cumulative probability + quantiles : numpy.darray + Quantiles + """ + sample = np.atleast_1d(data) + + # find the unique values and their corresponding counts + quantiles, counts = np.unique(sample, return_counts=True) + + # take the cumulative sum of the counts and divide by the sample size to + # get the cumulative probabilities between 0 and 1 + prob = np.cumsum(counts).astype(float) / sample.size + + # match MATLAB + prob, quantiles = np.append([0], prob), np.append(quantiles[0], quantiles) + + return prob, quantiles + + +def struct_consensus(data, distance, hemiid, + conn_num_inter=None, + conn_num_intra=None, + weighted=False): + """ + Calculate distance-dependent group consensus structural connectivity graph. + + Takes as input a weighted stack of connectivity matrices with dimensions + (N, N, S) where `N` is the number of nodes and `S` is the number of + matrices or subjects. The matrices must be weighted, and ideally with + continuous weights (e.g. fractional anisotropy rather than streamline + count). The second input is a pairwise distance matrix, where distance(i,j) + is the Euclidean distance between nodes i and j. The final input is an + (N, 1) vector which labels nodes as belonging to the right (`hemiid==0`) or + left (`hemiid=1`) hemisphere (note that these values can be flipped as long + as `hemiid` contains only values of 0 and 1). + + This function estimates the average edge length distribution and builds + a group-averaged connectivity matrix that approximates this distribution + with density equal to the mean density across subjects. + + The algorithm works as follows: + + 1. Estimate the cumulative edge length distribution, + 2. Divide the distribution into M length bins, one for each edge that will + be added to the group-average matrix, and + 3. Within each bin, select the edge that is most consistently expressed + expressed across subjects, breaking ties according to average edge + weight (which is why the input matrix `data` must be weighted). + + The algorithm works separately on within/between hemisphere links. + M is the sum of `conn_num_inter` and `conn_num_intra`, if provided. + Otherwise, M is estimated from the data. + + Parameters + ---------- + data : (N, N, S) array_like + Weighted connectivity matrices (i.e., fractional anisotropy), where `N` + is nodes and `S` is subjects + distance : (N, N) array_like + Array where `distance[i, j]` is the Euclidean distance between nodes + `i` and `j` + hemiid : (N, 1) array_like + Hemisphere designation for `N` nodes where a value of 0/1 indicates + node `N_{i}` is in the right/left hemisphere, respectively + conn_num_inter : int, optional + Number of inter-hemispheric connections to include in the consensus + matrix. If `None`, the number of inter-hemispheric connections will be + estimated from the data. Default = `None`. + conn_num_intra : int, optional + Number of intra-hemispheric connections to include in the consensus + matrix. If `None`, the number of intra-hemispheric connections will be + estimated from the data. Default = `None`. + weighted : bool + Flag indicating whether or not to return a weighted consensus map. If + `True`, the consensus will be multiplied by the mean of `data`. + + Returns + ------- + consensus : (N, N) numpy.ndarray + Binary (default) or mean-weighted group-level connectivity matrix + + References + ---------- + Betzel, R. F., Griffa, A., Hagmann, P., & Mišić, B. (2018). Distance- + dependent consensus thresholds for generating group-representative + structural brain networks. Network Neuroscience, 1-22. + """ + # confirm input shapes are as expected + check_consistent_length(data, distance, hemiid) + try: + hemiid = check_array(hemiid, ensure_2d=True) + except ValueError: + raise ValueError('Provided hemiid must be a 2D array. Reshape your ' + 'data using array.reshape(-1, 1) and try again.') from None + + num_node, _, num_sub = data.shape # info on connectivity matrices + pos_data = data > 0 # location of + values in matrix + pos_data_count = pos_data.sum(axis=2) # num sub with + values at each node + + with np.errstate(divide='ignore', invalid='ignore'): + average_weights = data.sum(axis=2) / pos_data_count + + # empty array to hold inter/intra hemispheric connections + consensus = np.zeros((num_node, num_node, 2)) + + for conn_type in range(2): # iterate through inter/intra hemisphere conn + if conn_type == 0: # get inter hemisphere edges + inter_hemi = (hemiid == 0) @ (hemiid == 1).T + keep_conn = np.logical_or(inter_hemi, inter_hemi.T) + else: # get intra hemisphere edges + right_hemi = (hemiid == 0) @ (hemiid == 0).T + left_hemi = (hemiid == 1) @ (hemiid == 1).T + keep_conn = np.logical_or(right_hemi @ right_hemi.T, + left_hemi @ left_hemi.T) + + # mask the distance array for only those edges we want to examine + full_dist_conn = distance * keep_conn + upper_dist_conn = np.atleast_3d(np.triu(full_dist_conn)) + + # generate array of weighted (by distance), positive edges across subs + pos_dist = pos_data * upper_dist_conn + pos_dist = pos_dist[np.nonzero(pos_dist)] + + # determine average # of positive edges across subs + # we will use this to bin the edge weights + if conn_type == 0: + if conn_num_inter is None: + avg_conn_num = len(pos_dist) / num_sub + else: + avg_conn_num = conn_num_inter + else: + if conn_num_intra is None: + avg_conn_num = len(pos_dist) / num_sub + else: + avg_conn_num = conn_num_intra + + # estimate empirical CDF of weighted, positive edges across subs + cumprob, quantiles = _ecdf(pos_dist) + cumprob = np.round(cumprob * avg_conn_num).astype(int) + + # empty array to hold group-average matrix for current connection type + # (i.e., inter/intra hemispheric connections) + group_conn_type = np.zeros((num_node, num_node)) + + # iterate through bins (for edge weights) + for n in range(1, int(avg_conn_num) + 1): + # get current quantile of interest + curr_quant = quantiles[np.logical_and(cumprob >= (n - 1), + cumprob < n)] + if curr_quant.size == 0: + continue + + # find edges in distance connectivity matrix w/i current quantile + mask = np.logical_and(full_dist_conn >= curr_quant.min(), + full_dist_conn <= curr_quant.max()) + i, j = np.where(np.triu(mask)) # indices of edges of interest + + c = pos_data_count[i, j] # get num sub with + values at edges + w = average_weights[i, j] # get averaged weight of edges + + # find locations of edges most commonly represented across subs + indmax = np.argwhere(c == c.max()) + + # determine index of most frequent edge; break ties with higher + # weighted edge + if indmax.size == 1: # only one edge found + group_conn_type[i[indmax], j[indmax]] = 1 + else: # multiple edges found + indmax = indmax[np.argmax(w[indmax])] + group_conn_type[i[indmax], j[indmax]] = 1 + + consensus[:, :, conn_type] = group_conn_type + + # collapse across hemispheric connections types and make symmetrical array + consensus = consensus.sum(axis=2) + consensus = np.logical_or(consensus, consensus.T).astype(int) + + if weighted: + consensus = consensus * np.mean(data, axis=2) + return consensus diff --git a/netneurotools/networks/generative.py b/netneurotools/networks/generative.py new file mode 100644 index 0000000..7a7bff4 --- /dev/null +++ b/netneurotools/networks/generative.py @@ -0,0 +1 @@ +"""Functions for generative network models.""" diff --git a/netneurotools/networks/networks_utils.py b/netneurotools/networks/networks_utils.py new file mode 100644 index 0000000..5085e55 --- /dev/null +++ b/netneurotools/networks/networks_utils.py @@ -0,0 +1,132 @@ +"""Functions for supporting network constuction.""" + +import numpy as np +from scipy.sparse import csgraph + + +def get_triu(data, k=1): + """ + Return vectorized version of upper triangle from `data`. + + Parameters + ---------- + data : (N, N) array_like + Input data + k : int, optional + Which diagonal to select from (where primary diagonal is 0). Default: 1 + + Returns + ------- + triu : (N * N-1 / 2) numpy.ndarray + Upper triangle of `data` + + Examples + -------- + >>> from netneurotools import networks + + >>> X = np.array([[1, 0.5, 0.25], [0.5, 1, 0.33], [0.25, 0.33, 1]]) + >>> tri = networks.get_triu(X) + >>> tri + array([0.5 , 0.25, 0.33]) + """ + return data[np.triu_indices(len(data), k=k)].copy() + + +def binarize_network(network, retain=10, keep_diag=False): + """ + Keep top `retain` % of connections in `network` and binarizes. + + Uses the upper triangle for determining connection percentage, which may + result in disconnected nodes. If this behavior is not desired see + :py:func:`netneurotools.networks.threshold_network`. + + Parameters + ---------- + network : (N, N) array_like + Input graph + retain : [0, 100] float, optional + Percent connections to retain. Default: 10 + keep_diag : bool, optional + Whether to keep the diagonal instead of setting it to 0. Default: False + + Returns + ------- + binarized : (N, N) numpy.ndarray + Binarized, thresholded graph + + See Also + -------- + netneurotools.networks.threshold_network + """ + if retain < 0 or retain > 100: + raise ValueError( + f'Value provided for `retain` is outside [0, 100]: {retain}' + ) + + prctile = 100 - retain + triu = get_triu(network) + thresh = np.percentile(triu, prctile, axis=0, keepdims=True) + binarized = np.array(network > thresh, dtype=int) + + if not keep_diag: + binarized[np.diag_indices(len(binarized))] = 0 + + return binarized + + +def threshold_network(network, retain=10): + """ + Keep top `retain` % of connections in `network` and binarizes. + + Uses a minimum spanning tree to ensure that no nodes are disconnected from + the resulting thresholded graph + + Parameters + ---------- + network : (N, N) array_like + Input graph + retain : [0, 100] float, optional + Percent connections to retain. Default: 10 + + Returns + ------- + thresholded : (N, N) numpy.ndarray + Binarized, thresholded graph + + See Also + -------- + netneurotools.networks.binarize_network + """ + if retain < 0 or retain > 100: + raise ValueError( + f'Value provided for `retain` must be a percent ' + f'in range [0, 100]. Provided: {retain}' + ) + + # get number of nodes in graph and invert weights (MINIMUM spanning tree) + nodes = len(network) + graph = np.triu(network * -1) + + # find MST and count # of edges in graph + mst = csgraph.minimum_spanning_tree(graph).todense() + mst_edges = np.sum(mst != 0) + + # determine # of remaining edges and ensure we're not over the limit + remain = int((retain / 100) * ((nodes * (nodes - 1)) / 2)) - mst_edges + if remain < 0: + raise ValueError( + f'Minimum spanning tree with {mst_edges} edges exceeds desired ' + f'connection density of {retain}% ({remain + mst_edges} edges). Cannot ' + f'proceed with graph creation.' + ) + + # zero out edges already in MST and then get indices of next best edges + graph -= mst + inds = get_triu(graph).argsort()[:remain] + inds = tuple(e[inds] for e in np.triu_indices_from(graph, k=1)) + + # add edges to MST, symmetrize, and convert to binary matrix + mst[inds] = graph[inds] + mst = np.array((mst + mst.T) != 0, dtype=int) + + return mst diff --git a/netneurotools/networks.py b/netneurotools/networks/randomize.py similarity index 61% rename from netneurotools/networks.py rename to netneurotools/networks/randomize.py index 1803f8c..a91f9ec 100644 --- a/netneurotools/networks.py +++ b/netneurotools/networks/randomize.py @@ -1,14 +1,11 @@ -# -*- coding: utf-8 -*- -"""Functions for generating group-level networks from individual measurements.""" +"""Functions for generating randomized networks.""" import bct import numpy as np from tqdm import tqdm -from scipy.sparse import csgraph -from sklearn.utils.validation import (check_random_state, check_array, - check_consistent_length) - -from . import utils +from sklearn.utils.validation import ( + check_random_state +) try: from numba import njit @@ -17,388 +14,91 @@ use_numba = False -def func_consensus(data, n_boot=1000, ci=95, seed=None): - """ - Calculate thresholded group consensus functional connectivity graph. - - This function concatenates all time series in `data` and computes a group - correlation matrix based on this extended time series. It then generates - length `T` bootstrapped samples from the concatenated matrix and estimates - confidence intervals for all correlations. Correlations whose sign is - consistent across bootstraps are retained; inconsistent correlations are - set to zero. - - If `n_boot` is set to 0 or None a simple, group-averaged functional - connectivity matrix is estimated, instead. - - Parameters - ---------- - data : (N, T, S) array_like (or a list of S arrays, each shaped as (N, T)) - Pre-processed functional time series, where `N` is the number of nodes, - `T` is the number of volumes in the time series, and `S` is the number - of subjects. - n_boot : int, optional - Number of bootstraps for which to generate correlation. Default: 1000 - ci : (0, 100) float, optional - Confidence interval for which to assess the reliability of correlations - with bootstraps. Default: 95 - seed : int, optional - Random seed. Default: None - - Returns - ------- - consensus : (N, N) numpy.ndarray - Thresholded, group-level correlation matrix - - References - ---------- - Mišić, B., Betzel, R. F., Nematzadeh, A., Goni, J., Griffa, A., Hagmann, - P., Flammini, A., Ahn, Y.-Y., & Sporns, O. (2015). Cooperative and - competitive spreading dynamics on the human connectome. Neuron, 86(6), - 1518-1529. - """ - # check inputs - rs = check_random_state(seed) - if ci > 100 or ci < 0: - raise ValueError("`ci` must be between 0 and 100.") - - # group-average functional connectivity matrix desired instead of bootstrap - if n_boot == 0 or n_boot is None: - if isinstance(data, list): - corrs = [np.corrcoef(sub) for sub in data] - else: - corrs = [np.corrcoef(data[..., sub]) for sub in - range(data.shape[-1])] - return np.nanmean(corrs, axis=0) - - if isinstance(data, list): - collapsed_data = np.hstack(data) - nsample = int(collapsed_data.shape[-1] / len(data)) - else: - collapsed_data = data.reshape((len(data), -1), order='F') - nsample = data.shape[1] - - consensus = np.corrcoef(collapsed_data) - - # only keep the upper triangle for the bootstraps to save on memory usage - triu_inds = np.triu_indices_from(consensus, k=1) - bootstrapped_corrmat = np.zeros((len(triu_inds[0]), n_boot)) - - # generate `n_boot` bootstrap correlation matrices by sampling `t` time - # points from the concatenated time series - for boot in range(n_boot): - inds = rs.randint(collapsed_data.shape[-1], size=nsample) - bootstrapped_corrmat[..., boot] = \ - np.corrcoef(collapsed_data[:, inds])[triu_inds] - - # extract the CIs from the bootstrapped correlation matrices - # we don't need the input anymore so overwrite it - bootstrapped_ci = np.percentile(bootstrapped_corrmat, [100 - ci, ci], - axis=-1, overwrite_input=True) - - # remove unreliable (i.e., CI zero-crossing) correlations - # if the signs of the bootstrapped confidence intervals are different - # (i.e., their signs sum to 0), then we want to remove them - # so, take the logical not of the CI (CI = 0 ---> True) and create a mask - # then, set all connections from the consensus array inside the mask to 0 - remove_inds = np.logical_not(np.sign(bootstrapped_ci).sum(axis=0)) - mask = np.zeros_like(consensus, dtype=bool) - mask[triu_inds] = remove_inds - consensus[mask + mask.T] = 0 - - return consensus - - -def _ecdf(data): - """ - Estimate empirical cumulative distribution function of `data`. - - Taken directly from StackOverflow. See original answer at - https://stackoverflow.com/questions/33345780. - - Parameters - ---------- - data : array_like - - Returns - ------- - prob : numpy.ndarray - Cumulative probability - quantiles : numpy.darray - Quantiles - """ - sample = np.atleast_1d(data) - - # find the unique values and their corresponding counts - quantiles, counts = np.unique(sample, return_counts=True) - - # take the cumulative sum of the counts and divide by the sample size to - # get the cumulative probabilities between 0 and 1 - prob = np.cumsum(counts).astype(float) / sample.size - - # match MATLAB - prob, quantiles = np.append([0], prob), np.append(quantiles[0], quantiles) - - return prob, quantiles - - -def struct_consensus(data, distance, hemiid, - conn_num_inter=None, - conn_num_intra=None, - weighted=False): - """ - Calculate distance-dependent group consensus structural connectivity graph. - - Takes as input a weighted stack of connectivity matrices with dimensions - (N, N, S) where `N` is the number of nodes and `S` is the number of - matrices or subjects. The matrices must be weighted, and ideally with - continuous weights (e.g. fractional anisotropy rather than streamline - count). The second input is a pairwise distance matrix, where distance(i,j) - is the Euclidean distance between nodes i and j. The final input is an - (N, 1) vector which labels nodes as belonging to the right (`hemiid==0`) or - left (`hemiid=1`) hemisphere (note that these values can be flipped as long - as `hemiid` contains only values of 0 and 1). - - This function estimates the average edge length distribution and builds - a group-averaged connectivity matrix that approximates this distribution - with density equal to the mean density across subjects. - - The algorithm works as follows: - - 1. Estimate the cumulative edge length distribution, - 2. Divide the distribution into M length bins, one for each edge that will - be added to the group-average matrix, and - 3. Within each bin, select the edge that is most consistently expressed - expressed across subjects, breaking ties according to average edge - weight (which is why the input matrix `data` must be weighted). - - The algorithm works separately on within/between hemisphere links. - M is the sum of `conn_num_inter` and `conn_num_intra`, if provided. - Otherwise, M is estimated from the data. - - Parameters - ---------- - data : (N, N, S) array_like - Weighted connectivity matrices (i.e., fractional anisotropy), where `N` - is nodes and `S` is subjects - distance : (N, N) array_like - Array where `distance[i, j]` is the Euclidean distance between nodes - `i` and `j` - hemiid : (N, 1) array_like - Hemisphere designation for `N` nodes where a value of 0/1 indicates - node `N_{i}` is in the right/left hemisphere, respectively - conn_num_inter : int, optional - Number of inter-hemispheric connections to include in the consensus - matrix. If `None`, the number of inter-hemispheric connections will be - estimated from the data. Default = `None`. - conn_num_intra : int, optional - Number of intra-hemispheric connections to include in the consensus - matrix. If `None`, the number of intra-hemispheric connections will be - estimated from the data. Default = `None`. - weighted : bool - Flag indicating whether or not to return a weighted consensus map. If - `True`, the consensus will be multiplied by the mean of `data`. - - Returns - ------- - consensus : (N, N) numpy.ndarray - Binary (default) or mean-weighted group-level connectivity matrix - - References - ---------- - Betzel, R. F., Griffa, A., Hagmann, P., & Mišić, B. (2018). Distance- - dependent consensus thresholds for generating group-representative - structural brain networks. Network Neuroscience, 1-22. +def randmio_und(W, itr): """ - # confirm input shapes are as expected - check_consistent_length(data, distance, hemiid) - try: - hemiid = check_array(hemiid, ensure_2d=True) - except ValueError: - raise ValueError('Provided hemiid must be a 2D array. Reshape your ' - 'data using array.reshape(-1, 1) and try again.') from None - - num_node, _, num_sub = data.shape # info on connectivity matrices - pos_data = data > 0 # location of + values in matrix - pos_data_count = pos_data.sum(axis=2) # num sub with + values at each node - - with np.errstate(divide='ignore', invalid='ignore'): - average_weights = data.sum(axis=2) / pos_data_count - - # empty array to hold inter/intra hemispheric connections - consensus = np.zeros((num_node, num_node, 2)) - - for conn_type in range(2): # iterate through inter/intra hemisphere conn - if conn_type == 0: # get inter hemisphere edges - inter_hemi = (hemiid == 0) @ (hemiid == 1).T - keep_conn = np.logical_or(inter_hemi, inter_hemi.T) - else: # get intra hemisphere edges - right_hemi = (hemiid == 0) @ (hemiid == 0).T - left_hemi = (hemiid == 1) @ (hemiid == 1).T - keep_conn = np.logical_or(right_hemi @ right_hemi.T, - left_hemi @ left_hemi.T) - - # mask the distance array for only those edges we want to examine - full_dist_conn = distance * keep_conn - upper_dist_conn = np.atleast_3d(np.triu(full_dist_conn)) - - # generate array of weighted (by distance), positive edges across subs - pos_dist = pos_data * upper_dist_conn - pos_dist = pos_dist[np.nonzero(pos_dist)] - - # determine average # of positive edges across subs - # we will use this to bin the edge weights - if conn_type == 0: - if conn_num_inter is None: - avg_conn_num = len(pos_dist) / num_sub - else: - avg_conn_num = conn_num_inter - else: - if conn_num_intra is None: - avg_conn_num = len(pos_dist) / num_sub - else: - avg_conn_num = conn_num_intra - - # estimate empirical CDF of weighted, positive edges across subs - cumprob, quantiles = _ecdf(pos_dist) - cumprob = np.round(cumprob * avg_conn_num).astype(int) - - # empty array to hold group-average matrix for current connection type - # (i.e., inter/intra hemispheric connections) - group_conn_type = np.zeros((num_node, num_node)) - - # iterate through bins (for edge weights) - for n in range(1, int(avg_conn_num) + 1): - # get current quantile of interest - curr_quant = quantiles[np.logical_and(cumprob >= (n - 1), - cumprob < n)] - if curr_quant.size == 0: - continue - - # find edges in distance connectivity matrix w/i current quantile - mask = np.logical_and(full_dist_conn >= curr_quant.min(), - full_dist_conn <= curr_quant.max()) - i, j = np.where(np.triu(mask)) # indices of edges of interest - - c = pos_data_count[i, j] # get num sub with + values at edges - w = average_weights[i, j] # get averaged weight of edges - - # find locations of edges most commonly represented across subs - indmax = np.argwhere(c == c.max()) - - # determine index of most frequent edge; break ties with higher - # weighted edge - if indmax.size == 1: # only one edge found - group_conn_type[i[indmax], j[indmax]] = 1 - else: # multiple edges found - indmax = indmax[np.argmax(w[indmax])] - group_conn_type[i[indmax], j[indmax]] = 1 - - consensus[:, :, conn_type] = group_conn_type - - # collapse across hemispheric connections types and make symmetrical array - consensus = consensus.sum(axis=2) - consensus = np.logical_or(consensus, consensus.T).astype(int) - - if weighted: - consensus = consensus * np.mean(data, axis=2) - return consensus - + Optimized version of randmio_und. -def binarize_network(network, retain=10, keep_diag=False): - """ - Keep top `retain` % of connections in `network` and binarizes. + This function randomizes an undirected network, while preserving the + degree distribution. The function does not preserve the strength + distribution in weighted networks. - Uses the upper triangle for determining connection percentage, which may - result in disconnected nodes. If this behavior is not desired see - :py:func:`netneurotools.networks.threshold_network`. + This function is significantly faster if numba is enabled, because + the main overhead is `np.random.randint`, see `here `_ Parameters ---------- - network : (N, N) array_like - Input graph - retain : [0, 100] float, optional - Percent connections to retain. Default: 10 - keep_diag : bool, optional - Whether to keep the diagonal instead of setting it to 0. Default: False + W : (N, N) array-like + Undirected binary/weighted connection matrix + itr : int + rewiring parameter. Each edge is rewired approximately itr times. Returns ------- - binarized : (N, N) numpy.ndarray - Binarized, thresholded graph - - See Also - -------- - netneurotools.networks.threshold_network - """ - if retain < 0 or retain > 100: - raise ValueError('Value provided for `retain` is outside [0, 100]: {}' - .format(retain)) - - prctile = 100 - retain - triu = utils.get_triu(network) - thresh = np.percentile(triu, prctile, axis=0, keepdims=True) - binarized = np.array(network > thresh, dtype=int) - - if not keep_diag: - binarized[np.diag_indices(len(binarized))] = 0 - - return binarized - - -def threshold_network(network, retain=10): - """ - Keep top `retain` % of connections in `network` and binarizes. - - Uses a minimum spanning tree to ensure that no nodes are disconnected from - the resulting thresholded graph + W : (N, N) array-like + Randomized network + eff : int + number of actual rewirings carried out + """ # noqa: E501 + W = W.copy() + n = len(W) + i, j = np.where(np.triu(W > 0, 1)) + k = len(i) + itr *= k - Parameters - ---------- - network : (N, N) array_like - Input graph - retain : [0, 100] float, optional - Percent connections to retain. Default: 10 + # maximum number of rewiring attempts per iteration + max_attempts = np.round(n * k / (n * (n - 1))) + # actual number of successful rewirings + eff = 0 - Returns - ------- - thresholded : (N, N) numpy.ndarray - Binarized, thresholded graph + for _ in range(int(itr)): + att = 0 + while att <= max_attempts: # while not rewired + while True: + e1, e2 = np.random.randint(k), np.random.randint(k) + while e1 == e2: + e2 = np.random.randint(k) + a, b = i[e1], j[e1] + c, d = i[e2], j[e2] - See Also - -------- - netneurotools.networks.binarize_network - """ - if retain < 0 or retain > 100: - raise ValueError('Value provided for `retain` must be a percent ' - 'in range [0, 100]. Provided: {}'.format(retain)) + if a != c and a != d and b != c and b != d: + break # all 4 vertices must be different - # get number of nodes in graph and invert weights (MINIMUM spanning tree) - nodes = len(network) - graph = np.triu(network * -1) + # flip edge c-d with 50% probability + # to explore all potential rewirings + if np.random.random() > .5: + i[e2], j[e2] = d, c + c, d = d, c - # find MST and count # of edges in graph - mst = csgraph.minimum_spanning_tree(graph).todense() - mst_edges = np.sum(mst != 0) + # rewiring condition + # not flipped + # a--b a b + # TO X + # c--d c d + # if flipped + # a--b a--b a b + # TO TO X + # c--d d--c d c + if not (W[a, d] or W[c, b]): + W[a, d] = W[a, b] + W[a, b] = 0 + W[d, a] = W[b, a] + W[b, a] = 0 + W[c, b] = W[c, d] + W[c, d] = 0 + W[b, c] = W[d, c] + W[d, c] = 0 - # determine # of remaining edges and ensure we're not over the limit - remain = int((retain / 100) * ((nodes * (nodes - 1)) / 2)) - mst_edges - if remain < 0: - raise ValueError('Minimum spanning tree with {} edges exceeds desired ' - 'connection density of {}% ({} edges). Cannot ' - 'proceed with graph creation.' - .format(mst_edges, retain, remain + mst_edges)) + j[e1] = d + j[e2] = b # reassign edge indices + eff += 1 + break + att += 1 - # zero out edges already in MST and then get indices of next best edges - graph -= mst - inds = utils.get_triu(graph).argsort()[:remain] - inds = tuple(e[inds] for e in np.triu_indices_from(graph, k=1)) + return W, eff - # add edges to MST, symmetrize, and convert to binary matrix - mst[inds] = graph[inds] - mst = np.array((mst + mst.T) != 0, dtype=int) - return mst +if use_numba: + randmio_und = njit(randmio_und) def match_length_degree_distribution(W, D, nbins=10, nswap=1000, @@ -546,7 +246,7 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, if len(np.where(B != 0)[0]) != len(np.where(newB != 0)[0]): print( f"ERROR --- number of edges changed, \ - B:{len(np.where(B!=0)[0])}, newB:{len(np.where(newB!=0)[0])}") + B:{len(np.where(B != 0)[0])}, newB:{len(np.where(newB != 0)[0])}") # check that the degree of the nodes it's the same for i in range(N): if np.sum(B[i]) != np.sum(newB[i]): @@ -578,93 +278,6 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, return newB, newW, nr -def randmio_und(W, itr): - """ - Optimized version of randmio_und. - - This function randomizes an undirected network, while preserving the - degree distribution. The function does not preserve the strength - distribution in weighted networks. - - This function is significantly faster if numba is enabled, because - the main overhead is `np.random.randint`, see `here `_ - - Parameters - ---------- - W : (N, N) array-like - Undirected binary/weighted connection matrix - itr : int - rewiring parameter. Each edge is rewired approximately itr times. - - Returns - ------- - W : (N, N) array-like - Randomized network - eff : int - number of actual rewirings carried out - """ # noqa: E501 - W = W.copy() - n = len(W) - i, j = np.where(np.triu(W > 0, 1)) - k = len(i) - itr *= k - - # maximum number of rewiring attempts per iteration - max_attempts = np.round(n * k / (n * (n - 1))) - # actual number of successful rewirings - eff = 0 - - for _ in range(int(itr)): - att = 0 - while att <= max_attempts: # while not rewired - while True: - e1, e2 = np.random.randint(k), np.random.randint(k) - while e1 == e2: - e2 = np.random.randint(k) - a, b = i[e1], j[e1] - c, d = i[e2], j[e2] - - if a != c and a != d and b != c and b != d: - break # all 4 vertices must be different - - # flip edge c-d with 50% probability - # to explore all potential rewirings - if np.random.random() > .5: - i[e2], j[e2] = d, c - c, d = d, c - - # rewiring condition - # not flipped - # a--b a b - # TO X - # c--d c d - # if flipped - # a--b a--b a b - # TO TO X - # c--d d--c d c - if not (W[a, d] or W[c, b]): - W[a, d] = W[a, b] - W[a, b] = 0 - W[d, a] = W[b, a] - W[b, a] = 0 - W[c, b] = W[c, d] - W[c, d] = 0 - W[b, c] = W[d, c] - W[d, c] = 0 - - j[e1] = d - j[e2] = b # reassign edge indices - eff += 1 - break - att += 1 - - return W, eff - - -if use_numba: - randmio_und = njit(randmio_und) - - def strength_preserving_rand_sa(A, rewiring_iter=10, nstage=100, niter=10000, temp=1000, frac=0.5, @@ -762,11 +375,11 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, rs = check_random_state(seed) n = A.shape[0] - s = np.sum(A, axis=1) #strengths of A + s = np.sum(A, axis=1) # strengths of A - #Maslov & Sneppen rewiring + # Maslov & Sneppen rewiring if R is None: - #ensuring connectedness if the original network is connected + # ensuring connectedness if the original network is connected if connected is None: connected = False if bct.number_of_components(A) > 1 else True if connected: @@ -776,10 +389,10 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, else: B = R.copy() - u, v = np.triu(B, k=1).nonzero() #upper triangle indices - wts = np.triu(B, k=1)[(u, v)] #upper triangle values + u, v = np.triu(B, k=1).nonzero() # upper triangle indices + wts = np.triu(B, k=1)[(u, v)] # upper triangle values m = len(wts) - sb = np.sum(B, axis=1) #strengths of B + sb = np.sum(B, axis=1) # strengths of B if energy_func is not None: energy = energy_func(s, sb) @@ -809,7 +422,7 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, naccept = 0 for _ in range(niter): - #permutation + # permutation e1 = rs.randint(m) e2 = rs.randint(m) @@ -838,9 +451,9 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, "Received: {}.".format(energy_type)) raise ValueError(msg) - #permutation acceptance criterion + # permutation acceptance criterion if (energy_prime < energy or - rs.rand() < np.exp(-(energy_prime - energy)/temp)): + rs.rand() < np.exp(-(energy_prime - energy) / temp)): sb = sb_prime.copy() wts[[e1, e2]] = wts[[e2, e1]] energy = energy_prime @@ -849,13 +462,13 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, wtsmin = wts.copy() naccept = naccept + 1 - #temperature update - temp = temp*frac + # temperature update + temp = temp * frac if verbose: print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, ' 'frac of accepted moves {:.3f}'.format(istage, temp, energymin, - naccept/niter)) + naccept / niter)) B = np.zeros((n, n)) B[(u, v)] = wtsmin @@ -947,11 +560,11 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, rs = check_random_state(seed) n = A.shape[0] - s = np.sum(A, axis=1) #strengths of A + s = np.sum(A, axis=1) # strengths of A - #Maslov & Sneppen rewiring + # Maslov & Sneppen rewiring if R is None: - #ensuring connectedness if the original network is connected + # ensuring connectedness if the original network is connected if connected is None: connected = False if bct.number_of_components(A) > 1 else True if connected: @@ -961,10 +574,10 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, else: B = R.copy() - u, v = np.triu(B, k=1).nonzero() #upper triangle indices - wts = np.triu(B, k=1)[(u, v)] #upper triangle values + u, v = np.triu(B, k=1).nonzero() # upper triangle indices + wts = np.triu(B, k=1)[(u, v)] # upper triangle values m = len(wts) - sb = np.sum(B, axis=1) #strengths of B + sb = np.sum(B, axis=1) # strengths of B energy = np.mean((s - sb)**2) @@ -980,7 +593,7 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, rs.rand(niter) ): - #permutation + # permutation a, b, c, d = u[e1], v[e1], u[e2], v[e2] wts_change = wts[e1] - wts[e2] delta_energy = (2 * wts_change * @@ -990,10 +603,10 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, (s[c] - sb[c]) - (s[d] - sb[d]) ) - )/n + ) / n - #permutation acceptance criterion - if (delta_energy < 0 or prob < np.e**(-(delta_energy)/temp)): + # permutation acceptance criterion + if (delta_energy < 0 or prob < np.e**(-(delta_energy) / temp)): sb[[a, b]] -= wts_change sb[[c, d]] += wts_change @@ -1006,13 +619,13 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, wtsmin = wts.copy() naccept = naccept + 1 - #temperature update - temp = temp*frac + # temperature update + temp = temp * frac if verbose: print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, ' 'frac of accepted moves {:.3f}'.format(istage, temp, energymin, - naccept/niter)) + naccept / niter)) B = np.zeros((n, n)) B[(u, v)] = wtsmin @@ -1114,20 +727,20 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, rs = check_random_state(seed) n = A.shape[0] - s_in = np.sum(A, axis=0) #in-strengths of A - s_out = np.sum(A, axis=1) #out-strengths of A + s_in = np.sum(A, axis=0) # in-strengths of A + s_out = np.sum(A, axis=1) # out-strengths of A - #Maslov & Sneppen rewiring + # Maslov & Sneppen rewiring if connected: B = bct.randmio_dir_connected(A, rewiring_iter, seed=seed)[0] else: B = bct.randmio_dir(A, rewiring_iter, seed=seed)[0] - u, v = B.nonzero() #nonzero indices of B - wts = B[(u, v)] #nonzero values of B + u, v = B.nonzero() # nonzero indices of B + wts = B[(u, v)] # nonzero values of B m = len(wts) - sb_in = np.sum(B, axis=0) #in-strengths of B - sb_out = np.sum(B, axis=1) #out-strengths of B + sb_in = np.sum(B, axis=0) # in-strengths of B + sb_out = np.sum(B, axis=1) # out-strengths of B if energy_func is not None: energy = energy_func(s_in, sb_in) + energy_func(s_out, sb_out) @@ -1136,7 +749,7 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, elif energy_type == 'max': energy = np.max(np.abs(s_in - sb_in)) + np.max(np.abs(s_out - sb_out)) elif energy_type == 'mae': - energy= np.mean(np.abs(s_in - sb_in)) + np.mean(np.abs(s_out - sb_out)) + energy = np.mean(np.abs(s_in - sb_in)) + np.mean(np.abs(s_out - sb_out)) elif energy_type == 'mse': energy = np.mean((s_in - sb_in)**2) + np.mean((s_out - sb_out)**2) elif energy_type == 'rmse': @@ -1158,7 +771,7 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, naccept = 0 for _ in range(niter): - #permutation + # permutation e1 = rs.randint(m) e2 = rs.randint(m) @@ -1196,9 +809,9 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, "Received: {}.".format(energy_type)) raise ValueError(msg) - #permutation acceptance criterion + # permutation acceptance criterion if (energy_prime < energy or - rs.rand() < np.exp(-(energy_prime - energy)/temp)): + rs.rand() < np.exp(-(energy_prime - energy) / temp)): sb_in = sb_prime_in.copy() sb_out = sb_prime_out.copy() wts[[e1, e2]] = wts[[e2, e1]] @@ -1208,13 +821,13 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, wtsmin = wts.copy() naccept = naccept + 1 - #temperature update - temp = temp*frac + # temperature update + temp = temp * frac if verbose: print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, ' 'frac of accepted moves {:.3f}'.format(istage, temp, energymin, - naccept/niter)) + naccept / niter)) B = np.zeros((n, n)) B[(u, v)] = wtsmin diff --git a/netneurotools/networks/tests/__init__.py b/netneurotools/networks/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/networks/tests/test_consensus.py b/netneurotools/networks/tests/test_consensus.py new file mode 100644 index 0000000..2c0fc23 --- /dev/null +++ b/netneurotools/networks/tests/test_consensus.py @@ -0,0 +1 @@ +"""For testing netneurotools.networks.consensus functionality.""" diff --git a/netneurotools/networks/tests/test_generative.py b/netneurotools/networks/tests/test_generative.py new file mode 100644 index 0000000..223d4bb --- /dev/null +++ b/netneurotools/networks/tests/test_generative.py @@ -0,0 +1 @@ +"""For testing netneurotools.networks.generative functionality.""" diff --git a/netneurotools/networks/tests/test_networks_utils.py b/netneurotools/networks/tests/test_networks_utils.py new file mode 100644 index 0000000..7bbcfef --- /dev/null +++ b/netneurotools/networks/tests/test_networks_utils.py @@ -0,0 +1,12 @@ +"""For testing netneurotools.networks.networks_utils functionality.""" + +import numpy as np + +from netneurotools import networks + + +def test_get_triu(): + """Test that get_triu returns correct values.""" + arr = np.arange(9).reshape(3, 3) + assert np.all(networks.get_triu(arr) == np.array([1, 2, 5])) + assert np.all(networks.get_triu(arr, k=0) == np.array([0, 1, 2, 4, 5, 8])) diff --git a/netneurotools/networks/tests/test_randomize.py b/netneurotools/networks/tests/test_randomize.py new file mode 100644 index 0000000..c2a4be2 --- /dev/null +++ b/netneurotools/networks/tests/test_randomize.py @@ -0,0 +1 @@ +"""For testing netneurotools.networks.randomize functionality.""" diff --git a/netneurotools/plotting/__init__.py b/netneurotools/plotting/__init__.py new file mode 100644 index 0000000..2124fe3 --- /dev/null +++ b/netneurotools/plotting/__init__.py @@ -0,0 +1,34 @@ +"""Functions for making pretty plots and whatnot.""" + + +from .pysurfer_plotters import ( + plot_conte69, plot_fslr, plot_fsaverage, plot_fsvertex +) + + +from .pyvista_plotters import ( + pv_plot_surface +) + + +from .mpl_plotters import ( + _grid_communities, _sort_communities, + plot_point_brain, plot_mod_heatmap, +) + + +from .color_utils import ( + available_cmaps +) + +__all__ = [ + # pysurfer_plotters + 'plot_conte69', 'plot_fslr', 'plot_fsaverage', 'plot_fsvertex', + # pyvista_plotters + 'pv_plot_surface', + # mpl_plotters + '_grid_communities', '_sort_communities', + 'plot_point_brain', 'plot_mod_heatmap', + # color_utils + 'available_cmaps' +] diff --git a/netneurotools/colors.py b/netneurotools/plotting/color_utils.py similarity index 97% rename from netneurotools/colors.py rename to netneurotools/plotting/color_utils.py index cf0b7d8..f9fb3d0 100644 --- a/netneurotools/colors.py +++ b/netneurotools/plotting/color_utils.py @@ -1,6 +1,6 @@ -# -*- coding: utf-8 -*- -"""Useful colormaps.""" +"""Functions for working with colors and colormaps.""" +import matplotlib from matplotlib.colors import LinearSegmentedColormap, ListedColormap __all__ = ['parula', 'justine', 'dinosaur'] @@ -94,10 +94,8 @@ def available_cmaps(): def _register_cmaps(): """Register all colormaps in module so they are accessible via matplotlib.""" - from matplotlib.cm import register_cmap - for cmap in __all__: - register_cmap(cmap, globals()[cmap]) + matplotlib.colormaps.register(globals()[cmap], name=cmap) _register_cmaps() diff --git a/netneurotools/plotting/mpl_plotters.py b/netneurotools/plotting/mpl_plotters.py new file mode 100644 index 0000000..66dcd9b --- /dev/null +++ b/netneurotools/plotting/mpl_plotters.py @@ -0,0 +1,287 @@ +"""Functions for matplotlib-based plotting.""" + +from typing import Iterable +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches + + +def _grid_communities(communities): + """ + Generate boundaries of `communities`. + + Parameters + ---------- + communities : array_like + Community assignment vector + + Returns + ------- + bounds : list + Boundaries of communities + """ + communities = np.asarray(communities) + if 0 in communities: + communities = communities + 1 + + comm = communities[np.argsort(communities)] + bounds = [] + for i in np.unique(comm): + ind = np.where(comm == i) + if len(ind) > 0: + bounds.append(np.min(ind)) + + bounds.append(len(communities)) + + return bounds + + +def _sort_communities(consensus, communities): + """ + Sort `communities` in `consensus` according to strength. + + Parameters + ---------- + consensus : array_like + Correlation matrix + communities : array_like + Community assignments for `consensus` + + Returns + ------- + inds : np.ndarray + Index array for sorting `consensus` + """ + communities = np.asarray(communities) + if 0 in communities: + communities = communities + 1 + + bounds = _grid_communities(communities) + inds = np.argsort(communities) + + for n, f in enumerate(bounds[:-1]): + i = inds[f:bounds[n + 1]] + cco = i[consensus[np.ix_(i, i)].mean(axis=1).argsort()[::-1]] + inds[f:bounds[n + 1]] = cco + + return inds + + +def plot_mod_heatmap(data, communities, *, inds=None, edgecolor='black', + ax=None, figsize=(6.4, 4.8), xlabels=None, ylabels=None, + xlabelrotation=90, ylabelrotation=0, cbar=True, + square=True, xticklabels=None, yticklabels=None, + mask_diagonal=True, **kwargs): + """ + Plot `data` as heatmap with borders drawn around `communities`. + + Parameters + ---------- + data : (N, N) array_like + Correlation matrix + communities : (N,) array_like + Community assignments for `data` + inds : (N,) array_like, optional + Index array for sorting `data` within `communities`. If None, these + will be generated from `data`. Default: None + edgecolor : str, optional + Color for lines demarcating community boundaries. Default: 'black' + ax : matplotlib.axes.Axes, optional + Axis on which to plot the heatmap. If none provided, a new figure and + axis will be created. Default: None + figsize : tuple, optional + Size of figure to create if `ax` is not provided. Default: (20, 20) + {x,y}labels : list, optional + List of labels on {x,y}-axis for each community in `communities`. The + number of labels should match the number of unique communities. + Default: None + {x,y}labelrotation : float, optional + Angle of the rotation of the labels. Available only if `{x,y}labels` + provided. Default : xlabelrotation: 90, ylabelrotation: 0 + square : bool, optional + Setting the matrix with equal aspect. Default: True + {x,y}ticklabels : list, optional + Incompatible with `{x,y}labels`. List of labels for each entry (not + community) in `data`. Default: None + cbar : bool, optional + Whether to plot colorbar. Default: True + mask_diagonal : bool, optional + Whether to mask the diagonal in the plotted heatmap. Default: True + kwargs : key-value mapping + Keyword arguments for `plt.pcolormesh()` + + Returns + ------- + ax : matplotlib.axes.Axes + Axis object containing plot + """ + for t, label in zip([xticklabels, yticklabels], [xlabels, ylabels]): + if t is not None and label is not None: + raise ValueError('Cannot set both {x,y}labels and {x,y}ticklabels') + + # get indices for sorting consensus + if inds is None: + inds = _sort_communities(data, communities) + + if ax is None: + _, ax = plt.subplots(1, 1, figsize=figsize) + + # plot data re-ordered based on community and node strength + if mask_diagonal: + plot_data = np.ma.masked_where(np.eye(len(data)), + data[np.ix_(inds, inds)]) + else: + plot_data = data[np.ix_(inds, inds)] + + coll = ax.pcolormesh(plot_data, edgecolor='none', **kwargs) + ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0])) + + # set equal aspect + if square: + ax.set_aspect('equal') + + for side in ['top', 'right', 'left', 'bottom']: + ax.spines[side].set_visible(False) + + # invert the y-axis so it looks "as expected" + ax.invert_yaxis() + + # plot the colorbar + if cbar: + cb = ax.figure.colorbar(coll) + if kwargs.get('rasterized', False): + cb.solids.set_rasterized(True) + + # draw borders around communities + bounds = _grid_communities(communities) + bounds[0] += 0.2 + bounds[-1] -= 0.2 + for n, edge in enumerate(np.diff(bounds)): + ax.add_patch(mpatches.Rectangle((bounds[n], bounds[n]), + edge, edge, fill=False, linewidth=2, + edgecolor=edgecolor)) + + if xlabels is not None or ylabels is not None: + # find the tick locations + initloc = _grid_communities(communities) + tickloc = [] + for loc in range(len(initloc) - 1): + tickloc.append(np.mean((initloc[loc], initloc[loc + 1]))) + + if xlabels is not None: + # make sure number of labels match the number of ticks + if len(tickloc) != len(xlabels): + raise ValueError('Number of labels do not match the number of ' + 'unique communities.') + else: + ax.set_xticks(tickloc) + ax.set_xticklabels(labels=xlabels, rotation=xlabelrotation) + ax.tick_params(left=False, bottom=False) + if ylabels is not None: + # make sure number of labels match the number of ticks + if len(tickloc) != len(ylabels): + raise ValueError('Number of labels do not match the number of ' + 'unique communities.') + else: + ax.set_yticks(tickloc) + ax.set_yticklabels(labels=ylabels, rotation=ylabelrotation) + ax.tick_params(left=False, bottom=False) + + if xticklabels is not None: + labels_ind = [xticklabels[i] for i in inds] + ax.set_xticks(np.arange(len(labels_ind)) + 0.5) + ax.set_xticklabels(labels_ind, rotation=90) + if yticklabels is not None: + labels_ind = [yticklabels[i] for i in inds] + ax.set_yticks(np.arange(len(labels_ind)) + 0.5) + ax.set_yticklabels(labels_ind) + + return ax + + +def plot_point_brain(data, coords, views=None, views_orientation='vertical', + views_size=(4, 2.4), cbar=False, robust=True, size=50, + **kwargs): + """ + Plot `data` as a cloud of points in 3D space based on specified `coords`. + + Parameters + ---------- + data : (N,) array_like + Data for an `N` node parcellation; determines color of points + coords : (N, 3) array_like + x, y, z coordinates for `N` node parcellation + views : list, optional + List specifying which views to use. Can be any of {'sagittal', 'sag', + 'coronal', 'cor', 'axial', 'ax'}. If not specified will use 'sagittal' + and 'axial'. Default: None + views_orientation: str, optional + Orientation of the views. Can be either 'vertical' or 'horizontal'. + Default: 'vertical'. + views_size : tuple, optional + Figure size of each view. Default: (4, 2.4) + cbar : bool, optional + Whether to also show colorbar. Default: False + robust : bool, optional + Whether to use robust calculation of `vmin` and `vmax` for color scale. + size : int, optional + Size of points on plot. Default: 50 + **kwargs + Key-value pairs passed to `matplotlib.axes.Axis.scatter` + + Returns + ------- + fig : :class:`matplotlib.figure.Figure` + """ + _views = dict(sagittal=(0, 180), sag=(0, 180), + axial=(90, 180), ax=(90, 180), + coronal=(0, 90), cor=(0, 90)) + + x, y, z = coords[:, 0], coords[:, 1], coords[:, 2] + + if views is None: + views = [_views[f] for f in ['sagittal', 'axial']] + else: + if not isinstance(views, Iterable) or isinstance(views, str): + views = [views] + views = [_views[f] for f in views] + + if views_orientation == 'vertical': + ncols, nrows = 1, len(views) + elif views_orientation == 'horizontal': + ncols, nrows = len(views), 1 + figsize = (ncols * views_size[0], nrows * views_size[1]) + + # create figure and axes (3d projections) + fig, axes = plt.subplots(ncols=ncols, nrows=nrows, + figsize=figsize, + subplot_kw=dict(projection='3d')) + + opts = dict(linewidth=0.5, edgecolor='gray', cmap='viridis') + if robust: + vmin, vmax = np.percentile(data, [2.5, 97.5]) + opts.update(dict(vmin=vmin, vmax=vmax)) + opts.update(kwargs) + + # iterate through saggital/axial views and plot, rotating as needed + for n, view in enumerate(views): + # if only one view then axes is not a list! + ax = axes[n] if len(views) > 1 else axes + # make the actual scatterplot and update the view / aspect ratios + col = ax.scatter(x, y, z, c=data, s=size, **opts) + ax.view_init(*view) + ax.axis('off') + scaling = np.array([ax.get_xlim(), + ax.get_ylim(), + ax.get_zlim()]) + ax.set_box_aspect(tuple(scaling[:, 1] - scaling[:, 0])) + + fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0, wspace=0) + + # add colorbar to axes + if cbar: + cbar = fig.colorbar(col, ax=axes.flatten(), + drawedges=False, shrink=0.7) + cbar.outline.set_linewidth(0) + + return fig diff --git a/netneurotools/plotting.py b/netneurotools/plotting/pysurfer_plotters.py similarity index 61% rename from netneurotools/plotting.py rename to netneurotools/plotting/pysurfer_plotters.py index 3886547..50e830b 100644 --- a/netneurotools/plotting.py +++ b/netneurotools/plotting/pysurfer_plotters.py @@ -1,209 +1,10 @@ -# -*- coding: utf-8 -*- -"""Functions for making pretty plots and whatnot.""" +"""Functions for pysurfer-based plotting.""" import os -from typing import Iterable - -import matplotlib.patches as patches -import matplotlib.pyplot as plt -from mpl_toolkits.mplot3d import Axes3D # noqa -import nibabel as nib import numpy as np +import nibabel as nib -from .freesurfer import FSIGNORE, _decode_list - - -def _grid_communities(communities): - """ - Generate boundaries of `communities`. - - Parameters - ---------- - communities : array_like - Community assignment vector - - Returns - ------- - bounds : list - Boundaries of communities - """ - communities = np.asarray(communities) - if 0 in communities: - communities = communities + 1 - - comm = communities[np.argsort(communities)] - bounds = [] - for i in np.unique(comm): - ind = np.where(comm == i) - if len(ind) > 0: - bounds.append(np.min(ind)) - - bounds.append(len(communities)) - - return bounds - - -def sort_communities(consensus, communities): - """ - Sort `communities` in `consensus` according to strength. - - Parameters - ---------- - consensus : array_like - Correlation matrix - communities : array_like - Community assignments for `consensus` - - Returns - ------- - inds : np.ndarray - Index array for sorting `consensus` - """ - communities = np.asarray(communities) - if 0 in communities: - communities = communities + 1 - - bounds = _grid_communities(communities) - inds = np.argsort(communities) - - for n, f in enumerate(bounds[:-1]): - i = inds[f:bounds[n + 1]] - cco = i[consensus[np.ix_(i, i)].mean(axis=1).argsort()[::-1]] - inds[f:bounds[n + 1]] = cco - - return inds - - -def plot_mod_heatmap(data, communities, *, inds=None, edgecolor='black', - ax=None, figsize=(6.4, 4.8), xlabels=None, ylabels=None, - xlabelrotation=90, ylabelrotation=0, cbar=True, - square=True, xticklabels=None, yticklabels=None, - mask_diagonal=True, **kwargs): - """ - Plot `data` as heatmap with borders drawn around `communities`. - - Parameters - ---------- - data : (N, N) array_like - Correlation matrix - communities : (N,) array_like - Community assignments for `data` - inds : (N,) array_like, optional - Index array for sorting `data` within `communities`. If None, these - will be generated from `data`. Default: None - edgecolor : str, optional - Color for lines demarcating community boundaries. Default: 'black' - ax : matplotlib.axes.Axes, optional - Axis on which to plot the heatmap. If none provided, a new figure and - axis will be created. Default: None - figsize : tuple, optional - Size of figure to create if `ax` is not provided. Default: (20, 20) - {x,y}labels : list, optional - List of labels on {x,y}-axis for each community in `communities`. The - number of labels should match the number of unique communities. - Default: None - {x,y}labelrotation : float, optional - Angle of the rotation of the labels. Available only if `{x,y}labels` - provided. Default : xlabelrotation: 90, ylabelrotation: 0 - square : bool, optional - Setting the matrix with equal aspect. Default: True - {x,y}ticklabels : list, optional - Incompatible with `{x,y}labels`. List of labels for each entry (not - community) in `data`. Default: None - cbar : bool, optional - Whether to plot colorbar. Default: True - mask_diagonal : bool, optional - Whether to mask the diagonal in the plotted heatmap. Default: True - kwargs : key-value mapping - Keyword arguments for `plt.pcolormesh()` - - Returns - ------- - ax : matplotlib.axes.Axes - Axis object containing plot - """ - for t, label in zip([xticklabels, yticklabels], [xlabels, ylabels]): - if t is not None and label is not None: - raise ValueError('Cannot set both {x,y}labels and {x,y}ticklabels') - - # get indices for sorting consensus - if inds is None: - inds = sort_communities(data, communities) - - if ax is None: - fig, ax = plt.subplots(1, 1, figsize=figsize) - - # plot data re-ordered based on community and node strength - if mask_diagonal: - plot_data = np.ma.masked_where(np.eye(len(data)), - data[np.ix_(inds, inds)]) - else: - plot_data = data[np.ix_(inds, inds)] - - coll = ax.pcolormesh(plot_data, edgecolor='none', **kwargs) - ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0])) - - # set equal aspect - if square: - ax.set_aspect('equal') - - for side in ['top', 'right', 'left', 'bottom']: - ax.spines[side].set_visible(False) - - # invert the y-axis so it looks "as expected" - ax.invert_yaxis() - - # plot the colorbar - if cbar: - cb = ax.figure.colorbar(coll) - if kwargs.get('rasterized', False): - cb.solids.set_rasterized(True) - - # draw borders around communities - bounds = _grid_communities(communities) - bounds[0] += 0.2 - bounds[-1] -= 0.2 - for n, edge in enumerate(np.diff(bounds)): - ax.add_patch(patches.Rectangle((bounds[n], bounds[n]), - edge, edge, fill=False, linewidth=2, - edgecolor=edgecolor)) - - if xlabels is not None or ylabels is not None: - # find the tick locations - initloc = _grid_communities(communities) - tickloc = [] - for loc in range(len(initloc) - 1): - tickloc.append(np.mean((initloc[loc], initloc[loc + 1]))) - - if xlabels is not None: - # make sure number of labels match the number of ticks - if len(tickloc) != len(xlabels): - raise ValueError('Number of labels do not match the number of ' - 'unique communities.') - else: - ax.set_xticks(tickloc) - ax.set_xticklabels(labels=xlabels, rotation=xlabelrotation) - ax.tick_params(left=False, bottom=False) - if ylabels is not None: - # make sure number of labels match the number of ticks - if len(tickloc) != len(ylabels): - raise ValueError('Number of labels do not match the number of ' - 'unique communities.') - else: - ax.set_yticks(tickloc) - ax.set_yticklabels(labels=ylabels, rotation=ylabelrotation) - ax.tick_params(left=False, bottom=False) - - if xticklabels is not None: - labels_ind = [xticklabels[i] for i in inds] - ax.set_xticks(np.arange(len(labels_ind)) + 0.5) - ax.set_xticklabels(labels_ind, rotation=90) - if yticklabels is not None: - labels_ind = [yticklabels[i] for i in inds] - ax.set_yticks(np.arange(len(labels_ind)) + 0.5) - ax.set_yticklabels(labels_ind) - - return ax +from ..datasets import FREESURFER_IGNORE, _get_freesurfer_subjid def plot_conte69(data, lhlabel, rhlabel, surf='midthickness', @@ -319,7 +120,7 @@ def plot_fslr(data, lhlabel, rhlabel, surf_atlas='conte69', scene : mayavi.Scene Scene object containing plot """ - from .datasets import fetch_conte69, fetch_yerkes19 + from ..datasets import fetch_conte69, fetch_yerkes19 try: from mayavi import mlab except ImportError: @@ -388,44 +189,6 @@ def plot_fslr(data, lhlabel, rhlabel, surf_atlas='conte69', return lhplot, rhplot -def _get_fs_subjid(subject_id, subjects_dir=None): - """ - Get fsaverage version `subject_id`, fetching if required. - - Parameters - ---------- - subject_id : str - FreeSurfer subject ID - subjects_dir : str, optional - Path to FreeSurfer subject directory. If not set, will inherit from - the environmental variable $SUBJECTS_DIR. Default: None - - Returns - ------- - subject_id : str - FreeSurfer subject ID - subjects_dir : str - Path to subject directory with `subject_id` - """ - from netneurotools.utils import check_fs_subjid - - # check for FreeSurfer install w/fsaverage; otherwise, fetch required - try: - subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir) - except FileNotFoundError: - if 'fsaverage' not in subject_id: - raise ValueError('Provided subject {} does not exist in provided ' - 'subjects_dir {}' - .format(subject_id, subjects_dir)) from None - from netneurotools.datasets import fetch_fsaverage - from netneurotools.datasets.utils import _get_data_dir - fetch_fsaverage(subject_id) - subjects_dir = os.path.join(_get_data_dir(), 'tpl-fsaverage') - subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir) - - return subject_id, subjects_dir - - def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None, noplot=None, subject_id='fsaverage', subjects_dir=None, vmin=None, vmax=None, **kwargs): @@ -503,7 +266,11 @@ def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None, ... rhannot=schaefer.rh) # doctest: +SKIP """ - subject_id, subjects_dir = _get_fs_subjid(subject_id, subjects_dir) + def _decode_list(vals): + """List decoder.""" + return [val.decode() if hasattr(val, 'decode') else val for val in vals] + + subject_id, subjects_dir = _get_freesurfer_subjid(subject_id, subjects_dir) # cast data to float (required for NaNs) data = np.asarray(data, dtype='float') @@ -521,7 +288,7 @@ def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None, vmax = np.nanpercentile(data, 97.5) # parcels that should not be included in parcellation - drop = FSIGNORE.copy() + drop = FREESURFER_IGNORE.copy() if noplot is not None: if isinstance(noplot, str): noplot = [noplot] @@ -533,7 +300,7 @@ def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None, # loads annotation data for hemisphere, including vertex `labels`! if not annot.startswith(os.path.abspath(os.sep)): annot = os.path.join(subjects_dir, subject_id, 'label', annot) - labels, ctab, names = nib.freesurfer.read_annot(annot) + labels, _, names = nib.freesurfer.read_annot(annot) names = _decode_list(names) # get appropriate data, accounting for hemispheric asymmetry @@ -637,7 +404,7 @@ def plot_fsvertex(data, *, order='lr', surf='pial', views='lat', raise ImportError('Cannot use plot_fsaverage() if pysurfer is not ' 'installed. Please install pysurfer and try again.') from None - subject_id, subjects_dir = _get_fs_subjid(subject_id, subjects_dir) + subject_id, subjects_dir = _get_freesurfer_subjid(subject_id, subjects_dir) # cast data to float (required for NaNs) data = np.asarray(data, dtype='float') @@ -712,91 +479,3 @@ def plot_fsvertex(data, *, order='lr', surf='pial', views='lat', surf[n].render() return brain - - -def plot_point_brain(data, coords, views=None, views_orientation='vertical', - views_size=(4, 2.4), cbar=False, robust=True, size=50, - **kwargs): - """ - Plot `data` as a cloud of points in 3D space based on specified `coords`. - - Parameters - ---------- - data : (N,) array_like - Data for an `N` node parcellation; determines color of points - coords : (N, 3) array_like - x, y, z coordinates for `N` node parcellation - views : list, optional - List specifying which views to use. Can be any of {'sagittal', 'sag', - 'coronal', 'cor', 'axial', 'ax'}. If not specified will use 'sagittal' - and 'axial'. Default: None - views_orientation: str, optional - Orientation of the views. Can be either 'vertical' or 'horizontal'. - Default: 'vertical'. - views_size : tuple, optional - Figure size of each view. Default: (4, 2.4) - cbar : bool, optional - Whether to also show colorbar. Default: False - robust : bool, optional - Whether to use robust calculation of `vmin` and `vmax` for color scale. - size : int, optional - Size of points on plot. Default: 50 - **kwargs - Key-value pairs passed to `matplotlib.axes.Axis.scatter` - - Returns - ------- - fig : :class:`matplotlib.figure.Figure` - """ - _views = dict(sagittal=(0, 180), sag=(0, 180), - axial=(90, 180), ax=(90, 180), - coronal=(0, 90), cor=(0, 90)) - - x, y, z = coords[:, 0], coords[:, 1], coords[:, 2] - - if views is None: - views = [_views[f] for f in ['sagittal', 'axial']] - else: - if not isinstance(views, Iterable) or isinstance(views, str): - views = [views] - views = [_views[f] for f in views] - - if views_orientation == 'vertical': - ncols, nrows = 1, len(views) - elif views_orientation == 'horizontal': - ncols, nrows = len(views), 1 - figsize = (ncols * views_size[0], nrows * views_size[1]) - - # create figure and axes (3d projections) - fig, axes = plt.subplots(ncols=ncols, nrows=nrows, - figsize=figsize, - subplot_kw=dict(projection='3d')) - - opts = dict(linewidth=0.5, edgecolor='gray', cmap='viridis') - if robust: - vmin, vmax = np.percentile(data, [2.5, 97.5]) - opts.update(dict(vmin=vmin, vmax=vmax)) - opts.update(kwargs) - - # iterate through saggital/axial views and plot, rotating as needed - for n, view in enumerate(views): - # if only one view then axes is not a list! - ax = axes[n] if len(views) > 1 else axes - # make the actual scatterplot and update the view / aspect ratios - col = ax.scatter(x, y, z, c=data, s=size, **opts) - ax.view_init(*view) - ax.axis('off') - scaling = np.array([ax.get_xlim(), - ax.get_ylim(), - ax.get_zlim()]) - ax.set_box_aspect(tuple(scaling[:, 1] - scaling[:, 0])) - - fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0, wspace=0) - - # add colorbar to axes - if cbar: - cbar = fig.colorbar(col, ax=axes.flatten(), - drawedges=False, shrink=0.7) - cbar.outline.set_linewidth(0) - - return fig diff --git a/netneurotools/plotting/pyvista_plotters.py b/netneurotools/plotting/pyvista_plotters.py new file mode 100644 index 0000000..7b17565 --- /dev/null +++ b/netneurotools/plotting/pyvista_plotters.py @@ -0,0 +1,6 @@ +"""Functions for pyvista-based plotting.""" + + +def pv_plot_surface(): + """Plot a surface using PyVista.""" + pass diff --git a/netneurotools/plotting/tests/__init__.py b/netneurotools/plotting/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/plotting/tests/test_color_utils.py b/netneurotools/plotting/tests/test_color_utils.py new file mode 100644 index 0000000..3bd55c1 --- /dev/null +++ b/netneurotools/plotting/tests/test_color_utils.py @@ -0,0 +1,10 @@ +"""For testing netneurotools.plotting.color_utils functionality.""" + + +def test_register_cmaps(): + """Test registering colormaps.""" + import matplotlib + if "justine" in matplotlib.colormaps: + assert True + else: + assert False diff --git a/netneurotools/plotting/tests/test_mpl.py b/netneurotools/plotting/tests/test_mpl.py new file mode 100644 index 0000000..1d7d79a --- /dev/null +++ b/netneurotools/plotting/tests/test_mpl.py @@ -0,0 +1,38 @@ +"""For testing netneurotools.plotting.mpl_plotters functionality.""" + +import numpy as np +import matplotlib.pyplot as plt +from netneurotools import plotting + + +def test_grid_communities(): + """Test _grid_communities function.""" + comms = np.asarray([0, 0, 0, 0, 1, 1, 1, 1, 2, 2]) + # check that comms with / without 0 community label yields same output + assert np.allclose(plotting._grid_communities(comms), [0, 4, 8, 10]) + assert np.allclose(plotting._grid_communities(comms + 1), [0, 4, 8, 10]) + + +def test_sort_communities(): + """Test sort_communities function.""" + data = np.arange(9).reshape(3, 3) + comms = np.asarray([0, 0, 2]) + # check that comms with / without 0 community label yields same output + assert np.allclose(plotting._sort_communities(data, comms), [1, 0, 2]) + assert np.allclose(plotting._sort_communities(data, comms + 1), [1, 0, 2]) + + +def test_plot_mod_heatmap(): + """Test plot_mod_heatmap function.""" + data = np.random.rand(100, 100) + comms = np.random.choice(4, size=(100,)) + ax = plotting.plot_mod_heatmap(data, comms) + assert isinstance(ax, plt.Axes) + + +def test_plot_point_brain(): + """Test plot_point_brain function.""" + data = np.random.rand(100) + coords = np.random.rand(100, 3) + out = plotting.plot_point_brain(data, coords) + assert isinstance(out, plt.Figure) diff --git a/netneurotools/plotting/tests/test_pysurfer.py b/netneurotools/plotting/tests/test_pysurfer.py new file mode 100644 index 0000000..3133b3c --- /dev/null +++ b/netneurotools/plotting/tests/test_pysurfer.py @@ -0,0 +1,28 @@ +"""For testing netneurotools.plotting.pysurfer_plotters functionality.""" + +import pytest +import numpy as np +from netneurotools import datasets, plotting + + +@pytest.mark.filterwarnings('ignore') +def test_plot_fsvertex(): + """Test plotting on a freesurfer vertex.""" + surfer = pytest.importorskip('surfer') + + data = np.random.rand(20484) + brain = plotting.plot_fsvertex(data, subject_id='fsaverage5', + offscreen=True) + assert isinstance(brain, surfer.Brain) + + +@pytest.mark.filterwarnings('ignore') +def test_plot_fsaverage(): + """Test plotting on a freesurfer average brain.""" + surfer = pytest.importorskip('surfer') + + data = np.random.rand(68) + lhannot, rhannot = datasets.fetch_cammoun2012('fsaverage5')['scale033'] + brain = plotting.plot_fsaverage(data, lhannot=lhannot, rhannot=rhannot, + subject_id='fsaverage5', offscreen=True) + assert isinstance(brain, surfer.Brain) diff --git a/netneurotools/plotting/tests/test_pyvista.py b/netneurotools/plotting/tests/test_pyvista.py new file mode 100644 index 0000000..0b87931 --- /dev/null +++ b/netneurotools/plotting/tests/test_pyvista.py @@ -0,0 +1 @@ +"""For testing netneurotools.plotting.pyvista_plotters functionality.""" diff --git a/netneurotools/spatial/__init__.py b/netneurotools/spatial/__init__.py new file mode 100644 index 0000000..a958655 --- /dev/null +++ b/netneurotools/spatial/__init__.py @@ -0,0 +1,12 @@ +"""Functions for handling spatial brain data.""" + + +from .spatial_stats import ( + morans_i, local_morans_i +) + + +__all__ = [ + # spatial_stats + 'morans_i', 'local_morans_i' +] diff --git a/netneurotools/spatial/gaussian_random_field.py b/netneurotools/spatial/gaussian_random_field.py new file mode 100644 index 0000000..7b40565 --- /dev/null +++ b/netneurotools/spatial/gaussian_random_field.py @@ -0,0 +1 @@ +"""Functions for working with Gaussian random fields.""" diff --git a/netneurotools/spatial/spatial_stats.py b/netneurotools/spatial/spatial_stats.py new file mode 100644 index 0000000..54baddf --- /dev/null +++ b/netneurotools/spatial/spatial_stats.py @@ -0,0 +1,11 @@ +"""Functions for calculating spatial statistics.""" + + +def morans_i(): + """Calculate Moran's I for spatial autocorrelation.""" + pass + + +def local_morans_i(): + """Calculate local Moran's I for spatial autocorrelation.""" + pass diff --git a/netneurotools/spatial/tests/__init__.py b/netneurotools/spatial/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/spatial/tests/test_grf.py b/netneurotools/spatial/tests/test_grf.py new file mode 100644 index 0000000..962bdb9 --- /dev/null +++ b/netneurotools/spatial/tests/test_grf.py @@ -0,0 +1 @@ +"""For testing netneurotools.spatial.gaussian_random_field functionality.""" diff --git a/netneurotools/spatial/tests/test_spatialstats.py b/netneurotools/spatial/tests/test_spatialstats.py new file mode 100644 index 0000000..fa9c7f6 --- /dev/null +++ b/netneurotools/spatial/tests/test_spatialstats.py @@ -0,0 +1 @@ +"""For testing netneurotools.spatial.spatial_stats functionality.""" diff --git a/netneurotools/stats.py b/netneurotools/stats.py deleted file mode 100644 index 952995b..0000000 --- a/netneurotools/stats.py +++ /dev/null @@ -1,1593 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions for performing statistical preprocessing and analyses.""" - -import warnings - -import numpy as np -from tqdm import tqdm -from itertools import combinations -from scipy import optimize, spatial, special, stats as sstats -try: # scipy >= 1.8.0 - from scipy.stats._stats_py import _chk2_asarray -except ImportError: # scipy < 1.8.0 - from scipy.stats.stats import _chk2_asarray -from sklearn.utils.validation import check_random_state -from sklearn.linear_model import LinearRegression -from joblib import Parallel, delayed - - -from . import utils -from .metrics import _graph_laplacian - -try: - from numba import njit - use_numba = True -except ImportError: - use_numba = False - - -def residualize(X, Y, Xc=None, Yc=None, normalize=True, add_intercept=True): - """ - Return residuals of regression equation from `Y ~ X`. - - Parameters - ---------- - X : (N[, R]) array_like - Coefficient matrix of `R` variables for `N` subjects - Y : (N[, F]) array_like - Dependent variable matrix of `F` variables for `N` subjects - Xc : (M[, R]) array_like, optional - Coefficient matrix of `R` variables for `M` subjects. If not specified - then `X` is used to estimate betas. Default: None - Yc : (M[, F]) array_like, optional - Dependent variable matrix of `F` variables for `M` subjects. If not - specified then `Y` is used to estimate betas. Default: None - normalize : bool, optional - Whether to normalize (i.e., z-score) residuals. Will use residuals from - `Yc ~ Xc` for generating mean and variance. Default: True - add_intercept : bool, optional - Whether to add intercept to `X` (and `Xc`, if provided). The intercept - will not be removed, just used in beta estimation. Default: True - - Returns - ------- - Yr : (N, F) numpy.ndarray - Residuals of `Y ~ X` - - Notes - ----- - If both `Xc` and `Yc` are provided, these are used to calculate betas which - are then applied to `X` and `Y`. - """ - if ((Yc is None and Xc is not None) or (Yc is not None and Xc is None)): - raise ValueError('If processing against a comparative group, you must ' - 'provide both `Xc` and `Yc`.') - - X, Y = np.asarray(X), np.asarray(Y) - - if Yc is None: - Xc, Yc = X.copy(), Y.copy() - else: - Xc, Yc = np.asarray(Xc), np.asarray(Yc) - - # add intercept to regressors if requested and calculate fit - if add_intercept: - X, Xc = utils.add_constant(X), utils.add_constant(Xc) - betas, *rest = np.linalg.lstsq(Xc, Yc, rcond=None) - - # remove intercept from regressors and betas for calculation of residuals - if add_intercept: - betas = betas[:-1] - X, Xc = X[:, :-1], Xc[:, :-1] - - # calculate residuals - Yr = Y - (X @ betas) - Ycr = Yc - (Xc @ betas) - - if normalize: - Yr = sstats.zmap(Yr, compare=Ycr) - - return Yr - - -def get_mad_outliers(data, thresh=3.5): - """ - Determine which samples in `data` are outliers. - - Uses the Median Absolute Deviation for determining whether datapoints are - outliers - - Parameters - ---------- - data : (N, M) array_like - Data array where `N` is samples and `M` is features - thresh : float, optional - Modified z-score. Observations with a modified z-score (based on the - median absolute deviation) greater than this value will be classified - as outliers. Default: 3.5 - - Returns - ------- - outliers : (N,) numpy.ndarray - Boolean array where True indicates an outlier - - Notes - ----- - Taken directly from https://stackoverflow.com/a/22357811 - - References - ---------- - Boris Iglewicz and David Hoaglin (1993), "Volume 16: How to Detect and - Handle Outliers", The ASQC Basic References in Quality Control: Statistical - Techniques, Edward F. Mykytka, Ph.D., Editor. - - Examples - -------- - >>> from netneurotools import stats - - Create array with three samples of four features each: - - >>> X = np.array([[0, 5, 10, 15], [1, 4, 11, 16], [100, 100, 100, 100]]) - >>> X - array([[ 0, 5, 10, 15], - [ 1, 4, 11, 16], - [100, 100, 100, 100]]) - - Determine which sample(s) is outlier: - - >>> outliers = stats.get_mad_outliers(X) - >>> outliers - array([False, False, True]) - """ - data = np.asarray(data) - - if data.ndim == 1: - data = np.vstack(data) - if data.ndim > 2: - data = data.reshape(len(data), -1) - - median = np.nanmedian(data, axis=0) - diff = np.nansum((data - median)**2, axis=-1) - diff = np.sqrt(diff) - med_abs_deviation = np.median(diff) - - modified_z_score = 0.6745 * diff / med_abs_deviation - - return modified_z_score > thresh - - -def permtest_1samp(a, popmean, axis=0, n_perm=1000, seed=0): - """ - Non-parametric equivalent of :py:func:`scipy.stats.ttest_1samp`. - - Generates two-tailed p-value for hypothesis of whether `a` differs from - `popmean` using permutation tests - - Parameters - ---------- - a : array_like - Sample observations - popmean : float or array_like - Expected valued in null hypothesis. If array_like then it must have the - same shape as `a` excluding the `axis` dimension - axis : int or None, optional - Axis along which to compute test. If None, compute over the whole array - of `a`. Default: 0 - n_perm : int, optional - Number of permutations to assess. Unless `a` is very small along `axis` - this will approximate a randomization test via Monte Carlo simulations. - Default: 1000 - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Set to None for "randomness". - Default: 0 - - Returns - ------- - stat : float or numpy.ndarray - Difference from `popmean` - pvalue : float or numpy.ndarray - Non-parametric p-value - - Notes - ----- - Providing multiple values to `popmean` to run *independent* tests in - parallel is not currently supported. - - The lowest p-value that can be returned by this function is equal to 1 / - (`n_perm` + 1). - - Examples - -------- - >>> from netneurotools import stats - >>> np.random.seed(7654567) # set random seed for reproducible results - >>> rvs = np.random.normal(loc=5, scale=10, size=(50, 2)) - - Test if mean of random sample is equal to true mean, and different mean. We - reject the null hypothesis in the second case and don't reject it in the - first case. - - >>> stats.permtest_1samp(rvs, 5.0) - (array([-0.985602 , -0.05204969]), array([0.48551449, 0.95904096])) - >>> stats.permtest_1samp(rvs, 0.0) - (array([4.014398 , 4.94795031]), array([0.00699301, 0.000999 ])) - - Example using axis and non-scalar dimension for population mean - - >>> stats.permtest_1samp(rvs, [5.0, 0.0]) - (array([-0.985602 , 4.94795031]), array([0.48551449, 0.000999 ])) - >>> stats.permtest_1samp(rvs.T, [5.0, 0.0], axis=1) - (array([-0.985602 , 4.94795031]), array([0.51548452, 0.000999 ])) - """ - a, popmean, axis = _chk2_asarray(a, popmean, axis) - rs = check_random_state(seed) - - if a.size == 0: - return np.nan, np.nan - - # ensure popmean will broadcast to `a` correctly - if popmean.ndim != a.ndim: - popmean = np.expand_dims(popmean, axis=axis) - - # center `a` around `popmean` and calculate original mean - zeroed = a - popmean - true_mean = zeroed.mean(axis=axis) / 1 - abs_mean = np.abs(true_mean) - - # this for loop is not _the fastest_ but is memory efficient - # the broadcasting alt. would mean storing zeroed.size * n_perm in memory - permutations = np.ones(true_mean.shape) - for _ in range(n_perm): - flipped = zeroed * rs.choice([-1, 1], size=zeroed.shape) # sign flip - permutations += np.abs(flipped.mean(axis=axis)) >= abs_mean - - pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_mean - - return true_mean, pvals - - -def permtest_rel(a, b, axis=0, n_perm=1000, seed=0): - """ - Non-parametric equivalent of :py:func:`scipy.stats.ttest_rel`. - - Generates two-tailed p-value for hypothesis of whether related samples `a` - and `b` differ using permutation tests - - Parameters - ---------- - a, b : array_like - Sample observations. These arrays must have the same shape. - axis : int or None, optional - Axis along which to compute test. If None, compute over whole arrays - of `a` and `b`. Default: 0 - n_perm : int, optional - Number of permutations to assess. Unless `a` and `b` are very small - along `axis` this will approximate a randomization test via Monte - Carlo simulations. Default: 1000 - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Set to None for "randomness". - Default: 0 - - Returns - ------- - stat : float or numpy.ndarray - Average difference between `a` and `b` - pvalue : float or numpy.ndarray - Non-parametric p-value - - Notes - ----- - The lowest p-value that can be returned by this function is equal to 1 / - (`n_perm` + 1). - - Examples - -------- - >>> from netneurotools import stats - - >>> np.random.seed(12345678) # set random seed for reproducible results - >>> rvs1 = np.random.normal(loc=5, scale=10, size=500) - >>> rvs2 = (np.random.normal(loc=5, scale=10, size=500) - ... + np.random.normal(scale=0.2, size=500)) - >>> stats.permtest_rel(rvs1, rvs2) # doctest: +SKIP - (-0.16506275161572695, 0.8021978021978022) - - >>> rvs3 = (np.random.normal(loc=8, scale=10, size=500) - ... + np.random.normal(scale=0.2, size=500)) - >>> stats.permtest_rel(rvs1, rvs3) # doctest: +SKIP - (2.40533726097883, 0.000999000999000999) - """ - a, b, axis = _chk2_asarray(a, b, axis) - rs = check_random_state(seed) - - if a.shape[axis] != b.shape[axis]: - raise ValueError('Provided arrays do not have same length along axis') - - if a.size == 0 or b.size == 0: - return np.nan, np.nan - - # calculate original difference in means - ab = np.stack([a, b], axis=0) - if ab.ndim < 3: - ab = np.expand_dims(ab, axis=-1) - true_diff = np.squeeze(np.diff(ab, axis=0)).mean(axis=axis) / 1 - abs_true = np.abs(true_diff) - - # idx array - reidx = np.meshgrid(*[range(f) for f in ab.shape], indexing='ij') - - permutations = np.ones(true_diff.shape) - for _ in range(n_perm): - # use this to re-index (i.e., swap along) the first axis of `ab` - swap = rs.random_sample(ab.shape[:-1]).argsort(axis=axis) - reidx[0] = np.repeat(swap[..., np.newaxis], ab.shape[-1], axis=-1) - # recompute difference between `a` and `b` (i.e., first axis of `ab`) - pdiff = np.squeeze(np.diff(ab[tuple(reidx)], axis=0)).mean(axis=axis) - permutations += np.abs(pdiff) >= abs_true - - pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_diff - - return true_diff, pvals - - -def permtest_pearsonr(a, b, axis=0, n_perm=1000, resamples=None, seed=0): - """ - Non-parametric equivalent of :py:func:`scipy.stats.pearsonr`. - - Generates two-tailed p-value for hypothesis of whether samples `a` and `b` - are correlated using permutation tests - - Parameters - ---------- - a,b : (N[, M]) array_like - Sample observations. These arrays must have the same length and either - an equivalent number of columns or be broadcastable - axis : int or None, optional - Axis along which to compute test. If None, compute over whole arrays - of `a` and `b`. Default: 0 - n_perm : int, optional - Number of permutations to assess. Unless `a` and `b` are very small - along `axis` this will approximate a randomization test via Monte - Carlo simulations. Default: 1000 - resamples : (N, P) array_like, optional - Resampling array used to shuffle `a` when generating null distribution - of correlations. This array must have the same length as `a` and `b` - and should have at least the same number of columns as `n_perm` (if it - has more then only `n_perm` columns will be used. When not specified a - standard permutation is used to shuffle `a`. Default: None - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Set to None for "randomness". - Default: 0 - - Returns - ------- - corr : float or numpyndarray - Correlations - pvalue : float or numpy.ndarray - Non-parametric p-value - - Notes - ----- - The lowest p-value that can be returned by this function is equal to 1 / - (`n_perm` + 1). - - Examples - -------- - >>> from netneurotools import datasets, stats - - >>> np.random.seed(12345678) # set random seed for reproducible results - >>> x, y = datasets.make_correlated_xy(corr=0.1, size=100) - >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP - (0.10032564626876286, 0.3046953046953047) - - >>> x, y = datasets.make_correlated_xy(corr=0.5, size=100) - >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP - (0.500040365781984, 0.000999000999000999) - - Also works with multiple columns by either broadcasting the smaller array - to the larger: - - >>> z = x + np.random.normal(loc=1, size=100) - >>> stats.permtest_pearsonr(x, np.column_stack([y, z])) - (array([0.50004037, 0.25843187]), array([0.000999 , 0.01098901])) - - or by using matching columns in the two arrays (e.g., `x` and `y` vs - `a` and `b`): - - >>> a, b = datasets.make_correlated_xy(corr=0.9, size=100) - >>> stats.permtest_pearsonr(np.column_stack([x, a]), np.column_stack([y, b])) - (array([0.50004037, 0.89927523]), array([0.000999, 0.000999])) - """ # noqa - a, b, axis = _chk2_asarray(a, b, axis) - rs = check_random_state(seed) - - if len(a) != len(b): - raise ValueError('Provided arrays do not have same length') - - if a.size == 0 or b.size == 0: - return np.nan, np.nan - - if resamples is not None: - if n_perm > resamples.shape[-1]: - raise ValueError('Number of permutations requested exceeds size ' - 'of resampling array.') - - # divide by one forces coercion to float if ndim = 0 - true_corr = efficient_pearsonr(a, b)[0] / 1 - abs_true = np.abs(true_corr) - - permutations = np.ones(true_corr.shape) - for perm in range(n_perm): - # permute `a` and determine whether correlations exceed original - if resamples is None: - ap = a[rs.permutation(len(a))] - else: - ap = a[resamples[:, perm]] - permutations += np.abs(efficient_pearsonr(ap, b)[0]) >= abs_true - - pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_corr - - return true_corr, pvals - - -def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'): - """ - Compute correlation of matching columns in `a` and `b`. - - Parameters - ---------- - a,b : array_like - Sample observations. These arrays must have the same length and either - an equivalent number of columns or be broadcastable - ddof : int, optional - Degrees of freedom correction in the calculation of the standard - deviation. Default: 1 - nan_policy : bool, optional - Defines how to handle when input contains nan. 'propagate' returns nan, - 'raise' throws an error, 'omit' performs the calculations ignoring nan - values. Default: 'propagate' - - Returns - ------- - corr : float or numpy.ndarray - Pearson's correlation coefficient between matching columns of inputs - pval : float or numpy.ndarray - Two-tailed p-values - - Notes - ----- - If either input contains nan and nan_policy is set to 'omit', both arrays - will be masked to omit the nan entries. - - Examples - -------- - >>> from netneurotools import datasets, stats - - Generate some not-very-correlated and some highly-correlated data: - - >>> np.random.seed(12345678) # set random seed for reproducible results - >>> x1, y1 = datasets.make_correlated_xy(corr=0.1, size=100) - >>> x2, y2 = datasets.make_correlated_xy(corr=0.8, size=100) - - Calculate both correlations simultaneously: - - >>> stats.efficient_pearsonr(np.c_[x1, x2], np.c_[y1, y2]) - (array([0.10032565, 0.79961189]), array([3.20636135e-01, 1.97429944e-23])) - """ - a, b, axis = _chk2_asarray(a, b, 0) - if len(a) != len(b): - raise ValueError('Provided arrays do not have same length') - - if a.size == 0 or b.size == 0: - return np.nan, np.nan - - if nan_policy not in ('propagate', 'raise', 'omit'): - raise ValueError(f'Value for nan_policy "{nan_policy}" not allowed') - - a, b = a.reshape(len(a), -1), b.reshape(len(b), -1) - if (a.shape[1] != b.shape[1]): - a, b = np.broadcast_arrays(a, b) - - mask = np.logical_or(np.isnan(a), np.isnan(b)) - if nan_policy == 'raise' and np.any(mask): - raise ValueError('Input cannot contain NaN when nan_policy is "omit"') - elif nan_policy == 'omit': - # avoid making copies of the data, if possible - a = np.ma.masked_array(a, mask, copy=False, fill_value=np.nan) - b = np.ma.masked_array(b, mask, copy=False, fill_value=np.nan) - - with np.errstate(invalid='ignore'): - corr = (sstats.zscore(a, ddof=ddof, nan_policy=nan_policy) - * sstats.zscore(b, ddof=ddof, nan_policy=nan_policy)) - - sumfunc, n_obs = np.sum, len(a) - if nan_policy == 'omit': - corr = corr.filled(np.nan) - sumfunc = np.nansum - n_obs = np.squeeze(np.sum(np.logical_not(np.isnan(corr)), axis=0)) - - corr = sumfunc(corr, axis=0) / (n_obs - 1) - corr = np.squeeze(np.clip(corr, -1, 1)) / 1 - - # taken from scipy.stats - ab = (n_obs / 2) - 1 - prob = 2 * special.btdtr(ab, ab, 0.5 * (1 - np.abs(corr))) - - return corr, prob - - -def _gen_rotation(seed=None): - """ - Generate random matrix for rotating spherical coordinates. - - Parameters - ---------- - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation - - Returns - ------- - rotate_{l,r} : (3, 3) numpy.ndarray - Rotations for left and right hemisphere coordinates, respectively - """ - rs = check_random_state(seed) - - # for reflecting across Y-Z plane - reflect = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]]) - - # generate rotation for left - rotate_l, temp = np.linalg.qr(rs.normal(size=(3, 3))) - rotate_l = rotate_l @ np.diag(np.sign(np.diag(temp))) - if np.linalg.det(rotate_l) < 0: - rotate_l[:, 0] = -rotate_l[:, 0] - - # reflect the left rotation across Y-Z plane - rotate_r = reflect @ rotate_l @ reflect - - return rotate_l, rotate_r - - -def gen_spinsamples(coords, hemiid, n_rotate=1000, check_duplicates=True, - method='original', exact=False, seed=None, verbose=False, - return_cost=False): - """ - Return a resampling array for `coords` obtained from rotations / spins. - - Using the method initially proposed in [ST1]_ (and later modified + updated - based on findings in [ST2]_ and [ST3]_), this function applies random - rotations to the user-supplied `coords` in order to generate a resampling - array that preserves its spatial embedding. Rotations are generated for one - hemisphere and mirrored for the other (see `hemiid` for more information). - - Due to irregular sampling of `coords` and the randomness of the rotations - it is possible that some "rotations" may resample with replacement (i.e., - will not be a true permutation). The likelihood of this can be reduced by - either increasing the sampling density of `coords` or changing the - ``method`` parameter (see Notes for more information on the latter). - - Parameters - ---------- - coords : (N, 3) array_like - X, Y, Z coordinates of `N` nodes/parcels/regions/vertices defined on a - sphere - hemiid : (N,) array_like - Array denoting hemisphere designation of coordinates in `coords`, where - values should be {0, 1} denoting the different hemispheres. Rotations - are generated for one hemisphere and mirrored across the y-axis for the - other hemisphere. - n_rotate : int, optional - Number of rotations to generate. Default: 1000 - check_duplicates : bool, optional - Whether to check for and attempt to avoid duplicate resamplings. A - warnings will be raised if duplicates cannot be avoided. Setting to - True may increase the runtime of this function! Default: True - method : {'original', 'vasa', 'hungarian'}, optional - Method by which to match non- and rotated coordinates. Specifying - 'original' will use the method described in [ST1]_. Specfying 'vasa' - will use the method described in [ST4]_. Specfying 'hungarian' will use - the Hungarian algorithm to minimize the global cost of reassignment - (will dramatically increase runtime). Default: 'original' - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Default: None - verbose : bool, optional - Whether to print occasional status messages. Default: False - return_cost : bool, optional - Whether to return cost array (specified as Euclidean distance) for each - coordinate for each rotation Default: True - - Returns - ------- - spinsamples : (N, `n_rotate`) numpy.ndarray - Resampling matrix to use in permuting data based on supplied `coords`. - cost : (N, `n_rotate`,) numpy.ndarray - Cost (specified as Euclidean distance) of re-assigning each coordinate - for every rotation in `spinsamples`. Only provided if `return_cost` is - True. - - Notes - ----- - By default, this function uses the minimum Euclidean distance between the - original coordinates and the new, rotated coordinates to generate a - resampling array after each spin. Unfortunately, this can (with some - frequency) lead to multiple coordinates being re-assigned the same value: - - >>> from netneurotools import stats as nnstats - >>> coords = [[0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0]] - >>> hemi = [0, 0, 1, 1] - >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1, - ... method='original', check_duplicates=False) - array([[0], - [0], - [2], - [3]]) - - While this is reasonable in most circumstances, if you feel incredibly - strongly about having a perfect "permutation" (i.e., all indices appear - once and exactly once in the resampling), you can set the ``method`` - parameter to either 'vasa' or 'hungarian': - - >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1, - ... method='vasa', check_duplicates=False) - array([[1], - [0], - [2], - [3]]) - >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1, - ... method='hungarian', check_duplicates=False) - array([[0], - [1], - [2], - [3]]) - - Note that setting this parameter may increase the runtime of the function - (especially for `method='hungarian'`). Refer to [ST1]_ for information on - why the default (i.e., ``exact`` set to False) suffices in most cases. - - For the original MATLAB implementation of this function refer to [ST5]_. - - References - ---------- - .. [ST1] Alexander-Bloch, A., Shou, H., Liu, S., Satterthwaite, T. D., - Glahn, D. C., Shinohara, R. T., Vandekar, S. N., & Raznahan, A. (2018). - On testing for spatial correspondence between maps of human brain - structure and function. NeuroImage, 178, 540-51. - - .. [ST2] Blaser, R., & Fryzlewicz, P. (2016). Random Rotation Ensembles. - Journal of Machine Learning Research, 17(4), 1–26. - - .. [ST3] Lefèvre, J., Pepe, A., Muscato, J., De Guio, F., Girard, N., - Auzias, G., & Germanaud, D. (2018). SPANOL (SPectral ANalysis of Lobes): - A Spectral Clustering Framework for Individual and Group Parcellation of - Cortical Surfaces in Lobes. Frontiers in Neuroscience, 12, 354. - - .. [ST4] Váša, F., Seidlitz, J., Romero-Garcia, R., Whitaker, K. J., - Rosenthal, G., Vértes, P. E., ... & Jones, P. B. (2018). Adolescent - tuning of association cortex in human structural brain networks. - Cerebral Cortex, 28(1), 281-294. - - .. [ST5] https://github.com/spin-test/spin-test - """ - methods = ['original', 'vasa', 'hungarian'] - if method not in methods: - raise ValueError('Provided method "{}" invalid. Must be one of {}.' - .format(method, methods)) - - if exact: - warnings.warn('The `exact` parameter will no longer be supported in ' - 'an upcoming release. Please use the `method` parameter ' - 'instead.', DeprecationWarning, stacklevel=3) - if exact == 'vasa' and method == 'original': - method = 'vasa' - elif exact and method == 'original': - method = 'hungarian' - - seed = check_random_state(seed) - - coords = np.asanyarray(coords) - hemiid = np.squeeze(np.asanyarray(hemiid, dtype='int8')) - - # check supplied coordinate shape - if coords.shape[-1] != 3 or coords.squeeze().ndim != 2: - raise ValueError('Provided `coords` must be of shape (N, 3), not {}' - .format(coords.shape)) - - # ensure hemisphere designation array is correct - if hemiid.ndim != 1: - raise ValueError('Provided `hemiid` array must be one-dimensional.') - if len(coords) != len(hemiid): - raise ValueError('Provided `coords` and `hemiid` must have the same ' - 'length. Provided lengths: coords = {}, hemiid = {}' - .format(len(coords), len(hemiid))) - if np.max(hemiid) > 1 or np.min(hemiid) < 0: - raise ValueError('Hemiid must have values in {0, 1} denoting left and ' - 'right hemisphere coordinates, respectively. ' - + 'Provided array contains values: {}' - .format(np.unique(hemiid))) - - # empty array to store resampling indices - spinsamples = np.zeros((len(coords), n_rotate), dtype=int) - cost = np.zeros((len(coords), n_rotate)) - inds = np.arange(len(coords), dtype=int) - - # generate rotations and resampling array! - msg, warned = '', False - for n in range(n_rotate): - count, duplicated = 0, True - - if verbose: - msg = 'Generating spin {:>5} of {:>5}'.format(n, n_rotate) - print(msg, end='\r', flush=True) - - while duplicated and count < 500: - count, duplicated = count + 1, False - resampled = np.zeros(len(coords), dtype='int32') - - # rotate each hemisphere separately - for h, rot in enumerate(_gen_rotation(seed=seed)): - hinds = (hemiid == h) - coor = coords[hinds] - if len(coor) == 0: - continue - - # if we need an "exact" mapping (i.e., each node needs to be - # assigned EXACTLY once) then we have to calculate the full - # distance matrix which is a nightmare with respect to memory - # for anything that isn't parcellated data. - # that is, don't do this with vertex coordinates! - if method == 'vasa': - dist = spatial.distance_matrix(coor, coor @ rot) - # min of max a la Vasa et al., 2018 - col = np.zeros(len(coor), dtype='int32') - for _ in range(len(dist)): - # find parcel whose closest neighbor is farthest away - # overall; assign to that - row = dist.min(axis=1).argmax() - col[row] = dist[row].argmin() - cost[inds[hinds][row], n] = dist[row, col[row]] - # set to -inf and inf so they can't be assigned again - dist[row] = -np.inf - dist[:, col[row]] = np.inf - # optimization of total cost using Hungarian algorithm. this - # may result in certain parcels having higher cost than with - # `method='vasa'` but should always result in the total cost - # being lower #tradeoffs - elif method == 'hungarian': - dist = spatial.distance_matrix(coor, coor @ rot) - row, col = optimize.linear_sum_assignment(dist) - cost[hinds, n] = dist[row, col] - # if nodes can be assigned multiple targets, we can simply use - # the absolute minimum of the distances (no optimization - # required) which is _much_ lighter on memory - # huge thanks to https://stackoverflow.com/a/47779290 for this - # memory-efficient method - elif method == 'original': - dist, col = spatial.cKDTree(coor @ rot).query(coor, 1) - cost[hinds, n] = dist - - resampled[hinds] = inds[hinds][col] - - # if we want to check for duplicates ensure that we don't have any - if check_duplicates: - if np.any(np.all(resampled[:, None] == spinsamples[:, :n], 0)): - duplicated = True - # if our "spin" is identical to the input then that's no good - elif np.all(resampled == inds): - duplicated = True - - # if we broke out because we tried 500 rotations and couldn't generate - # a new one, warn that we're using duplicate rotations and give up. - # this should only be triggered if check_duplicates is set to True - if count == 500 and not warned: - warnings.warn( - 'Duplicate rotations used. Check resampling array ' - 'to determine real number of unique permutations.', stacklevel=2) - warned = True - - spinsamples[:, n] = resampled - - if verbose: - print(' ' * len(msg) + '\b' * len(msg), end='', flush=True) - - if return_cost: - return spinsamples, cost - - return spinsamples - - -def get_dominance_stats(X, y, use_adjusted_r_sq=True, verbose=False, n_jobs=1): - """ - Return the dominance analysis statistics for multilinear regression. - - This is a rewritten & simplified version of [DA1]_. It is briefly - tested against the original package, but still in early stages. - Please feel free to report any bugs. - - Warning: Still work-in-progress. Parameters might change! - - Parameters - ---------- - X : (N, M) array_like - Input data - y : (N,) array_like - Target values - use_adjusted_r_sq : bool, optional - Whether to use adjusted r squares. Default: True - verbose : bool, optional - Whether to print debug messages. Default: False - n_jobs : int, optional - The number of jobs to run in parallel. Default: 1 - - Returns - ------- - model_metrics : dict - The dominance metrics, currently containing `individual_dominance`, - `partial_dominance`, `total_dominance`, and `full_r_sq`. - model_r_sq : dict - Contains all model r squares - - Notes - ----- - Example usage - - .. code:: python - - from netneurotools.stats import get_dominance_stats - from sklearn.datasets import load_boston - X, y = load_boston(return_X_y=True) - model_metrics, model_r_sq = get_dominance_stats(X, y) - - To compare with [DA1]_, use `use_adjusted_r_sq=False` - - .. code:: python - - from dominance_analysis import Dominance_Datasets - from dominance_analysis import Dominance - boston_dataset=Dominance_Datasets.get_boston() - dominance_regression=Dominance(data=boston_dataset, - target='House_Price',objective=1) - incr_variable_rsquare=dominance_regression.incremental_rsquare() - dominance_regression.dominance_stats() - - References - ---------- - .. [DA1] https://github.com/dominance-analysis/dominance-analysis - - """ - # this helps to remove one element from a tuple - def remove_ret(tpl, elem): - lst = list(tpl) - lst.remove(elem) - return tuple(lst) - - # sklearn linear regression wrapper - def get_reg_r_sq(X, y, use_adjusted_r_sq=True): - lin_reg = LinearRegression() - lin_reg.fit(X, y) - yhat = lin_reg.predict(X) - SS_Residual = sum((y - yhat) ** 2) - SS_Total = sum((y - np.mean(y)) ** 2) - r_squared = 1 - (float(SS_Residual)) / SS_Total - adjusted_r_squared = 1 - (1 - r_squared) * \ - (len(y) - 1) / (len(y) - X.shape[1] - 1) - if use_adjusted_r_sq: - return adjusted_r_squared - else: - return r_squared - - # helper function to compute r_sq for a given idx_tuple - def compute_r_sq(idx_tuple): - return idx_tuple, get_reg_r_sq(X[:, idx_tuple], - y, - use_adjusted_r_sq=use_adjusted_r_sq) - - # generate all predictor combinations in list (num of predictors) of lists - n_predictor = X.shape[-1] - # n_comb_len_group = n_predictor - 1 - predictor_combs = [list(combinations(range(n_predictor), i)) - for i in range(1, n_predictor + 1)] - if verbose: - print(f"[Dominance analysis] Generated \ - {len([v for i in predictor_combs for v in i])} combinations") - - model_r_sq = dict() - results = Parallel(n_jobs=n_jobs)( - delayed(compute_r_sq)(idx_tuple) - for len_group in tqdm(predictor_combs, - desc='num-of-predictor loop', - disable=not verbose) - for idx_tuple in tqdm(len_group, - desc='insider loop', - disable=not verbose)) - - # extract r_sq from results - for idx_tuple, r_sq in results: - model_r_sq[idx_tuple] = r_sq - - if verbose: - print(f"[Dominance analysis] Acquired {len(model_r_sq)} r^2's") - - # getting all model metrics - model_metrics = dict([]) - - # individual dominance - individual_dominance = [] - for i_pred in range(n_predictor): - individual_dominance.append(model_r_sq[(i_pred,)]) - individual_dominance = np.array(individual_dominance).reshape(1, -1) - model_metrics["individual_dominance"] = individual_dominance - - # partial dominance - partial_dominance = [[] for _ in range(n_predictor - 1)] - for i_len in range(n_predictor - 1): - i_len_combs = list(combinations(range(n_predictor), i_len + 2)) - for j_node in range(n_predictor): - j_node_sel = [v for v in i_len_combs if j_node in v] - reduced_list = [remove_ret(comb, j_node) for comb in j_node_sel] - diff_values = [ - model_r_sq[j_node_sel[i]] - model_r_sq[reduced_list[i]] - for i in range(len(reduced_list))] - partial_dominance[i_len].append(np.mean(diff_values)) - - # save partial dominance - partial_dominance = np.array(partial_dominance) - model_metrics["partial_dominance"] = partial_dominance - # get total dominance - total_dominance = np.mean( - np.r_[individual_dominance, partial_dominance], axis=0) - # test and save total dominance - assert np.allclose(total_dominance.sum(), - model_r_sq[tuple(range(n_predictor))]), \ - "Sum of total dominance is not equal to full r square!" - model_metrics["total_dominance"] = total_dominance - # save full r^2 - model_metrics["full_r_sq"] = model_r_sq[tuple(range(n_predictor))] - - return model_metrics, model_r_sq - - -def network_pearsonr(annot1, annot2, weight): - r""" - Calculate pearson correlation between two annotation vectors. - - .. warning:: - Test before use. - - Parameters - ---------- - annot1 : (N,) array_like - First annotation vector, demean will be applied. - annot2 : (N,) array_like - Second annotation vector, demean will be applied. - weight : (N, N) array_like - Weight matrix. Diagonal elements should be 1. - - Returns - ------- - corr : float - Network correlation between `annot1` and `annot2` - - Notes - ----- - If Pearson correlation is represented as - - .. math:: - \rho_{x,y} = \dfrac{ - \mathrm{sum}(I \times (\hat{x} \otimes \hat{y})) - }{ - \sigma_x \sigma_y - } - - The network correlation is defined analogously as - - .. math:: - \rho_{x,y,G} = \dfrac{ - \mathrm{sum}(W \times (\hat{x} \otimes \hat{y})) - }{ - \sigma_{x,W} \sigma_{y,W} - } - - where :math:`\hat{x}` and :math:`\hat{y}` are the demeaned annotation vectors, - - The weight matrix :math:`W` is used to represent the network structure. - It is usually in the form of :math:`W = \\exp(-kL)` where :math:`L` is the - length matrix and :math:`k` is a decay parameter. - - Example using shortest path length as weight - - .. code:: python - - spl, _ = distance_wei_floyd(D) # input should be distance matrix - spl_wei = 1 / np.exp(spl) - netcorr = network_pearsonr(annot1, annot2, spl_wei) - - Example using (inverse) effective resistance as weight - - .. code:: python - - R_eff = effective_resistance(W) - R_eff_norm = R_eff / np.max(R_eff) - W = 1 / R_eff_norm - W = W / np.max(W) - np.fill_diagonal(W, 1.0) - netcorr = network_pearsonr(annot1, annot2, W) - - References - ---------- - .. [1] Coscia, M. (2021). Pearson correlations on complex networks. - Journal of Complex Networks, 9(6), cnab036. - https://doi.org/10.1093/comnet/cnab036 - - - See Also - -------- - netneurotools.stats.network_pearsonr_pairwise - """ - annot1 = annot1 - np.mean(annot1) - annot2 = annot2 - np.mean(annot2) - upper = np.sum(np.multiply(weight, np.outer(annot1, annot2))) - lower1 = np.sum(np.multiply(weight, np.outer(annot1, annot1))) - lower2 = np.sum(np.multiply(weight, np.outer(annot2, annot2))) - return upper / np.sqrt(lower1) / np.sqrt(lower2) - - -def network_pearsonr_numba(annot1, annot2, weight): - """ - Numba version of :meth:`netneurotools.stats.network_pearsonr`. - - .. warning:: - Test before use. - - Parameters - ---------- - annot1 : (N,) array_like - First annotation vector, demean will be applied. - annot2 : (N,) array_like - Second annotation vector, demean will be applied. - weight : (N, N) array_like - Weight matrix. Diagonal elements should be 1. - - Returns - ------- - corr : float - Network correlation between `annot1` and `annot2` - """ - n = annot1.shape[0] - annot1 = annot1 - np.mean(annot1) - annot2 = annot2 - np.mean(annot2) - upper, lower1, lower2 = 0.0, 0.0, 0.0 - for i in range(n): - for j in range(n): - upper += annot1[i] * annot2[j] * weight[i, j] - lower1 += annot1[i] * annot1[j] * weight[i, j] - lower2 += annot2[i] * annot2[j] * weight[i, j] - return upper / np.sqrt(lower1) / np.sqrt(lower2) - - -if use_numba: - network_pearsonr_numba = njit(network_pearsonr_numba) - - -def _cross_outer(annot_mat): - """ - Calculate cross outer product of input matrix. - - This functions is only used in `network_pearsonr_pairwise`. - - Parameters - ---------- - annot_mat : (N, D) array_like - Input matrix - - Returns - ------- - cross_outer : (N, N, D, D) numpy.ndarray - Cross outer product of `annot_mat` - """ - n_samp, n_feat = annot_mat.shape - cross_outer = np.empty((n_samp, n_samp, n_feat, n_feat), annot_mat.dtype) - for a in range(n_samp): - for b in range(n_samp): - for c in range(n_feat): - for d in range(n_feat): - cross_outer[a, b, c, d] = annot_mat[a, c] * annot_mat[b, d] - return cross_outer - - -if use_numba: - # ("float64[:,:,:,::1](float64[:,::1])") - _cross_outer = njit(_cross_outer) - - -def _multiply_sum(cross_outer, weight): - """ - Multiply and sum cross outer product. - - This functions is only used in `network_pearsonr_pairwise`. - - Parameters - ---------- - cross_outer : (N, N, D, D) array_like - Cross outer product of `annot_mat` - weight : (D, D) array_like - Weight matrix - - Returns - ------- - cross_outer_after : (N, N) numpy.ndarray - Result of multiplying and summing `cross_outer` - """ - n_samp, _, n_dim, _ = cross_outer.shape - cross_outer_after = np.empty((n_samp, n_samp), cross_outer.dtype) - for i in range(n_samp): - for j in range(n_samp): - curr_sum = 0.0 - for k in range(n_dim): - for l in range(n_dim): # noqa: E741 - curr_sum += weight[k, l] * cross_outer[i, j, k, l] - cross_outer_after[i, j] = curr_sum - return cross_outer_after - - -if use_numba: - # ("float64[:,::1](float64[:,:,:,::1],float64[:,::1])") - _multiply_sum = njit(_multiply_sum) - - -def network_pearsonr_pairwise(annot_mat, weight): - """ - Calculate pairwise network correlation between rows of `annot_mat`. - - .. warning:: - Test before use. - - Parameters - ---------- - annot_mat : (N, D) array_like - Input matrix - weight : (D, D) array_like - Weight matrix. Diagonal elements should be 1. - - Returns - ------- - corr_mat : (N, N) numpy.ndarray - Pairwise network correlation matrix - - Notes - ----- - This is a faster version of :meth:`netneurotools.stats.network_pearsonr` - for calculating pairwise network correlation between rows of `annot_mat`. - Check :meth:`netneurotools.stats.network_pearsonr` for details. - - See Also - -------- - netneurotools.stats.network_pearsonr - """ - annot_mat_demean = annot_mat - np.mean(annot_mat, axis=1, keepdims=True) - if use_numba: - cross_outer = _cross_outer(annot_mat_demean) - cross_outer_after = _multiply_sum(cross_outer, weight) - else: - # https://stackoverflow.com/questions/24839481/python-matrix-outer-product - cross_outer = np.einsum('ac,bd->abcd', annot_mat_demean, annot_mat_demean) - cross_outer_after = np.sum(np.multiply(cross_outer, weight), axis=(2, 3)) - # translating the two lines below in numba does not speed up much - lower = np.sqrt(np.diagonal(cross_outer_after)) - return cross_outer_after / np.einsum('i,j', lower, lower) - - -def _onehot_quadratic_form_broadcast(Q_star): - """ - Calculate one-hot quadratic form of input matrix. - - This functions is only used in `effective_resistance`. - - Parameters - ---------- - Q_star : (N, N) array_like - Input matrix - - Returns - ------- - R_eff : (N, N) numpy.ndarray - One-hot quadratic form of `Q_star` - """ - n = Q_star.shape[0] - R_eff = np.empty((n, n), Q_star.dtype) - for i in range(n): - for j in range(n): - R_eff[i, j] = Q_star[i, i] - Q_star[j, i] - Q_star[i, j] + Q_star[j, j] - return R_eff - - -if use_numba: - # ("float64[:,::1](float64[:,::1])") - _onehot_quadratic_form_broadcast = njit(_onehot_quadratic_form_broadcast) - - -def effective_resistance(W, directed=True): - """ - Calculate effective resistance matrix. - - The effective resistance between two nodes in a graph, often used in the context - of electrical networks, is a measure that stems from the inverse of the Laplacian - matrix of the graph. - - .. warning:: - Test before use. - - Parameters - ---------- - W : (N, N) array_like - Weight matrix. - directed : bool, optional - Whether the graph is directed. This is used to determine whether to turn on - the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are - using a symmetric weight matrix (while real-valued implying hermitian), you - can set this to False for better performance. Default: True - - Returns - ------- - R_eff : (N, N) numpy.ndarray - Effective resistance matrix - - Notes - ----- - The effective resistance between two nodes :math:`i` and :math:`j` is defined as - - .. math:: - R_{ij} = (e_i - e_j)^T Q^* (e_i - e_j) - - where :math:`Q^*` is the Moore-Penrose pseudoinverse of the Laplacian matrix - :math:`L` of the graph, and :math:`e_i` is the :math:`i`-th standard basis vector. - - References - ---------- - .. [1] Ellens, W., Spieksma, F. M., Van Mieghem, P., Jamakovic, A., & Kooij, - R. E. (2011). Effective graph resistance. Linear Algebra and Its Applications, - 435(10), 2491–2506. https://doi.org/10.1016/j.laa.2011.02.024 - - See Also - -------- - netneurotools.stats.network_polarisation - """ - L = _graph_laplacian(W) - Q_star = np.linalg.pinv(L, hermitian=not directed) - if use_numba: - R_eff = _onehot_quadratic_form_broadcast(Q_star) - else: - Q_star_diag = np.diag(Q_star) - R_eff = \ - Q_star_diag[:, np.newaxis] \ - - Q_star \ - - Q_star.T \ - + Q_star_diag[np.newaxis, :] - return R_eff - - -def _polariz_diff(vec): - """ - Calculate difference between positive and negative parts of a vector. - - This functions is only used in `network_polarisation`. - - Parameters - ---------- - vec : (N,) array_like - Input vector. Must have both positive and negative values. - - Returns - ------- - vec_diff : (N,) numpy.ndarray - Difference between positive and negative parts of `vec` - """ - # - vec_pos = np.maximum(vec, 0.0) - vec_pos /= np.max(vec_pos) - # - vec_neg = np.minimum(vec, 0.0) - vec_neg = np.abs(vec_neg) - vec_neg /= np.max(vec_neg) - return (vec_pos - vec_neg) - - -if use_numba: - _polariz_diff = njit(_polariz_diff) - - -def _quadratic_form(W, vec_left, vec_right, squared=False): - """ - Calculate quadratic form :math:`v_{left}^T W v_{right}`. - - Parameters - ---------- - W : (N, N) array_like - Input matrix. - vec_left : (N,) array_like - Left weight vector. - vec_right : (N,) array_like - Right weight vector. - squared : bool, optional - Whether to square the input weight matrix. Default: False - - Returns - ------- - quadratic_form : float - Quadratic form from `W`, `vec_left`, and `vec_right` - """ - # [numpy] - - # (vec_left.T @ W @ vec_right)[0, 0] - # [numba] - # vec = np.ascontiguousarray(vec[np.newaxis, :]) - n = W.shape[0] - ret = 0.0 - for i in range(n): - for j in range(n): - if squared: - ret += vec_left[i] * vec_right[j] * W[i, j]**2 - else: - ret += vec_left[i] * vec_right[j] * W[i, j] - return ret - - -if use_numba: - _quadratic_form = njit(_quadratic_form) - - -def network_polarisation(vec, W, directed=True): - r""" - Calculate polarisation of a vector on a graph. - - Network polarisation is a measure of polizzartion taken into account all the - three factors below [1]_: - - - how extreme the opinions of the people are - - how much they organize into echo chambers, and - - how these echo chambers organize in the network - - .. warning:: - Test before use. - - Parameters - ---------- - vec : (N,) array_like - Polarization vector. Must have both positive and negative values. Will be - normalized between -1 and 1 internally. - W : (N, N) array_like - Weight matrix. - directed : bool, optional - Whether the graph is directed. This is used to determine whether to turn on - the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are - using a symmetric weight matrix (while real-valued implying hermitian), you - can set this to False for better performance. Default: True - - Returns - ------- - polariz : float - Polarization of `vec` on `W` - - Notes - ----- - The measure is based on the genralized Eucledian distance, defined as - - .. math:: - \delta_{G, o} = \sqrt{(o^+ - o^-)^T Q^* (o^+ - o^-)} - - where :math:`o^+` and :math:`o^-` are the positive and negative parts of the - polarization vector, and :math:`Q^*` is the Moore-Penrose pseudoinverse - of the Laplacian matrix :math:`L` of the graph. Check :func:`effective_resistance` - for similarity. - - References - ---------- - .. [1] Hohmann, M., Devriendt, K., & Coscia, M. (2023). Quantifying ideological - polarization on a network using generalized Euclidean distance. Science Advances, - 9(9), eabq2044. https://doi.org/10.1126/sciadv.abq2044 - - See Also - -------- - netneurotools.stats.effective_resistance - """ - L = _graph_laplacian(W) - Q_star = np.linalg.pinv(L, hermitian=not directed) - diff = _polariz_diff(vec) - if use_numba: - polariz_sq = _quadratic_form(Q_star, diff, diff, squared=False) - else: - polariz_sq = (diff.T @ Q_star @ diff) - return np.sqrt(polariz_sq) - - -def network_variance(vec, D): - r""" - Calculate variance of a vector on a graph. - - Network variance is a measure of variance taken into account the network - structure. - - .. warning:: - Test before use. - - Parameters - ---------- - vec : (N,) array_like - Input vector. Must be all positive. - Will be normalized internally as a probability distribution. - D : (N, N) array_like - Distance matrix. - - Returns - ------- - network_variance : float - Network variance of `vec` on `D` - - Notes - ----- - The network variance is defined as - - .. math:: - var(p) = \frac{1}{2} \sum_{i, j} p(i) p(j) d^2(i,j) - - where :math:`p` is the probability distribution of `vec`, and :math:`d(i,j)` - is the distance between node :math:`i` and :math:`j`. - - The distance matrix :math:`D` can make use of effective resistance or its - square root. - - Example using effective resistance as weight matrix - - .. code:: python - - R_eff = effective_resistance(W) - netvar = network_variance(vec, R_eff) - - References - ---------- - .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022). - Variance and covariance of distributions on graphs. SIAM Review, 64(2), - 343–359. https://doi.org/10.1137/20M1361328 - - See Also - -------- - netneurotools.stats.network_covariance - """ - p = vec / np.sum(vec) - return 0.5 * (p.T @ np.multiply(D, D) @ p) - - -def network_variance_numba(vec, D): - """ - Numba version of :meth:`netneurotools.stats.network_variance`. - - Network variance is a measure of variance taken into account the network - structure. - - .. warning:: - Test before use. - - Parameters - ---------- - vec : (N,) array_like - Input vector. Must be all positive. - Will be normalized internally as a probability distribution. - D : (N, N) array_like - Distance matrix. - - Returns - ------- - network_variance : float - Network variance of `vec` on `D` - """ - p = vec / np.sum(vec) - return 0.5 * _quadratic_form(D, p, p, squared=True) - - -if use_numba: - network_variance_numba = njit(network_variance_numba) - - -def network_covariance(joint_pmat, D, calc_marginal=True): - r""" - Calculate covariance of a joint probability matrix on a graph. - - .. warning:: - Test before use. - - Parameters - ---------- - joint_pmat : (N, N) array_like - Joint probability matrix. Please make sure that it is valid. - D : (N, N) array_like - Distance matrix. - calc_marginal : bool, optional - Whether to calculate marginal variance. It will be marginally faster if - :code:`calc_marginal=False` (returning marginal variances as 0). Default: True - - Returns - ------- - network_covariance : float - Covariance of `joint_pmat` on `D` - var_p : float - Marginal variance of `joint_pmat` on `D`. - Will be 0 if :code:`calc_marginal=False` - var_q : float - Marginal variance of `joint_pmat` on `D`. - Will be 0 if :code:`calc_marginal=False` - - Notes - ----- - The network variance is defined as - - .. math:: - cov(P) = \frac{1}{2} \sum_{i, j} [p(i) q(j) - P(i,j)] d^2(i,j) - - where :math:`P` is the joint probability matrix, :math:`p` and :math:`q` - are the marginal probability distributions of `joint_pmat`, and :math:`d(i,j)` - is the distance between node :math:`i` and :math:`j`. - - Check :func:`network_variance` for usage. - - References - ---------- - .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022). - Variance and covariance of distributions on graphs. SIAM Review, 64(2), - 343–359. https://doi.org/10.1137/20M1361328 - - See Also - -------- - netneurotools.stats.network_variance - """ - p = np.sum(joint_pmat, axis=1) - q = np.sum(joint_pmat, axis=0) - D_sq = np.multiply(D, D) - cov = p.T @ D_sq @ q - np.sum(np.multiply(joint_pmat, D_sq)) - if calc_marginal: - var_p = p.T @ D_sq @ p - var_q = q.T @ D_sq @ q - else: - var_p, var_q = 0, 0 - return 0.5 * cov, 0.5 * var_p, 0.5 * var_q - - -def network_covariance_numba(joint_pmat, D, calc_marginal=True): - """ - Numba version of :meth:`netneurotools.stats.network_covariance`. - - .. warning:: - Test before use. - - Parameters - ---------- - joint_pmat : (N, N) array_like - Joint probability matrix. Please make sure that it is valid. - D : (N, N) array_like - Distance matrix. - calc_marginal : bool, optional - Whether to calculate marginal variance. It will be marginally faster if - :code:`calc_marginal=False` (returning marginal variances as 0). Default: True - - Returns - ------- - network_covariance : float - Covariance of `joint_pmat` on `D` - var_p : float - Marginal variance of `joint_pmat` on `D`. - Will be 0 if :code:`calc_marginal=False` - var_q : float - Marginal variance of `joint_pmat` on `D`. - Will be 0 if :code:`calc_marginal=False` - """ - n = joint_pmat.shape[0] - p = np.sum(joint_pmat, axis=1) - q = np.sum(joint_pmat, axis=0) - cov = 0.0 - var_p, var_q = 0.0, 0.0 - for i in range(n): - for j in range(n): - cov += (p[i] * q[j] - joint_pmat[i, j]) * D[i, j]**2 - if calc_marginal: - var_p += p[i] * p[j] * D[i, j]**2 - var_q += q[i] * q[j] * D[i, j]**2 - return 0.5 * cov, 0.5 * var_p, 0.5 * var_q - - -if use_numba: - network_covariance_numba = njit(network_covariance_numba) diff --git a/netneurotools/stats/__init__.py b/netneurotools/stats/__init__.py new file mode 100644 index 0000000..e000548 --- /dev/null +++ b/netneurotools/stats/__init__.py @@ -0,0 +1,36 @@ +"""Functions for performing statistical operations.""" + + +from .correlation import ( + efficient_pearsonr, + weighted_pearsonr, + make_correlated_xy +) + + +from .permutation_test import ( + permtest_1samp, + permtest_rel, + permtest_pearsonr +) + + +from .regression import ( + _add_constant, + residualize, + get_dominance_stats +) + + +# from .stats_utils import () + + +__all__ = [ + # correlation + 'efficient_pearsonr', 'weighted_pearsonr', 'make_correlated_xy' + # permutation_test + 'permtest_1samp', 'permtest_rel', 'permtest_pearsonr', + # regression + '_add_constant', 'residualize', 'get_dominance_stats', + # stats_utils +] diff --git a/netneurotools/stats/correlation.py b/netneurotools/stats/correlation.py new file mode 100644 index 0000000..38489fd --- /dev/null +++ b/netneurotools/stats/correlation.py @@ -0,0 +1,189 @@ +"""Functions for calculating correlation.""" + +import numpy as np +import scipy.stats as sstats +import scipy.special as sspecial +from sklearn.utils.validation import check_random_state + +try: # scipy >= 1.8.0 + from scipy.stats._stats_py import _chk2_asarray +except ImportError: # scipy < 1.8.0 + from scipy.stats.stats import _chk2_asarray + + +def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'): + """ + Compute correlation of matching columns in `a` and `b`. + + Parameters + ---------- + a,b : array_like + Sample observations. These arrays must have the same length and either + an equivalent number of columns or be broadcastable + ddof : int, optional + Degrees of freedom correction in the calculation of the standard + deviation. Default: 1 + nan_policy : bool, optional + Defines how to handle when input contains nan. 'propagate' returns nan, + 'raise' throws an error, 'omit' performs the calculations ignoring nan + values. Default: 'propagate' + + Returns + ------- + corr : float or numpy.ndarray + Pearson's correlation coefficient between matching columns of inputs + pval : float or numpy.ndarray + Two-tailed p-values + + Notes + ----- + If either input contains nan and nan_policy is set to 'omit', both arrays + will be masked to omit the nan entries. + + Examples + -------- + >>> from netneurotools import stats + + Generate some not-very-correlated and some highly-correlated data: + + >>> np.random.seed(12345678) # set random seed for reproducible results + >>> x1, y1 = stats.make_correlated_xy(corr=0.1, size=100) + >>> x2, y2 = stats.make_correlated_xy(corr=0.8, size=100) + + Calculate both correlations simultaneously: + + >>> stats.efficient_pearsonr(np.c_[x1, x2], np.c_[y1, y2]) + (array([0.10032565, 0.79961189]), array([3.20636135e-01, 1.97429944e-23])) + """ + a, b, _ = _chk2_asarray(a, b, 0) + if len(a) != len(b): + raise ValueError('Provided arrays do not have same length') + + if a.size == 0 or b.size == 0: + return np.nan, np.nan + + if nan_policy not in ('propagate', 'raise', 'omit'): + raise ValueError(f'Value for nan_policy "{nan_policy}" not allowed') + + a, b = a.reshape(len(a), -1), b.reshape(len(b), -1) + if (a.shape[1] != b.shape[1]): + a, b = np.broadcast_arrays(a, b) + + mask = np.logical_or(np.isnan(a), np.isnan(b)) + if nan_policy == 'raise' and np.any(mask): + raise ValueError('Input cannot contain NaN when nan_policy is "omit"') + elif nan_policy == 'omit': + # avoid making copies of the data, if possible + a = np.ma.masked_array(a, mask, copy=False, fill_value=np.nan) + b = np.ma.masked_array(b, mask, copy=False, fill_value=np.nan) + + with np.errstate(invalid='ignore'): + corr = (sstats.zscore(a, ddof=ddof, nan_policy=nan_policy) + * sstats.zscore(b, ddof=ddof, nan_policy=nan_policy)) + + sumfunc, n_obs = np.sum, len(a) + if nan_policy == 'omit': + corr = corr.filled(np.nan) + sumfunc = np.nansum + n_obs = np.squeeze(np.sum(np.logical_not(np.isnan(corr)), axis=0)) + + corr = sumfunc(corr, axis=0) / (n_obs - 1) + corr = np.squeeze(np.clip(corr, -1, 1)) / 1 + + # taken from scipy.stats + ab = (n_obs / 2) - 1 + prob = 2 * sspecial.betainc(ab, ab, 0.5 * (1 - np.abs(corr))) + + return corr, prob + + +def weighted_pearsonr(): + """Calculate weighted Pearson correlation coefficient.""" + pass + + +def make_correlated_xy(corr=0.85, size=10000, seed=None, tol=0.001): + """ + Generate random vectors that are correlated to approximately `corr`. + + Parameters + ---------- + corr : [-1, 1] float or (N, N) numpy.ndarray, optional + The approximate correlation desired. If a float is provided, two + vectors with the specified level of correlation will be generated. If + an array is provided, it is assumed to be a symmetrical correlation + matrix and ``len(corr)`` vectors with the specified levels of + correlation will be generated. Default: 0.85 + size : int or tuple, optional + Desired size of the generated vectors. Default: 1000 + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Default: None + tol : [0, 1] float, optional + Tolerance of correlation between generated `vectors` and specified + `corr`. Default: 0.001 + + Returns + ------- + vectors : numpy.ndarray + Random vectors of size `size` with correlation specified by `corr` + + Examples + -------- + >>> from netneurotools import stats + + By default two vectors are generated with specified correlation + + >>> x, y = stats.make_correlated_xy() + >>> np.corrcoef(x, y) # doctest: +SKIP + array([[1. , 0.85083661], + [0.85083661, 1. ]]) + >>> x, y = stats.make_correlated_xy(corr=0.2) + >>> np.corrcoef(x, y) # doctest: +SKIP + array([[1. , 0.20069953], + [0.20069953, 1. ]]) + + You can also provide correlation matrices to generate more than two vectors + if desired. Note that this makes it more difficult to ensure the actual + correlations are close to the desired values: + + >>> corr = [[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]] + >>> out = stats.make_correlated_xy(corr=corr) + >>> out.shape + (3, 10000) + >>> np.corrcoef(out) # doctest: +SKIP + array([[1. , 0.50965273, 0.30235686], + [0.50965273, 1. , 0.01089107], + [0.30235686, 0.01089107, 1. ]]) + """ + rs = check_random_state(seed) + + # no correlations outside [-1, 1] bounds + if np.any(np.abs(corr) > 1): + raise ValueError('Provided `corr` must (all) be in range [-1, 1].') + + # if we're given a single number, assume two vectors are desired + if isinstance(corr, (int, float)): + covs = np.ones((2, 2)) * 0.111 + covs[(0, 1), (1, 0)] *= corr + # if we're given a correlation matrix, assume `N` vectors are desired + elif isinstance(corr, (list, np.ndarray)): + corr = np.asarray(corr) + if corr.ndim != 2 or len(corr) != len(corr.T): + raise ValueError('If `corr` is a list or array, must be a 2D ' + 'square array, not {}'.format(corr.shape)) + if np.any(np.diag(corr) != 1): + raise ValueError('Diagonal of `corr` must be 1.') + covs = corr * 0.111 + means = [0] * len(covs) + + # generate the variables + count = 0 + while count < 500: + vectors = rs.multivariate_normal(mean=means, cov=covs, size=size).T + flat = vectors.reshape(len(vectors), -1) + # if diff between actual and desired correlations less than tol, break + if np.all(np.abs(np.corrcoef(flat) - (covs / 0.111)) < tol): + break + count += 1 + + return vectors diff --git a/netneurotools/stats/permutation_test.py b/netneurotools/stats/permutation_test.py new file mode 100644 index 0000000..147e4d1 --- /dev/null +++ b/netneurotools/stats/permutation_test.py @@ -0,0 +1,283 @@ +"""Functions for calculating permutation test.""" + +import numpy as np +from sklearn.utils.validation import check_random_state + +try: # scipy >= 1.8.0 + from scipy.stats._stats_py import _chk2_asarray +except ImportError: # scipy < 1.8.0 + from scipy.stats.stats import _chk2_asarray + +from .correlation import efficient_pearsonr + + +def permtest_1samp(a, popmean, axis=0, n_perm=1000, seed=0): + """ + Non-parametric equivalent of :py:func:`scipy.stats.ttest_1samp`. + + Generates two-tailed p-value for hypothesis of whether `a` differs from + `popmean` using permutation tests + + Parameters + ---------- + a : array_like + Sample observations + popmean : float or array_like + Expected valued in null hypothesis. If array_like then it must have the + same shape as `a` excluding the `axis` dimension + axis : int or None, optional + Axis along which to compute test. If None, compute over the whole array + of `a`. Default: 0 + n_perm : int, optional + Number of permutations to assess. Unless `a` is very small along `axis` + this will approximate a randomization test via Monte Carlo simulations. + Default: 1000 + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Set to None for "randomness". + Default: 0 + + Returns + ------- + stat : float or numpy.ndarray + Difference from `popmean` + pvalue : float or numpy.ndarray + Non-parametric p-value + + Notes + ----- + Providing multiple values to `popmean` to run *independent* tests in + parallel is not currently supported. + + The lowest p-value that can be returned by this function is equal to 1 / + (`n_perm` + 1). + + Examples + -------- + >>> from netneurotools import stats + >>> np.random.seed(7654567) # set random seed for reproducible results + >>> rvs = np.random.normal(loc=5, scale=10, size=(50, 2)) + + Test if mean of random sample is equal to true mean, and different mean. We + reject the null hypothesis in the second case and don't reject it in the + first case. + + >>> stats.permtest_1samp(rvs, 5.0) + (array([-0.985602 , -0.05204969]), array([0.48551449, 0.95904096])) + >>> stats.permtest_1samp(rvs, 0.0) + (array([4.014398 , 4.94795031]), array([0.00699301, 0.000999 ])) + + Example using axis and non-scalar dimension for population mean + + >>> stats.permtest_1samp(rvs, [5.0, 0.0]) + (array([-0.985602 , 4.94795031]), array([0.48551449, 0.000999 ])) + >>> stats.permtest_1samp(rvs.T, [5.0, 0.0], axis=1) + (array([-0.985602 , 4.94795031]), array([0.51548452, 0.000999 ])) + """ + a, popmean, axis = _chk2_asarray(a, popmean, axis) + rs = check_random_state(seed) + + if a.size == 0: + return np.nan, np.nan + + # ensure popmean will broadcast to `a` correctly + if popmean.ndim != a.ndim: + popmean = np.expand_dims(popmean, axis=axis) + + # center `a` around `popmean` and calculate original mean + zeroed = a - popmean + true_mean = zeroed.mean(axis=axis) / 1 + abs_mean = np.abs(true_mean) + + # this for loop is not _the fastest_ but is memory efficient + # the broadcasting alt. would mean storing zeroed.size * n_perm in memory + permutations = np.ones(true_mean.shape) + for _ in range(n_perm): + flipped = zeroed * rs.choice([-1, 1], size=zeroed.shape) # sign flip + permutations += np.abs(flipped.mean(axis=axis)) >= abs_mean + + pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_mean + + return true_mean, pvals + + +def permtest_rel(a, b, axis=0, n_perm=1000, seed=0): + """ + Non-parametric equivalent of :py:func:`scipy.stats.ttest_rel`. + + Generates two-tailed p-value for hypothesis of whether related samples `a` + and `b` differ using permutation tests + + Parameters + ---------- + a, b : array_like + Sample observations. These arrays must have the same shape. + axis : int or None, optional + Axis along which to compute test. If None, compute over whole arrays + of `a` and `b`. Default: 0 + n_perm : int, optional + Number of permutations to assess. Unless `a` and `b` are very small + along `axis` this will approximate a randomization test via Monte + Carlo simulations. Default: 1000 + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Set to None for "randomness". + Default: 0 + + Returns + ------- + stat : float or numpy.ndarray + Average difference between `a` and `b` + pvalue : float or numpy.ndarray + Non-parametric p-value + + Notes + ----- + The lowest p-value that can be returned by this function is equal to 1 / + (`n_perm` + 1). + + Examples + -------- + >>> from netneurotools import stats + + >>> np.random.seed(12345678) # set random seed for reproducible results + >>> rvs1 = np.random.normal(loc=5, scale=10, size=500) + >>> rvs2 = (np.random.normal(loc=5, scale=10, size=500) + ... + np.random.normal(scale=0.2, size=500)) + >>> stats.permtest_rel(rvs1, rvs2) # doctest: +SKIP + (-0.16506275161572695, 0.8021978021978022) + + >>> rvs3 = (np.random.normal(loc=8, scale=10, size=500) + ... + np.random.normal(scale=0.2, size=500)) + >>> stats.permtest_rel(rvs1, rvs3) # doctest: +SKIP + (2.40533726097883, 0.000999000999000999) + """ + a, b, axis = _chk2_asarray(a, b, axis) + rs = check_random_state(seed) + + if a.shape[axis] != b.shape[axis]: + raise ValueError('Provided arrays do not have same length along axis') + + if a.size == 0 or b.size == 0: + return np.nan, np.nan + + # calculate original difference in means + ab = np.stack([a, b], axis=0) + if ab.ndim < 3: + ab = np.expand_dims(ab, axis=-1) + true_diff = np.squeeze(np.diff(ab, axis=0)).mean(axis=axis) / 1 + abs_true = np.abs(true_diff) + + # idx array + reidx = np.meshgrid(*[range(f) for f in ab.shape], indexing='ij') + + permutations = np.ones(true_diff.shape) + for _ in range(n_perm): + # use this to re-index (i.e., swap along) the first axis of `ab` + swap = rs.random_sample(ab.shape[:-1]).argsort(axis=axis) + reidx[0] = np.repeat(swap[..., np.newaxis], ab.shape[-1], axis=-1) + # recompute difference between `a` and `b` (i.e., first axis of `ab`) + pdiff = np.squeeze(np.diff(ab[tuple(reidx)], axis=0)).mean(axis=axis) + permutations += np.abs(pdiff) >= abs_true + + pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_diff + + return true_diff, pvals + + +def permtest_pearsonr(a, b, axis=0, n_perm=1000, resamples=None, seed=0): + """ + Non-parametric equivalent of :py:func:`scipy.stats.pearsonr`. + + Generates two-tailed p-value for hypothesis of whether samples `a` and `b` + are correlated using permutation tests + + Parameters + ---------- + a,b : (N[, M]) array_like + Sample observations. These arrays must have the same length and either + an equivalent number of columns or be broadcastable + axis : int or None, optional + Axis along which to compute test. If None, compute over whole arrays + of `a` and `b`. Default: 0 + n_perm : int, optional + Number of permutations to assess. Unless `a` and `b` are very small + along `axis` this will approximate a randomization test via Monte + Carlo simulations. Default: 1000 + resamples : (N, P) array_like, optional + Resampling array used to shuffle `a` when generating null distribution + of correlations. This array must have the same length as `a` and `b` + and should have at least the same number of columns as `n_perm` (if it + has more then only `n_perm` columns will be used. When not specified a + standard permutation is used to shuffle `a`. Default: None + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Set to None for "randomness". + Default: 0 + + Returns + ------- + corr : float or numpyndarray + Correlations + pvalue : float or numpy.ndarray + Non-parametric p-value + + Notes + ----- + The lowest p-value that can be returned by this function is equal to 1 / + (`n_perm` + 1). + + Examples + -------- + >>> from netneurotools import stats + + >>> np.random.seed(12345678) # set random seed for reproducible results + >>> x, y = stats.make_correlated_xy(corr=0.1, size=100) + >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP + (0.10032564626876286, 0.3046953046953047) + + >>> x, y = stats.make_correlated_xy(corr=0.5, size=100) + >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP + (0.500040365781984, 0.000999000999000999) + + Also works with multiple columns by either broadcasting the smaller array + to the larger: + + >>> z = x + np.random.normal(loc=1, size=100) + >>> stats.permtest_pearsonr(x, np.column_stack([y, z])) + (array([0.50004037, 0.25843187]), array([0.000999 , 0.01098901])) + + or by using matching columns in the two arrays (e.g., `x` and `y` vs + `a` and `b`): + + >>> a, b = stats.make_correlated_xy(corr=0.9, size=100) + >>> stats.permtest_pearsonr(np.column_stack([x, a]), np.column_stack([y, b])) + (array([0.50004037, 0.89927523]), array([0.000999, 0.000999])) + """ # noqa + a, b, axis = _chk2_asarray(a, b, axis) + rs = check_random_state(seed) + + if len(a) != len(b): + raise ValueError('Provided arrays do not have same length') + + if a.size == 0 or b.size == 0: + return np.nan, np.nan + + if resamples is not None: + if n_perm > resamples.shape[-1]: + raise ValueError('Number of permutations requested exceeds size ' + 'of resampling array.') + + # divide by one forces coercion to float if ndim = 0 + true_corr = efficient_pearsonr(a, b)[0] / 1 + abs_true = np.abs(true_corr) + + permutations = np.ones(true_corr.shape) + for perm in range(n_perm): + # permute `a` and determine whether correlations exceed original + if resamples is None: + ap = a[rs.permutation(len(a))] + else: + ap = a[resamples[:, perm]] + permutations += np.abs(efficient_pearsonr(ap, b)[0]) >= abs_true + + pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_corr + + return true_corr, pvals diff --git a/netneurotools/stats/regression.py b/netneurotools/stats/regression.py new file mode 100644 index 0000000..c6ac6e2 --- /dev/null +++ b/netneurotools/stats/regression.py @@ -0,0 +1,256 @@ +"""Functions for calculating regression.""" + +from itertools import combinations + +import numpy as np +from tqdm import tqdm +import scipy.stats as sstats +from joblib import Parallel, delayed +from sklearn.linear_model import LinearRegression +from sklearn.utils.validation import check_array + + +def _add_constant(data): + """ + Add a constant (i.e., intercept) term to `data`. + + Parameters + ---------- + data : (N, M) array_like + Samples by features data array + + Returns + ------- + data : (N, F) np.ndarray + Where `F` is `M + 1` + + Examples + -------- + >>> from netneurotools import stats + + >>> A = np.zeros((5, 5)) + >>> Ac = stats._add_constant(A) + >>> Ac + array([[0., 0., 0., 0., 0., 1.], + [0., 0., 0., 0., 0., 1.], + [0., 0., 0., 0., 0., 1.], + [0., 0., 0., 0., 0., 1.], + [0., 0., 0., 0., 0., 1.]]) + """ + data = check_array(data, ensure_2d=False) + return np.column_stack([data, np.ones(len(data))]) + + +def residualize(X, Y, Xc=None, Yc=None, normalize=True, add_intercept=True): + """ + Return residuals of regression equation from `Y ~ X`. + + Parameters + ---------- + X : (N[, R]) array_like + Coefficient matrix of `R` variables for `N` subjects + Y : (N[, F]) array_like + Dependent variable matrix of `F` variables for `N` subjects + Xc : (M[, R]) array_like, optional + Coefficient matrix of `R` variables for `M` subjects. If not specified + then `X` is used to estimate betas. Default: None + Yc : (M[, F]) array_like, optional + Dependent variable matrix of `F` variables for `M` subjects. If not + specified then `Y` is used to estimate betas. Default: None + normalize : bool, optional + Whether to normalize (i.e., z-score) residuals. Will use residuals from + `Yc ~ Xc` for generating mean and variance. Default: True + add_intercept : bool, optional + Whether to add intercept to `X` (and `Xc`, if provided). The intercept + will not be removed, just used in beta estimation. Default: True + + Returns + ------- + Yr : (N, F) numpy.ndarray + Residuals of `Y ~ X` + + Notes + ----- + If both `Xc` and `Yc` are provided, these are used to calculate betas which + are then applied to `X` and `Y`. + """ + if ((Yc is None and Xc is not None) or (Yc is not None and Xc is None)): + raise ValueError('If processing against a comparative group, you must ' + 'provide both `Xc` and `Yc`.') + + X, Y = np.asarray(X), np.asarray(Y) + + if Yc is None: + Xc, Yc = X.copy(), Y.copy() + else: + Xc, Yc = np.asarray(Xc), np.asarray(Yc) + + # add intercept to regressors if requested and calculate fit + if add_intercept: + X, Xc = _add_constant(X), _add_constant(Xc) + betas, *_ = np.linalg.lstsq(Xc, Yc, rcond=None) + + # remove intercept from regressors and betas for calculation of residuals + if add_intercept: + betas = betas[:-1] + X, Xc = X[:, :-1], Xc[:, :-1] + + # calculate residuals + Yr = Y - (X @ betas) + Ycr = Yc - (Xc @ betas) + + if normalize: + Yr = sstats.zmap(Yr, compare=Ycr) + + return Yr + + +def get_dominance_stats(X, y, use_adjusted_r_sq=True, verbose=False, n_jobs=1): + """ + Return the dominance analysis statistics for multilinear regression. + + This is a rewritten & simplified version of [DA1]_. It is briefly + tested against the original package, but still in early stages. + Please feel free to report any bugs. + + Warning: Still work-in-progress. Parameters might change! + + Parameters + ---------- + X : (N, M) array_like + Input data + y : (N,) array_like + Target values + use_adjusted_r_sq : bool, optional + Whether to use adjusted r squares. Default: True + verbose : bool, optional + Whether to print debug messages. Default: False + n_jobs : int, optional + The number of jobs to run in parallel. Default: 1 + + Returns + ------- + model_metrics : dict + The dominance metrics, currently containing `individual_dominance`, + `partial_dominance`, `total_dominance`, and `full_r_sq`. + model_r_sq : dict + Contains all model r squares + + Notes + ----- + Example usage + + .. code:: python + + from netneurotools.stats import get_dominance_stats + from sklearn.datasets import load_boston + X, y = load_boston(return_X_y=True) + model_metrics, model_r_sq = get_dominance_stats(X, y) + + To compare with [DA1]_, use `use_adjusted_r_sq=False` + + .. code:: python + + from dominance_analysis import Dominance_Datasets + from dominance_analysis import Dominance + boston_dataset=Dominance_Datasets.get_boston() + dominance_regression=Dominance(data=boston_dataset, + target='House_Price',objective=1) + incr_variable_rsquare=dominance_regression.incremental_rsquare() + dominance_regression.dominance_stats() + + References + ---------- + .. [DA1] https://github.com/dominance-analysis/dominance-analysis + + """ + # this helps to remove one element from a tuple + def remove_ret(tpl, elem): + lst = list(tpl) + lst.remove(elem) + return tuple(lst) + + # sklearn linear regression wrapper + def get_reg_r_sq(X, y, use_adjusted_r_sq=True): + lin_reg = LinearRegression() + lin_reg.fit(X, y) + yhat = lin_reg.predict(X) + SS_Residual = sum((y - yhat) ** 2) + SS_Total = sum((y - np.mean(y)) ** 2) + r_squared = 1 - (float(SS_Residual)) / SS_Total + adjusted_r_squared = 1 - (1 - r_squared) * \ + (len(y) - 1) / (len(y) - X.shape[1] - 1) + if use_adjusted_r_sq: + return adjusted_r_squared + else: + return r_squared + + # helper function to compute r_sq for a given idx_tuple + def compute_r_sq(idx_tuple): + return idx_tuple, get_reg_r_sq(X[:, idx_tuple], + y, + use_adjusted_r_sq=use_adjusted_r_sq) + + # generate all predictor combinations in list (num of predictors) of lists + n_predictor = X.shape[-1] + # n_comb_len_group = n_predictor - 1 + predictor_combs = [list(combinations(range(n_predictor), i)) + for i in range(1, n_predictor + 1)] + if verbose: + print(f"[Dominance analysis] Generated \ + {len([v for i in predictor_combs for v in i])} combinations") + + model_r_sq = dict() + results = Parallel(n_jobs=n_jobs)( + delayed(compute_r_sq)(idx_tuple) + for len_group in tqdm(predictor_combs, + desc='num-of-predictor loop', + disable=not verbose) + for idx_tuple in tqdm(len_group, + desc='insider loop', + disable=not verbose)) + + # extract r_sq from results + for idx_tuple, r_sq in results: + model_r_sq[idx_tuple] = r_sq + + if verbose: + print(f"[Dominance analysis] Acquired {len(model_r_sq)} r^2's") + + # getting all model metrics + model_metrics = dict([]) + + # individual dominance + individual_dominance = [] + for i_pred in range(n_predictor): + individual_dominance.append(model_r_sq[(i_pred,)]) + individual_dominance = np.array(individual_dominance).reshape(1, -1) + model_metrics["individual_dominance"] = individual_dominance + + # partial dominance + partial_dominance = [[] for _ in range(n_predictor - 1)] + for i_len in range(n_predictor - 1): + i_len_combs = list(combinations(range(n_predictor), i_len + 2)) + for j_node in range(n_predictor): + j_node_sel = [v for v in i_len_combs if j_node in v] + reduced_list = [remove_ret(comb, j_node) for comb in j_node_sel] + diff_values = [ + model_r_sq[j_node_sel[i]] - model_r_sq[reduced_list[i]] + for i in range(len(reduced_list))] + partial_dominance[i_len].append(np.mean(diff_values)) + + # save partial dominance + partial_dominance = np.array(partial_dominance) + model_metrics["partial_dominance"] = partial_dominance + # get total dominance + total_dominance = np.mean( + np.r_[individual_dominance, partial_dominance], axis=0) + # test and save total dominance + assert np.allclose(total_dominance.sum(), + model_r_sq[tuple(range(n_predictor))]), \ + "Sum of total dominance is not equal to full r square!" + model_metrics["total_dominance"] = total_dominance + # save full r^2 + model_metrics["full_r_sq"] = model_r_sq[tuple(range(n_predictor))] + + return model_metrics, model_r_sq diff --git a/netneurotools/stats/stats_utils.py b/netneurotools/stats/stats_utils.py new file mode 100644 index 0000000..9964f8e --- /dev/null +++ b/netneurotools/stats/stats_utils.py @@ -0,0 +1 @@ +"""Functions for supporting statistics.""" diff --git a/netneurotools/stats/tests/__init__.py b/netneurotools/stats/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/stats/tests/test_correlation.py b/netneurotools/stats/tests/test_correlation.py new file mode 100644 index 0000000..f5c8653 --- /dev/null +++ b/netneurotools/stats/tests/test_correlation.py @@ -0,0 +1,67 @@ +"""For testing netneurotools.stats.correlation functionality.""" + +import pytest +import numpy as np +from netneurotools import stats + + +@pytest.mark.parametrize('x, y, expected', [ + # basic one-dimensional input + (range(5), range(5), (1.0, 0.0)), + # broadcasting occurs regardless of input order + (np.stack([range(5), range(5, 0, -1)], 1), range(5), + ([1.0, -1.0], [0.0, 0.0])), + (range(5), np.stack([range(5), range(5, 0, -1)], 1), + ([1.0, -1.0], [0.0, 0.0])), + # correlation between matching columns + (np.stack([range(5), range(5, 0, -1)], 1), + np.stack([range(5), range(5, 0, -1)], 1), + ([1.0, 1.0], [0.0, 0.0])) +]) +def test_efficient_pearsonr(x, y, expected): + """Test efficient_pearsonr function.""" + assert np.allclose(stats.efficient_pearsonr(x, y), expected) + + +def test_efficient_pearsonr_errors(): + """Test efficient_pearsonr function errors.""" + with pytest.raises(ValueError): + stats.efficient_pearsonr(range(4), range(5)) + + assert all(np.isnan(a) for a in stats.efficient_pearsonr([], [])) + + +@pytest.mark.parametrize('corr, size, tol, seed', [ + (0.85, (1000,), 0.05, 1234), + (0.85, (1000, 1000), 0.05, 1234), + ([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]], (1000,), 0.05, 1234) +]) +def test_make_correlated_xy(corr, size, tol, seed): + """Test make_correlated_xy function.""" + out = stats.make_correlated_xy(corr=corr, size=size, + tol=tol, seed=seed) + # ensure output is expected shape + assert out.shape[1:] == size + assert len(out) == len(corr) if hasattr(corr, '__len__') else 2 + + # check outputs are correlated within specified tolerance + realcorr = np.corrcoef(out.reshape(len(out), -1)) + if len(realcorr) == 2 and not hasattr(corr, '__len__'): + realcorr = realcorr[0, 1] + assert np.all(np.abs(realcorr - corr) < tol) + + # check that seed generates reproducible values + duplicate = stats.make_correlated_xy(corr=corr, size=size, + tol=tol, seed=seed) + assert np.allclose(out, duplicate) + + +@pytest.mark.parametrize('corr', [ + (1.5), (-1.5), # outside range of [-1, 1] + ([0.85]), ([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]), # not 2D / square array + ([[0.85]]), ([[1, 0.5], [0.5, 0.5]]) # diagonal not equal to 1 +]) +def test_make_correlated_xy_errors(corr): + """Test make_correlated_xy function errors.""" + with pytest.raises(ValueError): + stats.make_correlated_xy(corr) diff --git a/netneurotools/stats/tests/test_permutation.py b/netneurotools/stats/tests/test_permutation.py new file mode 100644 index 0000000..ae6a335 --- /dev/null +++ b/netneurotools/stats/tests/test_permutation.py @@ -0,0 +1,65 @@ +"""For testing netneurotools.stats.permutation_test functionality.""" + +import pytest +import numpy as np +from netneurotools import stats + + +@pytest.mark.xfail +def test_permtest_1samp(): + """Test permutation test for one-sample t-test.""" + assert False + # n1, n2, n3 = 10, 15, 20 + # rs = np.random.RandomState(1234) + # rvn1 = rs.normal(loc=8, scale=10, size=(n1, n2, n3)) + + # t1, p1 = stats.permtest_1samp(rvn1, 1, axis=0) + + +def test_permtest_rel(): + """Test permutation test for paired samples.""" + dr, pr = -0.0005, 0.4175824175824176 + dpr = ([dr, -dr], [pr, pr]) + + rvs1 = np.linspace(1, 100, 100) + rvs2 = np.linspace(1.01, 99.989, 100) + rvs1_2D = np.array([rvs1, rvs2]) + rvs2_2D = np.array([rvs2, rvs1]) + + # the p-values in these two cases should be consistent + d, p = stats.permtest_rel(rvs1, rvs2, axis=0, seed=1234) + assert np.allclose([d, p], (dr, pr)) + d, p = stats.permtest_rel(rvs1_2D.T, rvs2_2D.T, axis=0, seed=1234) + assert np.allclose([d, p], dpr) + + # but the p-value will differ here because of _how_ we're drawing the + # random permutations... it would be nice if this was consistent, but as + # yet i don't have a great idea on how to make that happen without assuming + # a whole lot about the data + pr = 0.51248751 + tpr = ([dr, -dr], [pr, pr]) + d, p = stats.permtest_rel(rvs1_2D, rvs2_2D, axis=1, seed=1234) + assert np.allclose([d, p], tpr) + + +def test_permtest_pearsonr(): + """Test permutation test for Pearson correlation.""" + np.random.seed(12345678) + x, y = stats.make_correlated_xy(corr=0.1, size=100) + r, p = stats.permtest_pearsonr(x, y) + assert np.allclose([r, p], [0.10032564626876286, 0.3046953046953047]) + + x, y = stats.make_correlated_xy(corr=0.5, size=100) + r, p = stats.permtest_pearsonr(x, y) + assert np.allclose([r, p], [0.500040365781984, 0.000999000999000999]) + + z = x + np.random.normal(loc=1, size=100) + r, p = stats.permtest_pearsonr(x, np.column_stack([y, z])) + assert np.allclose(r, np.array([0.50004037, 0.25843187])) + assert np.allclose(p, np.array([0.000999, 0.01098901])) + + a, b = stats.make_correlated_xy(corr=0.9, size=100) + r, p = stats.permtest_pearsonr(np.column_stack([x, a]), + np.column_stack([y, b])) + assert np.allclose(r, np.array([0.50004037, 0.89927523])) + assert np.allclose(p, np.array([0.000999, 0.000999])) diff --git a/netneurotools/stats/tests/test_regression.py b/netneurotools/stats/tests/test_regression.py new file mode 100644 index 0000000..ce20638 --- /dev/null +++ b/netneurotools/stats/tests/test_regression.py @@ -0,0 +1,14 @@ +"""For testing netneurotools.stats.regression functionality.""" + +import numpy as np +from netneurotools import stats + + +def test_add_constant(): + """Test adding a constant to a 1D or 2D array.""" + # if provided a vector it will return a 2D array + assert stats._add_constant(np.random.rand(100)).shape == (100, 2) + + # if provided a 2D array it will return the same, extended by 1 column + out = stats._add_constant(np.random.rand(100, 100)) + assert out.shape == (100, 101) and np.all(out[:, -1] == 1) diff --git a/netneurotools/surface.py b/netneurotools/surface.py deleted file mode 100644 index e43ca16..0000000 --- a/netneurotools/surface.py +++ /dev/null @@ -1,189 +0,0 @@ -"""Functions for constructing graphs from surface meshes.""" - -import numpy as np -from scipy import sparse - - -def _get_edges(faces): - """ - Get set of edges from `faces`. - - Parameters - ---------- - faces : (F, 3) array_like - Set of indices creating triangular faces of a mesh - - Returns - ------- - edges : (F*3, 2) array_like - All edges in `faces` - """ - faces = np.asarray(faces) - edges = np.sort(faces[:, [0, 1, 1, 2, 2, 0]].reshape((-1, 2)), axis=1) - - return edges - - -def get_direct_edges(vertices, faces): - """ - Get (unique) direct edges and weights in mesh describes by inputs. - - Parameters - ---------- - vertices : (N, 3) array_like - Coordinates of `vertices` comprising mesh with `faces` - faces : (F, 3) array_like - Indices of `vertices` that compose triangular faces of mesh - - Returns - ------- - edges : (E, 2) array_like - Indices of `vertices` comprising direct edges (without duplicates) - weights : (E, 1) array_like - Distances between `edges` - - """ - edges = np.unique(_get_edges(faces), axis=0) - weights = np.linalg.norm(np.diff(vertices[edges], axis=1), axis=-1) - return edges, weights.squeeze() - - -def get_indirect_edges(vertices, faces): - """ - Get indirect edges and weights in mesh described by inputs. - - Indirect edges are between two vertices that participate in faces sharing - an edge - - Parameters - ---------- - vertices : (N, 3) array_like - Coordinates of `vertices` comprising mesh with `faces` - faces : (F, 3) array_like - Indices of `vertices` that compose triangular faces of mesh - - Returns - ------- - edges : (E, 2) array_like - Indices of `vertices` comprising indirect edges (without duplicates) - weights : (E, 1) array_like - Distances between `edges` on surface - - References - ---------- - https://github.com/mikedh/trimesh (MIT licensed) - - """ - # first generate the list of edges for the provided faces and the - # index for which face the edge is from (which is just the index of the - # face repeated thrice, since each face generates three direct edges) - edges = _get_edges(faces) - edges_face = np.repeat(np.arange(len(faces)), 3) - - # every edge appears twice in a watertight surface, so we'll first get the - # indices for each duplicate edge in `edges` (this should, assuming all - # goes well, have rows equal to len(edges) // 2) - order = np.lexsort(edges.T[::-1]) - edges_sorted = edges[order] - dupe = np.any(edges_sorted[1:] != edges_sorted[:-1], axis=1) - dupe_idx = np.append(0, np.nonzero(dupe)[0] + 1) - start_ok = np.diff(np.concatenate((dupe_idx, [len(edges_sorted)]))) == 2 - groups = np.tile(dupe_idx[start_ok].reshape(-1, 1), 2) - edge_groups = order[groups + np.arange(2)] - - # now, get the indices of the faces that participate in these duplicate - # edges, as well as the edges themselves - adjacency = edges_face[edge_groups] - nondegenerate = adjacency[:, 0] != adjacency[:, 1] - adjacency = np.sort(adjacency[nondegenerate], axis=1) - adjacency_edges = edges[edge_groups[:, 0][nondegenerate]] - - # the non-shared vertex index is the same shape as adjacency, holding - # vertex indices vs face indices - indirect_edges = np.zeros(adjacency.shape, dtype=np.int32) - 1 - - # loop through the two columns of adjacency - for i, fid in enumerate(adjacency.T): - # faces from the current column of adjacency - face = faces[fid] - # get index of vertex not included in shared edge - unshared = np.logical_not(np.logical_or( - face == adjacency_edges[:, 0].reshape(-1, 1), - face == adjacency_edges[:, 1].reshape(-1, 1))) - # each row should have one "uncontained" vertex; ignore degenerates - row_ok = unshared.sum(axis=1) == 1 - unshared[~row_ok, :] = False - indirect_edges[row_ok, i] = face[unshared] - - # get vertex coordinates of triangles pairs with shared edges, ordered - # such that the non-shared vertex is always _last_ among the trio - shared = np.sort(face[np.logical_not(unshared)].reshape(-1, 1, 2), axis=-1) - shared = np.repeat(shared, 2, axis=1) - triangles = np.concatenate((shared, indirect_edges[..., None]), axis=-1) - # `A.shape`: (3, N, 2) corresponding to (xyz coords, edges, triangle pairs) - A, B, V = vertices[triangles].transpose(2, 3, 0, 1) - - # calculate the xyz coordinates of the foot of each triangle, where the - # base is the shared edge - # that is, we're trying to calculate F in the equation `VF = VB - (w * BA)` - # where `VF`, `VB`, and `BA` are vectors, and `w = (AB * VB) / (AB ** 2)` - w = (np.sum((A - B) * (V - B), axis=0, keepdims=True) - / np.sum((A - B) ** 2, axis=0, keepdims=True)) - feet = B - (w * (B - A)) - # calculate coordinates of midpoint b/w the feet of each pair of triangles - midpoints = (np.sum(feet.transpose(1, 2, 0), axis=1) / 2)[:, None] - # calculate Euclidean distance between non-shared vertices and midpoints - # and add distances together for each pair of triangles - norms = np.linalg.norm(vertices[indirect_edges] - midpoints, axis=-1) - weights = np.sum(norms, axis=-1) - - # NOTE: weights won't be perfectly accurate for a small subset of triangle - # pairs where either triangle has angle >90 along the shared edge. in these - # the midpoint lies _outside_ the shared edge, so neighboring triangles - # would need to be taken into account. that said, this occurs in only a - # minority of cases and the difference tends to be in the ~0.001 mm range - return indirect_edges, weights - - -def make_surf_graph(vertices, faces, mask=None): - """ - Construct adjacency graph from `surf`. - - Parameters - ---------- - vertices : (N, 3) array_like - Coordinates of `vertices` comprising mesh with `faces` - faces : (F, 3) array_like - Indices of `vertices` that compose triangular faces of mesh - mask : (N,) array_like, optional (default None) - Boolean mask indicating which vertices should be removed from generated - graph. If not supplied, all vertices are used. - - Returns - ------- - graph : scipy.sparse.csr_matrix - Sparse matrix representing graph of `vertices` and `faces` - - Raises - ------ - ValueError : inconsistent number of vertices in `mask` and `vertices` - """ - if mask is not None and len(mask) != len(vertices): - raise ValueError('Supplied `mask` array has different number of ' - 'vertices than supplied `vertices`.') - - # get all (direct + indirect) edges from surface - direct_edges, direct_weights = get_direct_edges(vertices, faces) - indirect_edges, indirect_weights = get_indirect_edges(vertices, faces) - edges = np.vstack((direct_edges, indirect_edges)) - weights = np.hstack((direct_weights, indirect_weights)) - - # remove edges that include a vertex in `mask` - if mask is not None: - idx, = np.where(mask) - mask = ~np.any(np.isin(edges, idx), axis=1) - edges, weights = edges[mask], weights[mask] - - # construct our graph on which to calculate shortest paths - return sparse.csr_matrix((np.squeeze(weights), (edges[:, 0], edges[:, 1])), - shape=(len(vertices), len(vertices))) diff --git a/netneurotools/tests/test_civet.py b/netneurotools/tests/test_civet.py deleted file mode 100644 index 9a6a1ab..0000000 --- a/netneurotools/tests/test_civet.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.civet functionality.""" - -import numpy as np -import pytest - -from netneurotools import civet, datasets - - -@pytest.fixture(scope='module') -def civet_surf(tmp_path_factory): - tmpdir = str(tmp_path_factory.getbasetemp()) - return datasets.fetch_civet(data_dir=tmpdir, verbose=0)['mid'] - - -def test_read_civet(civet_surf): - vertices, triangles = civet.read_civet(civet_surf.lh) - assert len(vertices) == 40962 - assert len(triangles) == 81920 - assert np.all(triangles.max(axis=0) < vertices.shape[0]) - - -def test_civet_to_freesurfer(): - brainmap = np.random.rand(81924) - out = civet.civet_to_freesurfer(brainmap) - out2 = civet.civet_to_freesurfer(brainmap, method='linear') - assert out.shape[0] == out2.shape[0] == 81924 - - with pytest.raises(ValueError): - civet.civet_to_freesurfer(np.random.rand(10)) diff --git a/netneurotools/tests/test_datasets.py b/netneurotools/tests/test_datasets.py deleted file mode 100644 index 0a5af12..0000000 --- a/netneurotools/tests/test_datasets.py +++ /dev/null @@ -1,204 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.datasets functionality.""" - -import os - -import numpy as np -import pytest - -from netneurotools import datasets -from netneurotools.datasets import utils - - -@pytest.mark.parametrize('corr, size, tol, seed', [ - (0.85, (1000,), 0.05, 1234), - (0.85, (1000, 1000), 0.05, 1234), - ([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]], (1000,), 0.05, 1234) -]) -def test_make_correlated_xy(corr, size, tol, seed): - out = datasets.make_correlated_xy(corr=corr, size=size, - tol=tol, seed=seed) - # ensure output is expected shape - assert out.shape[1:] == size - assert len(out) == len(corr) if hasattr(corr, '__len__') else 2 - - # check outputs are correlated within specified tolerance - realcorr = np.corrcoef(out.reshape(len(out), -1)) - if len(realcorr) == 2 and not hasattr(corr, '__len__'): - realcorr = realcorr[0, 1] - assert np.all(np.abs(realcorr - corr) < tol) - - # check that seed generates reproducible values - duplicate = datasets.make_correlated_xy(corr=corr, size=size, - tol=tol, seed=seed) - assert np.allclose(out, duplicate) - - -@pytest.mark.parametrize('corr', [ - (1.5), (-1.5), # outside range of [-1, 1] - ([0.85]), ([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]), # not 2D / square array - ([[0.85]]), ([[1, 0.5], [0.5, 0.5]]) # diagonal not equal to 1 -]) -def test_make_correlated_xy_errors(corr): - with pytest.raises(ValueError): - datasets.make_correlated_xy(corr) - - -def test_fetch_conte69(tmpdir): - conte = datasets.fetch_conte69(data_dir=tmpdir, verbose=0) - assert all(hasattr(conte, k) for k in - ['midthickness', 'inflated', 'vinflated', 'info']) - - -def test_fetch_yerkes19(tmpdir): - conte = datasets.fetch_yerkes19(data_dir=tmpdir, verbose=0) - assert all(hasattr(conte, k) for k in - ['midthickness', 'inflated', 'vinflated']) - - -def test_fetch_pauli2018(tmpdir): - pauli = datasets.fetch_pauli2018(data_dir=tmpdir, verbose=0) - assert all(hasattr(pauli, k) and os.path.isfile(pauli[k]) for k in - ['probabilistic', 'deterministic', 'info']) - - -@pytest.mark.parametrize('version', [ - 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' -]) -def test_fetch_fsaverage(tmpdir, version): - fsaverage = datasets.fetch_fsaverage(version=version, data_dir=tmpdir, - verbose=0) - assert all(hasattr(fsaverage, k) - and len(fsaverage[k]) == 2 - and all(os.path.isfile(hemi) - for hemi in fsaverage[k]) for k in - ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']) - - -@pytest.mark.parametrize('version, expected', [ - ('MNI152NLin2009aSym', [1, 1, 1, 1, 1]), - ('fsaverage', [2, 2, 2, 2, 2]), - ('fsaverage5', [2, 2, 2, 2, 2]), - ('fsaverage6', [2, 2, 2, 2, 2]), - ('fslr32k', [2, 2, 2, 2, 2]), - ('gcs', [2, 2, 2, 2, 6]) -]) -def test_fetch_cammoun2012(tmpdir, version, expected): - keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] - cammoun = datasets.fetch_cammoun2012(version, data_dir=tmpdir, verbose=0) - - # output has expected keys - assert all(hasattr(cammoun, k) for k in keys) - # and keys are expected lengths! - for k, e in zip(keys, expected): - out = getattr(cammoun, k) - if isinstance(out, (tuple, list)): - assert len(out) == e - else: - assert isinstance(out, str) and out.endswith('.nii.gz') - - if 'fsaverage' in version: - with pytest.warns(DeprecationWarning): - datasets.fetch_cammoun2012('surface', data_dir=tmpdir, verbose=0) - - -@pytest.mark.parametrize('dataset, expected', [ - ('celegans', ['conn', 'dist', 'labels', 'ref']), - ('drosophila', ['conn', 'coords', 'labels', 'networks', 'ref']), - ('human_func_scale033', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale060', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale125', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale250', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale500', ['conn', 'coords', 'labels', 'ref']), - ('human_struct_scale033', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale060', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale125', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale250', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale500', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('macaque_markov', ['conn', 'dist', 'labels', 'ref']), - ('macaque_modha', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('mouse', ['acronyms', 'conn', 'coords', 'dist', 'labels', 'ref']), - ('rat', ['conn', 'labels', 'ref']), -]) -def test_fetch_connectome(tmpdir, dataset, expected): - connectome = datasets.fetch_connectome(dataset, data_dir=tmpdir, verbose=0) - - for key in expected: - assert (key in connectome) - assert isinstance(connectome[key], str if key == 'ref' else np.ndarray) - - -@pytest.mark.parametrize('version', [ - 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k' -]) -def test_fetch_schaefer2018(tmpdir, version): - keys = [ - '{}Parcels{}Networks'.format(p, n) - for p in range(100, 1001, 100) for n in [7, 17] - ] - schaefer = datasets.fetch_schaefer2018(version, data_dir=tmpdir, verbose=0) - - if version == 'fslr32k': - assert all(k in schaefer and os.path.isfile(schaefer[k]) for k in keys) - else: - assert all(k in schaefer - and len(schaefer[k]) == 2 - and all(os.path.isfile(hemi) for hemi in schaefer[k]) - for k in keys) - - -def test_fetch_hcp_standards(tmpdir): - hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=0) - assert os.path.isdir(hcp) - - -def test_fetch_mmpall(tmpdir): - mmp = datasets.fetch_mmpall(data_dir=tmpdir, verbose=0) - assert len(mmp) == 2 - assert all(os.path.isfile(hemi) for hemi in mmp) - assert all(hasattr(mmp, attr) for attr in ('lh', 'rh')) - - -def test_fetch_voneconomo(tmpdir): - vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0) - assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ['gcs', 'ctab']) - assert isinstance(vek.get('info'), str) - - -@pytest.mark.parametrize('dset, expected', [ - ('atl-cammoun2012', ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k', - 'MNI152NLin2009aSym', 'gcs']), - ('tpl-conte69', ['url', 'md5']), - ('atl-pauli2018', ['url', 'md5', 'name']), - ('tpl-fsaverage', ['fsaverage' + f for f in ['', '3', '4', '5', '6']]), - ('atl-schaefer2018', ['fsaverage', 'fsaverage6', 'fsaverage6']) -]) -def test_get_dataset_info(dset, expected): - info = utils._get_dataset_info(dset) - if isinstance(info, dict): - assert all(k in info.keys() for k in expected) - elif isinstance(info, list): - for f in info: - assert all(k in f.keys() for k in expected) - else: - assert False - - with pytest.raises(KeyError): - utils._get_dataset_info('notvalid') - - -@pytest.mark.parametrize('version', [ - 'v1', 'v2' -]) -def test_fetch_civet(tmpdir, version): - civet = datasets.fetch_civet(version=version, data_dir=tmpdir, verbose=0) - for key in ('mid', 'white'): - assert key in civet - for hemi in ('lh', 'rh'): - assert hasattr(civet[key], hemi) - assert os.path.isfile(getattr(civet[key], hemi)) - - -def test_get_data_dir(tmpdir): - data_dir = utils._get_data_dir(tmpdir) - assert os.path.isdir(data_dir) diff --git a/netneurotools/tests/test_freesurfer.py b/netneurotools/tests/test_freesurfer.py deleted file mode 100644 index 53bf74e..0000000 --- a/netneurotools/tests/test_freesurfer.py +++ /dev/null @@ -1,82 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.freesurfer functionality.""" - -import numpy as np -import pytest - -from netneurotools import datasets, freesurfer - - -@pytest.fixture(scope='module') -def cammoun_surf(tmp_path_factory): - tmpdir = str(tmp_path_factory.getbasetemp()) - return datasets.fetch_cammoun2012('fsaverage5', data_dir=tmpdir, verbose=0) - - -@pytest.mark.parametrize('method', [ - 'average', 'surface', 'geodesic' -]) -@pytest.mark.parametrize('scale, parcels, n_right', [ - ('scale033', 68, 34), - ('scale060', 114, 57), - ('scale125', 219, 108), - ('scale250', 448, 223), - ('scale500', 1000, 501), -]) -def test_find_parcel_centroids(cammoun_surf, scale, parcels, n_right, method): - lh, rh = cammoun_surf[scale] - - coords, hemi = freesurfer.find_parcel_centroids(lhannot=lh, rhannot=rh, - method=method, - version='fsaverage5') - assert len(coords) == parcels - assert len(hemi) == parcels - assert np.sum(hemi) == n_right - - -@pytest.mark.parametrize('scale, parcels', [ - ('scale033', 68), - ('scale060', 114), - ('scale125', 219), - ('scale250', 448), - ('scale500', 1000), -]) -def test_project_reduce_vertices(cammoun_surf, scale, parcels): - # these functions are partners and should be tested in concert. - # we can test all the normal functionality and also ensure that "round - # trips" work as expected - - # generate "parcellated" data - data = np.random.rand(parcels) - lh, rh = cammoun_surf[scale] - - # do we get the expected number of vertices in our projection? - projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh) - assert len(projected) == 20484 - - # does reduction return our input data, as expected? - reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh) - assert np.allclose(data, reduced) - - # can we do this with multi-dimensional data, too? - data = np.random.rand(parcels, 2) - projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh) - assert projected.shape == (20484, 2) - reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh) - assert np.allclose(data, reduced) - - # what about int arrays as input? - data = np.random.choice(10, size=parcels) - projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh) - reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh) - assert np.allclose(reduced, data) - - # number of parcels != annotation spec - with pytest.raises(ValueError): - freesurfer.parcels_to_vertices(np.random.rand(parcels + 1), - rhannot=rh, lhannot=lh) - - # number of vertices != annotation spec - with pytest.raises(ValueError): - freesurfer.vertices_to_parcels(np.random.rand(20485), - rhannot=rh, lhannot=lh) diff --git a/netneurotools/tests/test_modularity.py b/netneurotools/tests/test_modularity.py deleted file mode 100644 index 4018ce0..0000000 --- a/netneurotools/tests/test_modularity.py +++ /dev/null @@ -1,44 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.modularity functionality.""" - -import numpy as np - -from netneurotools import modularity - -rs = np.random.RandomState(1234) - - -def test_dummyvar(): - # generate small example dummy variable code - out = modularity._dummyvar(np.array([1, 1, 2, 3, 3])) - assert np.all(out == np.array([[1, 0, 0], - [1, 0, 0], - [0, 1, 0], - [0, 0, 1], - [0, 0, 1]])) - - allones = np.array([1, 1, 1, 1, 1, 1, 1, 1]) - assert np.all(modularity._dummyvar(allones) == allones) - - -def test_zrand(): - # make the same two-group community assignments (with different labels) - label = np.ones((100, 1)) - X, Y = np.vstack((label, label * 2)), np.vstack((label * 2, label)) - # compare - assert modularity.zrand(X, Y) == modularity.zrand(X, Y[::-1]) - random = rs.choice([0, 1], size=X.shape) - assert modularity.zrand(X, Y) > modularity.zrand(X, random) - assert modularity.zrand(X, Y) == modularity.zrand(X[:, 0], Y[:, 0]) - - -def test_zrand_partitions(): - # make random communities - comm = rs.choice(range(6), size=(10, 100)) - all_diff = modularity._zrand_partitions(comm) - all_same = modularity._zrand_partitions(np.repeat(comm[:, [0]], 10, axis=1)) - - # partition of labels that are all the same should have higher average - # zrand and lower stdev zrand - assert np.nanmean(all_same) > np.nanmean(all_diff) - assert np.nanstd(all_same) < np.nanstd(all_diff) diff --git a/netneurotools/tests/test_plotting.py b/netneurotools/tests/test_plotting.py deleted file mode 100644 index de35811..0000000 --- a/netneurotools/tests/test_plotting.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.plotting functionality.""" - -import matplotlib.pyplot as plt -import numpy as np - -from netneurotools import datasets, plotting -import pytest - - -def test_grid_communities(): - comms = np.asarray([0, 0, 0, 0, 1, 1, 1, 1, 2, 2]) - # check that comms with / without 0 community label yields same output - assert np.allclose(plotting._grid_communities(comms), [0, 4, 8, 10]) - assert np.allclose(plotting._grid_communities(comms + 1), [0, 4, 8, 10]) - - -def test_sort_communities(): - data = np.arange(9).reshape(3, 3) - comms = np.asarray([0, 0, 2]) - # check that comms with / without 0 community label yields same output - assert np.allclose(plotting.sort_communities(data, comms), [1, 0, 2]) - assert np.allclose(plotting.sort_communities(data, comms + 1), [1, 0, 2]) - - -def test_plot_mod_heatmap(): - data = np.random.rand(100, 100) - comms = np.random.choice(4, size=(100,)) - ax = plotting.plot_mod_heatmap(data, comms) - assert isinstance(ax, plt.Axes) - - -@pytest.mark.filterwarnings('ignore') -def test_plot_fsvertex(): - surfer = pytest.importorskip('surfer') - - data = np.random.rand(20484) - brain = plotting.plot_fsvertex(data, subject_id='fsaverage5', - offscreen=True) - assert isinstance(brain, surfer.Brain) - - -@pytest.mark.filterwarnings('ignore') -def test_plot_fsaverage(): - surfer = pytest.importorskip('surfer') - - data = np.random.rand(68) - lhannot, rhannot = datasets.fetch_cammoun2012('fsaverage5')['scale033'] - brain = plotting.plot_fsaverage(data, lhannot=lhannot, rhannot=rhannot, - subject_id='fsaverage5', offscreen=True) - assert isinstance(brain, surfer.Brain) - - -def test_plot_point_brain(): - data = np.random.rand(100) - coords = np.random.rand(100, 3) - out = plotting.plot_point_brain(data, coords) - assert isinstance(out, plt.Figure) diff --git a/netneurotools/tests/test_stats.py b/netneurotools/tests/test_stats.py deleted file mode 100644 index 8730bfc..0000000 --- a/netneurotools/tests/test_stats.py +++ /dev/null @@ -1,172 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.stats functionality.""" - -import itertools -import numpy as np -import pytest - -from netneurotools import datasets, stats - - -@pytest.mark.xfail -def test_permtest_1samp(): - assert False - # n1, n2, n3 = 10, 15, 20 - # rs = np.random.RandomState(1234) - # rvn1 = rs.normal(loc=8, scale=10, size=(n1, n2, n3)) - - # t1, p1 = stats.permtest_1samp(rvn1, 1, axis=0) - - -def test_permtest_rel(): - dr, pr = -0.0005, 0.4175824175824176 - dpr = ([dr, -dr], [pr, pr]) - - rvs1 = np.linspace(1, 100, 100) - rvs2 = np.linspace(1.01, 99.989, 100) - rvs1_2D = np.array([rvs1, rvs2]) - rvs2_2D = np.array([rvs2, rvs1]) - - # the p-values in these two cases should be consistent - d, p = stats.permtest_rel(rvs1, rvs2, axis=0, seed=1234) - assert np.allclose([d, p], (dr, pr)) - d, p = stats.permtest_rel(rvs1_2D.T, rvs2_2D.T, axis=0, seed=1234) - assert np.allclose([d, p], dpr) - - # but the p-value will differ here because of _how_ we're drawing the - # random permutations... it would be nice if this was consistent, but as - # yet i don't have a great idea on how to make that happen without assuming - # a whole lot about the data - pr = 0.51248751 - tpr = ([dr, -dr], [pr, pr]) - d, p = stats.permtest_rel(rvs1_2D, rvs2_2D, axis=1, seed=1234) - assert np.allclose([d, p], tpr) - - -def test_permtest_pearsonr(): - np.random.seed(12345678) - x, y = datasets.make_correlated_xy(corr=0.1, size=100) - r, p = stats.permtest_pearsonr(x, y) - assert np.allclose([r, p], [0.10032564626876286, 0.3046953046953047]) - - x, y = datasets.make_correlated_xy(corr=0.5, size=100) - r, p = stats.permtest_pearsonr(x, y) - assert np.allclose([r, p], [0.500040365781984, 0.000999000999000999]) - - z = x + np.random.normal(loc=1, size=100) - r, p = stats.permtest_pearsonr(x, np.column_stack([y, z])) - assert np.allclose(r, np.array([0.50004037, 0.25843187])) - assert np.allclose(p, np.array([0.000999, 0.01098901])) - - a, b = datasets.make_correlated_xy(corr=0.9, size=100) - r, p = stats.permtest_pearsonr(np.column_stack([x, a]), - np.column_stack([y, b])) - assert np.allclose(r, np.array([0.50004037, 0.89927523])) - assert np.allclose(p, np.array([0.000999, 0.000999])) - - -@pytest.mark.parametrize('x, y, expected', [ - # basic one-dimensional input - (range(5), range(5), (1.0, 0.0)), - # broadcasting occurs regardless of input order - (np.stack([range(5), range(5, 0, -1)], 1), range(5), - ([1.0, -1.0], [0.0, 0.0])), - (range(5), np.stack([range(5), range(5, 0, -1)], 1), - ([1.0, -1.0], [0.0, 0.0])), - # correlation between matching columns - (np.stack([range(5), range(5, 0, -1)], 1), - np.stack([range(5), range(5, 0, -1)], 1), - ([1.0, 1.0], [0.0, 0.0])) -]) -def test_efficient_pearsonr(x, y, expected): - assert np.allclose(stats.efficient_pearsonr(x, y), expected) - - -def test_efficient_pearsonr_errors(): - with pytest.raises(ValueError): - stats.efficient_pearsonr(range(4), range(5)) - - assert all(np.isnan(a) for a in stats.efficient_pearsonr([], [])) - - -def test_gen_rotation(): - # make a few rotations (some same / different) - rout1, lout1 = stats._gen_rotation(seed=1234) - rout2, lout2 = stats._gen_rotation(seed=1234) - rout3, lout3 = stats._gen_rotation(seed=5678) - - # confirm consistency with the same seed - assert np.allclose(rout1, rout2) and np.allclose(lout1, lout2) - - # confirm inconsistency with different seeds - assert not np.allclose(rout1, rout3) and not np.allclose(lout1, lout3) - - # confirm reflection across L/R hemispheres as expected - # also confirm min/max never exceeds -1/1 - reflected = np.array([[1, -1, -1], [-1, 1, 1], [-1, 1, 1]]) - for r, l in zip([rout1, rout3], [lout1, lout3]): # noqa: E741 - assert np.allclose(r / l, reflected) - assert r.max() < 1 and r.min() > -1 and l.max() < 1 and l.min() > -1 - - -def _get_sphere_coords(s, t, r=1): - """Get coordinates at angles `s` and `t` a sphere of radius `r`.""" - # convert to radians - rad = np.pi / 180 - s, t = s * rad, t * rad - - # calculate new points - x = r * np.cos(s) * np.sin(t) - y = r * np.sin(s) * np.cos(t) - z = r * np.cos(t) - - return x, y, z - - -def test_gen_spinsamples(): - # grab a few points from a spherical surface and duplicate it for the - # "other hemisphere" - coords = [_get_sphere_coords(s, t, r=1) for s, t in - itertools.product(range(0, 360, 45), range(0, 360, 45))] - coords = np.vstack([coords, coords]) - hemi = np.hstack([np.zeros(len(coords) // 2), np.ones(len(coords) // 2)]) - - # generate "normal" test spins - spins, cost = stats.gen_spinsamples(coords, hemi, n_rotate=10, seed=1234, - return_cost=True) - assert spins.shape == spins.shape == (len(coords), 10) - - # confirm that `method` parameter functions as desired - for method in ['vasa', 'hungarian']: - spin_exact, cost_exact = stats.gen_spinsamples(coords, hemi, - n_rotate=10, seed=1234, - method=method, - return_cost=True) - assert spin_exact.shape == cost.shape == (len(coords), 10) - for s in spin_exact.T: - assert len(np.unique(s)) == len(s) - - # check that one hemisphere works - mask = hemi == 0 - spins, cost = stats.gen_spinsamples(coords[mask], hemi[mask], n_rotate=10, - seed=1234, return_cost=True) - assert spins.shape == cost.shape == (len(coords[mask]), 10) - - # confirm that check_duplicates will raise warnings - # since spins aren't exact permutations we need to use 4C4 with repeats - # and then perform one more rotation than that number (i.e., 35 + 1) - with pytest.warns(UserWarning): - i = [0, 1, -2, -1] # only grab a few coordinates - stats.gen_spinsamples(coords[i], hemi[i], n_rotate=36, seed=1234) - - # non-3D coords - with pytest.raises(ValueError): - stats.gen_spinsamples(coords[:, :2], hemi) - - # non-1D hemi - with pytest.raises(ValueError): - stats.gen_spinsamples(coords, np.column_stack([hemi, hemi])) - - # different length coords and hemi - with pytest.raises(ValueError): - stats.gen_spinsamples(coords, hemi[:-1]) diff --git a/netneurotools/tests/test_utils.py b/netneurotools/tests/test_utils.py deleted file mode 100644 index 1ac6b91..0000000 --- a/netneurotools/tests/test_utils.py +++ /dev/null @@ -1,47 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.utils functionality.""" - -import numpy as np -import pytest - -from netneurotools import datasets, utils - - -def test_add_constant(): - # if provided a vector it will return a 2D array - assert utils.add_constant(np.random.rand(100)).shape == (100, 2) - - # if provided a 2D array it will return the same, extended by 1 column - out = utils.add_constant(np.random.rand(100, 100)) - assert out.shape == (100, 101) and np.all(out[:, -1] == 1) - - -def test_add_triu(): - arr = np.arange(9).reshape(3, 3) - assert np.all(utils.get_triu(arr) == np.array([1, 2, 5])) - assert np.all(utils.get_triu(arr, k=0) == np.array([0, 1, 2, 4, 5, 8])) - - -@pytest.mark.parametrize('scale, expected', [ - ('scale033', 83), - ('scale060', 129), - ('scale125', 234), - ('scale250', 463), - ('scale500', 1015) -]) -def test_get_centroids(tmpdir, scale, expected): - # fetch test dataset - cammoun = datasets.fetch_cammoun2012('MNI152NLin2009aSym', data_dir=tmpdir, - verbose=0) - - ijk = utils.get_centroids(cammoun[scale]) - xyz = utils.get_centroids(cammoun[scale], image_space=True) - - # we get expected shape regardless of requested coordinate space - assert ijk.shape == xyz.shape == (expected, 3) - # ijk is all positive (i.e., cartesian) coordinates - assert np.all(ijk > 0) - - # requesting specific labels gives us a subset of the full `ijk` - lim = utils.get_centroids(cammoun[scale], labels=[1, 2, 3]) - assert np.all(lim == ijk[:3]) diff --git a/netneurotools/utils.py b/netneurotools/utils.py deleted file mode 100644 index 2d8839e..0000000 --- a/netneurotools/utils.py +++ /dev/null @@ -1,243 +0,0 @@ -# -*- coding: utf-8 -*- -"""Miscellaneous functions of various utility.""" - -import glob -import os -import subprocess - -import nibabel as nib -import numpy as np -from scipy import ndimage -from sklearn.utils.validation import check_array - - -def add_constant(data): - """ - Add a constant (i.e., intercept) term to `data`. - - Parameters - ---------- - data : (N, M) array_like - Samples by features data array - - Returns - ------- - data : (N, F) np.ndarray - Where `F` is `M + 1` - - Examples - -------- - >>> from netneurotools import utils - - >>> A = np.zeros((5, 5)) - >>> Ac = utils.add_constant(A) - >>> Ac - array([[0., 0., 0., 0., 0., 1.], - [0., 0., 0., 0., 0., 1.], - [0., 0., 0., 0., 0., 1.], - [0., 0., 0., 0., 0., 1.], - [0., 0., 0., 0., 0., 1.]]) - """ - data = check_array(data, ensure_2d=False) - return np.column_stack([data, np.ones(len(data))]) - - -def get_triu(data, k=1): - """ - Return vectorized version of upper triangle from `data`. - - Parameters - ---------- - data : (N, N) array_like - Input data - k : int, optional - Which diagonal to select from (where primary diagonal is 0). Default: 1 - - Returns - ------- - triu : (N * N-1 / 2) numpy.ndarray - Upper triangle of `data` - - Examples - -------- - >>> from netneurotools import utils - - >>> X = np.array([[1, 0.5, 0.25], [0.5, 1, 0.33], [0.25, 0.33, 1]]) - >>> tri = utils.get_triu(X) - >>> tri - array([0.5 , 0.25, 0.33]) - """ - return data[np.triu_indices(len(data), k=k)].copy() - - -def globpath(*args): - """ - Join `args` with :py:func:`os.path.join` and returns sorted glob output. - - Parameters - ---------- - args : str - Paths / `glob`-compatible regex strings - - Returns - ------- - files : list - Sorted list of files - """ - return sorted(glob.glob(os.path.join(*args))) - - -def rescale(data, low=0, high=1): - """ - Rescale `data` so it is within [`low`, `high`]. - - Parameters - ---------- - data : array_like - Input data array - low : float, optional - Lower bound for rescaling. Default: -1 - high : float, optional - Upper bound for rescaling. Default: 1 - - Returns - ------- - rescaled : np.ndarray - Rescaled data - """ - data = np.asarray(data) - rescaled = np.interp(data, (data.min(), data.max()), (low, high)) - - return rescaled - - -def run(cmd, env=None, return_proc=False, quiet=False): - """ - Run `cmd` via shell subprocess with provided environment `env`. - - Parameters - ---------- - cmd : str - Command to be run as single string - env : dict, optional - If provided, dictionary of key-value pairs to be added to base - environment when running `cmd`. Default: None - return_proc : bool, optional - Whether to return CompletedProcess object. Default: false - quiet : bool, optional - Whether to suppress stdout/stderr from subprocess. Default: False - - Returns - ------- - proc : subprocess.CompletedProcess - Process output - - Raises - ------ - subprocess.CalledProcessError - If subprocess does not exit cleanly - - Examples - -------- - >>> from netneurotools import utils - >>> p = utils.run('echo "hello world"', return_proc=True, quiet=True) - >>> p.returncode - 0 - >>> p.stdout # doctest: +SKIP - 'hello world\\n' - """ # noqa: D301 - merged_env = os.environ.copy() - if env is not None: - if not isinstance(env, dict): - raise TypeError('Provided `env` must be a dictionary, not {}' - .format(type(env))) - merged_env.update(env) - - opts = {} - if quiet: - opts = dict(stdout=subprocess.PIPE, stderr=subprocess.PIPE) - - proc = subprocess.run(cmd, env=merged_env, shell=True, check=True, - universal_newlines=True, **opts) - - if return_proc: - return proc - - -def check_fs_subjid(subject_id, subjects_dir=None): - """ - Check that `subject_id` exists in provided FreeSurfer `subjects_dir`. - - Parameters - ---------- - subject_id : str - FreeSurfer subject ID - subjects_dir : str, optional - Path to FreeSurfer subject directory. If not set, will inherit from - the environmental variable $SUBJECTS_DIR. Default: None - - Returns - ------- - subject_id : str - FreeSurfer subject ID, as provided - subjects_dir : str - Full filepath to `subjects_dir` - - Raises - ------ - FileNotFoundError - """ - # check inputs for subjects_dir and subject_id - if subjects_dir is None or not os.path.isdir(subjects_dir): - try: - subjects_dir = os.environ['SUBJECTS_DIR'] - except KeyError: - subjects_dir = os.getcwd() - else: - subjects_dir = os.path.abspath(subjects_dir) - - subjdir = os.path.join(subjects_dir, subject_id) - if not os.path.isdir(subjdir): - raise FileNotFoundError('Cannot find specified subject id {} in ' - 'provided subject directory {}.' - .format(subject_id, subjects_dir)) - - return subject_id, subjects_dir - - -def get_centroids(img, labels=None, image_space=False): - """ - Find centroids of `labels` in `img`. - - Parameters - ---------- - img : niimg-like object - 3D image containing integer label at each point - labels : array_like, optional - List of labels for which to find centroids. If not specified all - labels present in `img` will be used. Zero will be ignored as it is - considered "background." Default: None - image_space : bool, optional - Whether to return xyz (image space) coordinates for centroids based - on transformation in `img.affine`. Default: False - - Returns - ------- - centroids : (N, 3) np.ndarray - Coordinates of centroids for ROIs in input data - """ - from nilearn._utils import check_niimg_3d - - img = check_niimg_3d(img) - data = np.asarray(img.dataobj) - - if labels is None: - labels = np.trim_zeros(np.unique(data)) - - centroids = np.vstack(ndimage.center_of_mass(data, labels=data, - index=labels)) - - if image_space: - centroids = nib.affines.apply_affine(img.affine, centroids) - - return centroids diff --git a/pyproject.toml b/pyproject.toml index 9127969..653beee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,10 +37,15 @@ doc = [ "sphinx_rtd_theme", "sphinx-gallery" ] -plotting = [ +pysurfer = [ + "vtk", "mayavi", "pysurfer" ] +pyvista = [ + "vtk", + "pyvista" +] numba = [ "numba" ] @@ -61,9 +66,15 @@ test = [ requires = ["setuptools", "versioneer[toml]"] build-backend = "setuptools.build_meta" +[tool.setuptools] +include-package-data = true + [tool.setuptools.packages] find = {} +[tool.setuptools.package-data] +"*" = ["*.json", "*.bib"] + [tool.setuptools.dynamic] version = {attr = "netneurotools.__version__"} @@ -76,12 +87,7 @@ tag_prefix = "" parentdir_prefix = "" [tool.ruff] -select = ["E", "F", "B", "W", "D", "NPY"] -ignore = [ - "B905", # zip() without an explicit strict= parameter - # "W605", # Invalid escape sequence: latex - "NPY002", # Replace legacy `np.random` call with `np.random.Generator` -] + line-length = 88 exclude = [ "setup.py", @@ -92,18 +98,26 @@ exclude = [ ] target-version = "py38" -[tool.ruff.pydocstyle] +[tool.ruff.lint] +select = ["E", "F", "B", "W", "D", "NPY"] +ignore = [ + "B905", # zip() without an explicit strict= parameter + # "W605", # Invalid escape sequence: latex + "NPY002", # Replace legacy `np.random` call with `np.random.Generator` +] +preview = true + +[tool.ruff.lint.pydocstyle] convention = "numpy" -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "__init__.py" = ["D104", "F401"] -"netneurotools/tests/*" = ["B011", "D103"] +"test_*" = ["B011"] "examples/*" = ["E402", "D"] [tool.coverage.run] source = ["netneurotools"] omit = [ - "netneurotools/tests/*", "netneurotools/_version.py", ] diff --git a/resources/generate_atl-cammoun2012_surface.py b/resources/generate_atl-cammoun2012_surface.py index 686b518..6edc121 100755 --- a/resources/generate_atl-cammoun2012_surface.py +++ b/resources/generate_atl-cammoun2012_surface.py @@ -116,7 +116,7 @@ def combine_cammoun_500(lhannot, rhannot, subject_id, annot=None, quiet=quiet) # save ctab information from annotation file - vtx, ct, names = nib.freesurfer.read_annot(fn) + _, ct, names = nib.freesurfer.read_annot(fn) data = np.column_stack([[f.decode() for f in names], ct[:, :-1]]) ctab = ctab.append(pd.DataFrame(data), ignore_index=True)