neuropsychology · danibene · Aug 22, 2023 · Aug 8, 2023 · Aug 8, 2023 · Aug 8, 2023
diff --git a/data/fantasia/download_fantasia.py b/data/fantasia/download_fantasia.py
@@ -13,9 +13,24 @@
 import numpy as np
 import wfdb
 import os
+from pathlib import Path
 
+from neurokit2.data import download_zip
 
-files = os.listdir("./fantasia-database-1.0.0/")
+database_path = "./fantasia-database-1.0.0/"
+
+# Check if expected folder exists
+if not os.path.exists(database_path):
+    url = "https://physionet.org/static/published-projects/fantasia/fantasia-database-1.0.0.zip"
+    download_successful = download_zip(url, database_path)
+    if not download_successful:
+        raise ValueError(
+            "NeuroKit error: download of Fantasia database failed. "
+            "Please download it manually from https://physionet.org/content/fantasia/1.0.0/ "
+            "and unzip it in the same folder as this script."
+        )
+
+files = os.listdir(database_path)
 files = [s.replace('.dat', '') for s in files if ".dat" in s]
 
 dfs_ecg = []
@@ -24,7 +39,7 @@
 
 for i, participant in enumerate(files):
 
-    data, info = wfdb.rdsamp("./fantasia-database-1.0.0/" + participant)
+    data, info = wfdb.rdsamp(str(Path(database_path, participant)))
 
     # Get signal
     data = pd.DataFrame(data, columns=info["sig_name"])
@@ -35,7 +50,7 @@
     data["Database"] = "Fantasia"
 
     # Get annotations
-    anno = wfdb.rdann("./fantasia-database-1.0.0/" + participant, 'ecg')
+    anno = wfdb.rdann(str(Path(database_path, participant)), 'ecg')
     anno = anno.sample[np.where(np.array(anno.symbol) == "N")[0]]
     anno = pd.DataFrame({"Rpeaks": anno})
     anno["Participant"] = "Fantasia_" + participant
@@ -49,4 +64,4 @@
 
 # Save
 df_ecg = pd.concat(dfs_ecg).to_csv("ECGs.csv", index=False)
-dfs_rpeaks = pd.concat(dfs_rpeaks).to_csv("Rpeaks.csv", index=False)
+df_rpeaks = pd.concat(dfs_rpeaks).to_csv("Rpeaks.csv", index=False)
diff --git a/data/lemon/download_lemon.py b/data/lemon/download_lemon.py
@@ -3,12 +3,6 @@
 
 https://ftp.gwdg.de/pub/misc/MPI-Leipzig_Mind-Brain-Body-LEMON/EEG_MPILMBB_LEMON/EEG_Preprocessed_BIDS_ID/EEG_Preprocessed/
 
-Steps:
-    1. Download the ZIP database from https://physionet.org/content/nstdb/1.0.0/
-    2. Open it with a zip-opener (WinZip, 7zip).
-    3. Extract the folder of the same name (named 'mit-bih-noise-stress-test-database-1.0.0') to the same folder as this script.
-    4. Run this script.
-
 Credits:
     pycrostates package by Mathieu Scheltienne and Victor Férat
 """

diff --git a/data/mit_arrhythmia/download_mit_arrhythmia.py b/data/mit_arrhythmia/download_mit_arrhythmia.py
@@ -14,9 +14,22 @@
 import numpy as np
 import wfdb
 import os
+from neurokit2.data import download_zip
 
-data_files = ["mit-bih-arrhythmia-database-1.0.0/" + file for file in os.listdir("mit-bih-arrhythmia-database-1.0.0") if ".dat" in file]
+database_path = "./mit-bih-arrhythmia-database-1.0.0/"
 
+# Check if expected folder exists
+if not os.path.exists(database_path):
+    url = "https://physionet.org/static/published-projects/mitdb/mit-bih-arrhythmia-database-1.0.0.zip"
+    download_successful = download_zip(url, database_path)
+    if not download_successful:
+        raise ValueError(
+            "NeuroKit error: download of MIT-Arrhythmia database failed. "
+            "Please download it manually from https://alpha.physionet.org/content/mitdb/1.0.0/ "
+            "and unzip it in the same folder as this script."
+        )
+
+data_files = [database_path + file for file in os.listdir(database_path) if ".dat" in file]
 
 def read_file(file, participant):
     """Utility function
@@ -55,9 +68,9 @@ def read_file(file, participant):
     dfs_rpeaks.append(anno)
 
     # Store additional recording if available
-    if "x_" + file.replace("mit-bih-arrhythmia-database-1.0.0/", "") in os.listdir("mit-bih-arrhythmia-database-1.0.0/x_mitdb/"):
+    if "x_" + file.replace(database_path, "") in os.listdir(database_path + "x_mitdb/"):
         print("  - Additional recording detected.")
-        data, anno = read_file("mit-bih-arrhythmia-database-1.0.0/x_mitdb/" + "x_" + file.replace("mit-bih-arrhythmia-database-1.0.0/", ""), participant)
+        data, anno = read_file(database_path + "/x_mitdb/" + "x_" + file.replace(database_path, ""), participant)
         # Store with the rest
         dfs_ecg.append(data)
         dfs_rpeaks.append(anno)

diff --git a/neurokit2/data/__init__.py b/neurokit2/data/__init__.py
@@ -5,5 +5,6 @@
 from .read_bitalino import read_bitalino
 from .read_video import read_video
 from .write_csv import write_csv
+from .database import download_zip
 
-__all__ = ["read_acqknowledge", "read_bitalino", "read_video", "data", "write_csv"]
+__all__ = ["read_acqknowledge", "read_bitalino", "read_video", "data", "write_csv", "download_zip"]
diff --git a/neurokit2/data/data.py b/neurokit2/data/data.py
@@ -173,8 +173,7 @@ def data(dataset="bio_eventrelated_100hz"):
 
     dataset = dataset.lower()
 
-    # TODO: change this path back to "master"
-    path = "https://raw.githubusercontent.com/neuropsychology/NeuroKit/dev/data/"
+    path = "https://raw.githubusercontent.com/neuropsychology/NeuroKit/master/data/"
 
     # Signals as vectors =======================
     if dataset in ["eeg", "eeg_150hz", "eeg.txt"]:

diff --git a/neurokit2/data/database.py b/neurokit2/data/database.py
@@ -0,0 +1,53 @@
+import requests
+import zipfile
+from pathlib import Path
+
+def download_zip(url, destination_path):
+    """Download a ZIP file from a URL and extract it to a destination directory.
+
+    Parameters:
+    -----------
+    url : str
+        The URL of the ZIP file to download.
+    destination_path : str, Path
+        The path to which the ZIP file will be extracted.
+
+    Returns:
+    --------
+    bool
+        True if the ZIP file was downloaded and extracted successfully, False otherwise.
+    """
+    # Ensure that the destination path is a Path object ending with ".zip"
+    zip_filepath = Path(destination_path)
+    if zip_filepath.suffix != ".zip":
+        zip_filepath = Path(zip_filepath.parent, zip_filepath.name + ".zip")
+
+    # Create the destination directory if it does not exist
+    destination_directory = Path(destination_path).parent
+    Path(destination_directory).mkdir(parents=True, exist_ok=True)
+
+    # Download the ZIP file
+    response = requests.get(url)
+
+    if response.status_code == 200:
+        with zip_filepath.open("wb") as zip_file:
+            zip_file.write(response.content)
+
+        # Extract the ZIP file
+        with zipfile.ZipFile(zip_filepath, "r") as zip_ref:
+          extracted_folder_name = Path(zip_ref.namelist()[0]).parts[0]
+
+          # Extract the contents
+          zip_ref.extractall(destination_directory)
+
+          # Rename the extracted folder to the desired name
+          extracted_folder_path = destination_directory / extracted_folder_name
+          new_folder_path = destination_directory / Path(destination_path).name
+          extracted_folder_path.rename(new_folder_path)
+
+        # Clean up by removing the downloaded ZIP file
+        zip_filepath.unlink()
+
+        return True
+    else:
+        return False