Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] download zip #882

Merged
merged 28 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
ed20882
add function to download zip files
danibene Aug 8, 2023
d068a88
change path to data to master branch
danibene Aug 8, 2023
8f57246
download zip if expected folder is not there
danibene Aug 8, 2023
61a3547
ensure that destination_directory is Path
danibene Aug 8, 2023
e8c0a0c
delete comments that seem to be copying mistake
danibene Aug 8, 2023
ec5ceb1
allow to specify zip filename
danibene Aug 9, 2023
50340da
update init
danibene Aug 11, 2023
7427cf3
refactor for just one arg for destination path
danibene Aug 11, 2023
d353f9e
set database_path as variable
danibene Aug 11, 2023
2e34a64
Revert "refactor for just one arg for destination path"
danibene Aug 11, 2023
d6c2d58
use pathlib
danibene Aug 11, 2023
d14357d
correct download url
danibene Aug 11, 2023
41840d6
allow for destination_path as optional arg
danibene Aug 11, 2023
2302082
rename extracted folder to the desired name
danibene Aug 11, 2023
7050137
rename output for consistency
danibene Aug 11, 2023
1c42eb6
fix renaming
danibene Aug 11, 2023
9f978c4
auto download for mit arrhythmia
danibene Aug 11, 2023
394e32d
wip auto download ludb (not tested)
danibene Aug 11, 2023
9bbdd2c
Revert "wip auto download ludb (not tested)"
danibene Aug 11, 2023
0a2e20f
import neurokit2 as nk
danibene Aug 11, 2023
5750b79
import neurokit2 as nk
danibene Aug 11, 2023
66724c5
add option to not unzip file
danibene Aug 11, 2023
437f219
import pathlib for iconoclasticism and consistency
danibene Aug 11, 2023
71a2516
make destination_path optional arg
danibene Aug 11, 2023
69e62d8
refactor to add download_from_url()
danibene Aug 16, 2023
4f704ae
Merge branch 'dev' into feature/auto_download_data
DominiqueMakowski Aug 22, 2023
ec4c058
Update neurokit2/data/database.py
DominiqueMakowski Aug 22, 2023
4b3ac76
Update neurokit2/data/database.py
DominiqueMakowski Aug 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions data/fantasia/download_fantasia.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,24 @@
import numpy as np
import wfdb
import os
from pathlib import Path

from neurokit2.data import download_zip
danibene marked this conversation as resolved.
Show resolved Hide resolved

files = os.listdir("./fantasia-database-1.0.0/")
database_path = "./fantasia-database-1.0.0/"

# Check if expected folder exists
if not os.path.exists(database_path):
url = "https://physionet.org/static/published-projects/fantasia/fantasia-database-1.0.0.zip"
download_successful = download_zip(url, database_path)
if not download_successful:
raise ValueError(
"NeuroKit error: download of Fantasia database failed. "
"Please download it manually from https://physionet.org/content/fantasia/1.0.0/ "
"and unzip it in the same folder as this script."
)

files = os.listdir(database_path)
files = [s.replace('.dat', '') for s in files if ".dat" in s]

dfs_ecg = []
Expand All @@ -24,7 +39,7 @@

for i, participant in enumerate(files):

data, info = wfdb.rdsamp("./fantasia-database-1.0.0/" + participant)
data, info = wfdb.rdsamp(str(Path(database_path, participant)))

# Get signal
data = pd.DataFrame(data, columns=info["sig_name"])
Expand All @@ -35,7 +50,7 @@
data["Database"] = "Fantasia"

# Get annotations
anno = wfdb.rdann("./fantasia-database-1.0.0/" + participant, 'ecg')
anno = wfdb.rdann(str(Path(database_path, participant)), 'ecg')
anno = anno.sample[np.where(np.array(anno.symbol) == "N")[0]]
anno = pd.DataFrame({"Rpeaks": anno})
anno["Participant"] = "Fantasia_" + participant
Expand All @@ -49,4 +64,4 @@

# Save
df_ecg = pd.concat(dfs_ecg).to_csv("ECGs.csv", index=False)
dfs_rpeaks = pd.concat(dfs_rpeaks).to_csv("Rpeaks.csv", index=False)
df_rpeaks = pd.concat(dfs_rpeaks).to_csv("Rpeaks.csv", index=False)
6 changes: 0 additions & 6 deletions data/lemon/download_lemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,6 @@

https://ftp.gwdg.de/pub/misc/MPI-Leipzig_Mind-Brain-Body-LEMON/EEG_MPILMBB_LEMON/EEG_Preprocessed_BIDS_ID/EEG_Preprocessed/

Steps:
1. Download the ZIP database from https://physionet.org/content/nstdb/1.0.0/
2. Open it with a zip-opener (WinZip, 7zip).
3. Extract the folder of the same name (named 'mit-bih-noise-stress-test-database-1.0.0') to the same folder as this script.
4. Run this script.

Credits:
pycrostates package by Mathieu Scheltienne and Victor Férat
"""
Expand Down
19 changes: 16 additions & 3 deletions data/mit_arrhythmia/download_mit_arrhythmia.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,22 @@
import numpy as np
import wfdb
import os
from neurokit2.data import download_zip

data_files = ["mit-bih-arrhythmia-database-1.0.0/" + file for file in os.listdir("mit-bih-arrhythmia-database-1.0.0") if ".dat" in file]
database_path = "./mit-bih-arrhythmia-database-1.0.0/"

# Check if expected folder exists
if not os.path.exists(database_path):
url = "https://physionet.org/static/published-projects/mitdb/mit-bih-arrhythmia-database-1.0.0.zip"
download_successful = download_zip(url, database_path)
if not download_successful:
raise ValueError(
"NeuroKit error: download of MIT-Arrhythmia database failed. "
"Please download it manually from https://alpha.physionet.org/content/mitdb/1.0.0/ "
"and unzip it in the same folder as this script."
)

data_files = [database_path + file for file in os.listdir(database_path) if ".dat" in file]

def read_file(file, participant):
"""Utility function
Expand Down Expand Up @@ -55,9 +68,9 @@ def read_file(file, participant):
dfs_rpeaks.append(anno)

# Store additional recording if available
if "x_" + file.replace("mit-bih-arrhythmia-database-1.0.0/", "") in os.listdir("mit-bih-arrhythmia-database-1.0.0/x_mitdb/"):
if "x_" + file.replace(database_path, "") in os.listdir(database_path + "x_mitdb/"):
print(" - Additional recording detected.")
data, anno = read_file("mit-bih-arrhythmia-database-1.0.0/x_mitdb/" + "x_" + file.replace("mit-bih-arrhythmia-database-1.0.0/", ""), participant)
data, anno = read_file(database_path + "/x_mitdb/" + "x_" + file.replace(database_path, ""), participant)
# Store with the rest
dfs_ecg.append(data)
dfs_rpeaks.append(anno)
Expand Down
3 changes: 2 additions & 1 deletion neurokit2/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
from .read_bitalino import read_bitalino
from .read_video import read_video
from .write_csv import write_csv
from .database import download_zip

__all__ = ["read_acqknowledge", "read_bitalino", "read_video", "data", "write_csv"]
__all__ = ["read_acqknowledge", "read_bitalino", "read_video", "data", "write_csv", "download_zip"]
3 changes: 1 addition & 2 deletions neurokit2/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,7 @@ def data(dataset="bio_eventrelated_100hz"):

dataset = dataset.lower()

# TODO: change this path back to "master"
path = "https://raw.githubusercontent.com/neuropsychology/NeuroKit/dev/data/"
path = "https://raw.githubusercontent.com/neuropsychology/NeuroKit/master/data/"

# Signals as vectors =======================
if dataset in ["eeg", "eeg_150hz", "eeg.txt"]:
Expand Down
53 changes: 53 additions & 0 deletions neurokit2/data/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import requests
import zipfile
from pathlib import Path
danibene marked this conversation as resolved.
Show resolved Hide resolved

def download_zip(url, destination_path):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we do this somewhat more "generic", like something like download_from_url() and then there is an unzip=True argument that triggers, if the content is detected to be a zipped file, automatically unzips it?

So that we could use that function for both zipped un non-zipped stuff? (although I'm not sure if there would be such a need)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah I just realized you meant downloading any files from a url and not just a zip. Yeah sure I can do that too (though I feel like then we should still have download_zip() that calls download_from_url(), since there are zip-specific functionalities in the function like removing the original zip file), but I have to go now, can get back to it in a few days probably

"""Download a ZIP file from a URL and extract it to a destination directory.

DominiqueMakowski marked this conversation as resolved.
Show resolved Hide resolved
Parameters:
-----------
url : str
The URL of the ZIP file to download.
destination_path : str, Path
The path to which the ZIP file will be extracted.

Returns:
--------
bool
True if the ZIP file was downloaded and extracted successfully, False otherwise.
"""
# Ensure that the destination path is a Path object ending with ".zip"
zip_filepath = Path(destination_path)
if zip_filepath.suffix != ".zip":
zip_filepath = Path(zip_filepath.parent, zip_filepath.name + ".zip")

# Create the destination directory if it does not exist
destination_directory = Path(destination_path).parent
Path(destination_directory).mkdir(parents=True, exist_ok=True)

# Download the ZIP file
response = requests.get(url)

if response.status_code == 200:
with zip_filepath.open("wb") as zip_file:
zip_file.write(response.content)

# Extract the ZIP file
with zipfile.ZipFile(zip_filepath, "r") as zip_ref:
extracted_folder_name = Path(zip_ref.namelist()[0]).parts[0]

# Extract the contents
zip_ref.extractall(destination_directory)

# Rename the extracted folder to the desired name
extracted_folder_path = destination_directory / extracted_folder_name
new_folder_path = destination_directory / Path(destination_path).name
extracted_folder_path.rename(new_folder_path)

# Clean up by removing the downloaded ZIP file
zip_filepath.unlink()

return True
else:
return False
Loading