Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Seriesgrouping #684

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion heudiconv/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
set_readonly,
treat_infofile,
write_config,
has_deprecated_seriesid,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -172,7 +173,8 @@ def prep_conversion(
# if conversion table(s) do not exist -- we need to prepare them
# (the *prepare* stage in https://github.com/nipy/heudiconv/issues/134)
# if overwrite - recalculate this anyways
reuse_conversion_table = op.exists(edit_file)
# MD: a check for the deprecated series_id is added here to avoid reusing the conversion table
reuse_conversion_table = op.exists(edit_file) and not has_deprecated_seriesid(edit_file)
# We also might need to redo it if changes in the heuristic file
# detected
# ref: https://github.com/nipy/heudiconv/issues/84#issuecomment-330048609
Expand Down
13 changes: 9 additions & 4 deletions heudiconv/dicoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from typing import TYPE_CHECKING, Any, Dict, List, NamedTuple, Optional, Union, overload
from unittest.mock import patch
import warnings
import hashlib

import pydicom as dcm

Expand Down Expand Up @@ -114,7 +115,7 @@ def create_seqinfo(mw: dw.Wrapper, series_files: list[str], series_id: str) -> S

def validate_dicom(
fl: str, dcmfilter: Optional[Callable[[dcm.dataset.Dataset], Any]]
) -> Optional[tuple[dw.Wrapper, tuple[int, str], Optional[str]]]:
) -> Optional[tuple[dw.Wrapper, tuple[int, str, str], Optional[str]]]:
"""
Parse DICOM attributes. Returns None if not valid.
"""
Expand All @@ -135,7 +136,7 @@ def validate_dicom(
try:
protocol_name = mw.dcm_data.ProtocolName
assert isinstance(protocol_name, str)
series_id = (int(mw.dcm_data.SeriesNumber), protocol_name)
series_id = (int(mw.dcm_data.SeriesNumber), protocol_name, mw.dcm_data.SeriesInstanceUID)
except AttributeError as e:
lgr.warning('Ignoring %s since not quite a "normal" DICOM: %s', fl, e)
return None
Expand All @@ -159,10 +160,13 @@ def validate_dicom(
class SeriesID(NamedTuple):
series_number: int
protocol_name: str
series_uid: str
file_studyUID: Optional[str] = None

def __str__(self) -> str:
s = f"{self.series_number}-{self.protocol_name}"
len_hash_hex = 8
suid_hex = hashlib.md5(self.series_uid.encode('utf-8')).hexdigest()[:len_hash_hex]
s = f"{self.series_number}-{self.protocol_name}-{suid_hex}"
if self.file_studyUID is not None:
s += f"-{self.file_studyUID}"
return s
Expand Down Expand Up @@ -285,7 +289,7 @@ def group_dicoms_into_seqinfos(
removeidx.append(idx)
continue
mw, series_id_, file_studyUID = mwinfo
series_id = SeriesID(series_id_[0], series_id_[1])
series_id = SeriesID(series_id_[0], series_id_[1], series_id_[2])
if per_studyUID:
series_id = series_id._replace(file_studyUID=file_studyUID)

Expand Down Expand Up @@ -320,6 +324,7 @@ def group_dicoms_into_seqinfos(
series_id = SeriesID(
mwgroup[idx].dcm_data.SeriesNumber,
mwgroup[idx].dcm_data.ProtocolName,
mwgroup[idx].dcm_data.SeriesInstanceUID,
)
if per_studyUID:
series_id = series_id._replace(file_studyUID=file_studyUID)
Expand Down
14 changes: 4 additions & 10 deletions heudiconv/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,15 +278,9 @@ def infotoids(
)
lgr.info("Study session for %r", study_session_info)

if study_session_info in study_sessions:
if grouping != "all":
# MG - should this blow up to mimic -d invocation?
lgr.warning(
"Existing study session with the same values (%r)."
" Skipping DICOMS %s",
study_session_info,
seqinfo.values(),
)
continue
if grouping != "all":
assert (study_session_info not in study_sessions), (
f"Existing study session {study_session_info} "
f"already in analyzed sessions {study_sessions.keys()}")
study_sessions[study_session_info] = seqinfo
return study_sessions
36 changes: 36 additions & 0 deletions heudiconv/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import stat
from subprocess import check_output
import sys
import string
import tempfile
from time import sleep
from types import ModuleType
Expand Down Expand Up @@ -222,6 +223,41 @@
return data


def is_deprecated_seriesid(series_id: str, len_hash_hex: int = 8) -> bool:
"""Return True if series_id is in deprecated format.
A deprecated series ID is in the format "series_number-protocol_name".
A non-deprecated series ID is in the format "series_number-protocol_name-seriesUID_hash_hex"
"""
# Check at least two '-' in the series_id
if series_id.count('-') <= 1:
return True

Check warning on line 233 in heudiconv/utils.py

View check run for this annotation

Codecov / codecov/patch

heudiconv/utils.py#L233

Added line #L233 was not covered by tests
# Check the first part of the series_id is a number
series_number = series_id.split('-')[0]
if not series_number.isdigit():
return True

Check warning on line 237 in heudiconv/utils.py

View check run for this annotation

Codecov / codecov/patch

heudiconv/utils.py#L237

Added line #L237 was not covered by tests
# Check the last part of the series_id is a hash in hexadecimal format of length len_hash_hex
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Check the last part of the series_id is a hash in hexadecimal format of length len_hash_hex
# Check the last part of the series_id is a hash in hexadecimal format of length len_hash_hex
# Note that protocol_name could contain -

is that correct and that is why [-1]?

hash_hex = series_id.split('-')[-1]
hex_digits = set(string.hexdigits)
if len(hash_hex) != len_hash_hex or not all(c in hex_digits for c in hash_hex):
return True

Check warning on line 242 in heudiconv/utils.py

View check run for this annotation

Codecov / codecov/patch

heudiconv/utils.py#L242

Added line #L242 was not covered by tests
# If all previous tests passed, then the series_id is not deprecated
return False
Comment on lines +241 to +244
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if len(hash_hex) != len_hash_hex or not all(c in hex_digits for c in hash_hex):
return True
# If all previous tests passed, then the series_id is not deprecated
return False
return len(hash_hex) != len_hash_hex or not all(c in hex_digits for c in hash_hex)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or may be better to use re (might need an import)

Suggested change
if len(hash_hex) != len_hash_hex or not all(c in hex_digits for c in hash_hex):
return True
# If all previous tests passed, then the series_id is not deprecated
return False
return re.match(f"[{string.hexdigits}]{{{len_hash_hex}}}$")

this function needs a unittest which would be trivial to do

Comment on lines +232 to +244
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

overall can be

Suggested change
if series_id.count('-') <= 1:
return True
# Check the first part of the series_id is a number
series_number = series_id.split('-')[0]
if not series_number.isdigit():
return True
# Check the last part of the series_id is a hash in hexadecimal format of length len_hash_hex
hash_hex = series_id.split('-')[-1]
hex_digits = set(string.hexdigits)
if len(hash_hex) != len_hash_hex or not all(c in hex_digits for c in hash_hex):
return True
# If all previous tests passed, then the series_id is not deprecated
return False
return not re.match(f"[{string.digits}]+-.+-[{string.hexdigits}]{{{len_hash_hex}}}$")

to just match regex... again -- just needs a unittest



def has_deprecated_seriesid(info_file: str) -> bool:
"""Return True if any series_id in the info_file is in deprecated format."""
info = read_config(info_file)
series_ids = [series_id for sublist in info.values() for series_id in sublist]
if any(is_deprecated_seriesid(series_id) for series_id in series_ids):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be shorter

Suggested change
if any(is_deprecated_seriesid(series_id) for series_id in series_ids):
if any(map(is_deprecated_seriesid, series_ids)):

but it is ok either way.

lgr.warning(

Check warning on line 252 in heudiconv/utils.py

View check run for this annotation

Codecov / codecov/patch

heudiconv/utils.py#L252

Added line #L252 was not covered by tests
"Found deprecated series identifier in info file in the format"
"<series num>-<protocol name> instead of <series num>-<protocol name>-<UID hash>"
Comment on lines +253 to +254
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"Found deprecated series identifier in info file in the format"
"<series num>-<protocol name> instead of <series num>-<protocol name>-<UID hash>"
"Found deprecated series identifier in info file in the format "
"<series num>-<protocol name> instead of <series num>-<protocol name>-<UID hash>. "

"The existing conversion table will be ignored."
)
return True

Check warning on line 257 in heudiconv/utils.py

View check run for this annotation

Codecov / codecov/patch

heudiconv/utils.py#L257

Added line #L257 was not covered by tests
return False


def assure_no_file_exists(path: str | Path) -> None:
"""Check if file or symlink (git-annex?) exists, and if so -- remove"""
if os.path.lexists(path):
Expand Down