From f39f62a879dcc668351f4c3cbdba3068cb1d35f6 Mon Sep 17 00:00:00 2001 From: raluca-san Date: Sun, 4 Aug 2019 16:52:10 +0200 Subject: [PATCH] #12 create dataframe of all files from disk --- A_01_read_files_info.py | 83 +++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/A_01_read_files_info.py b/A_01_read_files_info.py index 6b7b1b8..b30fc59 100644 --- a/A_01_read_files_info.py +++ b/A_01_read_files_info.py @@ -8,57 +8,66 @@ import pandas as pd import B_01_ResampleSegmentations as ResizerClass + def create_paths(rootdir): list_all_ct_series = [] for subdir, dirs, files in os.walk(rootdir): - path, foldername = os.path.split(subdir) - for file in sorted(files): - try: - dcm_file = os.path.join(subdir, file) - ds = pydicom.read_file(dcm_file) - except Exception: - # not dicom file so continue until you find one - continue - path_img_folder = dcm_file - source_series_instance_uid = ds.SeriesInstanceUID - source_study_instance_uid = ds.StudyInstanceUID - source_series_number = ds.SeriesNumber - try: - path_reference_segm = ds.ReferencedImageSequence[0].ReferencedSOPInstanceUID - path_reference_src = ds.SourceImageSequence[0].ReferencedSOPInstanceUID - segment_label = ds.SegmentLabel - except Exception: - path_reference_segm = None - path_reference_src = None + if not len(files) > 1: + continue + else: + for file in sorted(files): + try: + dcm_file = os.path.join(subdir, file) + ds = pydicom.read_file(dcm_file) + except Exception: + # not dicom file so continue until you find one + continue + path_img_folder = dcm_file + source_series_instance_uid = ds.SeriesInstanceUID + source_study_instance_uid = ds.StudyInstanceUID + source_series_number = ds.SeriesNumber - # if the ct series is not found in the dictionary, add it - result = next((item for item in list_all_ct_series if - item["SeriesInstanceNumberUID"] == source_series_instance_uid), None) + try: + path_reference_segm = ds.ReferencedImageSequence[0].ReferencedSOPInstanceUID + path_reference_src = ds.SourceImageSequence[0].ReferencedSOPInstanceUID + lesion_number = ds.ReferencedImageSequence[0].ReferencedSegmentNumber + segment_label = ds.SegmentLabel + except AttributeError: + path_reference_segm = None + path_reference_src = None + lesion_number = None + segment_label = None - if result is None: # that means that that the img is not yet in the dictionary - dict_series_folder = { - "PathSeries": path_img_folder, - "SegmentLabel": segment_label, - "ReferenceSourceImgSeriesInstanceUID": path_reference_src, - "ReferenceSegmentationImgSeriesInstanceUID": path_reference_segm, - "SeriesNumber": source_series_number, - "SeriesInstanceNumberUID": source_series_instance_uid, - "StudyInstanceUID": source_study_instance_uid, - } - list_all_ct_series.append(dict_series_folder) + # if the ct series is not found in the dictionary, add it + result = next((item for item in list_all_ct_series if + item["SeriesInstanceNumberUID"] == source_series_instance_uid), None) - df_paths_mapping = pd.DataFrame(list_all_ct_series) - return df_paths_mapping + if result is None: # that means that that the img is not yet in the dictionary + dict_series_folder = { + "PathSeries": path_img_folder, + "SegmentLabel": segment_label, + "LesionNumber": lesion_number, + "ReferenceSourceImgSeriesInstanceUID": path_reference_src, + "ReferenceSegmentationImgSeriesInstanceUID": path_reference_segm, + "SeriesNumber": source_series_number, + "SeriesInstanceNumberUID": source_series_instance_uid, + "StudyInstanceUID": source_study_instance_uid, + } + list_all_ct_series.append(dict_series_folder) + df_paths_mapping = pd.DataFrame(list_all_ct_series) + return df_paths_mapping -if __name__ == '__main__': - rootdir = r"C:\tmp_patients\Pat_MAV_BE_B03\Study_0" # start with single patient folder, then load all the folders in the memory with glob +if __name__ == '__main__': + # start with single patient folder, then load all the folders in the memory with glob + rootdir = r"C:\tmp_patients\Pat_MAV_BE_B01_\Study_0" folder_path_new_resized_images = r" " flag_resize_only_segmentations = 'Y' flag_match_with_patient_studyID = 'N' flag_extract_max_size = 'N' df_paths_mapping = create_paths(rootdir) + print('Success') # now that we have the paths, we should read the folders into simpleitk objects # we need a loop for all the tumors and the ablations