From b5b8e5aaedbe1a6b2cf58148db3460b53103c623 Mon Sep 17 00:00:00 2001 From: Miha Mirt Date: Sun, 15 Dec 2024 11:58:34 +0100 Subject: [PATCH 1/2] TCX Manipulation and tests refactoring, fix examples --- README.md | 31 +++++--- examples/calculate_training_load.py | 3 +- examples/convert_tcx_to_csv.py | 5 +- examples/dead_end_extraction.py | 3 +- examples/draw_map_with_identified_hills.py | 3 +- .../draw_map_with_identified_intervals.py | 3 +- examples/extract_data_inside_area.py | 3 +- examples/hill_data_extraction.py | 3 +- examples/identify_interruptions.py | 3 +- examples/integral_metrics_extraction.py | 3 +- examples/interval_data_extraction.py | 3 +- examples/missing_elevation_data_extraction.py | 3 +- examples/read_all_files.py | 5 +- examples/read_folder.py | 3 +- examples/read_tcx_file.py | 3 +- examples/weather_data_extraction.py | 3 +- sport_activities_features/tcx_manipulation.py | 75 +++++++++++-------- tests/data/README.md | 2 +- tests/test_area_identification.py | 3 +- tests/test_dead_end_identification.py | 3 +- tests/test_different_activities.py | 19 +++-- tests/test_file_manipulation.py | 5 +- tests/test_hill_identification.py | 3 +- tests/test_interruptions.py | 3 +- tests/test_interval_identification.py | 3 +- tests/test_tcx_file.py | 3 +- tests/test_training_loads.py | 6 +- tests/test_training_metrics.py | 5 +- tests/test_weather.py | 3 +- 29 files changed, 134 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index 2833c0f..427ae0e 100644 --- a/README.md +++ b/README.md @@ -154,10 +154,11 @@ from sport_activities_features.tcx_manipulation import TCXFile # Class for reading TCX files tcx_file=TCXFile() -data = tcx_file.read_one_file("path_to_the_file") # Represents data as dictionary of lists +tcx_exercise = tcx_file.read_one_file("path_to_the_file") +data = tcx_file.extract_activity_data(tcx_exercise) # Represents data as dictionary of lists # Alternative choice -data = tcx_file.read_one_file("path_to_the_file", numpy_array= True) # Represents data as dictionary of numpy.arrays +data = tcx_file.extract_activity_data(tcx_exercise, numpy_array= True) # Represents data as dictionary of numpy.arrays ``` @@ -185,7 +186,8 @@ from sport_activities_features.plot_data import PlotData # Read TCX file tcx_file = TCXFile() -activity = tcx_file.read_one_file("path_to_the_file") +tcx_exercise = tcx_file.read_one_file("path_to_the_file") +activity = tcx_file.extract_activity_data(tcx_exercise) # Detect hills in data Hill = HillIdentification(activity['altitudes'], 30) @@ -211,7 +213,8 @@ from sport_activities_features.tcx_manipulation import TCXFile # Reading the TCX file tcx_file = TCXFile() -activity = tcx_file.read_one_file("path_to_the_data") +tcx_exercise = tcx_file.read_one_file("path_to_the_file") +activity = tcx_file.extract_activity_data(tcx_exercise) # Identifying the intervals in the activity by power Intervals = IntervalIdentificationByPower(activity["distances"], activity["timestamps"], activity["altitudes"], 70) @@ -231,7 +234,8 @@ from sport_activities_features import TCXFile # Read TCX file tcx_file = TCXFile() -tcx_data = tcx_file.read_one_file("path_to_file") +tcx_exercise = tcx_file.read_one_file("path_to_the_file") +tcx_data = tcx_file.extract_activity_data(tcx_exercise) # Configure visual crossing api key visual_crossing_api_key = "weather_api_key" # https://www.visualcrossing.com/weather-api @@ -285,8 +289,8 @@ from sport_activities_features.tcx_manipulation import TCXFile # Read TCX file tcx_file = TCXFile() - -integral_metrics = tcx_file.extract_integral_metrics("path_to_the_file") +tcx_exercise = tcx_file.read_one_file("path_to_the_file") +integral_metrics = tcx_file.extract_integral_metrics(tcx_exercise) print(integral_metrics) @@ -299,7 +303,9 @@ from sport_activities_features.tcx_manipulation import TCXFile #read TCX file tcx_file = TCXFile() -tcx_data = tcx_file.read_one_file("path_to_the_file") +tcx_exercise = tcx_file.read_one_file("path_to_the_file") +tcx_data = tcx_file.extract_activity_data(tcx_exercise) + #configure visual crossing api key visual_crossing_api_key = "API_KEY" # https://www.visualcrossing.com/weather-api @@ -351,7 +357,8 @@ from sport_activities_features.tcx_manipulation import TCXFile # Reading the TCX file. tcx_file = TCXFile() -activity = tcx_file.read_one_file('path_to_the_data') +tcx_exercise = tcx_file.read_one_file("path_to_the_file") +activity = tcx_file.extract_activity_data(tcx_exercise) # Converting the read data to arrays. positions = np.array([*activity['positions']]) @@ -381,7 +388,8 @@ Identify interruption events from a TCX or GPX file. # read TCX file (also works with GPX files) tcx_file = TCXFile() -tcx_data = tcx_file.read_one_file("path_to_the_data") +tcx_exercise = tcx_file.read_one_file("path_to_the_file") +tcx_data = tcx_file.extract_activity_data(tcx_exercise) """ Time interval = time before and after the start of an event @@ -434,7 +442,8 @@ from sport_activities_features import ElevationIdentification from sport_activities_features import TCXFile tcx_file = TCXFile() -tcx_data = tcx_file.read_one_file('path_to_file') +tcx_exercise = tcx_file.read_one_file("path_to_the_file") +tcx_data = tcx_file.extract_activity_data(tcx_exercise) elevations = ElevationIdentification(tcx_data['positions']) """Adds tcx_data['elevation'] = eg. [124, 21, 412] for each position""" diff --git a/examples/calculate_training_load.py b/examples/calculate_training_load.py index 64d9e27..c148a4d 100644 --- a/examples/calculate_training_load.py +++ b/examples/calculate_training_load.py @@ -11,7 +11,8 @@ # Reading a TCX file. tcx_file = TCXFile() -activity = tcx_file.read_one_file('../datasets/15.tcx') +tcx_exercise = tcx_file.read_one_file('../datasets/15.tcx') +activity = tcx_file.extract_activity_data(tcx_exercise) timestamps = activity['timestamps'] heart_rates = activity['heartrates'] diff --git a/examples/convert_tcx_to_csv.py b/examples/convert_tcx_to_csv.py index 765ab2e..9776019 100644 --- a/examples/convert_tcx_to_csv.py +++ b/examples/convert_tcx_to_csv.py @@ -12,8 +12,9 @@ output_file = 'path_to_output_file' # Read TCX file -data = tcx_file.read_one_file( - input_file, +tcx_exercise = tcx_file.read_one_file(input_file) +data = tcx_file.extract_activity_data( + tcx_exercise, ) # Represents data as dictionary of lists # Convert dictionary of lists to pandas DataFrame diff --git a/examples/dead_end_extraction.py b/examples/dead_end_extraction.py index fa20ad4..0da5e55 100644 --- a/examples/dead_end_extraction.py +++ b/examples/dead_end_extraction.py @@ -8,7 +8,8 @@ # Reading the TCX file. tcx_file = TCXFile() -activity = tcx_file.read_one_file('path_to_the_file') +tcx_exercise = tcx_file.read_one_file('path_to_the_file') +activity = tcx_file.extract_activity_data(tcx_exercise) # Converting the read data to the array. positions = np.array([*activity['positions']]) diff --git a/examples/draw_map_with_identified_hills.py b/examples/draw_map_with_identified_hills.py index 340c2d4..1c604a4 100644 --- a/examples/draw_map_with_identified_hills.py +++ b/examples/draw_map_with_identified_hills.py @@ -6,7 +6,8 @@ # read TCX file tcx_file = TCXFile() -data = tcx_file.read_one_file('path_to_the_data') +tcx_exercise = tcx_file.read_one_file('path_to_the_data') +data = tcx_file.extract_activity_data(tcx_exercise) # detect hills in data Hill = HillIdentification(data['altitudes'], 30) diff --git a/examples/draw_map_with_identified_intervals.py b/examples/draw_map_with_identified_intervals.py index 94d1345..1f43fdb 100644 --- a/examples/draw_map_with_identified_intervals.py +++ b/examples/draw_map_with_identified_intervals.py @@ -8,6 +8,7 @@ # Reading the TCX file tcx_file = TCXFile() +tcx_exercise = tcx_file.read_one_file('path_to_the_data') ( activity_type, positions, @@ -17,7 +18,7 @@ timestamps, heartrates, speeds, -) = tcx_file.read_one_file('path_to_the_data').values() +) = tcx_file.extract_activity_data(tcx_exercise).values() # Identifying the intervals in the activity by power and drawing the map Intervals = IntervalIdentificationByPower(distances, timestamps, altitudes, 70) diff --git a/examples/extract_data_inside_area.py b/examples/extract_data_inside_area.py index d5b1ef0..1d7c8c0 100644 --- a/examples/extract_data_inside_area.py +++ b/examples/extract_data_inside_area.py @@ -16,7 +16,8 @@ # Reading all files in filder. for file in all_files: print('\rProgress: ', int(progress), '%', end='') - activity = tcx_file.read_one_file(file) + tcx_exercise = tcx_file.read_one_file(file) + activity = tcx_file.extract_activity_data(tcx_exercise) # Converting the read data to arrays. positions = np.array([*activity['positions']]) diff --git a/examples/hill_data_extraction.py b/examples/hill_data_extraction.py index 9fd7884..f006bb8 100644 --- a/examples/hill_data_extraction.py +++ b/examples/hill_data_extraction.py @@ -4,7 +4,8 @@ # read TCX file tcx_file = TCXFile() -activity = tcx_file.read_one_file('path_to_the_data') +tcx_exercise = tcx_file.read_one_file('path_to_the_data') +activity = tcx_file.extract_activity_data(tcx_exercise) # detect hills in data Hill = HillIdentification(activity['altitudes'], 30) diff --git a/examples/identify_interruptions.py b/examples/identify_interruptions.py index 4c454de..e19e525 100644 --- a/examples/identify_interruptions.py +++ b/examples/identify_interruptions.py @@ -9,7 +9,8 @@ # read TCX file (also works with GPX files) tcx_file = TCXFile() -tcx_data = tcx_file.read_one_file('path_to_the_data') +tcx_exercise = tcx_file.read_one_file('path_to_the_data') +tcx_data = tcx_file.extract_activity_data(tcx_exercise) """ Time interval = time before and after the start of an event diff --git a/examples/integral_metrics_extraction.py b/examples/integral_metrics_extraction.py index a653dfe..2a1b632 100644 --- a/examples/integral_metrics_extraction.py +++ b/examples/integral_metrics_extraction.py @@ -3,6 +3,7 @@ # read TCX file tcx_file = TCXFile() # extract integral metrics ans store it in dictionary -integral_metrics = tcx_file.extract_integral_metrics('path_to_the_file') +tcx_exercise = tcx_file.read_one_file('path_to_the_file') +integral_metrics = tcx_file.extract_integral_metrics(tcx_exercise) print(integral_metrics) diff --git a/examples/interval_data_extraction.py b/examples/interval_data_extraction.py index a49e3dc..3c2ed64 100644 --- a/examples/interval_data_extraction.py +++ b/examples/interval_data_extraction.py @@ -9,7 +9,8 @@ # Reading the TCX file tcx_file = TCXFile() -activity = tcx_file.read_one_file('path_to_the_data') +tcx_exercise = tcx_file.read_one_file('path_to_the_data') +activity = tcx_file.extract_activity_data(tcx_exercise) # Identifying the intervals in the activity by power Intervals = IntervalIdentificationByPower( diff --git a/examples/missing_elevation_data_extraction.py b/examples/missing_elevation_data_extraction.py index bdd2a22..54c3a1b 100644 --- a/examples/missing_elevation_data_extraction.py +++ b/examples/missing_elevation_data_extraction.py @@ -1,7 +1,8 @@ from sport_activities_features import ElevationIdentification, TCXFile tcx_file = TCXFile() -tcx_data = tcx_file.read_one_file('path_to_the_data') +tcx_exercise = tcx_file.read_one_file('path_to_the_data') +tcx_data = tcx_file.extract_activity_data(tcx_exercise) elevations = ElevationIdentification(tcx_data['positions']) """Adds tcx_data['elevation'] = eg. [124, 21, 412] for each position""" diff --git a/examples/read_all_files.py b/examples/read_all_files.py index 3d06c9c..81c86cf 100644 --- a/examples/read_all_files.py +++ b/examples/read_all_files.py @@ -15,9 +15,10 @@ # Extracting the data of all files activities = [] for file in all_files: + tcx_exercise = tcx_file.read_one_file(file) activity = {'ID': os.path.splitext(os.path.split(file)[-1])[0]} - activity.update(tcx_file.read_one_file(file)) - activity.update(tcx_file.extract_integral_metrics(file)) + activity.update(tcx_file.extract_activity_data(tcx_exercise)) + activity.update(tcx_file.extract_integral_metrics(tcx_exercise)) # Hills Hill = HillIdentification(activity['altitudes'], 30) diff --git a/examples/read_folder.py b/examples/read_folder.py index 32d8c46..59f9d89 100644 --- a/examples/read_folder.py +++ b/examples/read_folder.py @@ -6,5 +6,6 @@ # iterate through files and print total distance of activities for i in range(len(all_files)): - activity = tcx_file.read_one_file(all_files[i]) + tcx_exercise = tcx_file.read_one_file(all_files[i]) + activity = tcx_file.extract_activity_data(tcx_exercise) print('total distance: ', activity['total_distance'] / 1000) diff --git a/examples/read_tcx_file.py b/examples/read_tcx_file.py index 2eb14f2..39b225d 100644 --- a/examples/read_tcx_file.py +++ b/examples/read_tcx_file.py @@ -2,4 +2,5 @@ # read TCX file tcx_file = TCXFile() -data = tcx_file.read_one_file('path_to_the_file') +tcx_exercise = tcx_file.read_one_file('path_to_the_file') +data = tcx_file.extract_activity_data(tcx_exercise) diff --git a/examples/weather_data_extraction.py b/examples/weather_data_extraction.py index c59272d..3790a66 100644 --- a/examples/weather_data_extraction.py +++ b/examples/weather_data_extraction.py @@ -2,7 +2,8 @@ # Read TCX file tcx_file = TCXFile() -tcx_data = tcx_file.read_one_file('path_to_the_file') +tcx_exercise = tcx_file.read_one_file('path_to_the_file') +tcx_data = tcx_file.extract_activity_data(tcx_exercise) # Configure visual crossing api key # https://www.visualcrossing.com/weather-api diff --git a/sport_activities_features/tcx_manipulation.py b/sport_activities_features/tcx_manipulation.py index 7764368..fc015b7 100644 --- a/sport_activities_features/tcx_manipulation.py +++ b/sport_activities_features/tcx_manipulation.py @@ -3,7 +3,7 @@ import numpy as np from tcx2gpx import tcx2gpx -from tcxreader.tcxreader import TCXReader, TCXTrackPoint +from tcxreader.tcxreader import TCXReader, TCXTrackPoint, TCXExercise from sport_activities_features.file_manipulation import FileManipulation @@ -31,13 +31,29 @@ def read_directory(self, directory_name: str) -> list: self.all_files.append(file) return self.all_files - def read_one_file(self, filename: str, numpy_array=False) -> dict: - """Method for parsing one TCX file using the TCXReader.\n + def read_one_file(self, filename: str) -> dict: + """Method for reading a TCXExercise object using the TCXReader.\n Args: filename (str): name of the TCX file to be read + Returns: + tcx (TCXExercise): + TCXExercise object with all the data from the file. + + Note: + In the case of missing value in raw data, we assign None. + """ + tcx = TCXReader().read(filename) + return tcx + + + def extract_activity_data(self, tcx: TCXExercise, numpy_array = False) -> dict: + """Method for parsing one TCX file using the TCXReader.\n + Args: + tcx (TCXExercise): + TCXExercise object to be read numpy_array (bool): - if set to True dictionary lists are transformed into numpy arrays + if True, dictionary lists are transformed into numpy arrays Returns: dict: { @@ -54,8 +70,6 @@ def read_one_file(self, filename: str, numpy_array=False) -> dict: Note: In the case of missing value in raw data, we assign None. """ - tcx = TCXReader().read(filename) - # handling missing data - should be improved in original tcxparser try: activity_type = tcx.activity_type @@ -119,12 +133,12 @@ def read_one_file(self, filename: str, numpy_array=False) -> dict: 'start_time': tcx.start_time, } return activity - - def extract_integral_metrics(self, filename: str) -> dict: - """Method for parsing one TCX file and extracting integral metrics.\n + + def extract_integral_metrics(self, tcx_exercise: TCXExercise) -> dict: + """Method for extracting integral metrics from a TCXExercise.\n Args: - filename (str): - name of the TCX file to be read + tcx_exercise (TCXExercise): + TCXExercise object to be read Returns: dict: { @@ -149,101 +163,100 @@ def extract_integral_metrics(self, filename: str) -> dict: 'speed_max': speed_max }. """ - tcx = TCXReader().read(filename) # handling missing data in raw files try: - activity_type = tcx.activity_type + activity_type = tcx_exercise.activity_type except BaseException: activity_type = None try: - distance = tcx.distance + distance = tcx_exercise.distance except BaseException: distance = None try: - duration = tcx.duration + duration = tcx_exercise.duration except BaseException: duration = None try: - calories = tcx.calories + calories = tcx_exercise.calories except BaseException: calories = None try: - hr_avg = tcx.hr_avg + hr_avg = tcx_exercise.hr_avg except BaseException: hr_avg = None try: - hr_max = tcx.hr_max + hr_max = tcx_exercise.hr_max except BaseException: hr_max = None try: - hr_min = tcx.hr_min + hr_min = tcx_exercise.hr_min except BaseException: hr_min = None try: - altitude_avg = tcx.altitude_avg + altitude_avg = tcx_exercise.altitude_avg except BaseException: altitude_avg = None try: - altitude_max = tcx.altitude_max + altitude_max = tcx_exercise.altitude_max except BaseException: altitude_max = None try: - altitude_min = tcx.altitude_min + altitude_min = tcx_exercise.altitude_min except BaseException: altitude_min = None try: - ascent = tcx.ascent + ascent = tcx_exercise.ascent except BaseException: ascent = None try: - descent = tcx.descent + descent = tcx_exercise.descent except BaseException: descent = None try: - steps = tcx.lx_ext['Steps'] + steps = tcx_exercise.lx_ext['Steps'] except BaseException: steps = None try: - cadence_avg = tcx.cadence_avg + cadence_avg = tcx_exercise.cadence_avg except BaseException: cadence_avg = None try: - cadence_max = tcx.cadence_max + cadence_max = tcx_exercise.cadence_max except BaseException: cadence_max = None try: - watts_avg = tcx.tpx_ext_stats['Watts']['avg'] + watts_avg = tcx_exercise.tpx_ext_stats['Watts']['avg'] except BaseException: watts_avg = None try: - watts_max = tcx.tpx_ext_stats['Watts']['max'] + watts_max = tcx_exercise.tpx_ext_stats['Watts']['max'] except BaseException: watts_max = None try: - speed_avg = tcx.avg_speed + speed_avg = tcx_exercise.avg_speed except BaseException: speed_avg = None try: - speed_max = tcx.max_speed + speed_max = tcx_exercise.max_speed except BaseException: speed_max = None diff --git a/tests/data/README.md b/tests/data/README.md index 13f4163..939fc56 100644 --- a/tests/data/README.md +++ b/tests/data/README.md @@ -10,7 +10,7 @@ UTF8 formatted Gpx files testing (Thanks Luka Koprivc for donating these test fi - cross-country-skiing_activity_1.tcx: an activity that represents cross-country skiing activity written in the tcx file -- nodes-test.temp: pickled array of position nodes retrieved from dictionary TcxFile.read_one_file(filename)['positions'] +- nodes-test.temp: pickled array of position nodes retrieved from dictionary TCXFile.extract_activity_data(tcx_exercise)['positions'] - pool_swim-activity_1.tcx: an activity that represents pool swim activity written in the tcx file diff --git a/tests/test_area_identification.py b/tests/test_area_identification.py index df193b7..5bc09a5 100644 --- a/tests/test_area_identification.py +++ b/tests/test_area_identification.py @@ -11,7 +11,8 @@ class TestAreaIdentification(TestCase): def setUp(self): filename = os.path.join(os.path.dirname(__file__), 'data', '15.tcx') tcx_file = TCXFile() - self.activity = tcx_file.read_one_file(filename) + tcx_exercise = tcx_file.read_one_file(filename) + self.activity = tcx_file.extract_activity_data(tcx_exercise) # Converting the read data to arrays. positions = np.array([*self.activity['positions']]) diff --git a/tests/test_dead_end_identification.py b/tests/test_dead_end_identification.py index 6ca696b..5978a4c 100644 --- a/tests/test_dead_end_identification.py +++ b/tests/test_dead_end_identification.py @@ -14,7 +14,8 @@ def setUp(self): # Reading the TCX file. filename = os.path.join(os.path.dirname(__file__), 'data', '15.tcx') tcx_file = TCXFile() - self.activity = tcx_file.read_one_file(filename) + tcx_exercise = tcx_file.read_one_file(filename) + self.activity = tcx_file.extract_activity_data(tcx_exercise) self.positions = np.array([*self.activity['positions']]) self.distances = np.array([*self.activity['distances']]) diff --git a/tests/test_different_activities.py b/tests/test_different_activities.py index 7c320ba..d558086 100644 --- a/tests/test_different_activities.py +++ b/tests/test_different_activities.py @@ -8,7 +8,9 @@ class TestTCXFile(TestCase): def setUp(self): filename = os.path.join(os.path.dirname(__file__), 'data', '15.tcx') self.tcx_file = TCXFile() - self.data = self.tcx_file.read_one_file(filename) + tcx_exercise = self.tcx_file.read_one_file(filename) + self.data = self.tcx_file.extract_activity_data(tcx_exercise) + def test_total_distance(self): self.assertAlmostEqual(self.data['total_distance'], 116366.98, 2) @@ -29,7 +31,8 @@ def setUp(self): os.path.dirname(__file__), 'data', 'sup_activity_1.tcx', ) self.tcx_file = TCXFile() - self.data = self.tcx_file.extract_integral_metrics(filename) + tcx_exercise = self.tcx_file.read_one_file(filename) + self.data = self.tcx_file.extract_integral_metrics(tcx_exercise) def test_total_steps(self): assert self.data['steps'] == 491 @@ -43,7 +46,8 @@ def setUp(self): os.path.dirname(__file__), 'data', 'swimming_activity_1.tcx', ) self.tcx_file = TCXFile() - self.data = self.tcx_file.extract_integral_metrics(filename) + tcx_exercise = self.tcx_file.read_one_file(filename) + self.data = self.tcx_file.extract_integral_metrics(tcx_exercise) def test_total_calories(self): assert self.data['calories'] == 284 @@ -62,7 +66,8 @@ def setUp(self): 'cross-country-skiing_activity_1.tcx', ) self.tcx_file = TCXFile() - self.data = self.tcx_file.extract_integral_metrics(filename) + tcx_exercise = self.tcx_file.read_one_file(filename) + self.data = self.tcx_file.extract_integral_metrics(tcx_exercise) def test_total_calories(self): assert self.data['calories'] == 532 @@ -79,7 +84,8 @@ def setUp(self): os.path.dirname(__file__), 'data', 'walking_activity_1.tcx', ) self.tcx_file = TCXFile() - self.data = self.tcx_file.extract_integral_metrics(filename) + tcx_exercise = self.tcx_file.read_one_file(filename) + self.data = self.tcx_file.extract_integral_metrics(tcx_exercise) def test_total_calories(self): assert self.data['calories'] == 329 @@ -96,7 +102,8 @@ def setUp(self): os.path.dirname(__file__), 'data', 'pool_swim-activity_1.tcx', ) self.tcx_file = TCXFile() - self.data = self.tcx_file.extract_integral_metrics(filename) + tcx_exercise = self.tcx_file.read_one_file(filename) + self.data = self.tcx_file.extract_integral_metrics(tcx_exercise) def test_total_calories(self): assert self.data['calories'] == 329 diff --git a/tests/test_file_manipulation.py b/tests/test_file_manipulation.py index 51ce6d7..3b8a9e3 100644 --- a/tests/test_file_manipulation.py +++ b/tests/test_file_manipulation.py @@ -8,8 +8,9 @@ class TestTCXFile(TestCase): def setUp(self): filename = os.path.join(os.path.dirname(__file__), 'data', '15.tcx') self.tcx_file: TCXFile = TCXFile() - self.data_with_missing = self.tcx_file.read_one_file(filename) - self.data_without_missing = self.tcx_file.read_one_file(filename) + tcx_exercise = self.tcx_file.read_one_file(filename) + self.data_with_missing = self.tcx_file.extract_activity_data(tcx_exercise) + self.data_without_missing = self.tcx_file.extract_activity_data(tcx_exercise) self.tcx_file.linear_fill_missing_values( self.data_without_missing, 'heartrates', 15, ) diff --git a/tests/test_hill_identification.py b/tests/test_hill_identification.py index 42570f6..9f0f3e9 100644 --- a/tests/test_hill_identification.py +++ b/tests/test_hill_identification.py @@ -13,7 +13,8 @@ class TestHillIdentification(TestCase): def setUp(self): filename = os.path.join(os.path.dirname(__file__), "data", "15.tcx") tcx_file = TCXFile() - self.activity = tcx_file.read_one_file(filename) + tcx_exercise = tcx_file.read_one_file(filename) + self.activity = tcx_file.extract_activity_data(tcx_exercise) self.hill = HillIdentification( self.activity["altitudes"], self.activity["distances"], 30 ) diff --git a/tests/test_interruptions.py b/tests/test_interruptions.py index 8bc4a16..4c8f7a1 100644 --- a/tests/test_interruptions.py +++ b/tests/test_interruptions.py @@ -9,7 +9,8 @@ class TestInterruptionProcessor(TestCase): def setUp(self): filename = os.path.join(os.path.dirname(__file__), 'data', '15.tcx') tcx_file = TCXFile() - tcx = tcx_file.read_one_file(filename) + tcx_exercise = tcx_file.read_one_file(filename) + tcx = tcx_file.extract_activity_data(tcx_exercise) interruptionProcessor = InterruptionProcessor( time_interval=60, diff --git a/tests/test_interval_identification.py b/tests/test_interval_identification.py index c9eec9f..5955c6f 100644 --- a/tests/test_interval_identification.py +++ b/tests/test_interval_identification.py @@ -12,7 +12,8 @@ class TestHillIdentification(TestCase): def setUp(self): filename = os.path.join(os.path.dirname(__file__), 'data', '2.tcx') tcx_file = TCXFile() - self.activity = tcx_file.read_one_file(filename) + tcx_exercise = tcx_file.read_one_file(filename) + self.activity = tcx_file.extract_activity_data(tcx_exercise) # Identifying the intervals in the activity by power IntervalsPower = IntervalIdentificationByPower( diff --git a/tests/test_tcx_file.py b/tests/test_tcx_file.py index b520639..0c9e237 100644 --- a/tests/test_tcx_file.py +++ b/tests/test_tcx_file.py @@ -8,7 +8,8 @@ class TestTCXFile(TestCase): def setUp(self): filename = os.path.join(os.path.dirname(__file__), 'data', '15.tcx') self.tcx_file = TCXFile() - self.data = self.tcx_file.read_one_file(filename) + tcx_exercise = self.tcx_file.read_one_file(filename) + self.data = self.tcx_file.extract_activity_data(tcx_exercise) def test_total_distance(self): self.assertAlmostEqual(self.data['total_distance'], 116366.98, 2) diff --git a/tests/test_training_loads.py b/tests/test_training_loads.py index 1f92fe9..2375fa8 100644 --- a/tests/test_training_loads.py +++ b/tests/test_training_loads.py @@ -61,7 +61,8 @@ def setUp(self): """Setting up the test.""" filename = os.path.join(os.path.dirname(__file__), 'data', '15.tcx') tcx_file = TCXFile() - activity = tcx_file.read_one_file(filename) + tcx_exercise = tcx_file.read_one_file(filename) + activity = tcx_file.extract_activity_data(tcx_exercise) timestamps = activity['timestamps'] heart_rates = activity['heartrates'] self.__edwards = EdwardsTRIMP(heart_rates, timestamps, 200) @@ -86,7 +87,8 @@ def setUp(self): """Setting up the test.""" filename = os.path.join(os.path.dirname(__file__), 'data', '15.tcx') tcx_file = TCXFile() - activity = tcx_file.read_one_file(filename) + tcx_exercise = tcx_file.read_one_file(filename) + activity = tcx_file.extract_activity_data(tcx_exercise) timestamps = activity['timestamps'] heart_rates = activity['heartrates'] self.__lucia = LuciaTRIMP(heart_rates, timestamps, VT1=160, VT2=180) diff --git a/tests/test_training_metrics.py b/tests/test_training_metrics.py index 5ba2b2e..4ac9245 100644 --- a/tests/test_training_metrics.py +++ b/tests/test_training_metrics.py @@ -10,8 +10,9 @@ class TestTrainingMetrics(TestCase): def setUp(self): filename = os.path.join(os.path.dirname(__file__), 'data', '11.tcx') self.tcx_file = TCXFile() - self.data = self.tcx_file.extract_integral_metrics(filename) - self.raw_data = TCXReader().read(filename) + tcx_exercise = self.tcx_file.read_one_file(filename) + self.data = self.tcx_file.extract_integral_metrics(tcx_exercise) + self.raw_data = tcx_exercise def test_functional_threshold_power(self): tm_instance = TrainingMetrics() diff --git a/tests/test_weather.py b/tests/test_weather.py index 3d03235..8664a54 100644 --- a/tests/test_weather.py +++ b/tests/test_weather.py @@ -14,7 +14,8 @@ def setUp(self): 'weather_test.temp', ) tcx_file = TCXFile() - self.data = tcx_file.read_one_file(filename) + tcx_exercise = tcx_file.read_one_file(filename) + self.data = tcx_file.extract_activity_data(tcx_exercise) self.weather = None with open(weather_external_data, 'rb') as input: self.weather = pickle.load(input) From 2f5bc2e0b88af37131c4196faf57d8f0a00bae85 Mon Sep 17 00:00:00 2001 From: Miha Mirt Date: Sun, 15 Dec 2024 13:00:52 +0100 Subject: [PATCH 2/2] GPX Manipulation and tests refactoring, fix examples --- README.md | 5 +- examples/convert_gpx_to_csv.py | 5 +- examples/read_gpx_file.py | 3 +- sport_activities_features/gpx_manipulation.py | 191 +++++++++++++++--- tests/test_gpx_file.py | 6 +- 5 files changed, 175 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 427ae0e..9c6b9b5 100644 --- a/README.md +++ b/README.md @@ -170,10 +170,11 @@ from sport_activities_features.gpx_manipulation import GPXFile gpx_file=GPXFile() # Read the file and generate a dictionary with -data = gpx_file.read_one_file("path_to_the_file") # Represents data as dictionary of lists +gpx_exercise = gpx_file.read_one_file("path_to_the_file") +data = gpx_file.extract_activity_data(gpx_exercise) # Represents data as dictionary of lists # Alternative choice -data = gpx_file.read_one_file("path_to_the_file", numpy_array= True) # Represents data as dictionary of numpy.arrays +data = gpx_file.extract_activity_data(gpx_exercise, numpy_array= True) # Represents data as dictionary of numpy.arrays ``` diff --git a/examples/convert_gpx_to_csv.py b/examples/convert_gpx_to_csv.py index 2272798..ac402d8 100644 --- a/examples/convert_gpx_to_csv.py +++ b/examples/convert_gpx_to_csv.py @@ -12,8 +12,9 @@ output_file = 'path_to_output_file' # Read GPX file -data = gpx_file.read_one_file( - input_file, +gpx_exercise = gpx_file.read_one_file(input_file) +data = gpx_file.extract_activity_data( + gpx_exercise, ) # Represents data as dictionary of lists # Convert dictionary of lists to pandas DataFrame diff --git a/examples/read_gpx_file.py b/examples/read_gpx_file.py index ac091b5..ef5c3ab 100644 --- a/examples/read_gpx_file.py +++ b/examples/read_gpx_file.py @@ -2,4 +2,5 @@ # read GPX file gpx_file = GPXFile() -data = gpx_file.read_one_file('path_to_the_file') +gpx_exercise = gpx_file.read_one_file('path_to_the_file') +data = gpx_file.extract_activity_data(gpx_exercise) diff --git a/sport_activities_features/gpx_manipulation.py b/sport_activities_features/gpx_manipulation.py index 5a35cda..a56d9f2 100644 --- a/sport_activities_features/gpx_manipulation.py +++ b/sport_activities_features/gpx_manipulation.py @@ -3,6 +3,7 @@ import geopy import gpxpy +import gpxpy.gpx import numpy as np from sport_activities_features.file_manipulation import FileManipulation @@ -107,14 +108,115 @@ def from_GPX( self.cadence = cadence self.watts = watts +class GPXExercise: + """Class for storing exercise data from a GPX file.\n + Args: + raw_data (GPX): + raw data from the GPX file. + trackpoints (list): + list of GPXTrackPoint objects. + activity_type (str): + type of the activity (e.g. Biking). + distance (float): + total distance of exercise in meters. + duration (float): + duration of exercise in seconds. + calories (int): + calories burned during the exercise. + hr_avg (float): + average heart rate during the exercise. + hr_max (int): + maximum heart rate during the exercise. + hr_min (int): + minimum heart rate during the exercise. + altitude_avg (float): + average altitude in meters. + altitude_max (float): + maximum altitude in meters. + altitude_min (float): + minimum altitude in meters. + ascent (float): + total ascent in meters. + descent (float): + total descent in meters. + """ -class GPXFile(FileManipulation): - """Class for reading GPX files.""" + def __init__(self, raw_data: gpxpy.gpx.GPX, trackpoints: list = None) -> None: + """Initialisation method for GPXExercise class.\n + Args: + raw_data (GPX): + raw data from the GPX file. + trackpoints (list): + list of GPXTrackPoint objects. + """ + if trackpoints is None: + trackpoints = [] + self.trackpoints = trackpoints + self.raw_data = raw_data + self._calculate_values() + + def _calculate_values(self): + gpx = self.raw_data + try: + self.activity_type = gpx.tracks[0].type + except BaseException: + self.activity_type = None - def __init__(self) -> None: - """Initialisation method for GPXFile class.""" - self.all_files = [] + try: + self.distance = gpx['total_distance'] + except BaseException: + self.distance = None + + try: + self.duration = gpx['timestamps'][-1] - gpx['timestamps'][0] + except BaseException: + self.duration = None + + try: + self.calories = None + except BaseException: + self.calories = None + + try: + self.hr_avg = sum(gpx['heartrates']) / len(gpx['heartrates']) + except BaseException: + self.hr_avg = None + + try: + self.hr_max = max(gpx['heartrates']) + except BaseException: + self.hr_max = None + + try: + self.hr_min = min(gpx['heartrates']) + except BaseException: + self.hr_min = None + + try: + self.altitude_avg = sum(gpx['altitudes']) / len(gpx['altitudes']) + except BaseException: + self.altitude_avg = None + + try: + self.altitude_max = max(gpx['altitudes']) + except BaseException: + self.altitude_max = None + + try: + self.altitude_min = min(gpx['altitudes']) + except BaseException: + self.altitude_min = None + try: + self.ascent = self.__ascent(gpx['altitudes']) + except BaseException: + self.ascent = None + + try: + self.descent = self.__descent(gpx['altitudes']) + except BaseException: + self.descent = None + def __ascent(self, altitudes: list) -> int: """Method for calculating the total ascent from a list of altitudes.\n Args: @@ -143,6 +245,15 @@ def __descent(self, altitudes: list) -> int: descent += altitudes[index - 1] - altitude return descent + +class GPXFile(FileManipulation): + """Class for reading GPX files.""" + + def __init__(self) -> None: + """Initialisation method for GPXFile class.""" + self.all_files = [] + + def read_directory(self, directory_name: str) -> list: """Method for finding all GPX files in a directory.\n Args: @@ -158,13 +269,11 @@ def read_directory(self, directory_name: str) -> list: self.all_files.append(file) return self.all_files - def read_one_file(self, filename, numpy_array=False): + def read_one_file(self, filename: str) -> GPXExercise: """Method for parsing one GPX file.\n Args: filename (str): name of the TCX file to be read - numpy_array (bool): - if set to true dictionary lists are transformed into numpy arrays Returns: activity (dict): { @@ -183,7 +292,7 @@ def read_one_file(self, filename, numpy_array=False): """ NAMESPACE = '{http://www.garmin.com/xmlschemas/TrackPointExtension/v1}' points = [] - gpx = None + gpx = None try: gpx_file = open(filename, encoding='utf-8') gpx = gpxpy.parse(gpx_file) @@ -238,9 +347,36 @@ def read_one_file(self, filename, numpy_array=False): previous_point = trackpoint gpx_file.close() + gpx_exercise = GPXExercise(gpx,points) + return gpx_exercise + + + def extract_activity_data(self, gpx: GPXExercise, numpy_array = False) -> dict: + """Method for parsing one GPX file.\n + Args: + gpx (GPXExercise): + GPXExercise object to be read + numpy_array (bool): + if True, dictionary lists are transformed into numpy arrays + Returns: + activity (dict): + { + 'activity_type': activity_type, + 'positions': positions, + 'altitudes': altitudes, + 'distances': distances, + 'total_distance': total_distance, + 'timestamps': timestamps, + 'heartrates': heartrates, + 'speeds': speeds + } + + Note: + In the case of missing value in raw data, we assign None. + """ # handling missing data - should be improved in original try: - activity_type = gpx.tracks[0].type + activity_type = gpx.raw_data.tracks[0].type except BaseException: activity_type = None @@ -250,8 +386,8 @@ def read_one_file(self, filename, numpy_array=False): timestamps = [] heartrates = [] speeds = [] - trackpoint: TCXTrackPoint - for trackpoint in points: + trackpoint: GPXTrackPoint + for trackpoint in gpx.trackpoints: positions.append((trackpoint.latitude, trackpoint.longitude)) altitudes.append(trackpoint.elevation) distances.append(trackpoint.distance) @@ -284,8 +420,8 @@ def read_one_file(self, filename, numpy_array=False): } return activity - - def extract_integral_metrics(self, filename) -> dict: + + def extract_integral_metrics(self, gpx_exercise: GPXExercise) -> dict: """Method for parsing one GPX file and extracting integral metrics.\n Args: filename (str): @@ -306,67 +442,66 @@ def extract_integral_metrics(self, filename) -> dict: "ascent": ascent, "descent": descent, }. - """ - gpx = self.read_one_file(filename) + """ # handling missing data in raw files try: - activity_type = gpx.tracks[0].type + activity_type = gpx_exercise.activity_type except BaseException: activity_type = None try: - distance = gpx['total_distance'] + distance = gpx_exercise.distance except BaseException: distance = None try: - duration = gpx['timestamps'][-1] - gpx['timestamps'][0] + duration = gpx_exercise.duration except BaseException: duration = None try: - calories = None + calories = gpx_exercise.calories except BaseException: calories = None try: - hr_avg = sum(gpx['heartrates']) / len(gpx['heartrates']) + hr_avg = gpx_exercise.hr_avg except BaseException: hr_avg = None try: - hr_max = max(gpx['heartrates']) + hr_max = gpx_exercise.hr_max except BaseException: hr_max = None try: - hr_min = min(gpx['heartrates']) + hr_min = gpx_exercise.hr_min except BaseException: hr_min = None try: - altitude_avg = sum(gpx['altitudes']) / len(gpx['altitudes']) + altitude_avg = gpx_exercise.altitude_avg except BaseException: altitude_avg = None try: - altitude_max = max(gpx['altitudes']) + altitude_max = gpx_exercise.altitude_max except BaseException: altitude_max = None try: - altitude_min = min(gpx['altitudes']) + altitude_min = gpx_exercise.altitude_min except BaseException: altitude_min = None try: - ascent = self.__ascent(gpx['altitudes']) + ascent = gpx_exercise.ascent except BaseException: ascent = None try: - descent = self.__descent(gpx['altitudes']) + descent = gpx_exercise.descent except BaseException: descent = None diff --git a/tests/test_gpx_file.py b/tests/test_gpx_file.py index 32f657e..3320834 100644 --- a/tests/test_gpx_file.py +++ b/tests/test_gpx_file.py @@ -10,7 +10,8 @@ def setUp(self): os.path.dirname(__file__), 'data', 'riderx3.gpx', ) self.gpx_file = GPXFile() - self.data = self.gpx_file.read_one_file(filename) + gpx_exercise = self.gpx_file.read_one_file(filename) + self.data = self.gpx_file.extract_activity_data(gpx_exercise) def test_total_distance(self): self.assertAlmostEqual(self.data['total_distance'], 5774.703, 2) @@ -39,7 +40,8 @@ def test_utf8_formatting(self): ] for index, f in enumerate(utf8_filenames): filename = os.path.join(os.path.dirname(__file__), 'data', f) - data = self.gpx_file.read_one_file(filename) + gpx_exercise = self.gpx_file.read_one_file(filename) + data = self.gpx_file.extract_activity_data(gpx_exercise) self.assertAlmostEqual( data['total_distance'], utf8_distances[index], places=5, )