From 8c88c05aa55d8550b9c928bbe4f85d47373bae3d Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Thu, 26 Sep 2024 14:03:34 +0200 Subject: [PATCH 01/13] Fix missing --- audbcards/core/templates/datacard_tables.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/audbcards/core/templates/datacard_tables.j2 b/audbcards/core/templates/datacard_tables.j2 index 0b11de5..c2a7fea 100644 --- a/audbcards/core/templates/datacard_tables.j2 +++ b/audbcards/core/templates/datacard_tables.j2 @@ -41,6 +41,7 @@ Tables {% for column in row %}

{{ column }}

{% endfor %} + {% endif %} {% endfor %} From d3d7db391d31aa5fabd29c472298abe7eee24e23 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Thu, 26 Sep 2024 14:11:21 +0200 Subject: [PATCH 02/13] Add Dataset.tables_rows --- audbcards/core/dataset.py | 41 ++++++++++++++++++--- audbcards/core/templates/datacard_tables.j2 | 1 + tests/test_dataset.py | 6 +++ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/audbcards/core/dataset.py b/audbcards/core/dataset.py index 36137df..e805e66 100644 --- a/audbcards/core/dataset.py +++ b/audbcards/core/dataset.py @@ -541,12 +541,7 @@ def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]: """ preview = {} for table in list(self.header): - df = audb.load_table( - self.name, - table, - version=self.version, - verbose=False, - ) + df = self._tables[table] df = df.reset_index() header = [df.columns.tolist()] body = df.head(5).astype("string").values.tolist() @@ -555,6 +550,26 @@ def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]: preview[table] = header + body return preview + @functools.cached_property + def tables_rows(self) -> typing.Dict[str, int]: + """Number of rows for each table of the dataset. + + Returns: + dictionary with table IDs as keys + and number of rows as values + + Examples: + >>> ds = Dataset("emodb", "1.4.1") + >>> ds.tables_rows["speaker"] + 10 + + """ + rows = {} + for table in list(self.header): + df = self._tables[table] + rows[table] = len(df) + return rows + @functools.cached_property def tables_table(self) -> typing.List[str]: """Tables of the dataset.""" @@ -751,6 +766,20 @@ def _segments(self) -> pd.MultiIndex: index = audformat.utils.union([index, df.index]) return index + @functools.cached_property + def _tables(self) -> typing.Dict[str, pd.DataFrame]: + """Dataframes of tables in the dataset.""" + tables = {} + for table in list(self.header): + df = audb.load_table( + self.name, + table, + version=self.version, + verbose=False, + ) + tables[table] = df + return tables + @staticmethod def _map_iso_languages(languages: typing.List[str]) -> typing.List[str]: r"""Calculate ISO languages for a list of languages. diff --git a/audbcards/core/templates/datacard_tables.j2 b/audbcards/core/templates/datacard_tables.j2 index c2a7fea..4d2e0a8 100644 --- a/audbcards/core/templates/datacard_tables.j2 +++ b/audbcards/core/templates/datacard_tables.j2 @@ -44,6 +44,7 @@ Tables {% endif %} {% endfor %} +

{{ tables_rows[row[0]] }} rows

diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 38d559c..b5168e8 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -205,6 +205,12 @@ def test_dataset(audb_cache, tmpdir, repository, db, request): expected_tables = list(db) assert dataset.tables == expected_tables + # tables_rows + expected_tables_rows = {} + for table_id in list(db): + expected_tables_rows[table_id] = len(db[table_id]) + assert dataset.tables_rows == expected_tables_rows + # tables_table expected_tables_table = [["ID", "Type", "Columns"]] for table_id in list(db): From c5027f271535ddbde241e123ea5b5f5070ef3861 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Thu, 26 Sep 2024 14:52:06 +0200 Subject: [PATCH 03/13] Fix expected test templates --- .../rendered_templates/medium_db.rst | 27 +++++++++++++------ .../rendered_templates/minimal_db.rst | 4 ++- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/tests/test_data/rendered_templates/medium_db.rst b/tests/test_data/rendered_templates/medium_db.rst index 5abaa7a..8745331 100644 --- a/tests/test_data/rendered_templates/medium_db.rst +++ b/tests/test_data/rendered_templates/medium_db.rst @@ -73,10 +73,13 @@ Tables

data/f0.wav

0

- + +

data/f1.wav

1

- + +

2 rows

+ @@ -104,22 +107,27 @@ Tables

0 days 00:00:00

0 days 00:00:00.500000

neutral

- + +

data/f0.wav

0 days 00:00:00.500000

0 days 00:00:01

neutral

- + +

data/f1.wav

0 days 00:00:00

0 days 00:02:30

happy

- + +

data/f1.wav

0 days 00:02:30

0 days 00:05:01

angry

- + +

4 rows

+ @@ -145,11 +153,14 @@ Tables

0

23

female

- + +

1

49

male

- + +

2 rows

+ diff --git a/tests/test_data/rendered_templates/minimal_db.rst b/tests/test_data/rendered_templates/minimal_db.rst index 97b956a..fd9bfc4 100644 --- a/tests/test_data/rendered_templates/minimal_db.rst +++ b/tests/test_data/rendered_templates/minimal_db.rst @@ -58,7 +58,9 @@ Tables

f0.wav

0

- + +

1 rows

+ From c0a8cb02802cb0a70bfae101a0a9be42c0003db9 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 21 Oct 2024 13:56:45 +0200 Subject: [PATCH 04/13] Add Dataset.tables_columns --- audbcards/core/dataset.py | 20 +++++++++++++++++++ audbcards/core/templates/datacard_tables.j2 | 2 +- .../rendered_templates/medium_db.rst | 6 +++--- .../rendered_templates/minimal_db.rst | 2 +- tests/test_dataset.py | 6 ++++++ 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/audbcards/core/dataset.py b/audbcards/core/dataset.py index e805e66..cfba4c9 100644 --- a/audbcards/core/dataset.py +++ b/audbcards/core/dataset.py @@ -510,6 +510,26 @@ def tables(self) -> typing.List[str]: tables = list(db) return tables + @functools.cached_property + def tables_columns(self) -> typing.Dict[str, int]: + """Number of columns for each table of the dataset. + + Returns: + dictionary with table IDs as keys + and number of columns as values + + Examples: + >>> ds = Dataset("emodb", "1.4.1") + >>> ds.tables_columns["speaker"] + 3 + + """ + columns = {} + for table in list(self.header): + df = self._tables[table] + columns[table] = len(df.columns) + return columns + @functools.cached_property def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]: """Table preview for each table of the dataset. diff --git a/audbcards/core/templates/datacard_tables.j2 b/audbcards/core/templates/datacard_tables.j2 index 4d2e0a8..06404ae 100644 --- a/audbcards/core/templates/datacard_tables.j2 +++ b/audbcards/core/templates/datacard_tables.j2 @@ -44,7 +44,7 @@ Tables {% endif %} {% endfor %} -

{{ tables_rows[row[0]] }} rows

+

{{ tables_rows[row[0]] }} {% if tables_rows[row[0]] == 1 %}row{% else %}rows{% endif %} x {{ tables_columns[row[0]] }} {% if tables_columns[row[0]] == 1 %}column{% else %}columns{% endif %}

diff --git a/tests/test_data/rendered_templates/medium_db.rst b/tests/test_data/rendered_templates/medium_db.rst index 8745331..0585e47 100644 --- a/tests/test_data/rendered_templates/medium_db.rst +++ b/tests/test_data/rendered_templates/medium_db.rst @@ -78,7 +78,7 @@ Tables

data/f1.wav

1

-

2 rows

+

2 rows x 1 column

@@ -126,7 +126,7 @@ Tables

0 days 00:05:01

angry

-

4 rows

+

4 rows x 1 column

@@ -159,7 +159,7 @@ Tables

49

male

-

2 rows

+

2 rows x 2 columns

diff --git a/tests/test_data/rendered_templates/minimal_db.rst b/tests/test_data/rendered_templates/minimal_db.rst index fd9bfc4..152a6ee 100644 --- a/tests/test_data/rendered_templates/minimal_db.rst +++ b/tests/test_data/rendered_templates/minimal_db.rst @@ -59,7 +59,7 @@ Tables

f0.wav

0

-

1 rows

+

1 row x 1 column

diff --git a/tests/test_dataset.py b/tests/test_dataset.py index b5168e8..b06982a 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -205,6 +205,12 @@ def test_dataset(audb_cache, tmpdir, repository, db, request): expected_tables = list(db) assert dataset.tables == expected_tables + # tables_columns + expected_tables_columns = {} + for table_id in list(db): + expected_tables_columns[table_id] = len(db[table_id].columns) + assert dataset.tables_columns == expected_tables_columns + # tables_rows expected_tables_rows = {} for table_id in list(db): From ebda2dfbd485895d4238f7dd915af6c9c8d56e85 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 21 Oct 2024 15:01:09 +0200 Subject: [PATCH 05/13] Make font smaller --- audbcards/core/templates/datacard_tables.j2 | 2 +- audbcards/sphinx/table-preview.css | 4 ++++ tests/test_data/rendered_templates/medium_db.rst | 6 +++--- tests/test_data/rendered_templates/minimal_db.rst | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/audbcards/core/templates/datacard_tables.j2 b/audbcards/core/templates/datacard_tables.j2 index 06404ae..5fc9807 100644 --- a/audbcards/core/templates/datacard_tables.j2 +++ b/audbcards/core/templates/datacard_tables.j2 @@ -44,7 +44,7 @@ Tables {% endif %} {% endfor %} -

{{ tables_rows[row[0]] }} {% if tables_rows[row[0]] == 1 %}row{% else %}rows{% endif %} x {{ tables_columns[row[0]] }} {% if tables_columns[row[0]] == 1 %}column{% else %}columns{% endif %}

+

{{ tables_rows[row[0]] }} {% if tables_rows[row[0]] == 1 %}row{% else %}rows{% endif %} x {{ tables_columns[row[0]] }} {% if tables_columns[row[0]] == 1 %}column{% else %}columns{% endif %}

diff --git a/audbcards/sphinx/table-preview.css b/audbcards/sphinx/table-preview.css index ce82007..ba4019f 100644 --- a/audbcards/sphinx/table-preview.css +++ b/audbcards/sphinx/table-preview.css @@ -34,6 +34,10 @@ table.preview td { border-top: none; border-bottom: none; } +table.preview td p.table-statistic { + /* Make "N rows x M columns" smaller */ + font-size: 90%; +} table.clickable td:not(.expanded-row-content), table.clickable th { /* Allow to center cell copntent with `margin: auto` */ diff --git a/tests/test_data/rendered_templates/medium_db.rst b/tests/test_data/rendered_templates/medium_db.rst index 0585e47..0dd4912 100644 --- a/tests/test_data/rendered_templates/medium_db.rst +++ b/tests/test_data/rendered_templates/medium_db.rst @@ -78,7 +78,7 @@ Tables

data/f1.wav

1

-

2 rows x 1 column

+

2 rows x 1 column

@@ -126,7 +126,7 @@ Tables

0 days 00:05:01

angry

-

4 rows x 1 column

+

4 rows x 1 column

@@ -159,7 +159,7 @@ Tables

49

male

-

2 rows x 2 columns

+

2 rows x 2 columns

diff --git a/tests/test_data/rendered_templates/minimal_db.rst b/tests/test_data/rendered_templates/minimal_db.rst index 152a6ee..248991e 100644 --- a/tests/test_data/rendered_templates/minimal_db.rst +++ b/tests/test_data/rendered_templates/minimal_db.rst @@ -59,7 +59,7 @@ Tables

f0.wav

0

-

1 row x 1 column

+

1 row x 1 column

From b88da6bff5084411e19ce2d3ad3f397ceade95c4 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 21 Oct 2024 15:12:38 +0200 Subject: [PATCH 06/13] Improve caching of table stats --- audbcards/core/dataset.py | 59 ++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/audbcards/core/dataset.py b/audbcards/core/dataset.py index cfba4c9..b4807ff 100644 --- a/audbcards/core/dataset.py +++ b/audbcards/core/dataset.py @@ -524,11 +524,7 @@ def tables_columns(self) -> typing.Dict[str, int]: 3 """ - columns = {} - for table in list(self.header): - df = self._tables[table] - columns[table] = len(df.columns) - return columns + return {table: stats["columns"] for table, stats in self._tables_stats.items()} @functools.cached_property def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]: @@ -559,16 +555,7 @@ def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]: | 11 | 26 | male | deu | """ - preview = {} - for table in list(self.header): - df = self._tables[table] - df = df.reset_index() - header = [df.columns.tolist()] - body = df.head(5).astype("string").values.tolist() - # Remove unwanted chars and limit length of each entry - body = [[self._parse_text(column) for column in row] for row in body] - preview[table] = header + body - return preview + return {table: stats["preview"] for table, stats in self._tables_stats.items()} @functools.cached_property def tables_rows(self) -> typing.Dict[str, int]: @@ -584,11 +571,7 @@ def tables_rows(self) -> typing.Dict[str, int]: 10 """ - rows = {} - for table in list(self.header): - df = self._tables[table] - rows[table] = len(df) - return rows + return {table: stats["rows"] for table, stats in self._tables_stats.items()} @functools.cached_property def tables_table(self) -> typing.List[str]: @@ -787,9 +770,17 @@ def _segments(self) -> pd.MultiIndex: return index @functools.cached_property - def _tables(self) -> typing.Dict[str, pd.DataFrame]: - """Dataframes of tables in the dataset.""" - tables = {} + def _tables_stats(self) -> typing.Dict[str, dict]: + """Table information of tables in the dataset. + + It returns a dict per table, containing: + + * ``"columns"``: number of table columns + * ``"rows"``: number of table rows + * ``"preview"``: preview of table + + """ + stats = {} for table in list(self.header): df = audb.load_table( self.name, @@ -797,8 +788,26 @@ def _tables(self) -> typing.Dict[str, pd.DataFrame]: version=self.version, verbose=False, ) - tables[table] = df - return tables + + columns = len(df.columns) + + rows = len(df) + + # Table preview + df = df.reset_index() + header = [df.columns.tolist()] + body = df.head(5).astype("string").values.tolist() + # Remove unwanted chars and limit length of each entry + body = [[self._parse_text(column) for column in row] for row in body] + preview = header + body + + stats[table] = { + "columns": columns, + "rows": rows, + "preview": preview, + } + + return stats @staticmethod def _map_iso_languages(languages: typing.List[str]) -> typing.List[str]: From 928f5f96fa4066a74c174eeb188c72769288a50f Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 21 Oct 2024 15:32:39 +0200 Subject: [PATCH 07/13] Avoid loops in test --- tests/test_dataset.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index b06982a..26aa15a 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -206,15 +206,11 @@ def test_dataset(audb_cache, tmpdir, repository, db, request): assert dataset.tables == expected_tables # tables_columns - expected_tables_columns = {} - for table_id in list(db): - expected_tables_columns[table_id] = len(db[table_id].columns) + expected_tables_columns = {table: len(db[table].df.columns) for table in list(db)} assert dataset.tables_columns == expected_tables_columns # tables_rows - expected_tables_rows = {} - for table_id in list(db): - expected_tables_rows[table_id] = len(db[table_id]) + expected_tables_rows = {table: len(db[table].df) for table in list(db)} assert dataset.tables_rows == expected_tables_rows # tables_table From 2d2ce1aadc24f8118ff0d7b8561e49e3e7b459ac Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 21 Oct 2024 15:39:51 +0200 Subject: [PATCH 08/13] Avoid loop in test, specify results --- tests/test_dataset.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 26aa15a..70e5be4 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -206,11 +206,19 @@ def test_dataset(audb_cache, tmpdir, repository, db, request): assert dataset.tables == expected_tables # tables_columns - expected_tables_columns = {table: len(db[table].df.columns) for table in list(db)} + expected_tables_columns = { + "files": 1, + "segments": 1, + "speaker": 2, + } assert dataset.tables_columns == expected_tables_columns # tables_rows - expected_tables_rows = {table: len(db[table].df) for table in list(db)} + expected_tables_rows = { + "files": 2, + "segments": 4, + "speaker": 2, + } assert dataset.tables_rows == expected_tables_rows # tables_table From 55eb86f659740b44a806c23c32bdce69b21d7302 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 21 Oct 2024 15:55:46 +0200 Subject: [PATCH 09/13] Include columns and rows in cached table props --- audbcards/core/dataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/audbcards/core/dataset.py b/audbcards/core/dataset.py index b4807ff..ab282e9 100644 --- a/audbcards/core/dataset.py +++ b/audbcards/core/dataset.py @@ -22,7 +22,9 @@ class _Dataset: _table_related_cached_properties = [ "segment_durations", "segments", + "tables_columns", "tables_preview", + "tables_rows", ] """Cached properties relying on table data. From b421caf3a3ea09c737e4165907e0f45171573ed5 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 21 Oct 2024 15:58:03 +0200 Subject: [PATCH 10/13] Improve docstring of _tables_stats() --- audbcards/core/dataset.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/audbcards/core/dataset.py b/audbcards/core/dataset.py index ab282e9..0485c78 100644 --- a/audbcards/core/dataset.py +++ b/audbcards/core/dataset.py @@ -775,11 +775,18 @@ def _segments(self) -> pd.MultiIndex: def _tables_stats(self) -> typing.Dict[str, dict]: """Table information of tables in the dataset. - It returns a dict per table, containing: + Caches table information to improve performance + of multiple table-related properties. + This property computes and stores statistics for all tables, + reducing repeated computations. + It significantly improves performance + when accessing multiple table properties frequently. - * ``"columns"``: number of table columns - * ``"rows"``: number of table rows - * ``"preview"``: preview of table + Returns: + A dictionary with table names as keys and dictionaries containing: + - "columns": number of columns + - "rows": number of rows + - "preview": table preview (header + first 5 rows) """ stats = {} From 607909a7cadca1550dedbc2035b35997c43c7335 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 21 Oct 2024 16:03:06 +0200 Subject: [PATCH 11/13] Simplify _tables_stats() --- audbcards/core/dataset.py | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/audbcards/core/dataset.py b/audbcards/core/dataset.py index 0485c78..5290b80 100644 --- a/audbcards/core/dataset.py +++ b/audbcards/core/dataset.py @@ -557,7 +557,16 @@ def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]: | 11 | 26 | male | deu | """ - return {table: stats["preview"] for table, stats in self._tables_stats.items()} + preview = {} + for table, stats in self._tables_stats.items(): + df = stats["preview"] + df = df.reset_index() + header = [df.columns.tolist()] + body = df.astype("string").values.tolist() + # Remove unwanted chars and limit length of each entry + body = [[self._parse_text(column) for column in row] for row in body] + preview[table] = header + body + return preview @functools.cached_property def tables_rows(self) -> typing.Dict[str, int]: @@ -786,7 +795,7 @@ def _tables_stats(self) -> typing.Dict[str, dict]: A dictionary with table names as keys and dictionaries containing: - "columns": number of columns - "rows": number of rows - - "preview": table preview (header + first 5 rows) + - "preview": dataframe preview (first 5 rows) """ stats = {} @@ -797,25 +806,11 @@ def _tables_stats(self) -> typing.Dict[str, dict]: version=self.version, verbose=False, ) - - columns = len(df.columns) - - rows = len(df) - - # Table preview - df = df.reset_index() - header = [df.columns.tolist()] - body = df.head(5).astype("string").values.tolist() - # Remove unwanted chars and limit length of each entry - body = [[self._parse_text(column) for column in row] for row in body] - preview = header + body - stats[table] = { - "columns": columns, - "rows": rows, - "preview": preview, + "columns": len(df.columns), + "rows": len(df), + "preview": df.head(5), } - return stats @staticmethod From c0628eedd8453379ca83bb6e2ccbac9b915363ca Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 21 Oct 2024 16:20:34 +0200 Subject: [PATCH 12/13] Extend tests with edge cases --- tests/test_dataset.py | 96 ++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 37 deletions(-) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 70e5be4..86e212a 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -7,7 +7,6 @@ import audb import audeer -import audformat import audiofile import audbcards @@ -50,12 +49,59 @@ def test_dataset_property_scope(tmpdir, db, request): @pytest.mark.parametrize( - "db", + "db, expected_schemes_table, expected_tables_table, " + "expected_tables_columns, expected_tables_rows, " + "expected_segment_durations", [ - "medium_db", + ( + "bare_db", + [[]], + [["ID", "Type", "Columns"]], + {}, + {}, + [], + ), + ( + "minimal_db", + [[]], + [["ID", "Type", "Columns"], ["files", "filewise", "speaker"]], + {"files": 1}, + {"files": 1}, + [], + ), + ( + "medium_db", + [ + ["ID", "Dtype", "Min", "Labels", "Mappings"], + ["age", "int", 0, "", ""], + ["emotion", "str", "", "angry, happy, neutral", ""], + ["gender", "str", "", "female, male", ""], + ["speaker", "int", "", "0, 1", "age, gender"], + ], + [ + ["ID", "Type", "Columns"], + ["files", "filewise", "speaker"], + ["segments", "segmented", "emotion"], + ["speaker", "misc", "age, gender"], + ], + {"files": 1, "segments": 1, "speaker": 2}, + {"files": 2, "segments": 4, "speaker": 2}, + [0.5, 0.5, 150, 151], + ), ], ) -def test_dataset(audb_cache, tmpdir, repository, db, request): +def test_dataset( + audb_cache, + tmpdir, + repository, + request, + db, + expected_schemes_table, + expected_tables_table, + expected_tables_columns, + expected_tables_rows, + expected_segment_durations, +): r"""Test audbcards.Dataset object and all its properties.""" db = request.getfixturevalue(db) @@ -115,7 +161,7 @@ def test_dataset(audb_cache, tmpdir, repository, db, request): # duration expected_duration = db.files_duration(db.files).sum() - assert dataset.duration == expected_duration + assert dataset.duration == pd.to_timedelta(expected_duration) # files expected_files = len(db.files) @@ -175,17 +221,9 @@ def test_dataset(audb_cache, tmpdir, repository, db, request): assert dataset.schemes == expected_schemes # schemes_table - expected_schemes_table = [ - ["ID", "Dtype", "Min", "Labels", "Mappings"], - ["age", "int", 0, "", ""], - ["emotion", "str", "", "angry, happy, neutral", ""], - ["gender", "str", "", "female, male", ""], - ["speaker", "int", "", "0, 1", "age, gender"], - ] assert dataset.schemes_table == expected_schemes_table # segment_durations - expected_segment_durations = [0.5, 0.5, 150, 151] assert dataset.segment_durations == expected_segment_durations # segments @@ -194,11 +232,14 @@ def test_dataset(audb_cache, tmpdir, repository, db, request): # short_description max_desc_length = 150 - expected_description = ( - db.description - if (len(db.description) < max_desc_length) - else f"{db.description[:max_desc_length - 3]}..." - ) + if db.description is None: + expected_description = "" + else: + expected_description = ( + db.description + if (len(db.description) < max_desc_length) + else f"{db.description[:max_desc_length - 3]}..." + ) assert dataset.short_description == expected_description # tables @@ -206,31 +247,12 @@ def test_dataset(audb_cache, tmpdir, repository, db, request): assert dataset.tables == expected_tables # tables_columns - expected_tables_columns = { - "files": 1, - "segments": 1, - "speaker": 2, - } assert dataset.tables_columns == expected_tables_columns # tables_rows - expected_tables_rows = { - "files": 2, - "segments": 4, - "speaker": 2, - } assert dataset.tables_rows == expected_tables_rows # tables_table - expected_tables_table = [["ID", "Type", "Columns"]] - for table_id in list(db): - table = db[table_id] - if isinstance(table, audformat.MiscTable): - table_type = "misc" - else: - table_type = table.type - columns = ", ".join(list(table.columns)) - expected_tables_table.append([table_id, table_type, columns]) assert dataset.tables_table == expected_tables_table # version From d167379a86322a78a7762529e2c7bdfcad37f125 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 21 Oct 2024 16:44:57 +0200 Subject: [PATCH 13/13] Simplify description test --- tests/test_dataset.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 86e212a..d73bdfa 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -49,12 +49,17 @@ def test_dataset_property_scope(tmpdir, db, request): @pytest.mark.parametrize( - "db, expected_schemes_table, expected_tables_table, " - "expected_tables_columns, expected_tables_rows, " + "db, " + "expected_description, " + "expected_schemes_table, " + "expected_tables_table, " + "expected_tables_columns, " + "expected_tables_rows, " "expected_segment_durations", [ ( "bare_db", + "", [[]], [["ID", "Type", "Columns"]], {}, @@ -63,6 +68,7 @@ def test_dataset_property_scope(tmpdir, db, request): ), ( "minimal_db", + "Minimal database.", [[]], [["ID", "Type", "Columns"], ["files", "filewise", "speaker"]], {"files": 1}, @@ -71,6 +77,7 @@ def test_dataset_property_scope(tmpdir, db, request): ), ( "medium_db", + "Medium database. | Some description |.", [ ["ID", "Dtype", "Min", "Labels", "Mappings"], ["age", "int", 0, "", ""], @@ -96,6 +103,7 @@ def test_dataset( repository, request, db, + expected_description, expected_schemes_table, expected_tables_table, expected_tables_columns, @@ -231,15 +239,6 @@ def test_dataset( assert dataset.segments == expected_segments # short_description - max_desc_length = 150 - if db.description is None: - expected_description = "" - else: - expected_description = ( - db.description - if (len(db.description) < max_desc_length) - else f"{db.description[:max_desc_length - 3]}..." - ) assert dataset.short_description == expected_description # tables