Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Dataset.tables_columns and tables_rows #113

Merged
merged 13 commits into from
Oct 22, 2024
61 changes: 55 additions & 6 deletions audbcards/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,26 @@ def tables(self) -> typing.List[str]:
tables = list(db)
return tables

@functools.cached_property
hagenw marked this conversation as resolved.
Show resolved Hide resolved
ChristianGeng marked this conversation as resolved.
Show resolved Hide resolved
def tables_columns(self) -> typing.Dict[str, int]:
"""Number of columns for each table of the dataset.

Returns:
dictionary with table IDs as keys
and number of columns as values

Examples:
>>> ds = Dataset("emodb", "1.4.1")
>>> ds.tables_columns["speaker"]
3

"""
columns = {}
for table in list(self.header):
df = self._tables[table]
columns[table] = len(df.columns)
return columns
hagenw marked this conversation as resolved.
Show resolved Hide resolved

@functools.cached_property
def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]:
"""Table preview for each table of the dataset.
Expand Down Expand Up @@ -541,12 +561,7 @@ def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]:
"""
preview = {}
for table in list(self.header):
df = audb.load_table(
self.name,
table,
version=self.version,
verbose=False,
)
df = self._tables[table]
df = df.reset_index()
header = [df.columns.tolist()]
body = df.head(5).astype("string").values.tolist()
Expand All @@ -555,6 +570,26 @@ def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]:
preview[table] = header + body
return preview

@functools.cached_property
def tables_rows(self) -> typing.Dict[str, int]:
"""Number of rows for each table of the dataset.

Returns:
dictionary with table IDs as keys
and number of rows as values

Examples:
>>> ds = Dataset("emodb", "1.4.1")
>>> ds.tables_rows["speaker"]
10

"""
rows = {}
for table in list(self.header):
df = self._tables[table]
rows[table] = len(df)
return rows
hagenw marked this conversation as resolved.
Show resolved Hide resolved

@functools.cached_property
def tables_table(self) -> typing.List[str]:
"""Tables of the dataset."""
Expand Down Expand Up @@ -751,6 +786,20 @@ def _segments(self) -> pd.MultiIndex:
index = audformat.utils.union([index, df.index])
return index

@functools.cached_property
def _tables(self) -> typing.Dict[str, pd.DataFrame]:
"""Dataframes of tables in the dataset."""
tables = {}
for table in list(self.header):
df = audb.load_table(
self.name,
table,
version=self.version,
verbose=False,
)
tables[table] = df
return tables

@staticmethod
def _map_iso_languages(languages: typing.List[str]) -> typing.List[str]:
r"""Calculate ISO languages for a list of languages.
Expand Down
2 changes: 2 additions & 0 deletions audbcards/core/templates/datacard_tables.j2
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@ Tables
{% for column in row %}
<td><p>{{ column }}</p></td>
{% endfor %}
</tr>
{% endif %}
{% endfor %}
<tr><td><p>{{ tables_rows[row[0]] }} {% if tables_rows[row[0]] == 1 %}row{% else %}rows{% endif %} x {{ tables_columns[row[0]] }} {% if tables_columns[row[0]] == 1 %}column{% else %}columns{% endif %}</p></td></tr>
</tbody>
</table>

Expand Down
27 changes: 19 additions & 8 deletions tests/test_data/rendered_templates/medium_db.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,13 @@ Tables
<tr>
<td><p>data/f0.wav</p></td>
<td><p>0</p></td>
<tr>
</tr>
<tr>
<td><p>data/f1.wav</p></td>
<td><p>1</p></td>
</tbody>
</tr>
<tr><td><p>2 rows x 1 column</p></td></tr>
</tbody>
</table>


Expand Down Expand Up @@ -104,22 +107,27 @@ Tables
<td><p>0 days 00:00:00</p></td>
<td><p>0 days 00:00:00.500000</p></td>
<td><p>neutral</p></td>
<tr>
</tr>
<tr>
<td><p>data/f0.wav</p></td>
<td><p>0 days 00:00:00.500000</p></td>
<td><p>0 days 00:00:01</p></td>
<td><p>neutral</p></td>
<tr>
</tr>
<tr>
<td><p>data/f1.wav</p></td>
<td><p>0 days 00:00:00</p></td>
<td><p>0 days 00:02:30</p></td>
<td><p>happy</p></td>
<tr>
</tr>
<tr>
<td><p>data/f1.wav</p></td>
<td><p>0 days 00:02:30</p></td>
<td><p>0 days 00:05:01</p></td>
<td><p>angry</p></td>
</tbody>
</tr>
<tr><td><p>4 rows x 1 column</p></td></tr>
</tbody>
</table>


Expand All @@ -145,11 +153,14 @@ Tables
<td><p>0</p></td>
<td><p>23</p></td>
<td><p>female</p></td>
<tr>
</tr>
<tr>
<td><p>1</p></td>
<td><p>49</p></td>
<td><p>male</p></td>
</tbody>
</tr>
<tr><td><p>2 rows x 2 columns</p></td></tr>
</tbody>
</table>


Expand Down
4 changes: 3 additions & 1 deletion tests/test_data/rendered_templates/minimal_db.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ Tables
<tr>
<td><p>f0.wav</p></td>
<td><p>0</p></td>
</tbody>
</tr>
<tr><td><p>1 row x 1 column</p></td></tr>
</tbody>
</table>


Expand Down
12 changes: 12 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,18 @@ def test_dataset(audb_cache, tmpdir, repository, db, request):
expected_tables = list(db)
assert dataset.tables == expected_tables

# tables_columns
expected_tables_columns = {}
for table_id in list(db):
expected_tables_columns[table_id] = len(db[table_id].columns)
hagenw marked this conversation as resolved.
Show resolved Hide resolved
assert dataset.tables_columns == expected_tables_columns
hagenw marked this conversation as resolved.
Show resolved Hide resolved

# tables_rows
expected_tables_rows = {}
for table_id in list(db):
expected_tables_rows[table_id] = len(db[table_id])
hagenw marked this conversation as resolved.
Show resolved Hide resolved
assert dataset.tables_rows == expected_tables_rows

# tables_table
expected_tables_table = [["ID", "Type", "Columns"]]
for table_id in list(db):
Expand Down