Handle MP3 files with soundfile (#145)

* Handle MP3 files with soundfile * Depend on soundfile>=0.11.0 * Increase version of audiofile in benchmark * Mention that sox is only optionally needed * Depend on soundfile>=0.12.1
audeering · Jan 22, 2024 · 15355bd · 15355bd
1 parent be5b555
commit 15355bd
Show file tree

Hide file tree

Showing 27 changed files with 98 additions and 105 deletions.
diff --git a/audiofile/core/info.py b/audiofile/core/info.py
@@ -123,7 +123,7 @@ def duration(file: str, sloppy=False) -> float:
     the duration in seconds
     matches the one in samples.
     To achieve this it first decodes files to WAV
-    if needed, e.g. MP3 files.
+    if needed, e.g. MP4 files.
     If you have different decoders
     on different machines,
     results might differ.
@@ -189,7 +189,7 @@ def duration(file: str, sloppy=False) -> float:
 def samples(file: str) -> int:
     """Number of samples in audio file.
 
-    Audio files that are not WAV, FLAC, or OGG
+    Audio files that are not WAV, FLAC, MP3, or OGG
     are first converted to WAV,
     before counting the samples.
 

diff --git a/audiofile/core/io.py b/audiofile/core/io.py
@@ -27,7 +27,7 @@ def convert_to_wav(
 ) -> str:
     """Convert any audio/video file to WAV.
 
-    It uses soundfile for reading WAV, FLAC, OGG files,
+    It uses soundfile for reading WAV, FLAC, MP3, OGG files,
     and sox or ffmpeg for reading all other files.
     If ``duration`` and/or ``offset`` are specified
     the resulting WAV file
@@ -133,7 +133,7 @@ def read(
 ) -> typing.Tuple[np.array, int]:
     """Read audio file.
 
-    It uses :func:`soundfile.read` for WAV, FLAC, and OGG files.
+    It uses :func:`soundfile.read` for WAV, FLAC, MP3, and OGG files.
     All other audio files are
     first converted to WAV by sox or ffmpeg.
 

diff --git a/audiofile/core/utils.py b/audiofile/core/utils.py
@@ -14,7 +14,7 @@
 }
 r"""Maximum number of channels per format."""
 
-SNDFORMATS = ['wav', 'flac', 'ogg']
+SNDFORMATS = ['wav', 'flac', 'mp3', 'ogg']
 r"""File formats handled by soundfile"""
 
 

diff --git a/docs/benchmark.rst b/docs/benchmark.rst
@@ -31,7 +31,7 @@ Python packages
 The following Python packages are benchmarked against each other:
 
 * audioread_ 3.0.1
-* :mod:`audiofile` 1.3.2
+* :mod:`audiofile` 1.4.0
 * librosa_ 0.10.1
 * pedalboard_ 0.8.7
 * scipy_ 1.11.4
@@ -92,54 +92,32 @@ and will ask for a sudo password
 to install missing apt packages.
 
 
-WAV, FLAC, OGG
---------------
-
 Reading files
-^^^^^^^^^^^^^
+-------------
 
-audioread_ has been removed from the results
-as it was the slowest library.
-scipy_ is only meant for reading WAV files,
-and only included in this figure.
+audioread_ uses mad_ when reading MP3 files
+and ffmpeg_ in all other cases.
+scipy_ is only meant for reading WAV files.
 
-.. image:: ./benchmark/results/benchmark_wav-flac-ogg_read.png
+.. image:: ./benchmark/results/benchmark_wav-flac-ogg-mp3-mp4_read.png
 
-Results for :mod:`audiofile`, soundfile_ and librosa_ are similar here
-as all of them use soundfile_ under the hood to read the data.
+Results for :mod:`audiofile`, soundfile_ and librosa_ are similar
+for WAV, FLAC, MP3, OGG
+as all of them use soundfile_ under the hood
+to read the data.
 
 Accessing metadata
-^^^^^^^^^^^^^^^^^^
+------------------
 
 audioread_ (ffmpeg) and sox_ have been removed from the results
-as they take around 0.17s and 0.035s per file
-for WAV, FLAC, and OGG.
-
-.. image:: ./benchmark/results/benchmark_wav-flac-ogg_info.png
-
-
-MP3, MP4
---------
-
-Reading files
-^^^^^^^^^^^^^
+as they take around 0.003s and 0.012s per file.
+For MP3 files audioread_ uses the mad_ library.
 
-soundfile_ does not support
-reading MP3 and MP4 files,
-audioread_ (mad) and pedalboard_ only MP3 files.
 
-.. image:: ./benchmark/results/benchmark_mp3-mp4_read.png
+.. image:: ./benchmark/results/benchmark_wav-flac-ogg-mp3_info.png
 
-Accessing metadata
-^^^^^^^^^^^^^^^^^^
 
-soundfile_ does not support
-accessing MP3 and MP4 metadata.
-sox_, audioread_ (mad) and pedalboard_ only for MP3 files.
-
-.. image:: ./benchmark/results/benchmark_mp3-mp4_info.png
-
-For MP3 and MP4 files,
+For MP4 files,
 :mod:`audiofile` is not very fast in accessing the metadata.
 The main focus was not speed,
 but consistent results for number of samples and duration.
@@ -150,6 +128,11 @@ as argument to :func:`audiofile.duration`.
 This tries to read the duration from the header of the file
 and is shown as audiofile (sloppy)
 in the figure.
+soundfile_ and sox_ do not support
+accessing MP4 metadata.
+pedalboard_ only on MacOS.
+
+.. image:: ./benchmark/results/benchmark_mp4_info.png
 
 
 .. _audioread: https://github.com/beetbox/audioread/

diff --git a/docs/benchmark/benchmark_info.py b/docs/benchmark/benchmark_info.py
@@ -72,21 +72,22 @@ def __len__(self):
         )
 
     for lib in libs:
+
+        # MP4 and MP3 is not supported by all libraries
+        if (
+                lib in ['soundfile', 'sox', 'pedalboard']
+                and args.ext == 'mp4'
+        ):
+            continue
+        if lib == 'ar_ffmpeg' and args.ext == 'mp3':  # too slow
+            continue
+        if lib == 'ar_mad' and args.ext != 'mp3':
+            continue
+
         print(f"Benchmark metadata {args.ext} with {lib}")
         for root, dirs, fnames in audio_walk:
             for audio_dir in dirs:
 
-                # MP4 and MP3 is not supported by all libraries
-                if (
-                        lib in ['soundfile', 'sox', 'pedalboard']
-                        and args.ext == 'mp4'
-                ):
-                    continue
-                if lib in ['soundfile'] and args.ext == 'mp3':
-                    continue
-                if lib == 'ar_mad' and args.ext != 'mp3':
-                    continue
-
                 duration = int(audio_dir)
                 dataset = AudioFolder(
                     os.path.join(root, audio_dir),
@@ -102,9 +103,16 @@ def __len__(self):
                     info['duration']
 
                 end = time.time()
+
+                # Store ar_ffmpeg and ar_mad as audioread
+                if lib in ['ar_ffmpeg', 'ar_mad']:
+                    lib_name = 'audioread'
+                else:
+                    lib_name = lib
+
                 store.append(
                     ext=args.ext,
-                    lib=lib,
+                    lib=lib_name,
                     duration=duration,
                     time=float(end - start) / len(dataset),
                 )

diff --git a/docs/benchmark/benchmark_read.py b/docs/benchmark/benchmark_read.py
@@ -78,20 +78,23 @@ def __len__(self):
         )
 
     for lib in libs:
+
+        # Not all libraries support all file formats
+        if lib == 'scipy' and args.ext != 'wav':
+            continue
+        if lib == 'ar_ffmpeg' and args.ext == 'mp3':  # too slow
+            continue
+        if lib == 'ar_mad' and args.ext != 'mp3':
+            continue
+        if lib == 'soundfile' and args.ext == 'mp4':
+            continue
+        if lib == 'pedalboard' and args.ext == 'mp4':
+            continue
+
         print(f"Benchmark read {args.ext} with {lib}")
         for root, dirs, fnames in audio_walk:
             for audio_dir in dirs:
 
-                # Not all libraries support all file formats
-                if lib == 'scipy' and args.ext != 'wav':
-                    continue
-                if lib == 'ar_mad' and args.ext != 'mp3':
-                    continue
-                if lib == 'soundfile' and args.ext in ['mp3', 'mp4']:
-                    continue
-                if lib == 'pedalboard' and args.ext == 'mp4':
-                    continue
-
                 duration = int(audio_dir)
                 dataset = AudioFolder(
                     os.path.join(root, audio_dir),
@@ -106,9 +109,16 @@ def __len__(self):
                     np.max(audio)
 
                 end = time.time()
+
+                # Store ar_ffmpeg and ar_mad as audioread
+                if lib in ['ar_ffmpeg', 'ar_mad']:
+                    lib_name = 'audioread'
+                else:
+                    lib_name = lib
+
                 store.append(
                     ext=args.ext,
-                    lib=lib,
+                    lib=lib_name,
                     duration=duration,
                     time=float(end - start) / len(dataset),
                 )

diff --git a/docs/benchmark/plot.py b/docs/benchmark/plot.py
@@ -8,8 +8,7 @@
 MAPPINGS = {  # library name mappings
     'audiofile': 'audiofile',
     'audiofile_sloppy': 'audiofile (sloppy)',
-    'ar_ffmpeg': 'audioread (ffmpeg)',
-    'ar_mad': 'audioread (mad)',
+    'audioread': 'audioread',
     'librosa': 'librosa',
     'pedalboard': 'pedalboard',
     'scipy': 'scipy',
@@ -32,51 +31,46 @@
 
     df['lib'] = df['lib'].map(MAPPINGS)
 
-    for exts in [['wav', 'flac', 'ogg'], ['mp3', 'mp4']]:
+    if package == 'read':
+        extensions = [['wav', 'flac', 'ogg', 'mp3', 'mp4']]  # single graph
+    else:
+        extensions = [['wav', 'flac', 'ogg', 'mp3'], ['mp4']]
+
+    for exts in extensions:
 
         y = df[df['ext'].isin(exts)]
 
         # Define what to show in each figure
-        if 'wav' in exts and package == 'read':
+        if package == 'read':
             lib_order = [
                 MAPPINGS['audiofile'],
                 MAPPINGS['soundfile'],
                 MAPPINGS['librosa'],
-                MAPPINGS['ar_ffmpeg'],
                 MAPPINGS['pedalboard'],
+                MAPPINGS['audioread'],
                 MAPPINGS['scipy'],
             ]
-            height = 5.6
-            aspect = 1.2
-        elif 'wav' in exts and package == 'info':
+            height = 5.8
+            aspect = 1.0
+        elif package == 'info' and 'wav' in exts:
             lib_order = [
                 MAPPINGS['audiofile'],
                 MAPPINGS['soundfile'],
                 MAPPINGS['pedalboard'],
+                MAPPINGS['audioread'],
             ]
-            height = 3.36
-            aspect = 2.0
-        elif 'mp3' in exts and package == 'read':
-            lib_order = [
-                MAPPINGS['audiofile'],
-                MAPPINGS['librosa'],
-                MAPPINGS['ar_ffmpeg'],
-                MAPPINGS['ar_mad'],
-                MAPPINGS['pedalboard'],
-            ]
-            height = 3.36
-            aspect = 2.0
-        elif 'mp3' in exts and package == 'info':
+            # Remove audioread for WAV, FLAC, OGG
+            y = y[~((y['ext'] != 'mp3') & (y['lib'] == 'audioread'))]
+            height = 4.0
+            aspect = 1.6
+        elif package == 'info' and 'mp4' in exts:
             lib_order = [
                 MAPPINGS['audiofile'],
                 MAPPINGS['audiofile_sloppy'],
-                MAPPINGS['ar_ffmpeg'],
-                MAPPINGS['ar_mad'],
-                MAPPINGS['pedalboard'],
-                MAPPINGS['sox'],
+                MAPPINGS['audioread'],
             ]
-            height = 3.7
-            aspect = 1.82
+            height = 1.4
+            aspect = 4.8
 
         fig = plt.figure()
 
@@ -87,8 +81,7 @@
             MAPPINGS['soundfile']: '#db8548',
             MAPPINGS['librosa']: '#c34c4d',
             MAPPINGS['scipy']: '#8174b8',
-            MAPPINGS['ar_mad']: '#94785e',
-            MAPPINGS['ar_ffmpeg']: '#94785e',
+            MAPPINGS['audioread']: '#94785e',
             MAPPINGS['sox']: '#db8cc5',
             MAPPINGS['pedalboard']: '#5dab64',
         }

diff --git a/docs/benchmark/results/benchmark_info_flac.pickle b/docs/benchmark/results/benchmark_info_flac.pickle
diff --git a/docs/benchmark/results/benchmark_info_mp3.pickle b/docs/benchmark/results/benchmark_info_mp3.pickle
diff --git a/docs/benchmark/results/benchmark_info_mp4.pickle b/docs/benchmark/results/benchmark_info_mp4.pickle
diff --git a/docs/benchmark/results/benchmark_info_ogg.pickle b/docs/benchmark/results/benchmark_info_ogg.pickle
diff --git a/docs/benchmark/results/benchmark_info_wav.pickle b/docs/benchmark/results/benchmark_info_wav.pickle
diff --git a/docs/benchmark/results/benchmark_mp3-mp4_info.png b/docs/benchmark/results/benchmark_mp3-mp4_info.png
diff --git a/docs/benchmark/results/benchmark_mp3-mp4_read.png b/docs/benchmark/results/benchmark_mp3-mp4_read.png
diff --git a/docs/benchmark/results/benchmark_mp4_info.png b/docs/benchmark/results/benchmark_mp4_info.png
diff --git a/docs/benchmark/results/benchmark_read_flac.pickle b/docs/benchmark/results/benchmark_read_flac.pickle
diff --git a/docs/benchmark/results/benchmark_read_mp3.pickle b/docs/benchmark/results/benchmark_read_mp3.pickle
diff --git a/docs/benchmark/results/benchmark_read_mp4.pickle b/docs/benchmark/results/benchmark_read_mp4.pickle
diff --git a/docs/benchmark/results/benchmark_read_ogg.pickle b/docs/benchmark/results/benchmark_read_ogg.pickle
diff --git a/docs/benchmark/results/benchmark_read_wav.pickle b/docs/benchmark/results/benchmark_read_wav.pickle
diff --git a/docs/benchmark/results/benchmark_wav-flac-ogg-mp3-mp4_read.png b/docs/benchmark/results/benchmark_wav-flac-ogg-mp3-mp4_read.png
diff --git a/docs/benchmark/results/benchmark_wav-flac-ogg-mp3_info.png b/docs/benchmark/results/benchmark_wav-flac-ogg-mp3_info.png
diff --git a/docs/benchmark/results/benchmark_wav-flac-ogg_info.png b/docs/benchmark/results/benchmark_wav-flac-ogg_info.png
diff --git a/docs/benchmark/results/benchmark_wav-flac-ogg_read.png b/docs/benchmark/results/benchmark_wav-flac-ogg_read.png
diff --git a/docs/installation.rst b/docs/installation.rst
@@ -1,18 +1,17 @@
 Installation
 ============
 
-:mod:`audiofile` supports WAV, FLAC, OGG out of the box.
+:mod:`audiofile` supports WAV, FLAC, MP3, OGG out of the box.
 In order to handle other audio formats,
-please make sure ffmpeg_
-and mediainfo_
+please make sure ffmpeg_,
+mediainfo_,
+and optionally sox_
 are installed on your system.
-For faster access of MP3 files,
-please install sox_ with MP3 bindings as well,
 e.g.
 
 .. code-block:: bash
 
-    $ sudo apt-get install ffmpeg mediainfo sox libsox-fmt-mp3
+    $ sudo apt-get install ffmpeg mediainfo sox
 
 To install :mod:`audiofile` run:
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -34,7 +34,7 @@ dependencies = [
     'audeer',
     'audmath >=1.2.1',
     'numpy',
-    'soundfile',
+    'soundfile >=0.12.1',  # MP3 support with bundled libsndfile
 ]
 # Get version dynamically from git
 # (needs setuptools_scm tools config below)

diff --git a/tests/test_audiofile.py b/tests/test_audiofile.py
@@ -190,7 +190,7 @@ def test_read(tmpdir, duration, offset):
 )
 @pytest.mark.parametrize(
     'empty_file',
-    ('bin', 'mp3', 'wav'),
+    ('bin', 'mp4', 'wav'),
     indirect=True,
 )
 def test_empty_file(tmpdir, convert, empty_file):
@@ -216,7 +216,7 @@ def test_empty_file(tmpdir, convert, empty_file):
 
 @pytest.mark.parametrize(
     'empty_file',
-    ('bin', 'mp3'),
+    ('bin', 'mp4'),
     indirect=True,
 )
 def test_missing_binaries(tmpdir, hide_system_path, empty_file):
@@ -244,7 +244,7 @@ def test_missing_binaries(tmpdir, hide_system_path, empty_file):
 
 @pytest.mark.parametrize(
     'ext',
-    ('bin', 'mp3', 'wav'),
+    ('bin', 'mp4', 'wav'),
 )
 def test_missing_file(tmpdir, ext):
     missing_file = f'missing_file.{ext}'
@@ -269,7 +269,7 @@ def test_missing_file(tmpdir, ext):
 
 @pytest.mark.parametrize(
     'non_audio_file',
-    ('bin', 'mp3', 'wav'),
+    ('bin', 'mp4', 'wav'),
     indirect=True,
 )
 def test_broken_file(tmpdir, non_audio_file):