Kapre 0.3.5 (#122)

* Feat/tflite compatibility (#118) * update tensorflow requirements to have optional gpu version * Update README.md * Update README.md * update CI * tflite compatible STFT layer * tflite compatible STFT layer * magnitude layer works out shape for itself. * tflite phase support * Approximate phase and tests * undo some black modifications * undo some black modifications * undo some black modifications * undo some black modifications * remove batch index * right padding only for stft, improved unit test * update doc string * update doc string * remove extra requirement * remove extra requirement * add extra req * remove extra req * remove extra req * remove extra req * remove extra req * remove extra req * remove extra req * remove extra req * update readme * update readme * update docstring * update docstrings * tflite compatible layers as a subpackage * Wrap comment in """ * Revert to simple modules * Dont need to reinitialise * Fix formatting * Revert some `black` changes * Docstring update * docstring update Co-authored-by: Paul Kendrick <[email protected]> * magnitude_to_decibel support for float16+float64 (#120) * magnitude_to_decibel support for float16+float64 The function kapre.backend.magnitude_to_decibel with the default arguments throws a TensorFlow type error if `x` is not `float32`. We can fix this by casting `amin` to `x`'s dtype. * Tests for magnitude_to_decibel dtype Floating point errors with `float16` require use to assert with a larger NumPy `rtol`, but otherwise it works. * add _tflite to the doc * update doc setup * add doc utility script * fix install requirement for docs * rtd * re-travis Co-authored-by: Paul Kendrick <[email protected]> Co-authored-by: Paul Kendrick <[email protected]> Co-authored-by: James Mishra <[email protected]>
keunwoochoi · Mar 18, 2021 · 7643354 · 7643354
1 parent f41eb4b
commit 7643354
Show file tree

Hide file tree

Showing 20 changed files with 760 additions and 49 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Kapre
 Keras Audio Preprocessors - compute STFT, ISTFT, Melspectrogram, and others on GPU real-time.
-  
+
 Tested on Python 3.6 and 3.7
 
 ## Why Kapre?

diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css
@@ -1,3 +1,8 @@
 div.wy-nav-content {
-    max-width: 1200px;
+    max-width: 1000px;
+}
+
+code.literal {
+    color: #404040 !important;
+    background-color: #fbfbfb !important;
 }
diff --git a/docs/conf.py b/docs/conf.py
@@ -15,6 +15,7 @@
 import sys
 sys.path.insert(0, os.path.abspath('../'))
 import sphinx_rtd_theme
+
 autodoc_mock_imports = ['tensorflow', 'librosa', 'numpy']
 autodoc_member_order = 'bysource'
 
@@ -40,8 +41,18 @@
     "sphinx.ext.napoleon",
     # "sphinx.ext.autosummary",
     "sphinx.ext.viewcode",  # source linkage
+    "sphinxcontrib.inlinesyntaxhighlight"  # inline code highlight
 ]
 
+# https://stackoverflow.com/questions/21591107/sphinx-inline-code-highlight
+# use language set by highlight directive if no language is set by role
+inline_highlight_respect_highlight = True
+# use language set by highlight directive if no role is set
+inline_highlight_literals = True
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = "sphinx"
+
 # autosummary_generate = True
 
 # autoapi_type = 'python'
@@ -68,4 +79,14 @@
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
 
+html_css_files = [
+    'css/custom.css',
+]
+
+
+def setup(app):
+    app.add_stylesheet("css/custom.css")
+
+
 master_doc = 'index'
+
diff --git a/docs/index.rst b/docs/index.rst
@@ -92,6 +92,7 @@ Visit `github.com/keunwoochoi/kapre <https://github.com/keunwoochoi/kapre>`_ and
    signal
    composed
    backend
+   time_frequency_tflite
 
 .. toctree::
    :hidden:

diff --git a/docs/release_note.rst b/docs/release_note.rst
@@ -1,6 +1,10 @@
 Release Note
 ^^^^^^^^^^^^
 
+* 18 March 2021
+  - 0.3.5
+    - Add `kapre.time_frequency_tflite` which uses tflite for a faster CPU inference.
+
 * 29 Sep 2020
   - 0.3.4
     - Fix a bug in `kapre.backend.get_window_fn()`. Previously, it only correctly worked with `None` input and an error was raised when non-default value was set for `window_name` in any layer.

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,3 +1,4 @@
 sphinx!=1.3.1
 sphinx_rtd_theme
-sphinxcontrib-napoleon
+sphinxcontrib-napoleon
+sphinxcontrib-inlinesyntaxhighlight
diff --git a/docs/time_frequency_tflite.rst b/docs/time_frequency_tflite.rst
@@ -0,0 +1,5 @@
+time_frequency_tflite
+^^^^^^^^^^^^^^^^^^^^^
+
+.. automodule:: kapre.time_frequency_tflite
+    :members:
diff --git a/kapre/__init__.py b/kapre/__init__.py
@@ -1,8 +1,9 @@
-__version__ = '0.3.4'
+__version__ = '0.3.5'
 VERSION = __version__
 
 from . import composed
 from . import backend
 
 from .signal import *
 from .time_frequency import *
+from .time_frequency_tflite import *
diff --git a/kapre/augmentation.py b/kapre/augmentation.py
@@ -49,7 +49,9 @@ class ChannelSwap(Layer):
     """
 
     def __init__(
-        self, data_format='default', **kwargs,
+        self,
+        data_format='default',
+        **kwargs,
     ):
         backend.validate_data_format_str(data_format)
 
@@ -94,6 +96,8 @@ def call(self, x, training=None):
     def get_config(self):
         config = super(ChannelSwap, self).get_config()
         config.update(
-            {'data_format': self.data_format,}
+            {
+                'data_format': self.data_format,
+            }
         )
         return config
diff --git a/kapre/backend.py b/kapre/backend.py
@@ -110,6 +110,7 @@ def _log10(x):
     if amin is None:
         amin = 1e-5
 
+    amin = tf.cast(amin, dtype=x.dtype)
     log_spec = 10.0 * _log10(tf.math.maximum(x, amin))
     log_spec = log_spec - 10.0 * _log10(tf.math.maximum(amin, ref_value))
 

diff --git a/kapre/composed.py b/kapre/composed.py
@@ -13,6 +13,9 @@
 
 
 """
+from tensorflow import keras
+from tensorflow.keras import Sequential, Model
+
 from .time_frequency import (
     STFT,
     InverseSTFT,
@@ -23,9 +26,6 @@
     ConcatenateFrequencyMap,
 )
 from . import backend
-
-from tensorflow import keras
-from tensorflow.keras import Sequential, Model
 from .backend import _CH_FIRST_STR, _CH_LAST_STR, _CH_DEFAULT_STR
 
 

diff --git a/kapre/signal.py b/kapre/signal.py
@@ -5,8 +5,9 @@
 """
 import tensorflow as tf
 from tensorflow.keras.layers import Layer
-from . import backend
 from tensorflow.keras import backend as K
+
+from . import backend
 from .backend import _CH_FIRST_STR, _CH_LAST_STR, _CH_DEFAULT_STR
 
 
@@ -23,7 +24,7 @@ class Frame(Layer):
         pad_end (bool): whether to pad at the end of the signal of there would be a otherwise-discarded partial frame
         pad_value (int or float): value to use in the padding
         data_format (str): `channels_first`, `channels_last`, or `default`
-            **kwargs:
+        **kwargs: optional keyword args for `tf.keras.layers.Layer()`
 
     Example:
         ::
@@ -63,9 +64,8 @@ def call(self, x):
             x (`Tensor`): batch audio signal in the specified 1D format in initiation.
 
         Returns:
-            (`Tensor`): A framed tensor. The shape is
-                (batch, time (frames), frame_length, channel) if `channels_last` and
-                (batch, channel, time (frames), frame_length) if `channels_first`.
+            (`Tensor`): A framed tensor. The shape is (batch, time (frames), frame_length, channel) if `channels_last`,
+            or (batch, channel, time (frames), frame_length) if `channels_first`.
         """
         return tf.signal.frame(
             x,
@@ -104,7 +104,7 @@ class Energy(Layer):
         pad_end (bool): whether to pad at the end of the signal of there would be a otherwise-discarded partial frame
         pad_value (int or float): value to use in the padding
         data_format (str): `channels_first`, `channels_last`, or `default`
-        **kwargs:
+        **kwargs: optional keyword args for `tf.keras.layers.Layer()`
 
     Example:
         ::
@@ -154,9 +154,8 @@ def call(self, x):
             x (`Tensor`): batch audio signal in the specified 1D format in initiation.
 
         Returns:
-            (`Tensor`): A framed tensor. The shape is
-                (batch, time (frames), channel) if `channels_last`, and
-                (batch, channel, time (frames)) if `channels_first`.
+            (`Tensor`): A framed tensor. The shape is (batch, time (frames), channel) if `channels_last`, or
+            (batch, channel, time (frames)) if `channels_first`.
         """
         frames = tf.signal.frame(
             x,
@@ -200,6 +199,7 @@ class MuLawEncoding(Layer):
 
     Args:
         quantization_channels (positive int): Number of channels. For 8-bit encoding, use 256.
+        **kwargs: optional keyword args for `tf.keras.layers.Layer()`
 
     Note:
         Mu-law encoding was originally developed to increase signal-to-noise ratio of signal during transmission.
@@ -219,7 +219,9 @@ class MuLawEncoding(Layer):
     """
 
     def __init__(
-        self, quantization_channels, **kwargs,
+        self,
+        quantization_channels,
+        **kwargs,
     ):
         super(MuLawEncoding, self).__init__(**kwargs)
         self.quantization_channels = quantization_channels
@@ -238,7 +240,9 @@ def call(self, x):
     def get_config(self):
         config = super(MuLawEncoding, self).get_config()
         config.update(
-            {'quantization_channels': self.quantization_channels,}
+            {
+                'quantization_channels': self.quantization_channels,
+            }
         )
 
         return config
@@ -251,6 +255,7 @@ class MuLawDecoding(Layer):
 
     Args:
         quantization_channels (positive int): Number of channels. For 8-bit encoding, use 256.
+        **kwargs: optional keyword args for `tf.keras.layers.Layer()`
 
     Example:
         ::
@@ -263,7 +268,9 @@ class MuLawDecoding(Layer):
     """
 
     def __init__(
-        self, quantization_channels, **kwargs,
+        self,
+        quantization_channels,
+        **kwargs,
     ):
         super(MuLawDecoding, self).__init__(**kwargs)
         self.quantization_channels = quantization_channels
@@ -282,7 +289,9 @@ def call(self, x):
     def get_config(self):
         config = super(MuLawDecoding, self).get_config()
         config.update(
-            {'quantization_channels': self.quantization_channels,}
+            {
+                'quantization_channels': self.quantization_channels,
+            }
         )
 
         return config
@@ -303,6 +312,11 @@ class LogmelToMFCC(Layer):
         As long as all of your data in training / inference / deployment is consistent (i.e., do not
         mix librosa and kapre MFCC), it'll be fine!
 
+    Args:
+        n_mfccs (int): Number of MFCC
+        data_format (str): `channels_first`, `channels_last`, or `default`
+        **kwargs: optional keyword args for `tf.keras.layers.Layer()`
+
     Example:
         ::
 
@@ -336,8 +350,8 @@ def call(self, log_melgrams):
                 and `(b, ch, time, mel)` if `channels_first`.
 
         Returns:
-            (float `Tensor`): MFCCs. `(batch, time, n_mfccs, ch)` if `channels_last`
-                and `(batch, ch, time, n_mfccs)` if `channels_first`.
+            (float `Tensor`):
+            MFCCs. `(batch, time, n_mfccs, ch)` if `channels_last`, `(batch, ch, time, n_mfccs)` if `channels_first`.
         """
         if self.permutation is not None:  # reshape so that last channel == mel
             log_melgrams = K.permute_dimensions(log_melgrams, pattern=self.permutation)