Experimental Feature - Accelerated NumPy

gavincyi · gavincyi · commit e57fabd8dfb4 · 2023-07-05T10:23:21.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,4 @@ dist/
 doc/build/
 doc/source/**/generated/
 arch/univariate/recursions.c
+**/.DS_Store
diff --git a/arch/covariance/kernel.py b/arch/covariance/kernel.py
@@ -4,13 +4,14 @@
 from functools import cached_property
 from typing import SupportsInt, cast
 
-import numpy as np
 from pandas import DataFrame, Index
 from pandas.util._decorators import Substitution
 
+from arch.experimental import numpy as np
 from arch.typing import ArrayLike, Float64Array
 from arch.utility.array import AbstractDocStringInheritor, ensure1d, ensure2d
 
+
 __all__ = [
     "Bartlett",
     "Parzen",
@@ -398,6 +399,7 @@ def cov(self) -> CovarianceEstimate:
         sr = x.T @ x / df
         w = self.kernel_weights
         num_weights = w.shape[0]
+        x = np.asarray(self._x)
         oss = np.zeros((k, k))
         for i in range(1, num_weights):
             oss += w[i] * (x[i:].T @ x[:-i]) / df
diff --git a/arch/experimental/__init__.py b/arch/experimental/__init__.py
@@ -0,0 +1,10 @@
+from .engine import (
+    backend,
+    set_backend,
+    use_backend,
+    NumpyEngine,
+    LinAlgEngine,
+    numpy,
+    linalg,
+    fori_loop,
+)
diff --git a/arch/experimental/engine.py b/arch/experimental/engine.py
@@ -0,0 +1,172 @@
+from contextlib import contextmanager
+from typing import Any
+
+_BACKEND_ENGINE = "numpy"
+
+
+def backend():
+    return _BACKEND_ENGINE
+
+
+def set_backend(library_name):
+    """
+    Set backend engine.
+
+    The function sets the backend engine in global level.
+
+    Parameters
+    ----------
+    library_name : str
+        Library name. Default is `numpy`. Options are `numpy`, `tensorflow`,
+        `cupy` and `jax`.
+    """
+    assert library_name.lower() in ["numpy", "tensorflow", "cupy", "jax"], (
+        "Only `numpy`, `tensorflow`, `cupy` and `jax` are supported, but not "
+        f"{library_name}"
+    )
+    global _BACKEND_ENGINE
+    _BACKEND_ENGINE = library_name
+
+
+@contextmanager
+def use_backend(library_name="numpy"):
+    """
+    NumPy engine selection.
+
+    The function is a context manager to enable users to switch to a
+    specific library as a replacement of NumPy in CPU.
+
+    Parameters
+    ----------
+    library_name : str
+        Library name. Default is `numpy`. Options are `numpy`, `tensorflow`,
+        `cupy` and `jax`.
+    """
+    assert library_name.lower() in ["numpy", "tensorflow", "cupy", "jax"], (
+        "Only `numpy`, `tensorflow`, `cupy` and `jax` are supported, but not "
+        f"{library_name}"
+    )
+    global _BACKEND_ENGINE
+    _original = _BACKEND_ENGINE
+    try:
+        _BACKEND_ENGINE = library_name
+        if _BACKEND_ENGINE == "tensorflow":
+            import tensorflow.experimental.numpy as np
+
+            np.experimental_enable_numpy_behavior()
+        yield
+    finally:
+        _BACKEND_ENGINE = _original
+
+
+class NumpyEngine:
+    """
+    NumPy engine.
+    """
+
+    @property
+    def name(self):
+        """
+        Get engine name.
+        """
+        global _BACKEND_ENGINE
+        return _BACKEND_ENGINE
+
+    def __getattribute__(self, __name: str) -> Any:
+        global _BACKEND_ENGINE
+        try:
+            if _BACKEND_ENGINE == "numpy":
+                import numpy as anp
+            elif _BACKEND_ENGINE == "tensorflow":
+                import tensorflow.experimental.numpy as anp
+            elif _BACKEND_ENGINE == "cupy":
+                import cupy as anp
+            elif _BACKEND_ENGINE == "jax":
+                import jax.numpy as anp
+            else:
+                raise ValueError(f"Cannot recognize backend {_BACKEND_ENGINE}")
+        except ImportError:
+            raise ImportError(
+                "Library `numpy` cannot be imported from backend engine "
+                f"{_BACKEND_ENGINE}. Please make sure to install the library "
+                f"via `pip install {_BACKEND_ENGINE}`."
+            )
+
+        try:
+            return getattr(anp, __name)
+        except AttributeError:
+            raise AttributeError(
+                "Cannot get attribute / function from numpy library in "
+                f"backend engine {_BACKEND_ENGINE}"
+            )
+
+
+class LinAlgEngine:
+    """
+    Linear algebra engine.
+    """
+
+    @property
+    def name(self):
+        """
+        Get engine name.
+        """
+        global _BACKEND_ENGINE
+        return _BACKEND_ENGINE
+
+    def __getattribute__(self, __name: str) -> Any:
+        global _BACKEND_ENGINE
+        try:
+            if _BACKEND_ENGINE == "numpy":
+                import numpy.linalg as alinalg
+            elif _BACKEND_ENGINE == "tensorflow":
+                import tensorflow.linalg as alinalg
+            elif _BACKEND_ENGINE == "cupy":
+                import cupy.linalg as alinalg
+            elif _BACKEND_ENGINE == "jax":
+                import jax.numpy.linalg as alinalg
+            else:
+                raise ValueError(f"Cannot recognize backend {_BACKEND_ENGINE}")
+        except ImportError:
+            raise ImportError(
+                "Library `linalg` cannot be imported from backend engine "
+                f"{_BACKEND_ENGINE}. Please make sure to install the library "
+                f"via `pip install {_BACKEND_ENGINE}`."
+            )
+
+        try:
+            return getattr(alinalg, __name)
+        except AttributeError:
+            raise AttributeError(
+                "Cannot get attribute / function from linalg library in "
+                f"backend engine {_BACKEND_ENGINE}"
+            )
+
+
+def fori_loop(lower, upper, body_fun, init_val=None):
+    global _BACKEND_ENGINE
+    if _BACKEND_ENGINE in ["numpy", "cupy"]:
+        val = init_val
+        for i in range(lower, upper):
+            val = body_fun(i, val)
+        return val
+    elif _BACKEND_ENGINE == "jax":
+        import jax.lax
+
+        return jax.lax.fori_loop(lower, upper, body_fun, init_val)
+    elif _BACKEND_ENGINE == "tensorflow":
+        import tensorflow as tf
+
+        i = tf.constant(lower)
+        while_condition = lambda i: tf.less(i, upper)
+
+        def body(i, val):
+            return [tf.add(i, 1), body_fun(val)]
+
+        return tf.while_loop(while_condition, body, [i, init_val])
+
+    raise ImportError(f"Cannot recognize backend {_BACKEND_ENGINE}")
+
+
+numpy = NumpyEngine()
+linalg = LinAlgEngine()
diff --git a/arch/unitroot/unitroot.py b/arch/unitroot/unitroot.py
@@ -37,7 +37,7 @@
     squeeze,
     sum as npsum,
 )
-from numpy.linalg import LinAlgError, inv, lstsq, matrix_rank, pinv, qr, solve
+from numpy.linalg import LinAlgError, inv, lstsq, matrix_rank, pinv, solve
 from pandas import DataFrame
 from scipy.stats import norm
 from statsmodels.iolib.summary import Summary
@@ -86,6 +86,8 @@
     invalid_length_doc,
 )
 from arch.utility.timeseries import add_trend
+from arch.experimental import numpy as anp, linalg as alinalg
+
 
 __all__ = [
     "ADF",
@@ -337,8 +339,13 @@ def _autolag_ols(
                 max_lags=maxlag, lag=max(exog_rank - startlag, 0)
             )
         )
-    q, r = qr(exog)
-    qpy = q.T @ endog
+
+    endog = anp.asarray(endog)
+    exog = anp.asarray(exog)
+    q, r = alinalg.qr(exog)
+    # Convert it to 2-d so as to adapt to linalg.solve input format for all
+    # engines
+    qpy = (q.T @ endog)[:, anp.newaxis]
     ypy = endog.T @ endog
     xpx = exog.T @ exog
 
@@ -347,12 +354,12 @@ def _autolag_ols(
     nobs = float(endog.shape[0])
     tstat[0] = inf
     for i in range(startlag, startlag + maxlag + 1):
-        b = solve(r[:i, :i], qpy[:i])
-        sigma2[i - startlag] = squeeze(ypy - b.T @ xpx[:i, :i] @ b) / nobs
+        b = alinalg.solve(r[:i, :i], qpy[:i])
+        sigma2[i - startlag] = anp.squeeze(ypy - b.T @ xpx[:i, :i] @ b) / nobs
         if lower_method == "t-stat" and i > startlag:
-            xpxi = inv(xpx[:i, :i])
-            stderr = sqrt(sigma2[i - startlag] * xpxi[-1, -1])
-            tstat[i - startlag] = squeeze(b[-1]) / stderr
+            xpxi = alinalg.inv(xpx[:i, :i])
+            stderr = anp.sqrt(sigma2[i - startlag] * xpxi[-1, -1])
+            tstat[i - startlag] = anp.squeeze(b[-1]) / stderr
 
     return _select_best_ic(method, nobs, sigma2, tstat)
 
diff --git a/doc/source/experimental/accelerated_numpy.rst b/doc/source/experimental/accelerated_numpy.rst
@@ -0,0 +1,68 @@
+.. module:: arch.experimental.engine
+   :noindex:
+.. currentmodule:: arch.experimental.engine
+
+Accelerated NumPy
+=================
+
+The feature is to allow users to choose alternative NumPy-like engine
+to run on CPU and GPU. Currently, the following engine are supported in
+CPU and GPU runtime
+
+
+* `JAX <https://jax.readthedocs.io/en/latest/index.html#>`_
+
+* `TensorFlow <https://www.tensorflow.org/guide/tf_numpy>`_
+
+* `CuPy <https://docs.cupy.dev/en/stable/index.html>`_
+
+There are two options users can switch the backend engine.
+
+1. Context Manager
+
+Users can use function ``use_backend`` in a ``with`` statement to temporarily
+switch the NumPy engine.
+
+In the below example, assume that ``data`` object is a timeseries in NumPy array.
+The covariance estimation (NeweyWest) is computed in TensorFlow. Since the
+output is in TensorFlow Tensor type, the last line convert the long term
+covariance from Tensor to NumPy array type.
+
+.. code-block:: python
+
+    import numpy as np
+
+    from arch.experimental import use_backend
+    from arch.covariance.kernel import NeweyWest
+
+    with use_backend("tensorflow"):
+        cov = NeweyWest(data).cov
+
+    long_term_cov = np.asarray(cov.long_term)
+
+2. Global
+
+Users can also configure the backend engine in global level with function
+``set_backend``.
+
+.. code-block:: python
+
+    from arch.experimental import set_backend
+
+    set_backend("tensorflow")
+
+    # Output is already in TensorFlow Tensor type
+    long_term_cov = NeweyWest(data).cov.long_term
+
+For further examples, please refer to the example
+`notebook <experimental_accelerated_numpy.ipynb>`_.
+
+Configure
+---------
+
+.. autosummary::
+   :toctree: generated/
+
+    use_backend
+    set_backend
+
diff --git a/doc/source/index.rst b/doc/source/index.rst
@@ -35,6 +35,7 @@ routines relevant for the analysis of financial data.
     Multiple Comparison Problems <multiple-comparison/multiple-comparisons>
     Unit Root Tests and Cointegration Analysis <unitroot/unitroot>
     Long-run Covariance Estimation <covariance/covariance>
+    Experimental features <experimental/accelerated_numpy>
     API Reference <api>
     Change Log <changes>
 
diff --git a/examples/experimental_accelerated_numpy.ipynb b/examples/experimental_accelerated_numpy.ipynb